linux/arch/powerpc/platforms/powernv/idle.c
<<
>>
Prefs
   1/*
   2 * PowerNV cpuidle code
   3 *
   4 * Copyright 2015 IBM Corp.
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11
  12#include <linux/types.h>
  13#include <linux/mm.h>
  14#include <linux/slab.h>
  15#include <linux/of.h>
  16#include <linux/device.h>
  17#include <linux/cpu.h>
  18
  19#include <asm/firmware.h>
  20#include <asm/machdep.h>
  21#include <asm/opal.h>
  22#include <asm/cputhreads.h>
  23#include <asm/cpuidle.h>
  24#include <asm/code-patching.h>
  25#include <asm/smp.h>
  26#include <asm/runlatch.h>
  27
  28#include "powernv.h"
  29#include "subcore.h"
  30
  31/* Power ISA 3.0 allows for stop states 0x0 - 0xF */
  32#define MAX_STOP_STATE  0xF
  33
  34#define P9_STOP_SPR_MSR 2000
  35#define P9_STOP_SPR_PSSCR      855
  36
  37static u32 supported_cpuidle_states;
  38
  39/*
  40 * The default stop state that will be used by ppc_md.power_save
  41 * function on platforms that support stop instruction.
  42 */
  43static u64 pnv_default_stop_val;
  44static u64 pnv_default_stop_mask;
  45static bool default_stop_found;
  46
  47/*
  48 * First deep stop state. Used to figure out when to save/restore
  49 * hypervisor context.
  50 */
  51u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
  52
  53/*
  54 * psscr value and mask of the deepest stop idle state.
  55 * Used when a cpu is offlined.
  56 */
  57static u64 pnv_deepest_stop_psscr_val;
  58static u64 pnv_deepest_stop_psscr_mask;
  59static u64 pnv_deepest_stop_flag;
  60static bool deepest_stop_found;
  61
  62static int pnv_save_sprs_for_deep_states(void)
  63{
  64        int cpu;
  65        int rc;
  66
  67        /*
  68         * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
  69         * all cpus at boot. Get these reg values of current cpu and use the
  70         * same across all cpus.
  71         */
  72        uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
  73        uint64_t hid0_val = mfspr(SPRN_HID0);
  74        uint64_t hid1_val = mfspr(SPRN_HID1);
  75        uint64_t hid4_val = mfspr(SPRN_HID4);
  76        uint64_t hid5_val = mfspr(SPRN_HID5);
  77        uint64_t hmeer_val = mfspr(SPRN_HMEER);
  78        uint64_t msr_val = MSR_IDLE;
  79        uint64_t psscr_val = pnv_deepest_stop_psscr_val;
  80
  81        for_each_possible_cpu(cpu) {
  82                uint64_t pir = get_hard_smp_processor_id(cpu);
  83                uint64_t hsprg0_val = (uint64_t)&paca[cpu];
  84
  85                rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
  86                if (rc != 0)
  87                        return rc;
  88
  89                rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
  90                if (rc != 0)
  91                        return rc;
  92
  93                if (cpu_has_feature(CPU_FTR_ARCH_300)) {
  94                        rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
  95                        if (rc)
  96                                return rc;
  97
  98                        rc = opal_slw_set_reg(pir,
  99                                              P9_STOP_SPR_PSSCR, psscr_val);
 100
 101                        if (rc)
 102                                return rc;
 103                }
 104
 105                /* HIDs are per core registers */
 106                if (cpu_thread_in_core(cpu) == 0) {
 107
 108                        rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
 109                        if (rc != 0)
 110                                return rc;
 111
 112                        rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
 113                        if (rc != 0)
 114                                return rc;
 115
 116                        /* Only p8 needs to set extra HID regiters */
 117                        if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
 118
 119                                rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
 120                                if (rc != 0)
 121                                        return rc;
 122
 123                                rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
 124                                if (rc != 0)
 125                                        return rc;
 126
 127                                rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
 128                                if (rc != 0)
 129                                        return rc;
 130                        }
 131                }
 132        }
 133
 134        return 0;
 135}
 136
 137static void pnv_alloc_idle_core_states(void)
 138{
 139        int i, j;
 140        int nr_cores = cpu_nr_cores();
 141        u32 *core_idle_state;
 142
 143        /*
 144         * core_idle_state - The lower 8 bits track the idle state of
 145         * each thread of the core.
 146         *
 147         * The most significant bit is the lock bit.
 148         *
 149         * Initially all the bits corresponding to threads_per_core
 150         * are set. They are cleared when the thread enters deep idle
 151         * state like sleep and winkle/stop.
 152         *
 153         * Initially the lock bit is cleared.  The lock bit has 2
 154         * purposes:
 155         *      a. While the first thread in the core waking up from
 156         *         idle is restoring core state, it prevents other
 157         *         threads in the core from switching to process
 158         *         context.
 159         *      b. While the last thread in the core is saving the
 160         *         core state, it prevents a different thread from
 161         *         waking up.
 162         */
 163        for (i = 0; i < nr_cores; i++) {
 164                int first_cpu = i * threads_per_core;
 165                int node = cpu_to_node(first_cpu);
 166                size_t paca_ptr_array_size;
 167
 168                core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
 169                *core_idle_state = (1 << threads_per_core) - 1;
 170                paca_ptr_array_size = (threads_per_core *
 171                                       sizeof(struct paca_struct *));
 172
 173                for (j = 0; j < threads_per_core; j++) {
 174                        int cpu = first_cpu + j;
 175
 176                        paca[cpu].core_idle_state_ptr = core_idle_state;
 177                        paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
 178                        paca[cpu].thread_mask = 1 << j;
 179                        if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
 180                                continue;
 181                        paca[cpu].thread_sibling_pacas =
 182                                kmalloc_node(paca_ptr_array_size,
 183                                             GFP_KERNEL, node);
 184                }
 185        }
 186
 187        update_subcore_sibling_mask();
 188
 189        if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
 190                int rc = pnv_save_sprs_for_deep_states();
 191
 192                if (likely(!rc))
 193                        return;
 194
 195                /*
 196                 * The stop-api is unable to restore hypervisor
 197                 * resources on wakeup from platform idle states which
 198                 * lose full context. So disable such states.
 199                 */
 200                supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
 201                pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
 202                pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
 203
 204                if (cpu_has_feature(CPU_FTR_ARCH_300) &&
 205                    (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
 206                        /*
 207                         * Use the default stop state for CPU-Hotplug
 208                         * if available.
 209                         */
 210                        if (default_stop_found) {
 211                                pnv_deepest_stop_psscr_val =
 212                                        pnv_default_stop_val;
 213                                pnv_deepest_stop_psscr_mask =
 214                                        pnv_default_stop_mask;
 215                                pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
 216                                        pnv_deepest_stop_psscr_val);
 217                        } else { /* Fallback to snooze loop for CPU-Hotplug */
 218                                deepest_stop_found = false;
 219                                pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
 220                        }
 221                }
 222        }
 223}
 224
 225u32 pnv_get_supported_cpuidle_states(void)
 226{
 227        return supported_cpuidle_states;
 228}
 229EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
 230
 231static void pnv_fastsleep_workaround_apply(void *info)
 232
 233{
 234        int rc;
 235        int *err = info;
 236
 237        rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
 238                                        OPAL_CONFIG_IDLE_APPLY);
 239        if (rc)
 240                *err = 1;
 241}
 242
 243/*
 244 * Used to store fastsleep workaround state
 245 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
 246 * 1 - Workaround applied once, never undone.
 247 */
 248static u8 fastsleep_workaround_applyonce;
 249
 250static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
 251                struct device_attribute *attr, char *buf)
 252{
 253        return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
 254}
 255
 256static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
 257                struct device_attribute *attr, const char *buf,
 258                size_t count)
 259{
 260        cpumask_t primary_thread_mask;
 261        int err;
 262        u8 val;
 263
 264        if (kstrtou8(buf, 0, &val) || val != 1)
 265                return -EINVAL;
 266
 267        if (fastsleep_workaround_applyonce == 1)
 268                return count;
 269
 270        /*
 271         * fastsleep_workaround_applyonce = 1 implies
 272         * fastsleep workaround needs to be left in 'applied' state on all
 273         * the cores. Do this by-
 274         * 1. Patching out the call to 'undo' workaround in fastsleep exit path
 275         * 2. Sending ipi to all the cores which have at least one online thread
 276         * 3. Patching out the call to 'apply' workaround in fastsleep entry
 277         * path
 278         * There is no need to send ipi to cores which have all threads
 279         * offlined, as last thread of the core entering fastsleep or deeper
 280         * state would have applied workaround.
 281         */
 282        err = patch_instruction(
 283                (unsigned int *)pnv_fastsleep_workaround_at_exit,
 284                PPC_INST_NOP);
 285        if (err) {
 286                pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
 287                goto fail;
 288        }
 289
 290        get_online_cpus();
 291        primary_thread_mask = cpu_online_cores_map();
 292        on_each_cpu_mask(&primary_thread_mask,
 293                                pnv_fastsleep_workaround_apply,
 294                                &err, 1);
 295        put_online_cpus();
 296        if (err) {
 297                pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
 298                goto fail;
 299        }
 300
 301        err = patch_instruction(
 302                (unsigned int *)pnv_fastsleep_workaround_at_entry,
 303                PPC_INST_NOP);
 304        if (err) {
 305                pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
 306                goto fail;
 307        }
 308
 309        fastsleep_workaround_applyonce = 1;
 310
 311        return count;
 312fail:
 313        return -EIO;
 314}
 315
 316static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
 317                        show_fastsleep_workaround_applyonce,
 318                        store_fastsleep_workaround_applyonce);
 319
 320static unsigned long __power7_idle_type(unsigned long type)
 321{
 322        unsigned long srr1;
 323
 324        if (!prep_irq_for_idle_irqsoff())
 325                return 0;
 326
 327        __ppc64_runlatch_off();
 328        srr1 = power7_idle_insn(type);
 329        __ppc64_runlatch_on();
 330
 331        fini_irq_for_idle_irqsoff();
 332
 333        return srr1;
 334}
 335
 336void power7_idle_type(unsigned long type)
 337{
 338        unsigned long srr1;
 339
 340        srr1 = __power7_idle_type(type);
 341        irq_set_pending_from_srr1(srr1);
 342}
 343
 344void power7_idle(void)
 345{
 346        if (!powersave_nap)
 347                return;
 348
 349        power7_idle_type(PNV_THREAD_NAP);
 350}
 351
 352static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
 353                                      unsigned long stop_psscr_mask)
 354{
 355        unsigned long psscr;
 356        unsigned long srr1;
 357
 358        if (!prep_irq_for_idle_irqsoff())
 359                return 0;
 360
 361        psscr = mfspr(SPRN_PSSCR);
 362        psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
 363
 364        __ppc64_runlatch_off();
 365        srr1 = power9_idle_stop(psscr);
 366        __ppc64_runlatch_on();
 367
 368        fini_irq_for_idle_irqsoff();
 369
 370        return srr1;
 371}
 372
 373void power9_idle_type(unsigned long stop_psscr_val,
 374                                      unsigned long stop_psscr_mask)
 375{
 376        unsigned long srr1;
 377
 378        srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
 379        irq_set_pending_from_srr1(srr1);
 380}
 381
 382/*
 383 * Used for ppc_md.power_save which needs a function with no parameters
 384 */
 385void power9_idle(void)
 386{
 387        power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
 388}
 389
 390#ifdef CONFIG_HOTPLUG_CPU
 391/*
 392 * pnv_cpu_offline: A function that puts the CPU into the deepest
 393 * available platform idle state on a CPU-Offline.
 394 * interrupts hard disabled and no lazy irq pending.
 395 */
 396unsigned long pnv_cpu_offline(unsigned int cpu)
 397{
 398        unsigned long srr1;
 399        u32 idle_states = pnv_get_supported_cpuidle_states();
 400
 401        __ppc64_runlatch_off();
 402
 403        if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
 404                unsigned long psscr;
 405
 406                psscr = mfspr(SPRN_PSSCR);
 407                psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
 408                                                pnv_deepest_stop_psscr_val;
 409                srr1 = power9_idle_stop(psscr);
 410
 411        } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
 412                   (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
 413                srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
 414        } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
 415                   (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
 416                srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
 417        } else if (idle_states & OPAL_PM_NAP_ENABLED) {
 418                srr1 = power7_idle_insn(PNV_THREAD_NAP);
 419        } else {
 420                /* This is the fallback method. We emulate snooze */
 421                while (!generic_check_cpu_restart(cpu)) {
 422                        HMT_low();
 423                        HMT_very_low();
 424                }
 425                srr1 = 0;
 426                HMT_medium();
 427        }
 428
 429        __ppc64_runlatch_on();
 430
 431        return srr1;
 432}
 433#endif
 434
 435/*
 436 * Power ISA 3.0 idle initialization.
 437 *
 438 * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
 439 * Register (PSSCR) to control idle behavior.
 440 *
 441 * PSSCR layout:
 442 * ----------------------------------------------------------
 443 * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
 444 * ----------------------------------------------------------
 445 * 0      4     41   42    43   44     48    54   56    60
 446 *
 447 * PSSCR key fields:
 448 *      Bits 0:3  - Power-Saving Level Status (PLS). This field indicates the
 449 *      lowest power-saving state the thread entered since stop instruction was
 450 *      last executed.
 451 *
 452 *      Bit 41 - Status Disable(SD)
 453 *      0 - Shows PLS entries
 454 *      1 - PLS entries are all 0
 455 *
 456 *      Bit 42 - Enable State Loss
 457 *      0 - No state is lost irrespective of other fields
 458 *      1 - Allows state loss
 459 *
 460 *      Bit 43 - Exit Criterion
 461 *      0 - Exit from power-save mode on any interrupt
 462 *      1 - Exit from power-save mode controlled by LPCR's PECE bits
 463 *
 464 *      Bits 44:47 - Power-Saving Level Limit
 465 *      This limits the power-saving level that can be entered into.
 466 *
 467 *      Bits 60:63 - Requested Level
 468 *      Used to specify which power-saving level must be entered on executing
 469 *      stop instruction
 470 */
 471
 472int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
 473{
 474        int err = 0;
 475
 476        /*
 477         * psscr_mask == 0xf indicates an older firmware.
 478         * Set remaining fields of psscr to the default values.
 479         * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
 480         */
 481        if (*psscr_mask == 0xf) {
 482                *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
 483                *psscr_mask = PSSCR_HV_DEFAULT_MASK;
 484                return err;
 485        }
 486
 487        /*
 488         * New firmware is expected to set the psscr_val bits correctly.
 489         * Validate that the following invariants are correctly maintained by
 490         * the new firmware.
 491         * - ESL bit value matches the EC bit value.
 492         * - ESL bit is set for all the deep stop states.
 493         */
 494        if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
 495                err = ERR_EC_ESL_MISMATCH;
 496        } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
 497                GET_PSSCR_ESL(*psscr_val) == 0) {
 498                err = ERR_DEEP_STATE_ESL_MISMATCH;
 499        }
 500
 501        return err;
 502}
 503
 504/*
 505 * pnv_arch300_idle_init: Initializes the default idle state, first
 506 *                        deep idle state and deepest idle state on
 507 *                        ISA 3.0 CPUs.
 508 *
 509 * @np: /ibm,opal/power-mgt device node
 510 * @flags: cpu-idle-state-flags array
 511 * @dt_idle_states: Number of idle state entries
 512 * Returns 0 on success
 513 */
 514static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
 515                                        int dt_idle_states)
 516{
 517        u64 *psscr_val = NULL;
 518        u64 *psscr_mask = NULL;
 519        u32 *residency_ns = NULL;
 520        u64 max_residency_ns = 0;
 521        int rc = 0, i;
 522
 523        psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL);
 524        psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL);
 525        residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns),
 526                               GFP_KERNEL);
 527
 528        if (!psscr_val || !psscr_mask || !residency_ns) {
 529                rc = -1;
 530                goto out;
 531        }
 532
 533        if (of_property_read_u64_array(np,
 534                "ibm,cpu-idle-state-psscr",
 535                psscr_val, dt_idle_states)) {
 536                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
 537                rc = -1;
 538                goto out;
 539        }
 540
 541        if (of_property_read_u64_array(np,
 542                                       "ibm,cpu-idle-state-psscr-mask",
 543                                       psscr_mask, dt_idle_states)) {
 544                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
 545                rc = -1;
 546                goto out;
 547        }
 548
 549        if (of_property_read_u32_array(np,
 550                                       "ibm,cpu-idle-state-residency-ns",
 551                                        residency_ns, dt_idle_states)) {
 552                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
 553                rc = -1;
 554                goto out;
 555        }
 556
 557        /*
 558         * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
 559         * and the pnv_default_stop_{val,mask}.
 560         *
 561         * pnv_first_deep_stop_state should be set to the first stop
 562         * level to cause hypervisor state loss.
 563         *
 564         * pnv_deepest_stop_{val,mask} should be set to values corresponding to
 565         * the deepest stop state.
 566         *
 567         * pnv_default_stop_{val,mask} should be set to values corresponding to
 568         * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
 569         */
 570        pnv_first_deep_stop_state = MAX_STOP_STATE;
 571        for (i = 0; i < dt_idle_states; i++) {
 572                int err;
 573                u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK;
 574
 575                if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) &&
 576                     (pnv_first_deep_stop_state > psscr_rl))
 577                        pnv_first_deep_stop_state = psscr_rl;
 578
 579                err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i],
 580                                              flags[i]);
 581                if (err) {
 582                        report_invalid_psscr_val(psscr_val[i], err);
 583                        continue;
 584                }
 585
 586                if (max_residency_ns < residency_ns[i]) {
 587                        max_residency_ns = residency_ns[i];
 588                        pnv_deepest_stop_psscr_val = psscr_val[i];
 589                        pnv_deepest_stop_psscr_mask = psscr_mask[i];
 590                        pnv_deepest_stop_flag = flags[i];
 591                        deepest_stop_found = true;
 592                }
 593
 594                if (!default_stop_found &&
 595                    (flags[i] & OPAL_PM_STOP_INST_FAST)) {
 596                        pnv_default_stop_val = psscr_val[i];
 597                        pnv_default_stop_mask = psscr_mask[i];
 598                        default_stop_found = true;
 599                }
 600        }
 601
 602        if (unlikely(!default_stop_found)) {
 603                pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
 604        } else {
 605                ppc_md.power_save = power9_idle;
 606                pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
 607                        pnv_default_stop_val, pnv_default_stop_mask);
 608        }
 609
 610        if (unlikely(!deepest_stop_found)) {
 611                pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
 612        } else {
 613                pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
 614                        pnv_deepest_stop_psscr_val,
 615                        pnv_deepest_stop_psscr_mask);
 616        }
 617
 618        pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
 619                pnv_first_deep_stop_state);
 620out:
 621        kfree(psscr_val);
 622        kfree(psscr_mask);
 623        kfree(residency_ns);
 624        return rc;
 625}
 626
 627/*
 628 * Probe device tree for supported idle states
 629 */
 630static void __init pnv_probe_idle_states(void)
 631{
 632        struct device_node *np;
 633        int dt_idle_states;
 634        u32 *flags = NULL;
 635        int i;
 636
 637        np = of_find_node_by_path("/ibm,opal/power-mgt");
 638        if (!np) {
 639                pr_warn("opal: PowerMgmt Node not found\n");
 640                goto out;
 641        }
 642        dt_idle_states = of_property_count_u32_elems(np,
 643                        "ibm,cpu-idle-state-flags");
 644        if (dt_idle_states < 0) {
 645                pr_warn("cpuidle-powernv: no idle states found in the DT\n");
 646                goto out;
 647        }
 648
 649        flags = kcalloc(dt_idle_states, sizeof(*flags),  GFP_KERNEL);
 650
 651        if (of_property_read_u32_array(np,
 652                        "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
 653                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
 654                goto out;
 655        }
 656
 657        if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 658                if (pnv_power9_idle_init(np, flags, dt_idle_states))
 659                        goto out;
 660        }
 661
 662        for (i = 0; i < dt_idle_states; i++)
 663                supported_cpuidle_states |= flags[i];
 664
 665out:
 666        kfree(flags);
 667}
 668static int __init pnv_init_idle_states(void)
 669{
 670
 671        supported_cpuidle_states = 0;
 672
 673        if (cpuidle_disable != IDLE_NO_OVERRIDE)
 674                goto out;
 675
 676        pnv_probe_idle_states();
 677
 678        if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
 679                patch_instruction(
 680                        (unsigned int *)pnv_fastsleep_workaround_at_entry,
 681                        PPC_INST_NOP);
 682                patch_instruction(
 683                        (unsigned int *)pnv_fastsleep_workaround_at_exit,
 684                        PPC_INST_NOP);
 685        } else {
 686                /*
 687                 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
 688                 * workaround is needed to use fastsleep. Provide sysfs
 689                 * control to choose how this workaround has to be applied.
 690                 */
 691                device_create_file(cpu_subsys.dev_root,
 692                                &dev_attr_fastsleep_workaround_applyonce);
 693        }
 694
 695        pnv_alloc_idle_core_states();
 696
 697        /*
 698         * For each CPU, record its PACA address in each of it's
 699         * sibling thread's PACA at the slot corresponding to this
 700         * CPU's index in the core.
 701         */
 702        if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
 703                int cpu;
 704
 705                pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n");
 706                for_each_possible_cpu(cpu) {
 707                        int base_cpu = cpu_first_thread_sibling(cpu);
 708                        int idx = cpu_thread_in_core(cpu);
 709                        int i;
 710
 711                        for (i = 0; i < threads_per_core; i++) {
 712                                int j = base_cpu + i;
 713
 714                                paca[j].thread_sibling_pacas[idx] = &paca[cpu];
 715                        }
 716                }
 717        }
 718
 719        if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
 720                ppc_md.power_save = power7_idle;
 721
 722out:
 723        return 0;
 724}
 725machine_subsys_initcall(powernv, pnv_init_idle_states);
 726