linux/arch/powerpc/platforms/powernv/idle.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * PowerNV cpuidle code
   4 *
   5 * Copyright 2015 IBM Corp.
   6 */
   7
   8#include <linux/types.h>
   9#include <linux/mm.h>
  10#include <linux/slab.h>
  11#include <linux/of.h>
  12#include <linux/device.h>
  13#include <linux/cpu.h>
  14
  15#include <asm/asm-prototypes.h>
  16#include <asm/firmware.h>
  17#include <asm/interrupt.h>
  18#include <asm/machdep.h>
  19#include <asm/opal.h>
  20#include <asm/cputhreads.h>
  21#include <asm/cpuidle.h>
  22#include <asm/code-patching.h>
  23#include <asm/smp.h>
  24#include <asm/runlatch.h>
  25#include <asm/dbell.h>
  26
  27#include "powernv.h"
  28#include "subcore.h"
  29
  30/* Power ISA 3.0 allows for stop states 0x0 - 0xF */
  31#define MAX_STOP_STATE  0xF
  32
  33#define P9_STOP_SPR_MSR 2000
  34#define P9_STOP_SPR_PSSCR      855
  35
  36static u32 supported_cpuidle_states;
  37struct pnv_idle_states_t *pnv_idle_states;
  38int nr_pnv_idle_states;
  39
  40/*
  41 * The default stop state that will be used by ppc_md.power_save
  42 * function on platforms that support stop instruction.
  43 */
  44static u64 pnv_default_stop_val;
  45static u64 pnv_default_stop_mask;
  46static bool default_stop_found;
  47
  48/*
  49 * First stop state levels when SPR and TB loss can occur.
  50 */
  51static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
  52static u64 deep_spr_loss_state = MAX_STOP_STATE + 1;
  53
  54/*
  55 * psscr value and mask of the deepest stop idle state.
  56 * Used when a cpu is offlined.
  57 */
  58static u64 pnv_deepest_stop_psscr_val;
  59static u64 pnv_deepest_stop_psscr_mask;
  60static u64 pnv_deepest_stop_flag;
  61static bool deepest_stop_found;
  62
  63static unsigned long power7_offline_type;
  64
  65static int pnv_save_sprs_for_deep_states(void)
  66{
  67        int cpu;
  68        int rc;
  69
  70        /*
  71         * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
  72         * all cpus at boot. Get these reg values of current cpu and use the
  73         * same across all cpus.
  74         */
  75        uint64_t lpcr_val       = mfspr(SPRN_LPCR);
  76        uint64_t hid0_val       = mfspr(SPRN_HID0);
  77        uint64_t hmeer_val      = mfspr(SPRN_HMEER);
  78        uint64_t msr_val = MSR_IDLE;
  79        uint64_t psscr_val = pnv_deepest_stop_psscr_val;
  80
  81        for_each_present_cpu(cpu) {
  82                uint64_t pir = get_hard_smp_processor_id(cpu);
  83                uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
  84
  85                rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
  86                if (rc != 0)
  87                        return rc;
  88
  89                rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
  90                if (rc != 0)
  91                        return rc;
  92
  93                if (cpu_has_feature(CPU_FTR_ARCH_300)) {
  94                        rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
  95                        if (rc)
  96                                return rc;
  97
  98                        rc = opal_slw_set_reg(pir,
  99                                              P9_STOP_SPR_PSSCR, psscr_val);
 100
 101                        if (rc)
 102                                return rc;
 103                }
 104
 105                /* HIDs are per core registers */
 106                if (cpu_thread_in_core(cpu) == 0) {
 107
 108                        rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
 109                        if (rc != 0)
 110                                return rc;
 111
 112                        rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
 113                        if (rc != 0)
 114                                return rc;
 115
 116                        /* Only p8 needs to set extra HID regiters */
 117                        if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
 118                                uint64_t hid1_val = mfspr(SPRN_HID1);
 119                                uint64_t hid4_val = mfspr(SPRN_HID4);
 120                                uint64_t hid5_val = mfspr(SPRN_HID5);
 121
 122                                rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
 123                                if (rc != 0)
 124                                        return rc;
 125
 126                                rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
 127                                if (rc != 0)
 128                                        return rc;
 129
 130                                rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
 131                                if (rc != 0)
 132                                        return rc;
 133                        }
 134                }
 135        }
 136
 137        return 0;
 138}
 139
 140u32 pnv_get_supported_cpuidle_states(void)
 141{
 142        return supported_cpuidle_states;
 143}
 144EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
 145
 146static void pnv_fastsleep_workaround_apply(void *info)
 147
 148{
 149        int rc;
 150        int *err = info;
 151
 152        rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
 153                                        OPAL_CONFIG_IDLE_APPLY);
 154        if (rc)
 155                *err = 1;
 156}
 157
 158static bool power7_fastsleep_workaround_entry = true;
 159static bool power7_fastsleep_workaround_exit = true;
 160
 161/*
 162 * Used to store fastsleep workaround state
 163 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
 164 * 1 - Workaround applied once, never undone.
 165 */
 166static u8 fastsleep_workaround_applyonce;
 167
 168static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
 169                struct device_attribute *attr, char *buf)
 170{
 171        return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
 172}
 173
 174static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
 175                struct device_attribute *attr, const char *buf,
 176                size_t count)
 177{
 178        cpumask_t primary_thread_mask;
 179        int err;
 180        u8 val;
 181
 182        if (kstrtou8(buf, 0, &val) || val != 1)
 183                return -EINVAL;
 184
 185        if (fastsleep_workaround_applyonce == 1)
 186                return count;
 187
 188        /*
 189         * fastsleep_workaround_applyonce = 1 implies
 190         * fastsleep workaround needs to be left in 'applied' state on all
 191         * the cores. Do this by-
 192         * 1. Disable the 'undo' workaround in fastsleep exit path
 193         * 2. Sendi IPIs to all the cores which have at least one online thread
 194         * 3. Disable the 'apply' workaround in fastsleep entry path
 195         *
 196         * There is no need to send ipi to cores which have all threads
 197         * offlined, as last thread of the core entering fastsleep or deeper
 198         * state would have applied workaround.
 199         */
 200        power7_fastsleep_workaround_exit = false;
 201
 202        cpus_read_lock();
 203        primary_thread_mask = cpu_online_cores_map();
 204        on_each_cpu_mask(&primary_thread_mask,
 205                                pnv_fastsleep_workaround_apply,
 206                                &err, 1);
 207        cpus_read_unlock();
 208        if (err) {
 209                pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
 210                goto fail;
 211        }
 212
 213        power7_fastsleep_workaround_entry = false;
 214
 215        fastsleep_workaround_applyonce = 1;
 216
 217        return count;
 218fail:
 219        return -EIO;
 220}
 221
 222static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
 223                        show_fastsleep_workaround_applyonce,
 224                        store_fastsleep_workaround_applyonce);
 225
 226static inline void atomic_start_thread_idle(void)
 227{
 228        int cpu = raw_smp_processor_id();
 229        int first = cpu_first_thread_sibling(cpu);
 230        int thread_nr = cpu_thread_in_core(cpu);
 231        unsigned long *state = &paca_ptrs[first]->idle_state;
 232
 233        clear_bit(thread_nr, state);
 234}
 235
 236static inline void atomic_stop_thread_idle(void)
 237{
 238        int cpu = raw_smp_processor_id();
 239        int first = cpu_first_thread_sibling(cpu);
 240        int thread_nr = cpu_thread_in_core(cpu);
 241        unsigned long *state = &paca_ptrs[first]->idle_state;
 242
 243        set_bit(thread_nr, state);
 244}
 245
 246static inline void atomic_lock_thread_idle(void)
 247{
 248        int cpu = raw_smp_processor_id();
 249        int first = cpu_first_thread_sibling(cpu);
 250        unsigned long *state = &paca_ptrs[first]->idle_state;
 251
 252        while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state)))
 253                barrier();
 254}
 255
 256static inline void atomic_unlock_and_stop_thread_idle(void)
 257{
 258        int cpu = raw_smp_processor_id();
 259        int first = cpu_first_thread_sibling(cpu);
 260        unsigned long thread = 1UL << cpu_thread_in_core(cpu);
 261        unsigned long *state = &paca_ptrs[first]->idle_state;
 262        u64 s = READ_ONCE(*state);
 263        u64 new, tmp;
 264
 265        BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT));
 266        BUG_ON(s & thread);
 267
 268again:
 269        new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT;
 270        tmp = cmpxchg(state, s, new);
 271        if (unlikely(tmp != s)) {
 272                s = tmp;
 273                goto again;
 274        }
 275}
 276
 277static inline void atomic_unlock_thread_idle(void)
 278{
 279        int cpu = raw_smp_processor_id();
 280        int first = cpu_first_thread_sibling(cpu);
 281        unsigned long *state = &paca_ptrs[first]->idle_state;
 282
 283        BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state));
 284        clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state);
 285}
 286
 287/* P7 and P8 */
 288struct p7_sprs {
 289        /* per core */
 290        u64 tscr;
 291        u64 worc;
 292
 293        /* per subcore */
 294        u64 sdr1;
 295        u64 rpr;
 296
 297        /* per thread */
 298        u64 lpcr;
 299        u64 hfscr;
 300        u64 fscr;
 301        u64 purr;
 302        u64 spurr;
 303        u64 dscr;
 304        u64 wort;
 305
 306        /* per thread SPRs that get lost in shallow states */
 307        u64 amr;
 308        u64 iamr;
 309        u64 amor;
 310        u64 uamor;
 311};
 312
 313static unsigned long power7_idle_insn(unsigned long type)
 314{
 315        int cpu = raw_smp_processor_id();
 316        int first = cpu_first_thread_sibling(cpu);
 317        unsigned long *state = &paca_ptrs[first]->idle_state;
 318        unsigned long thread = 1UL << cpu_thread_in_core(cpu);
 319        unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
 320        unsigned long srr1;
 321        bool full_winkle;
 322        struct p7_sprs sprs = {}; /* avoid false use-uninitialised */
 323        bool sprs_saved = false;
 324        int rc;
 325
 326        if (unlikely(type != PNV_THREAD_NAP)) {
 327                atomic_lock_thread_idle();
 328
 329                BUG_ON(!(*state & thread));
 330                *state &= ~thread;
 331
 332                if (power7_fastsleep_workaround_entry) {
 333                        if ((*state & core_thread_mask) == 0) {
 334                                rc = opal_config_cpu_idle_state(
 335                                                OPAL_CONFIG_IDLE_FASTSLEEP,
 336                                                OPAL_CONFIG_IDLE_APPLY);
 337                                BUG_ON(rc);
 338                        }
 339                }
 340
 341                if (type == PNV_THREAD_WINKLE) {
 342                        sprs.tscr       = mfspr(SPRN_TSCR);
 343                        sprs.worc       = mfspr(SPRN_WORC);
 344
 345                        sprs.sdr1       = mfspr(SPRN_SDR1);
 346                        sprs.rpr        = mfspr(SPRN_RPR);
 347
 348                        sprs.lpcr       = mfspr(SPRN_LPCR);
 349                        if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 350                                sprs.hfscr      = mfspr(SPRN_HFSCR);
 351                                sprs.fscr       = mfspr(SPRN_FSCR);
 352                        }
 353                        sprs.purr       = mfspr(SPRN_PURR);
 354                        sprs.spurr      = mfspr(SPRN_SPURR);
 355                        sprs.dscr       = mfspr(SPRN_DSCR);
 356                        sprs.wort       = mfspr(SPRN_WORT);
 357
 358                        sprs_saved = true;
 359
 360                        /*
 361                         * Increment winkle counter and set all winkle bits if
 362                         * all threads are winkling. This allows wakeup side to
 363                         * distinguish between fast sleep and winkle state
 364                         * loss. Fast sleep still has to resync the timebase so
 365                         * this may not be a really big win.
 366                         */
 367                        *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
 368                        if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS)
 369                                        >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT
 370                                        == threads_per_core)
 371                                *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS;
 372                        WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
 373                }
 374
 375                atomic_unlock_thread_idle();
 376        }
 377
 378        if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 379                sprs.amr        = mfspr(SPRN_AMR);
 380                sprs.iamr       = mfspr(SPRN_IAMR);
 381                sprs.amor       = mfspr(SPRN_AMOR);
 382                sprs.uamor      = mfspr(SPRN_UAMOR);
 383        }
 384
 385        local_paca->thread_idle_state = type;
 386        srr1 = isa206_idle_insn_mayloss(type);          /* go idle */
 387        local_paca->thread_idle_state = PNV_THREAD_RUNNING;
 388
 389        WARN_ON_ONCE(!srr1);
 390        WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
 391
 392        if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 393                if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
 394                        /*
 395                         * We don't need an isync after the mtsprs here because
 396                         * the upcoming mtmsrd is execution synchronizing.
 397                         */
 398                        mtspr(SPRN_AMR,         sprs.amr);
 399                        mtspr(SPRN_IAMR,        sprs.iamr);
 400                        mtspr(SPRN_AMOR,        sprs.amor);
 401                        mtspr(SPRN_UAMOR,       sprs.uamor);
 402                }
 403        }
 404
 405        if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
 406                hmi_exception_realmode(NULL);
 407
 408        if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) {
 409                if (unlikely(type != PNV_THREAD_NAP)) {
 410                        atomic_lock_thread_idle();
 411                        if (type == PNV_THREAD_WINKLE) {
 412                                WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
 413                                *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
 414                                *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
 415                        }
 416                        atomic_unlock_and_stop_thread_idle();
 417                }
 418                return srr1;
 419        }
 420
 421        /* HV state loss */
 422        BUG_ON(type == PNV_THREAD_NAP);
 423
 424        atomic_lock_thread_idle();
 425
 426        full_winkle = false;
 427        if (type == PNV_THREAD_WINKLE) {
 428                WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
 429                *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
 430                if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) {
 431                        *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
 432                        full_winkle = true;
 433                        BUG_ON(!sprs_saved);
 434                }
 435        }
 436
 437        WARN_ON(*state & thread);
 438
 439        if ((*state & core_thread_mask) != 0)
 440                goto core_woken;
 441
 442        /* Per-core SPRs */
 443        if (full_winkle) {
 444                mtspr(SPRN_TSCR,        sprs.tscr);
 445                mtspr(SPRN_WORC,        sprs.worc);
 446        }
 447
 448        if (power7_fastsleep_workaround_exit) {
 449                rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
 450                                                OPAL_CONFIG_IDLE_UNDO);
 451                BUG_ON(rc);
 452        }
 453
 454        /* TB */
 455        if (opal_resync_timebase() != OPAL_SUCCESS)
 456                BUG();
 457
 458core_woken:
 459        if (!full_winkle)
 460                goto subcore_woken;
 461
 462        if ((*state & local_paca->subcore_sibling_mask) != 0)
 463                goto subcore_woken;
 464
 465        /* Per-subcore SPRs */
 466        mtspr(SPRN_SDR1,        sprs.sdr1);
 467        mtspr(SPRN_RPR,         sprs.rpr);
 468
 469subcore_woken:
 470        /*
 471         * isync after restoring shared SPRs and before unlocking. Unlock
 472         * only contains hwsync which does not necessarily do the right
 473         * thing for SPRs.
 474         */
 475        isync();
 476        atomic_unlock_and_stop_thread_idle();
 477
 478        /* Fast sleep does not lose SPRs */
 479        if (!full_winkle)
 480                return srr1;
 481
 482        /* Per-thread SPRs */
 483        mtspr(SPRN_LPCR,        sprs.lpcr);
 484        if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
 485                mtspr(SPRN_HFSCR,       sprs.hfscr);
 486                mtspr(SPRN_FSCR,        sprs.fscr);
 487        }
 488        mtspr(SPRN_PURR,        sprs.purr);
 489        mtspr(SPRN_SPURR,       sprs.spurr);
 490        mtspr(SPRN_DSCR,        sprs.dscr);
 491        mtspr(SPRN_WORT,        sprs.wort);
 492
 493        mtspr(SPRN_SPRG3,       local_paca->sprg_vdso);
 494
 495        /*
 496         * The SLB has to be restored here, but it sometimes still
 497         * contains entries, so the __ variant must be used to prevent
 498         * multi hits.
 499         */
 500        __slb_restore_bolted_realmode();
 501
 502        return srr1;
 503}
 504
 505extern unsigned long idle_kvm_start_guest(unsigned long srr1);
 506
 507#ifdef CONFIG_HOTPLUG_CPU
 508static unsigned long power7_offline(void)
 509{
 510        unsigned long srr1;
 511
 512        mtmsr(MSR_IDLE);
 513
 514#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 515        /* Tell KVM we're entering idle. */
 516        /******************************************************/
 517        /*  N O T E   W E L L    ! ! !    N O T E   W E L L   */
 518        /* The following store to HSTATE_HWTHREAD_STATE(r13)  */
 519        /* MUST occur in real mode, i.e. with the MMU off,    */
 520        /* and the MMU must stay off until we clear this flag */
 521        /* and test HSTATE_HWTHREAD_REQ(r13) in               */
 522        /* pnv_powersave_wakeup in this file.                 */
 523        /* The reason is that another thread can switch the   */
 524        /* MMU to a guest context whenever this flag is set   */
 525        /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on,    */
 526        /* that would potentially cause this thread to start  */
 527        /* executing instructions from guest memory in        */
 528        /* hypervisor mode, leading to a host crash or data   */
 529        /* corruption, or worse.                              */
 530        /******************************************************/
 531        local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
 532#endif
 533
 534        __ppc64_runlatch_off();
 535        srr1 = power7_idle_insn(power7_offline_type);
 536        __ppc64_runlatch_on();
 537
 538#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 539        local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
 540        /* Order setting hwthread_state vs. testing hwthread_req */
 541        smp_mb();
 542        if (local_paca->kvm_hstate.hwthread_req)
 543                srr1 = idle_kvm_start_guest(srr1);
 544#endif
 545
 546        mtmsr(MSR_KERNEL);
 547
 548        return srr1;
 549}
 550#endif
 551
 552void power7_idle_type(unsigned long type)
 553{
 554        unsigned long srr1;
 555
 556        if (!prep_irq_for_idle_irqsoff())
 557                return;
 558
 559        mtmsr(MSR_IDLE);
 560        __ppc64_runlatch_off();
 561        srr1 = power7_idle_insn(type);
 562        __ppc64_runlatch_on();
 563        mtmsr(MSR_KERNEL);
 564
 565        fini_irq_for_idle_irqsoff();
 566        irq_set_pending_from_srr1(srr1);
 567}
 568
 569static void power7_idle(void)
 570{
 571        if (!powersave_nap)
 572                return;
 573
 574        power7_idle_type(PNV_THREAD_NAP);
 575}
 576
 577struct p9_sprs {
 578        /* per core */
 579        u64 ptcr;
 580        u64 rpr;
 581        u64 tscr;
 582        u64 ldbar;
 583
 584        /* per thread */
 585        u64 lpcr;
 586        u64 hfscr;
 587        u64 fscr;
 588        u64 pid;
 589        u64 purr;
 590        u64 spurr;
 591        u64 dscr;
 592        u64 wort;
 593        u64 ciabr;
 594
 595        u64 mmcra;
 596        u32 mmcr0;
 597        u32 mmcr1;
 598        u64 mmcr2;
 599
 600        /* per thread SPRs that get lost in shallow states */
 601        u64 amr;
 602        u64 iamr;
 603        u64 amor;
 604        u64 uamor;
 605};
 606
 607static unsigned long power9_idle_stop(unsigned long psscr)
 608{
 609        int cpu = raw_smp_processor_id();
 610        int first = cpu_first_thread_sibling(cpu);
 611        unsigned long *state = &paca_ptrs[first]->idle_state;
 612        unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
 613        unsigned long srr1;
 614        unsigned long pls;
 615        unsigned long mmcr0 = 0;
 616        unsigned long mmcra = 0;
 617        struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
 618        bool sprs_saved = false;
 619
 620        if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
 621                /* EC=ESL=0 case */
 622
 623                /*
 624                 * Wake synchronously. SRESET via xscom may still cause
 625                 * a 0x100 powersave wakeup with SRR1 reason!
 626                 */
 627                srr1 = isa300_idle_stop_noloss(psscr);          /* go idle */
 628                if (likely(!srr1))
 629                        return 0;
 630
 631                /*
 632                 * Registers not saved, can't recover!
 633                 * This would be a hardware bug
 634                 */
 635                BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
 636
 637                goto out;
 638        }
 639
 640        /* EC=ESL=1 case */
 641#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 642        if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) {
 643                local_paca->requested_psscr = psscr;
 644                /* order setting requested_psscr vs testing dont_stop */
 645                smp_mb();
 646                if (atomic_read(&local_paca->dont_stop)) {
 647                        local_paca->requested_psscr = 0;
 648                        return 0;
 649                }
 650        }
 651#endif
 652
 653        if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
 654                 /*
 655                  * POWER9 DD2 can incorrectly set PMAO when waking up
 656                  * after a state-loss idle. Saving and restoring MMCR0
 657                  * over idle is a workaround.
 658                  */
 659                mmcr0           = mfspr(SPRN_MMCR0);
 660        }
 661
 662        if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
 663                sprs.lpcr       = mfspr(SPRN_LPCR);
 664                sprs.hfscr      = mfspr(SPRN_HFSCR);
 665                sprs.fscr       = mfspr(SPRN_FSCR);
 666                sprs.pid        = mfspr(SPRN_PID);
 667                sprs.purr       = mfspr(SPRN_PURR);
 668                sprs.spurr      = mfspr(SPRN_SPURR);
 669                sprs.dscr       = mfspr(SPRN_DSCR);
 670                sprs.ciabr      = mfspr(SPRN_CIABR);
 671
 672                sprs.mmcra      = mfspr(SPRN_MMCRA);
 673                sprs.mmcr0      = mfspr(SPRN_MMCR0);
 674                sprs.mmcr1      = mfspr(SPRN_MMCR1);
 675                sprs.mmcr2      = mfspr(SPRN_MMCR2);
 676
 677                sprs.ptcr       = mfspr(SPRN_PTCR);
 678                sprs.rpr        = mfspr(SPRN_RPR);
 679                sprs.tscr       = mfspr(SPRN_TSCR);
 680                if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
 681                        sprs.ldbar = mfspr(SPRN_LDBAR);
 682
 683                sprs_saved = true;
 684
 685                atomic_start_thread_idle();
 686        }
 687
 688        sprs.amr        = mfspr(SPRN_AMR);
 689        sprs.iamr       = mfspr(SPRN_IAMR);
 690        sprs.amor       = mfspr(SPRN_AMOR);
 691        sprs.uamor      = mfspr(SPRN_UAMOR);
 692
 693        srr1 = isa300_idle_stop_mayloss(psscr);         /* go idle */
 694
 695#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 696        local_paca->requested_psscr = 0;
 697#endif
 698
 699        psscr = mfspr(SPRN_PSSCR);
 700
 701        WARN_ON_ONCE(!srr1);
 702        WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
 703
 704        if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
 705                /*
 706                 * We don't need an isync after the mtsprs here because the
 707                 * upcoming mtmsrd is execution synchronizing.
 708                 */
 709                mtspr(SPRN_AMR,         sprs.amr);
 710                mtspr(SPRN_IAMR,        sprs.iamr);
 711                mtspr(SPRN_AMOR,        sprs.amor);
 712                mtspr(SPRN_UAMOR,       sprs.uamor);
 713
 714                /*
 715                 * Workaround for POWER9 DD2.0, if we lost resources, the ERAT
 716                 * might have been corrupted and needs flushing. We also need
 717                 * to reload MMCR0 (see mmcr0 comment above).
 718                 */
 719                if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
 720                        asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT);
 721                        mtspr(SPRN_MMCR0, mmcr0);
 722                }
 723
 724                /*
 725                 * DD2.2 and earlier need to set then clear bit 60 in MMCRA
 726                 * to ensure the PMU starts running.
 727                 */
 728                mmcra = mfspr(SPRN_MMCRA);
 729                mmcra |= PPC_BIT(60);
 730                mtspr(SPRN_MMCRA, mmcra);
 731                mmcra &= ~PPC_BIT(60);
 732                mtspr(SPRN_MMCRA, mmcra);
 733        }
 734
 735        if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
 736                hmi_exception_realmode(NULL);
 737
 738        /*
 739         * On POWER9, SRR1 bits do not match exactly as expected.
 740         * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
 741         * just always test PSSCR for SPR/TB state loss.
 742         */
 743        pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
 744        if (likely(pls < deep_spr_loss_state)) {
 745                if (sprs_saved)
 746                        atomic_stop_thread_idle();
 747                goto out;
 748        }
 749
 750        /* HV state loss */
 751        BUG_ON(!sprs_saved);
 752
 753        atomic_lock_thread_idle();
 754
 755        if ((*state & core_thread_mask) != 0)
 756                goto core_woken;
 757
 758        /* Per-core SPRs */
 759        mtspr(SPRN_PTCR,        sprs.ptcr);
 760        mtspr(SPRN_RPR,         sprs.rpr);
 761        mtspr(SPRN_TSCR,        sprs.tscr);
 762
 763        if (pls >= pnv_first_tb_loss_level) {
 764                /* TB loss */
 765                if (opal_resync_timebase() != OPAL_SUCCESS)
 766                        BUG();
 767        }
 768
 769        /*
 770         * isync after restoring shared SPRs and before unlocking. Unlock
 771         * only contains hwsync which does not necessarily do the right
 772         * thing for SPRs.
 773         */
 774        isync();
 775
 776core_woken:
 777        atomic_unlock_and_stop_thread_idle();
 778
 779        /* Per-thread SPRs */
 780        mtspr(SPRN_LPCR,        sprs.lpcr);
 781        mtspr(SPRN_HFSCR,       sprs.hfscr);
 782        mtspr(SPRN_FSCR,        sprs.fscr);
 783        mtspr(SPRN_PID,         sprs.pid);
 784        mtspr(SPRN_PURR,        sprs.purr);
 785        mtspr(SPRN_SPURR,       sprs.spurr);
 786        mtspr(SPRN_DSCR,        sprs.dscr);
 787        mtspr(SPRN_CIABR,       sprs.ciabr);
 788
 789        mtspr(SPRN_MMCRA,       sprs.mmcra);
 790        mtspr(SPRN_MMCR0,       sprs.mmcr0);
 791        mtspr(SPRN_MMCR1,       sprs.mmcr1);
 792        mtspr(SPRN_MMCR2,       sprs.mmcr2);
 793        if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
 794                mtspr(SPRN_LDBAR, sprs.ldbar);
 795
 796        mtspr(SPRN_SPRG3,       local_paca->sprg_vdso);
 797
 798        if (!radix_enabled())
 799                __slb_restore_bolted_realmode();
 800
 801out:
 802        mtmsr(MSR_KERNEL);
 803
 804        return srr1;
 805}
 806
 807#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 808/*
 809 * This is used in working around bugs in thread reconfiguration
 810 * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
 811 * memory and the way that XER[SO] is checkpointed.
 812 * This function forces the core into SMT4 in order by asking
 813 * all other threads not to stop, and sending a message to any
 814 * that are in a stop state.
 815 * Must be called with preemption disabled.
 816 */
 817void pnv_power9_force_smt4_catch(void)
 818{
 819        int cpu, cpu0, thr;
 820        int awake_threads = 1;          /* this thread is awake */
 821        int poke_threads = 0;
 822        int need_awake = threads_per_core;
 823
 824        cpu = smp_processor_id();
 825        cpu0 = cpu & ~(threads_per_core - 1);
 826        for (thr = 0; thr < threads_per_core; ++thr) {
 827                if (cpu != cpu0 + thr)
 828                        atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
 829        }
 830        /* order setting dont_stop vs testing requested_psscr */
 831        smp_mb();
 832        for (thr = 0; thr < threads_per_core; ++thr) {
 833                if (!paca_ptrs[cpu0+thr]->requested_psscr)
 834                        ++awake_threads;
 835                else
 836                        poke_threads |= (1 << thr);
 837        }
 838
 839        /* If at least 3 threads are awake, the core is in SMT4 already */
 840        if (awake_threads < need_awake) {
 841                /* We have to wake some threads; we'll use msgsnd */
 842                for (thr = 0; thr < threads_per_core; ++thr) {
 843                        if (poke_threads & (1 << thr)) {
 844                                ppc_msgsnd_sync();
 845                                ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
 846                                           paca_ptrs[cpu0+thr]->hw_cpu_id);
 847                        }
 848                }
 849                /* now spin until at least 3 threads are awake */
 850                do {
 851                        for (thr = 0; thr < threads_per_core; ++thr) {
 852                                if ((poke_threads & (1 << thr)) &&
 853                                    !paca_ptrs[cpu0+thr]->requested_psscr) {
 854                                        ++awake_threads;
 855                                        poke_threads &= ~(1 << thr);
 856                                }
 857                        }
 858                } while (awake_threads < need_awake);
 859        }
 860}
 861EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
 862
 863void pnv_power9_force_smt4_release(void)
 864{
 865        int cpu, cpu0, thr;
 866
 867        cpu = smp_processor_id();
 868        cpu0 = cpu & ~(threads_per_core - 1);
 869
 870        /* clear all the dont_stop flags */
 871        for (thr = 0; thr < threads_per_core; ++thr) {
 872                if (cpu != cpu0 + thr)
 873                        atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
 874        }
 875}
 876EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
 877#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 878
 879struct p10_sprs {
 880        /*
 881         * SPRs that get lost in shallow states:
 882         *
 883         * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1
 884         * isa300 idle routines restore CR, LR.
 885         * CTR is volatile
 886         * idle thread doesn't use FP or VEC
 887         * kernel doesn't use TAR
 888         * HSPRG1 is only live in HV interrupt entry
 889         * SPRG2 is only live in KVM guests, KVM handles it.
 890         */
 891};
 892
 893static unsigned long power10_idle_stop(unsigned long psscr)
 894{
 895        int cpu = raw_smp_processor_id();
 896        int first = cpu_first_thread_sibling(cpu);
 897        unsigned long *state = &paca_ptrs[first]->idle_state;
 898        unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
 899        unsigned long srr1;
 900        unsigned long pls;
 901//      struct p10_sprs sprs = {}; /* avoid false used-uninitialised */
 902        bool sprs_saved = false;
 903
 904        if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
 905                /* EC=ESL=0 case */
 906
 907                /*
 908                 * Wake synchronously. SRESET via xscom may still cause
 909                 * a 0x100 powersave wakeup with SRR1 reason!
 910                 */
 911                srr1 = isa300_idle_stop_noloss(psscr);          /* go idle */
 912                if (likely(!srr1))
 913                        return 0;
 914
 915                /*
 916                 * Registers not saved, can't recover!
 917                 * This would be a hardware bug
 918                 */
 919                BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
 920
 921                goto out;
 922        }
 923
 924        /* EC=ESL=1 case */
 925        if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
 926                /* XXX: save SPRs for deep state loss here. */
 927
 928                sprs_saved = true;
 929
 930                atomic_start_thread_idle();
 931        }
 932
 933        srr1 = isa300_idle_stop_mayloss(psscr);         /* go idle */
 934
 935        psscr = mfspr(SPRN_PSSCR);
 936
 937        WARN_ON_ONCE(!srr1);
 938        WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
 939
 940        if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
 941                hmi_exception_realmode(NULL);
 942
 943        /*
 944         * On POWER10, SRR1 bits do not match exactly as expected.
 945         * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
 946         * just always test PSSCR for SPR/TB state loss.
 947         */
 948        pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
 949        if (likely(pls < deep_spr_loss_state)) {
 950                if (sprs_saved)
 951                        atomic_stop_thread_idle();
 952                goto out;
 953        }
 954
 955        /* HV state loss */
 956        BUG_ON(!sprs_saved);
 957
 958        atomic_lock_thread_idle();
 959
 960        if ((*state & core_thread_mask) != 0)
 961                goto core_woken;
 962
 963        /* XXX: restore per-core SPRs here */
 964
 965        if (pls >= pnv_first_tb_loss_level) {
 966                /* TB loss */
 967                if (opal_resync_timebase() != OPAL_SUCCESS)
 968                        BUG();
 969        }
 970
 971        /*
 972         * isync after restoring shared SPRs and before unlocking. Unlock
 973         * only contains hwsync which does not necessarily do the right
 974         * thing for SPRs.
 975         */
 976        isync();
 977
 978core_woken:
 979        atomic_unlock_and_stop_thread_idle();
 980
 981        /* XXX: restore per-thread SPRs here */
 982
 983        if (!radix_enabled())
 984                __slb_restore_bolted_realmode();
 985
 986out:
 987        mtmsr(MSR_KERNEL);
 988
 989        return srr1;
 990}
 991
 992#ifdef CONFIG_HOTPLUG_CPU
 993static unsigned long arch300_offline_stop(unsigned long psscr)
 994{
 995        unsigned long srr1;
 996
 997        if (cpu_has_feature(CPU_FTR_ARCH_31))
 998                srr1 = power10_idle_stop(psscr);
 999        else
1000                srr1 = power9_idle_stop(psscr);
1001
1002        return srr1;
1003}
1004#endif
1005
1006void arch300_idle_type(unsigned long stop_psscr_val,
1007                                      unsigned long stop_psscr_mask)
1008{
1009        unsigned long psscr;
1010        unsigned long srr1;
1011
1012        if (!prep_irq_for_idle_irqsoff())
1013                return;
1014
1015        psscr = mfspr(SPRN_PSSCR);
1016        psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
1017
1018        __ppc64_runlatch_off();
1019        if (cpu_has_feature(CPU_FTR_ARCH_31))
1020                srr1 = power10_idle_stop(psscr);
1021        else
1022                srr1 = power9_idle_stop(psscr);
1023        __ppc64_runlatch_on();
1024
1025        fini_irq_for_idle_irqsoff();
1026
1027        irq_set_pending_from_srr1(srr1);
1028}
1029
1030/*
1031 * Used for ppc_md.power_save which needs a function with no parameters
1032 */
1033static void arch300_idle(void)
1034{
1035        arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
1036}
1037
1038#ifdef CONFIG_HOTPLUG_CPU
1039
1040void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
1041{
1042        u64 pir = get_hard_smp_processor_id(cpu);
1043
1044        mtspr(SPRN_LPCR, lpcr_val);
1045
1046        /*
1047         * Program the LPCR via stop-api only if the deepest stop state
1048         * can lose hypervisor context.
1049         */
1050        if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
1051                opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
1052}
1053
1054/*
1055 * pnv_cpu_offline: A function that puts the CPU into the deepest
1056 * available platform idle state on a CPU-Offline.
1057 * interrupts hard disabled and no lazy irq pending.
1058 */
1059unsigned long pnv_cpu_offline(unsigned int cpu)
1060{
1061        unsigned long srr1;
1062
1063        __ppc64_runlatch_off();
1064
1065        if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
1066                unsigned long psscr;
1067
1068                psscr = mfspr(SPRN_PSSCR);
1069                psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
1070                                                pnv_deepest_stop_psscr_val;
1071                srr1 = arch300_offline_stop(psscr);
1072        } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
1073                srr1 = power7_offline();
1074        } else {
1075                /* This is the fallback method. We emulate snooze */
1076                while (!generic_check_cpu_restart(cpu)) {
1077                        HMT_low();
1078                        HMT_very_low();
1079                }
1080                srr1 = 0;
1081                HMT_medium();
1082        }
1083
1084        __ppc64_runlatch_on();
1085
1086        return srr1;
1087}
1088#endif
1089
1090/*
1091 * Power ISA 3.0 idle initialization.
1092 *
1093 * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
1094 * Register (PSSCR) to control idle behavior.
1095 *
1096 * PSSCR layout:
1097 * ----------------------------------------------------------
1098 * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
1099 * ----------------------------------------------------------
1100 * 0      4     41   42    43   44     48    54   56    60
1101 *
1102 * PSSCR key fields:
1103 *      Bits 0:3  - Power-Saving Level Status (PLS). This field indicates the
1104 *      lowest power-saving state the thread entered since stop instruction was
1105 *      last executed.
1106 *
1107 *      Bit 41 - Status Disable(SD)
1108 *      0 - Shows PLS entries
1109 *      1 - PLS entries are all 0
1110 *
1111 *      Bit 42 - Enable State Loss
1112 *      0 - No state is lost irrespective of other fields
1113 *      1 - Allows state loss
1114 *
1115 *      Bit 43 - Exit Criterion
1116 *      0 - Exit from power-save mode on any interrupt
1117 *      1 - Exit from power-save mode controlled by LPCR's PECE bits
1118 *
1119 *      Bits 44:47 - Power-Saving Level Limit
1120 *      This limits the power-saving level that can be entered into.
1121 *
1122 *      Bits 60:63 - Requested Level
1123 *      Used to specify which power-saving level must be entered on executing
1124 *      stop instruction
1125 */
1126
1127int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
1128{
1129        int err = 0;
1130
1131        /*
1132         * psscr_mask == 0xf indicates an older firmware.
1133         * Set remaining fields of psscr to the default values.
1134         * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
1135         */
1136        if (*psscr_mask == 0xf) {
1137                *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
1138                *psscr_mask = PSSCR_HV_DEFAULT_MASK;
1139                return err;
1140        }
1141
1142        /*
1143         * New firmware is expected to set the psscr_val bits correctly.
1144         * Validate that the following invariants are correctly maintained by
1145         * the new firmware.
1146         * - ESL bit value matches the EC bit value.
1147         * - ESL bit is set for all the deep stop states.
1148         */
1149        if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
1150                err = ERR_EC_ESL_MISMATCH;
1151        } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
1152                GET_PSSCR_ESL(*psscr_val) == 0) {
1153                err = ERR_DEEP_STATE_ESL_MISMATCH;
1154        }
1155
1156        return err;
1157}
1158
1159/*
1160 * pnv_arch300_idle_init: Initializes the default idle state, first
1161 *                        deep idle state and deepest idle state on
1162 *                        ISA 3.0 CPUs.
1163 *
1164 * @np: /ibm,opal/power-mgt device node
1165 * @flags: cpu-idle-state-flags array
1166 * @dt_idle_states: Number of idle state entries
1167 * Returns 0 on success
1168 */
1169static void __init pnv_arch300_idle_init(void)
1170{
1171        u64 max_residency_ns = 0;
1172        int i;
1173
1174        /* stop is not really architected, we only have p9,p10 drivers */
1175        if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9))
1176                return;
1177
1178        /*
1179         * pnv_deepest_stop_{val,mask} should be set to values corresponding to
1180         * the deepest stop state.
1181         *
1182         * pnv_default_stop_{val,mask} should be set to values corresponding to
1183         * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
1184         */
1185        pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
1186        deep_spr_loss_state = MAX_STOP_STATE + 1;
1187        for (i = 0; i < nr_pnv_idle_states; i++) {
1188                int err;
1189                struct pnv_idle_states_t *state = &pnv_idle_states[i];
1190                u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
1191
1192                /* No deep loss driver implemented for POWER10 yet */
1193                if (pvr_version_is(PVR_POWER10) &&
1194                                state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT))
1195                        continue;
1196
1197                if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1198                     (pnv_first_tb_loss_level > psscr_rl))
1199                        pnv_first_tb_loss_level = psscr_rl;
1200
1201                if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
1202                     (deep_spr_loss_state > psscr_rl))
1203                        deep_spr_loss_state = psscr_rl;
1204
1205                /*
1206                 * The idle code does not deal with TB loss occurring
1207                 * in a shallower state than SPR loss, so force it to
1208                 * behave like SPRs are lost if TB is lost. POWER9 would
1209                 * never encouter this, but a POWER8 core would if it
1210                 * implemented the stop instruction. So this is for forward
1211                 * compatibility.
1212                 */
1213                if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1214                     (deep_spr_loss_state > psscr_rl))
1215                        deep_spr_loss_state = psscr_rl;
1216
1217                err = validate_psscr_val_mask(&state->psscr_val,
1218                                              &state->psscr_mask,
1219                                              state->flags);
1220                if (err) {
1221                        report_invalid_psscr_val(state->psscr_val, err);
1222                        continue;
1223                }
1224
1225                state->valid = true;
1226
1227                if (max_residency_ns < state->residency_ns) {
1228                        max_residency_ns = state->residency_ns;
1229                        pnv_deepest_stop_psscr_val = state->psscr_val;
1230                        pnv_deepest_stop_psscr_mask = state->psscr_mask;
1231                        pnv_deepest_stop_flag = state->flags;
1232                        deepest_stop_found = true;
1233                }
1234
1235                if (!default_stop_found &&
1236                    (state->flags & OPAL_PM_STOP_INST_FAST)) {
1237                        pnv_default_stop_val = state->psscr_val;
1238                        pnv_default_stop_mask = state->psscr_mask;
1239                        default_stop_found = true;
1240                        WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT);
1241                }
1242        }
1243
1244        if (unlikely(!default_stop_found)) {
1245                pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
1246        } else {
1247                ppc_md.power_save = arch300_idle;
1248                pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
1249                        pnv_default_stop_val, pnv_default_stop_mask);
1250        }
1251
1252        if (unlikely(!deepest_stop_found)) {
1253                pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
1254        } else {
1255                pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
1256                        pnv_deepest_stop_psscr_val,
1257                        pnv_deepest_stop_psscr_mask);
1258        }
1259
1260        pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n",
1261                deep_spr_loss_state);
1262
1263        pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n",
1264                pnv_first_tb_loss_level);
1265}
1266
1267static void __init pnv_disable_deep_states(void)
1268{
1269        /*
1270         * The stop-api is unable to restore hypervisor
1271         * resources on wakeup from platform idle states which
1272         * lose full context. So disable such states.
1273         */
1274        supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
1275        pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
1276        pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
1277
1278        if (cpu_has_feature(CPU_FTR_ARCH_300) &&
1279            (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
1280                /*
1281                 * Use the default stop state for CPU-Hotplug
1282                 * if available.
1283                 */
1284                if (default_stop_found) {
1285                        pnv_deepest_stop_psscr_val = pnv_default_stop_val;
1286                        pnv_deepest_stop_psscr_mask = pnv_default_stop_mask;
1287                        pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
1288                                pnv_deepest_stop_psscr_val);
1289                } else { /* Fallback to snooze loop for CPU-Hotplug */
1290                        deepest_stop_found = false;
1291                        pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
1292                }
1293        }
1294}
1295
1296/*
1297 * Probe device tree for supported idle states
1298 */
1299static void __init pnv_probe_idle_states(void)
1300{
1301        int i;
1302
1303        if (nr_pnv_idle_states < 0) {
1304                pr_warn("cpuidle-powernv: no idle states found in the DT\n");
1305                return;
1306        }
1307
1308        if (cpu_has_feature(CPU_FTR_ARCH_300))
1309                pnv_arch300_idle_init();
1310
1311        for (i = 0; i < nr_pnv_idle_states; i++)
1312                supported_cpuidle_states |= pnv_idle_states[i].flags;
1313}
1314
1315/*
1316 * This function parses device-tree and populates all the information
1317 * into pnv_idle_states structure. It also sets up nr_pnv_idle_states
1318 * which is the number of cpuidle states discovered through device-tree.
1319 */
1320
1321static int pnv_parse_cpuidle_dt(void)
1322{
1323        struct device_node *np;
1324        int nr_idle_states, i;
1325        int rc = 0;
1326        u32 *temp_u32;
1327        u64 *temp_u64;
1328        const char **temp_string;
1329
1330        np = of_find_node_by_path("/ibm,opal/power-mgt");
1331        if (!np) {
1332                pr_warn("opal: PowerMgmt Node not found\n");
1333                return -ENODEV;
1334        }
1335        nr_idle_states = of_property_count_u32_elems(np,
1336                                                "ibm,cpu-idle-state-flags");
1337
1338        pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states),
1339                                  GFP_KERNEL);
1340        temp_u32 = kcalloc(nr_idle_states, sizeof(u32),  GFP_KERNEL);
1341        temp_u64 = kcalloc(nr_idle_states, sizeof(u64),  GFP_KERNEL);
1342        temp_string = kcalloc(nr_idle_states, sizeof(char *),  GFP_KERNEL);
1343
1344        if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) {
1345                pr_err("Could not allocate memory for dt parsing\n");
1346                rc = -ENOMEM;
1347                goto out;
1348        }
1349
1350        /* Read flags */
1351        if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags",
1352                                       temp_u32, nr_idle_states)) {
1353                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
1354                rc = -EINVAL;
1355                goto out;
1356        }
1357        for (i = 0; i < nr_idle_states; i++)
1358                pnv_idle_states[i].flags = temp_u32[i];
1359
1360        /* Read latencies */
1361        if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns",
1362                                       temp_u32, nr_idle_states)) {
1363                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
1364                rc = -EINVAL;
1365                goto out;
1366        }
1367        for (i = 0; i < nr_idle_states; i++)
1368                pnv_idle_states[i].latency_ns = temp_u32[i];
1369
1370        /* Read residencies */
1371        if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns",
1372                                       temp_u32, nr_idle_states)) {
1373                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
1374                rc = -EINVAL;
1375                goto out;
1376        }
1377        for (i = 0; i < nr_idle_states; i++)
1378                pnv_idle_states[i].residency_ns = temp_u32[i];
1379
1380        /* For power9 and later */
1381        if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1382                /* Read pm_crtl_val */
1383                if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
1384                                               temp_u64, nr_idle_states)) {
1385                        pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
1386                        rc = -EINVAL;
1387                        goto out;
1388                }
1389                for (i = 0; i < nr_idle_states; i++)
1390                        pnv_idle_states[i].psscr_val = temp_u64[i];
1391
1392                /* Read pm_crtl_mask */
1393                if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask",
1394                                               temp_u64, nr_idle_states)) {
1395                        pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
1396                        rc = -EINVAL;
1397                        goto out;
1398                }
1399                for (i = 0; i < nr_idle_states; i++)
1400                        pnv_idle_states[i].psscr_mask = temp_u64[i];
1401        }
1402
1403        /*
1404         * power8 specific properties ibm,cpu-idle-state-pmicr-mask and
1405         * ibm,cpu-idle-state-pmicr-val were never used and there is no
1406         * plan to use it in near future. Hence, not parsing these properties
1407         */
1408
1409        if (of_property_read_string_array(np, "ibm,cpu-idle-state-names",
1410                                          temp_string, nr_idle_states) < 0) {
1411                pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
1412                rc = -EINVAL;
1413                goto out;
1414        }
1415        for (i = 0; i < nr_idle_states; i++)
1416                strlcpy(pnv_idle_states[i].name, temp_string[i],
1417                        PNV_IDLE_NAME_LEN);
1418        nr_pnv_idle_states = nr_idle_states;
1419        rc = 0;
1420out:
1421        kfree(temp_u32);
1422        kfree(temp_u64);
1423        kfree(temp_string);
1424        return rc;
1425}
1426
1427static int __init pnv_init_idle_states(void)
1428{
1429        int cpu;
1430        int rc = 0;
1431
1432        /* Set up PACA fields */
1433        for_each_present_cpu(cpu) {
1434                struct paca_struct *p = paca_ptrs[cpu];
1435
1436                p->idle_state = 0;
1437                if (cpu == cpu_first_thread_sibling(cpu))
1438                        p->idle_state = (1 << threads_per_core) - 1;
1439
1440                if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1441                        /* P7/P8 nap */
1442                        p->thread_idle_state = PNV_THREAD_RUNNING;
1443                } else if (pvr_version_is(PVR_POWER9)) {
1444                        /* P9 stop workarounds */
1445#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1446                        p->requested_psscr = 0;
1447                        atomic_set(&p->dont_stop, 0);
1448#endif
1449                }
1450        }
1451
1452        /* In case we error out nr_pnv_idle_states will be zero */
1453        nr_pnv_idle_states = 0;
1454        supported_cpuidle_states = 0;
1455
1456        if (cpuidle_disable != IDLE_NO_OVERRIDE)
1457                goto out;
1458        rc = pnv_parse_cpuidle_dt();
1459        if (rc)
1460                return rc;
1461        pnv_probe_idle_states();
1462
1463        if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1464                if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
1465                        power7_fastsleep_workaround_entry = false;
1466                        power7_fastsleep_workaround_exit = false;
1467                } else {
1468                        /*
1469                         * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
1470                         * workaround is needed to use fastsleep. Provide sysfs
1471                         * control to choose how this workaround has to be
1472                         * applied.
1473                         */
1474                        device_create_file(cpu_subsys.dev_root,
1475                                &dev_attr_fastsleep_workaround_applyonce);
1476                }
1477
1478                update_subcore_sibling_mask();
1479
1480                if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) {
1481                        ppc_md.power_save = power7_idle;
1482                        power7_offline_type = PNV_THREAD_NAP;
1483                }
1484
1485                if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) &&
1486                           (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT))
1487                        power7_offline_type = PNV_THREAD_WINKLE;
1488                else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) ||
1489                           (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1))
1490                        power7_offline_type = PNV_THREAD_SLEEP;
1491        }
1492
1493        if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
1494                if (pnv_save_sprs_for_deep_states())
1495                        pnv_disable_deep_states();
1496        }
1497
1498out:
1499        return 0;
1500}
1501machine_subsys_initcall(powernv, pnv_init_idle_states);
1502