linux/arch/powerpc/platforms/powernv/smp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * SMP support for PowerNV machines.
   4 *
   5 * Copyright 2011 IBM Corp.
   6 */
   7
   8#include <linux/kernel.h>
   9#include <linux/module.h>
  10#include <linux/sched.h>
  11#include <linux/sched/hotplug.h>
  12#include <linux/smp.h>
  13#include <linux/interrupt.h>
  14#include <linux/delay.h>
  15#include <linux/init.h>
  16#include <linux/spinlock.h>
  17#include <linux/cpu.h>
  18
  19#include <asm/irq.h>
  20#include <asm/smp.h>
  21#include <asm/paca.h>
  22#include <asm/machdep.h>
  23#include <asm/cputable.h>
  24#include <asm/firmware.h>
  25#include <asm/vdso_datapage.h>
  26#include <asm/cputhreads.h>
  27#include <asm/xics.h>
  28#include <asm/xive.h>
  29#include <asm/opal.h>
  30#include <asm/runlatch.h>
  31#include <asm/code-patching.h>
  32#include <asm/dbell.h>
  33#include <asm/kvm_ppc.h>
  34#include <asm/ppc-opcode.h>
  35#include <asm/cpuidle.h>
  36#include <asm/kexec.h>
  37#include <asm/reg.h>
  38#include <asm/powernv.h>
  39
  40#include "powernv.h"
  41
  42#ifdef DEBUG
  43#include <asm/udbg.h>
  44#define DBG(fmt...) udbg_printf(fmt)
  45#else
  46#define DBG(fmt...) do { } while (0)
  47#endif
  48
  49static void pnv_smp_setup_cpu(int cpu)
  50{
  51        /*
  52         * P9 workaround for CI vector load (see traps.c),
  53         * enable the corresponding HMI interrupt
  54         */
  55        if (pvr_version_is(PVR_POWER9))
  56                mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17));
  57
  58        if (xive_enabled())
  59                xive_smp_setup_cpu();
  60        else if (cpu != boot_cpuid)
  61                xics_setup_cpu();
  62}
  63
  64static int pnv_smp_kick_cpu(int nr)
  65{
  66        unsigned int pcpu;
  67        unsigned long start_here =
  68                        __pa(ppc_function_entry(generic_secondary_smp_init));
  69        long rc;
  70        uint8_t status;
  71
  72        if (nr < 0 || nr >= nr_cpu_ids)
  73                return -EINVAL;
  74
  75        pcpu = get_hard_smp_processor_id(nr);
  76        /*
  77         * If we already started or OPAL is not supported, we just
  78         * kick the CPU via the PACA
  79         */
  80        if (paca_ptrs[nr]->cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
  81                goto kick;
  82
  83        /*
  84         * At this point, the CPU can either be spinning on the way in
  85         * from kexec or be inside OPAL waiting to be started for the
  86         * first time. OPAL v3 allows us to query OPAL to know if it
  87         * has the CPUs, so we do that
  88         */
  89        rc = opal_query_cpu_status(pcpu, &status);
  90        if (rc != OPAL_SUCCESS) {
  91                pr_warn("OPAL Error %ld querying CPU %d state\n", rc, nr);
  92                return -ENODEV;
  93        }
  94
  95        /*
  96         * Already started, just kick it, probably coming from
  97         * kexec and spinning
  98         */
  99        if (status == OPAL_THREAD_STARTED)
 100                goto kick;
 101
 102        /*
 103         * Available/inactive, let's kick it
 104         */
 105        if (status == OPAL_THREAD_INACTIVE) {
 106                pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu);
 107                rc = opal_start_cpu(pcpu, start_here);
 108                if (rc != OPAL_SUCCESS) {
 109                        pr_warn("OPAL Error %ld starting CPU %d\n", rc, nr);
 110                        return -ENODEV;
 111                }
 112        } else {
 113                /*
 114                 * An unavailable CPU (or any other unknown status)
 115                 * shouldn't be started. It should also
 116                 * not be in the possible map but currently it can
 117                 * happen
 118                 */
 119                pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable"
 120                         " (status %d)...\n", nr, pcpu, status);
 121                return -ENODEV;
 122        }
 123
 124kick:
 125        return smp_generic_kick_cpu(nr);
 126}
 127
 128#ifdef CONFIG_HOTPLUG_CPU
 129
 130static int pnv_smp_cpu_disable(void)
 131{
 132        int cpu = smp_processor_id();
 133
 134        /* This is identical to pSeries... might consolidate by
 135         * moving migrate_irqs_away to a ppc_md with default to
 136         * the generic fixup_irqs. --BenH.
 137         */
 138        set_cpu_online(cpu, false);
 139        vdso_data->processorCount--;
 140        if (cpu == boot_cpuid)
 141                boot_cpuid = cpumask_any(cpu_online_mask);
 142        if (xive_enabled())
 143                xive_smp_disable_cpu();
 144        else
 145                xics_migrate_irqs_away();
 146
 147        cleanup_cpu_mmu_context();
 148
 149        return 0;
 150}
 151
 152static void pnv_flush_interrupts(void)
 153{
 154        if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 155                if (xive_enabled())
 156                        xive_flush_interrupt();
 157                else
 158                        icp_opal_flush_interrupt();
 159        } else {
 160                icp_native_flush_interrupt();
 161        }
 162}
 163
 164static void pnv_cpu_offline_self(void)
 165{
 166        unsigned long srr1, unexpected_mask, wmask;
 167        unsigned int cpu;
 168        u64 lpcr_val;
 169
 170        /* Standard hot unplug procedure */
 171
 172        idle_task_exit();
 173        cpu = smp_processor_id();
 174        DBG("CPU%d offline\n", cpu);
 175        generic_set_cpu_dead(cpu);
 176        smp_wmb();
 177
 178        wmask = SRR1_WAKEMASK;
 179        if (cpu_has_feature(CPU_FTR_ARCH_207S))
 180                wmask = SRR1_WAKEMASK_P8;
 181
 182        /*
 183         * This turns the irq soft-disabled state we're called with, into a
 184         * hard-disabled state with pending irq_happened interrupts cleared.
 185         *
 186         * PACA_IRQ_DEC   - Decrementer should be ignored.
 187         * PACA_IRQ_HMI   - Can be ignored, processing is done in real mode.
 188         * PACA_IRQ_DBELL, EE, PMI - Unexpected.
 189         */
 190        hard_irq_disable();
 191        if (generic_check_cpu_restart(cpu))
 192                goto out;
 193
 194        unexpected_mask = ~(PACA_IRQ_DEC | PACA_IRQ_HMI | PACA_IRQ_HARD_DIS);
 195        if (local_paca->irq_happened & unexpected_mask) {
 196                if (local_paca->irq_happened & PACA_IRQ_EE)
 197                        pnv_flush_interrupts();
 198                DBG("CPU%d Unexpected exit while offline irq_happened=%lx!\n",
 199                                cpu, local_paca->irq_happened);
 200        }
 201        local_paca->irq_happened = PACA_IRQ_HARD_DIS;
 202
 203        /*
 204         * We don't want to take decrementer interrupts while we are
 205         * offline, so clear LPCR:PECE1. We keep PECE2 (and
 206         * LPCR_PECE_HVEE on P9) enabled so as to let IPIs in.
 207         *
 208         * If the CPU gets woken up by a special wakeup, ensure that
 209         * the SLW engine sets LPCR with decrementer bit cleared, else
 210         * the CPU will come back to the kernel due to a spurious
 211         * wakeup.
 212         */
 213        lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
 214        pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
 215
 216        while (!generic_check_cpu_restart(cpu)) {
 217                /*
 218                 * Clear IPI flag, since we don't handle IPIs while
 219                 * offline, except for those when changing micro-threading
 220                 * mode, which are handled explicitly below, and those
 221                 * for coming online, which are handled via
 222                 * generic_check_cpu_restart() calls.
 223                 */
 224                kvmppc_clear_host_ipi(cpu);
 225
 226                srr1 = pnv_cpu_offline(cpu);
 227
 228                WARN_ON_ONCE(!irqs_disabled());
 229                WARN_ON(lazy_irq_pending());
 230
 231                /*
 232                 * If the SRR1 value indicates that we woke up due to
 233                 * an external interrupt, then clear the interrupt.
 234                 * We clear the interrupt before checking for the
 235                 * reason, so as to avoid a race where we wake up for
 236                 * some other reason, find nothing and clear the interrupt
 237                 * just as some other cpu is sending us an interrupt.
 238                 * If we returned from power7_nap as a result of
 239                 * having finished executing in a KVM guest, then srr1
 240                 * contains 0.
 241                 */
 242                if (((srr1 & wmask) == SRR1_WAKEEE) ||
 243                    ((srr1 & wmask) == SRR1_WAKEHVI)) {
 244                        pnv_flush_interrupts();
 245                } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
 246                        unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
 247                        asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
 248                } else if ((srr1 & wmask) == SRR1_WAKERESET) {
 249                        irq_set_pending_from_srr1(srr1);
 250                        /* Does not return */
 251                }
 252
 253                smp_mb();
 254
 255                /*
 256                 * For kdump kernels, we process the ipi and jump to
 257                 * crash_ipi_callback
 258                 */
 259                if (kdump_in_progress()) {
 260                        /*
 261                         * If we got to this point, we've not used
 262                         * NMI's, otherwise we would have gone
 263                         * via the SRR1_WAKERESET path. We are
 264                         * using regular IPI's for waking up offline
 265                         * threads.
 266                         */
 267                        struct pt_regs regs;
 268
 269                        ppc_save_regs(&regs);
 270                        crash_ipi_callback(&regs);
 271                        /* Does not return */
 272                }
 273
 274                if (cpu_core_split_required())
 275                        continue;
 276
 277                if (srr1 && !generic_check_cpu_restart(cpu))
 278                        DBG("CPU%d Unexpected exit while offline srr1=%lx!\n",
 279                                        cpu, srr1);
 280
 281        }
 282
 283        /*
 284         * Re-enable decrementer interrupts in LPCR.
 285         *
 286         * Further, we want stop states to be woken up by decrementer
 287         * for non-hotplug cases. So program the LPCR via stop api as
 288         * well.
 289         */
 290        lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
 291        pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
 292out:
 293        DBG("CPU%d coming online...\n", cpu);
 294}
 295
 296#endif /* CONFIG_HOTPLUG_CPU */
 297
 298static int pnv_cpu_bootable(unsigned int nr)
 299{
 300        /*
 301         * Starting with POWER8, the subcore logic relies on all threads of a
 302         * core being booted so that they can participate in split mode
 303         * switches. So on those machines we ignore the smt_enabled_at_boot
 304         * setting (smt-enabled on the kernel command line).
 305         */
 306        if (cpu_has_feature(CPU_FTR_ARCH_207S))
 307                return 1;
 308
 309        return smp_generic_cpu_bootable(nr);
 310}
 311
 312static int pnv_smp_prepare_cpu(int cpu)
 313{
 314        if (xive_enabled())
 315                return xive_smp_prepare_cpu(cpu);
 316        return 0;
 317}
 318
 319/* Cause IPI as setup by the interrupt controller (xics or xive) */
 320static void (*ic_cause_ipi)(int cpu);
 321
 322static void pnv_cause_ipi(int cpu)
 323{
 324        if (doorbell_try_core_ipi(cpu))
 325                return;
 326
 327        ic_cause_ipi(cpu);
 328}
 329
 330static void __init pnv_smp_probe(void)
 331{
 332        if (xive_enabled())
 333                xive_smp_probe();
 334        else
 335                xics_smp_probe();
 336
 337        if (cpu_has_feature(CPU_FTR_DBELL)) {
 338                ic_cause_ipi = smp_ops->cause_ipi;
 339                WARN_ON(!ic_cause_ipi);
 340
 341                if (cpu_has_feature(CPU_FTR_ARCH_300))
 342                        smp_ops->cause_ipi = doorbell_global_ipi;
 343                else
 344                        smp_ops->cause_ipi = pnv_cause_ipi;
 345        }
 346}
 347
 348static int pnv_system_reset_exception(struct pt_regs *regs)
 349{
 350        if (smp_handle_nmi_ipi(regs))
 351                return 1;
 352        return 0;
 353}
 354
 355static int pnv_cause_nmi_ipi(int cpu)
 356{
 357        int64_t rc;
 358
 359        if (cpu >= 0) {
 360                int h = get_hard_smp_processor_id(cpu);
 361
 362                if (opal_check_token(OPAL_QUIESCE))
 363                        opal_quiesce(QUIESCE_HOLD, h);
 364
 365                rc = opal_signal_system_reset(h);
 366
 367                if (opal_check_token(OPAL_QUIESCE))
 368                        opal_quiesce(QUIESCE_RESUME, h);
 369
 370                if (rc != OPAL_SUCCESS)
 371                        return 0;
 372                return 1;
 373
 374        } else if (cpu == NMI_IPI_ALL_OTHERS) {
 375                bool success = true;
 376                int c;
 377
 378                if (opal_check_token(OPAL_QUIESCE))
 379                        opal_quiesce(QUIESCE_HOLD, -1);
 380
 381                /*
 382                 * We do not use broadcasts (yet), because it's not clear
 383                 * exactly what semantics Linux wants or the firmware should
 384                 * provide.
 385                 */
 386                for_each_online_cpu(c) {
 387                        if (c == smp_processor_id())
 388                                continue;
 389
 390                        rc = opal_signal_system_reset(
 391                                                get_hard_smp_processor_id(c));
 392                        if (rc != OPAL_SUCCESS)
 393                                success = false;
 394                }
 395
 396                if (opal_check_token(OPAL_QUIESCE))
 397                        opal_quiesce(QUIESCE_RESUME, -1);
 398
 399                if (success)
 400                        return 1;
 401
 402                /*
 403                 * Caller will fall back to doorbells, which may pick
 404                 * up the remainders.
 405                 */
 406        }
 407
 408        return 0;
 409}
 410
 411static struct smp_ops_t pnv_smp_ops = {
 412        .message_pass   = NULL, /* Use smp_muxed_ipi_message_pass */
 413        .cause_ipi      = NULL, /* Filled at runtime by pnv_smp_probe() */
 414        .cause_nmi_ipi  = NULL,
 415        .probe          = pnv_smp_probe,
 416        .prepare_cpu    = pnv_smp_prepare_cpu,
 417        .kick_cpu       = pnv_smp_kick_cpu,
 418        .setup_cpu      = pnv_smp_setup_cpu,
 419        .cpu_bootable   = pnv_cpu_bootable,
 420#ifdef CONFIG_HOTPLUG_CPU
 421        .cpu_disable    = pnv_smp_cpu_disable,
 422        .cpu_die        = generic_cpu_die,
 423        .cpu_offline_self = pnv_cpu_offline_self,
 424#endif /* CONFIG_HOTPLUG_CPU */
 425};
 426
 427/* This is called very early during platform setup_arch */
 428void __init pnv_smp_init(void)
 429{
 430        if (opal_check_token(OPAL_SIGNAL_SYSTEM_RESET)) {
 431                ppc_md.system_reset_exception = pnv_system_reset_exception;
 432                pnv_smp_ops.cause_nmi_ipi = pnv_cause_nmi_ipi;
 433        }
 434        smp_ops = &pnv_smp_ops;
 435
 436#ifdef CONFIG_HOTPLUG_CPU
 437#ifdef CONFIG_KEXEC_CORE
 438        crash_wake_offline = 1;
 439#endif
 440#endif
 441}
 442