linux/arch/powerpc/platforms/powernv/setup.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * PowerNV setup code.
   4 *
   5 * Copyright 2011 IBM Corp.
   6 */
   7
   8#undef DEBUG
   9
  10#include <linux/cpu.h>
  11#include <linux/errno.h>
  12#include <linux/sched.h>
  13#include <linux/kernel.h>
  14#include <linux/tty.h>
  15#include <linux/reboot.h>
  16#include <linux/init.h>
  17#include <linux/console.h>
  18#include <linux/delay.h>
  19#include <linux/irq.h>
  20#include <linux/seq_file.h>
  21#include <linux/of.h>
  22#include <linux/of_fdt.h>
  23#include <linux/interrupt.h>
  24#include <linux/bug.h>
  25#include <linux/pci.h>
  26#include <linux/cpufreq.h>
  27#include <linux/memblock.h>
  28
  29#include <asm/machdep.h>
  30#include <asm/firmware.h>
  31#include <asm/xics.h>
  32#include <asm/xive.h>
  33#include <asm/opal.h>
  34#include <asm/kexec.h>
  35#include <asm/smp.h>
  36#include <asm/tm.h>
  37#include <asm/setup.h>
  38#include <asm/security_features.h>
  39
  40#include "powernv.h"
  41
  42
  43static bool fw_feature_is(const char *state, const char *name,
  44                          struct device_node *fw_features)
  45{
  46        struct device_node *np;
  47        bool rc = false;
  48
  49        np = of_get_child_by_name(fw_features, name);
  50        if (np) {
  51                rc = of_property_read_bool(np, state);
  52                of_node_put(np);
  53        }
  54
  55        return rc;
  56}
  57
  58static void init_fw_feat_flags(struct device_node *np)
  59{
  60        if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
  61                security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
  62
  63        if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
  64                security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
  65
  66        if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np))
  67                security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
  68
  69        if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
  70                security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
  71
  72        if (fw_feature_is("enabled", "fw-l1d-thread-split", np))
  73                security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
  74
  75        if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
  76                security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
  77
  78        if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np))
  79                security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
  80
  81        if (fw_feature_is("enabled", "needs-count-cache-flush-on-context-switch", np))
  82                security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
  83
  84        /*
  85         * The features below are enabled by default, so we instead look to see
  86         * if firmware has *disabled* them, and clear them if so.
  87         */
  88        if (fw_feature_is("disabled", "speculation-policy-favor-security", np))
  89                security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
  90
  91        if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np))
  92                security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
  93
  94        if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np))
  95                security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
  96
  97        if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np))
  98                security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
  99}
 100
 101static void pnv_setup_security_mitigations(void)
 102{
 103        struct device_node *np, *fw_features;
 104        enum l1d_flush_type type;
 105        bool enable;
 106
 107        /* Default to fallback in case fw-features are not available */
 108        type = L1D_FLUSH_FALLBACK;
 109
 110        np = of_find_node_by_name(NULL, "ibm,opal");
 111        fw_features = of_get_child_by_name(np, "fw-features");
 112        of_node_put(np);
 113
 114        if (fw_features) {
 115                init_fw_feat_flags(fw_features);
 116                of_node_put(fw_features);
 117
 118                if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
 119                        type = L1D_FLUSH_MTTRIG;
 120
 121                if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
 122                        type = L1D_FLUSH_ORI;
 123        }
 124
 125        /*
 126         * If we are non-Power9 bare metal, we don't need to flush on kernel
 127         * entry or after user access: they fix a P9 specific vulnerability.
 128         */
 129        if (!pvr_version_is(PVR_POWER9)) {
 130                security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
 131                security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
 132        }
 133
 134        enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
 135                 (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)   || \
 136                  security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
 137
 138        setup_rfi_flush(type, enable);
 139        setup_count_cache_flush();
 140
 141        enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
 142                 security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
 143        setup_entry_flush(enable);
 144
 145        enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
 146                 security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
 147        setup_uaccess_flush(enable);
 148
 149        setup_stf_barrier();
 150}
 151
 152static void __init pnv_check_guarded_cores(void)
 153{
 154        struct device_node *dn;
 155        int bad_count = 0;
 156
 157        for_each_node_by_type(dn, "cpu") {
 158                if (of_property_match_string(dn, "status", "bad") >= 0)
 159                        bad_count++;
 160        }
 161
 162        if (bad_count) {
 163                printk("  _     _______________\n");
 164                pr_cont(" | |   /               \\\n");
 165                pr_cont(" | |   |    WARNING!   |\n");
 166                pr_cont(" | |   |               |\n");
 167                pr_cont(" | |   | It looks like |\n");
 168                pr_cont(" |_|   |  you have %*d |\n", 3, bad_count);
 169                pr_cont("  _    | guarded cores |\n");
 170                pr_cont(" (_)   \\_______________/\n");
 171        }
 172}
 173
 174static void __init pnv_setup_arch(void)
 175{
 176        set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 177
 178        pnv_setup_security_mitigations();
 179
 180        /* Initialize SMP */
 181        pnv_smp_init();
 182
 183        /* Setup RTC and NVRAM callbacks */
 184        if (firmware_has_feature(FW_FEATURE_OPAL))
 185                opal_nvram_init();
 186
 187        /* Enable NAP mode */
 188        powersave_nap = 1;
 189
 190        pnv_check_guarded_cores();
 191
 192        /* XXX PMCS */
 193}
 194
 195static void __init pnv_init(void)
 196{
 197        /*
 198         * Initialize the LPC bus now so that legacy serial
 199         * ports can be found on it
 200         */
 201        opal_lpc_init();
 202
 203#ifdef CONFIG_HVC_OPAL
 204        if (firmware_has_feature(FW_FEATURE_OPAL))
 205                hvc_opal_init_early();
 206        else
 207#endif
 208                add_preferred_console("hvc", 0, NULL);
 209
 210        if (!radix_enabled()) {
 211                size_t size = sizeof(struct slb_entry) * mmu_slb_size;
 212                int i;
 213
 214                /* Allocate per cpu area to save old slb contents during MCE */
 215                for_each_possible_cpu(i) {
 216                        paca_ptrs[i]->mce_faulty_slbs =
 217                                        memblock_alloc_node(size,
 218                                                __alignof__(struct slb_entry),
 219                                                cpu_to_node(i));
 220                }
 221        }
 222}
 223
 224static void __init pnv_init_IRQ(void)
 225{
 226        /* Try using a XIVE if available, otherwise use a XICS */
 227        if (!xive_native_init())
 228                xics_init();
 229
 230        WARN_ON(!ppc_md.get_irq);
 231}
 232
 233static void pnv_show_cpuinfo(struct seq_file *m)
 234{
 235        struct device_node *root;
 236        const char *model = "";
 237
 238        root = of_find_node_by_path("/");
 239        if (root)
 240                model = of_get_property(root, "model", NULL);
 241        seq_printf(m, "machine\t\t: PowerNV %s\n", model);
 242        if (firmware_has_feature(FW_FEATURE_OPAL))
 243                seq_printf(m, "firmware\t: OPAL\n");
 244        else
 245                seq_printf(m, "firmware\t: BML\n");
 246        of_node_put(root);
 247        if (radix_enabled())
 248                seq_printf(m, "MMU\t\t: Radix\n");
 249        else
 250                seq_printf(m, "MMU\t\t: Hash\n");
 251}
 252
 253static void pnv_prepare_going_down(void)
 254{
 255        /*
 256         * Disable all notifiers from OPAL, we can't
 257         * service interrupts anymore anyway
 258         */
 259        opal_event_shutdown();
 260
 261        /* Print flash update message if one is scheduled. */
 262        opal_flash_update_print_message();
 263
 264        smp_send_stop();
 265
 266        hard_irq_disable();
 267}
 268
 269static void  __noreturn pnv_restart(char *cmd)
 270{
 271        long rc;
 272
 273        pnv_prepare_going_down();
 274
 275        do {
 276                if (!cmd || !strlen(cmd))
 277                        rc = opal_cec_reboot();
 278                else if (strcmp(cmd, "full") == 0)
 279                        rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL);
 280                else if (strcmp(cmd, "mpipl") == 0)
 281                        rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, NULL);
 282                else if (strcmp(cmd, "error") == 0)
 283                        rc = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, NULL);
 284                else if (strcmp(cmd, "fast") == 0)
 285                        rc = opal_cec_reboot2(OPAL_REBOOT_FAST, NULL);
 286                else
 287                        rc = OPAL_UNSUPPORTED;
 288
 289                if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 290                        /* Opal is busy wait for some time and retry */
 291                        opal_poll_events(NULL);
 292                        mdelay(10);
 293
 294                } else  if (cmd && rc) {
 295                        /* Unknown error while issuing reboot */
 296                        if (rc == OPAL_UNSUPPORTED)
 297                                pr_err("Unsupported '%s' reboot.\n", cmd);
 298                        else
 299                                pr_err("Unable to issue '%s' reboot. Err=%ld\n",
 300                                       cmd, rc);
 301                        pr_info("Forcing a cec-reboot\n");
 302                        cmd = NULL;
 303                        rc = OPAL_BUSY;
 304
 305                } else if (rc != OPAL_SUCCESS) {
 306                        /* Unknown error while issuing cec-reboot */
 307                        pr_err("Unable to reboot. Err=%ld\n", rc);
 308                }
 309
 310        } while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT);
 311
 312        for (;;)
 313                opal_poll_events(NULL);
 314}
 315
 316static void __noreturn pnv_power_off(void)
 317{
 318        long rc = OPAL_BUSY;
 319
 320        pnv_prepare_going_down();
 321
 322        while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 323                rc = opal_cec_power_down(0);
 324                if (rc == OPAL_BUSY_EVENT)
 325                        opal_poll_events(NULL);
 326                else
 327                        mdelay(10);
 328        }
 329        for (;;)
 330                opal_poll_events(NULL);
 331}
 332
 333static void __noreturn pnv_halt(void)
 334{
 335        pnv_power_off();
 336}
 337
 338static void pnv_progress(char *s, unsigned short hex)
 339{
 340}
 341
 342static void pnv_shutdown(void)
 343{
 344        /* Let the PCI code clear up IODA tables */
 345        pnv_pci_shutdown();
 346
 347        /*
 348         * Stop OPAL activity: Unregister all OPAL interrupts so they
 349         * don't fire up while we kexec and make sure all potentially
 350         * DMA'ing ops are complete (such as dump retrieval).
 351         */
 352        opal_shutdown();
 353}
 354
 355#ifdef CONFIG_KEXEC_CORE
 356static void pnv_kexec_wait_secondaries_down(void)
 357{
 358        int my_cpu, i, notified = -1;
 359
 360        my_cpu = get_cpu();
 361
 362        for_each_online_cpu(i) {
 363                uint8_t status;
 364                int64_t rc, timeout = 1000;
 365
 366                if (i == my_cpu)
 367                        continue;
 368
 369                for (;;) {
 370                        rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
 371                                                   &status);
 372                        if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
 373                                break;
 374                        barrier();
 375                        if (i != notified) {
 376                                printk(KERN_INFO "kexec: waiting for cpu %d "
 377                                       "(physical %d) to enter OPAL\n",
 378                                       i, paca_ptrs[i]->hw_cpu_id);
 379                                notified = i;
 380                        }
 381
 382                        /*
 383                         * On crash secondaries might be unreachable or hung,
 384                         * so timeout if we've waited too long
 385                         * */
 386                        mdelay(1);
 387                        if (timeout-- == 0) {
 388                                printk(KERN_ERR "kexec: timed out waiting for "
 389                                       "cpu %d (physical %d) to enter OPAL\n",
 390                                       i, paca_ptrs[i]->hw_cpu_id);
 391                                break;
 392                        }
 393                }
 394        }
 395}
 396
 397static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 398{
 399        u64 reinit_flags;
 400
 401        if (xive_enabled())
 402                xive_teardown_cpu();
 403        else
 404                xics_kexec_teardown_cpu(secondary);
 405
 406        /* On OPAL, we return all CPUs to firmware */
 407        if (!firmware_has_feature(FW_FEATURE_OPAL))
 408                return;
 409
 410        if (secondary) {
 411                /* Return secondary CPUs to firmware on OPAL v3 */
 412                mb();
 413                get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
 414                mb();
 415
 416                /* Return the CPU to OPAL */
 417                opal_return_cpu();
 418        } else {
 419                /* Primary waits for the secondaries to have reached OPAL */
 420                pnv_kexec_wait_secondaries_down();
 421
 422                /* Switch XIVE back to emulation mode */
 423                if (xive_enabled())
 424                        xive_shutdown();
 425
 426                /*
 427                 * We might be running as little-endian - now that interrupts
 428                 * are disabled, reset the HILE bit to big-endian so we don't
 429                 * take interrupts in the wrong endian later
 430                 *
 431                 * We reinit to enable both radix and hash on P9 to ensure
 432                 * the mode used by the next kernel is always supported.
 433                 */
 434                reinit_flags = OPAL_REINIT_CPUS_HILE_BE;
 435                if (cpu_has_feature(CPU_FTR_ARCH_300))
 436                        reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX |
 437                                OPAL_REINIT_CPUS_MMU_HASH;
 438                opal_reinit_cpus(reinit_flags);
 439        }
 440}
 441#endif /* CONFIG_KEXEC_CORE */
 442
 443#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 444static unsigned long pnv_memory_block_size(void)
 445{
 446        /*
 447         * We map the kernel linear region with 1GB large pages on radix. For
 448         * memory hot unplug to work our memory block size must be at least
 449         * this size.
 450         */
 451        if (radix_enabled())
 452                return radix_mem_block_size;
 453        else
 454                return 256UL * 1024 * 1024;
 455}
 456#endif
 457
 458static void __init pnv_setup_machdep_opal(void)
 459{
 460        ppc_md.get_boot_time = opal_get_boot_time;
 461        ppc_md.restart = pnv_restart;
 462        pm_power_off = pnv_power_off;
 463        ppc_md.halt = pnv_halt;
 464        /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
 465        ppc_md.machine_check_exception = opal_machine_check;
 466        ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
 467        if (opal_check_token(OPAL_HANDLE_HMI2))
 468                ppc_md.hmi_exception_early = opal_hmi_exception_early2;
 469        else
 470                ppc_md.hmi_exception_early = opal_hmi_exception_early;
 471        ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
 472}
 473
 474static int __init pnv_probe(void)
 475{
 476        if (!of_machine_is_compatible("ibm,powernv"))
 477                return 0;
 478
 479        if (firmware_has_feature(FW_FEATURE_OPAL))
 480                pnv_setup_machdep_opal();
 481
 482        pr_debug("PowerNV detected !\n");
 483
 484        pnv_init();
 485
 486        return 1;
 487}
 488
 489#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 490void __init pnv_tm_init(void)
 491{
 492        if (!firmware_has_feature(FW_FEATURE_OPAL) ||
 493            !pvr_version_is(PVR_POWER9) ||
 494            early_cpu_has_feature(CPU_FTR_TM))
 495                return;
 496
 497        if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS)
 498                return;
 499
 500        pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n");
 501        cur_cpu_spec->cpu_features |= CPU_FTR_TM;
 502        /* Make sure "normal" HTM is off (it should be) */
 503        cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM;
 504        /* Turn on no suspend mode, and HTM no SC */
 505        cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \
 506                                            PPC_FEATURE2_HTM_NOSC;
 507        tm_suspend_disabled = true;
 508}
 509#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 510
 511/*
 512 * Returns the cpu frequency for 'cpu' in Hz. This is used by
 513 * /proc/cpuinfo
 514 */
 515static unsigned long pnv_get_proc_freq(unsigned int cpu)
 516{
 517        unsigned long ret_freq;
 518
 519        ret_freq = cpufreq_get(cpu) * 1000ul;
 520
 521        /*
 522         * If the backend cpufreq driver does not exist,
 523         * then fallback to old way of reporting the clockrate.
 524         */
 525        if (!ret_freq)
 526                ret_freq = ppc_proc_freq;
 527        return ret_freq;
 528}
 529
 530static long pnv_machine_check_early(struct pt_regs *regs)
 531{
 532        long handled = 0;
 533
 534        if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
 535                handled = cur_cpu_spec->machine_check_early(regs);
 536
 537        return handled;
 538}
 539
 540define_machine(powernv) {
 541        .name                   = "PowerNV",
 542        .probe                  = pnv_probe,
 543        .setup_arch             = pnv_setup_arch,
 544        .init_IRQ               = pnv_init_IRQ,
 545        .show_cpuinfo           = pnv_show_cpuinfo,
 546        .get_proc_freq          = pnv_get_proc_freq,
 547        .discover_phbs          = pnv_pci_init,
 548        .progress               = pnv_progress,
 549        .machine_shutdown       = pnv_shutdown,
 550        .power_save             = NULL,
 551        .calibrate_decr         = generic_calibrate_decr,
 552        .machine_check_early    = pnv_machine_check_early,
 553#ifdef CONFIG_KEXEC_CORE
 554        .kexec_cpu_down         = pnv_kexec_cpu_down,
 555#endif
 556#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 557        .memory_block_size      = pnv_memory_block_size,
 558#endif
 559};
 560