linux/arch/powerpc/platforms/powernv/setup.c
<<
>>
Prefs
   1/*
   2 * PowerNV setup code.
   3 *
   4 * Copyright 2011 IBM Corp.
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11
  12#undef DEBUG
  13
  14#include <linux/cpu.h>
  15#include <linux/errno.h>
  16#include <linux/sched.h>
  17#include <linux/kernel.h>
  18#include <linux/tty.h>
  19#include <linux/reboot.h>
  20#include <linux/init.h>
  21#include <linux/console.h>
  22#include <linux/delay.h>
  23#include <linux/irq.h>
  24#include <linux/seq_file.h>
  25#include <linux/of.h>
  26#include <linux/of_fdt.h>
  27#include <linux/interrupt.h>
  28#include <linux/bug.h>
  29#include <linux/pci.h>
  30#include <linux/cpufreq.h>
  31
  32#include <asm/machdep.h>
  33#include <asm/firmware.h>
  34#include <asm/xics.h>
  35#include <asm/xive.h>
  36#include <asm/opal.h>
  37#include <asm/kexec.h>
  38#include <asm/smp.h>
  39#include <asm/tm.h>
  40#include <asm/setup.h>
  41#include <asm/security_features.h>
  42
  43#include "powernv.h"
  44
  45
  46static bool fw_feature_is(const char *state, const char *name,
  47                          struct device_node *fw_features)
  48{
  49        struct device_node *np;
  50        bool rc = false;
  51
  52        np = of_get_child_by_name(fw_features, name);
  53        if (np) {
  54                rc = of_property_read_bool(np, state);
  55                of_node_put(np);
  56        }
  57
  58        return rc;
  59}
  60
  61static void init_fw_feat_flags(struct device_node *np)
  62{
  63        if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
  64                security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
  65
  66        if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
  67                security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
  68
  69        if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np))
  70                security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
  71
  72        if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
  73                security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
  74
  75        if (fw_feature_is("enabled", "fw-l1d-thread-split", np))
  76                security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
  77
  78        if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
  79                security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
  80
  81        if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np))
  82                security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
  83
  84        if (fw_feature_is("enabled", "needs-count-cache-flush-on-context-switch", np))
  85                security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
  86
  87        /*
  88         * The features below are enabled by default, so we instead look to see
  89         * if firmware has *disabled* them, and clear them if so.
  90         */
  91        if (fw_feature_is("disabled", "speculation-policy-favor-security", np))
  92                security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
  93
  94        if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np))
  95                security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
  96
  97        if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np))
  98                security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
  99
 100        if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np))
 101                security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
 102}
 103
 104static void pnv_setup_rfi_flush(void)
 105{
 106        struct device_node *np, *fw_features;
 107        enum l1d_flush_type type;
 108        bool enable;
 109
 110        /* Default to fallback in case fw-features are not available */
 111        type = L1D_FLUSH_FALLBACK;
 112
 113        np = of_find_node_by_name(NULL, "ibm,opal");
 114        fw_features = of_get_child_by_name(np, "fw-features");
 115        of_node_put(np);
 116
 117        if (fw_features) {
 118                init_fw_feat_flags(fw_features);
 119                of_node_put(fw_features);
 120
 121                if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
 122                        type = L1D_FLUSH_MTTRIG;
 123
 124                if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
 125                        type = L1D_FLUSH_ORI;
 126        }
 127
 128        /*
 129         * The issues addressed by the entry and uaccess flush don't affect P7
 130         * or P8, so on bare metal disable them explicitly in case firmware does
 131         * not include the features to disable them. POWER9 and newer processors
 132         * should have the appropriate firmware flags.
 133         */
 134        if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p) ||
 135            pvr_version_is(PVR_POWER8E) || pvr_version_is(PVR_POWER8NVL) ||
 136            pvr_version_is(PVR_POWER8)) {
 137                security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
 138                security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
 139        }
 140
 141        enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
 142                 (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)   || \
 143                  security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
 144
 145        setup_rfi_flush(type, enable);
 146        setup_count_cache_flush();
 147
 148        enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
 149                 security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
 150        setup_entry_flush(enable);
 151
 152        enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
 153                 security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
 154        setup_uaccess_flush(enable);
 155}
 156
 157static void __init pnv_setup_arch(void)
 158{
 159        set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 160
 161        pnv_setup_rfi_flush();
 162        setup_stf_barrier();
 163
 164        /* Initialize SMP */
 165        pnv_smp_init();
 166
 167        /* Setup RTC and NVRAM callbacks */
 168        if (firmware_has_feature(FW_FEATURE_OPAL))
 169                opal_nvram_init();
 170
 171        /* Enable NAP mode */
 172        powersave_nap = 1;
 173
 174        /* XXX PMCS */
 175}
 176
 177static void __init pnv_init(void)
 178{
 179        /*
 180         * Initialize the LPC bus now so that legacy serial
 181         * ports can be found on it
 182         */
 183        opal_lpc_init();
 184
 185#ifdef CONFIG_HVC_OPAL
 186        if (firmware_has_feature(FW_FEATURE_OPAL))
 187                hvc_opal_init_early();
 188        else
 189#endif
 190                add_preferred_console("hvc", 0, NULL);
 191}
 192
 193static void __init pnv_init_IRQ(void)
 194{
 195        /* Try using a XIVE if available, otherwise use a XICS */
 196        if (!xive_native_init())
 197                xics_init();
 198
 199        WARN_ON(!ppc_md.get_irq);
 200}
 201
 202static void pnv_show_cpuinfo(struct seq_file *m)
 203{
 204        struct device_node *root;
 205        const char *model = "";
 206
 207        root = of_find_node_by_path("/");
 208        if (root)
 209                model = of_get_property(root, "model", NULL);
 210        seq_printf(m, "machine\t\t: PowerNV %s\n", model);
 211        if (firmware_has_feature(FW_FEATURE_OPAL))
 212                seq_printf(m, "firmware\t: OPAL\n");
 213        else
 214                seq_printf(m, "firmware\t: BML\n");
 215        of_node_put(root);
 216        if (radix_enabled())
 217                seq_printf(m, "MMU\t\t: Radix\n");
 218        else
 219                seq_printf(m, "MMU\t\t: Hash\n");
 220}
 221
 222static void pnv_prepare_going_down(void)
 223{
 224        /*
 225         * Disable all notifiers from OPAL, we can't
 226         * service interrupts anymore anyway
 227         */
 228        opal_event_shutdown();
 229
 230        /* Print flash update message if one is scheduled. */
 231        opal_flash_update_print_message();
 232
 233        smp_send_stop();
 234
 235        hard_irq_disable();
 236}
 237
 238static void  __noreturn pnv_restart(char *cmd)
 239{
 240        long rc;
 241
 242        pnv_prepare_going_down();
 243
 244        do {
 245                if (!cmd)
 246                        rc = opal_cec_reboot();
 247                else if (strcmp(cmd, "full") == 0)
 248                        rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL);
 249                else
 250                        rc = OPAL_UNSUPPORTED;
 251
 252                if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 253                        /* Opal is busy wait for some time and retry */
 254                        opal_poll_events(NULL);
 255                        mdelay(10);
 256
 257                } else  if (cmd && rc) {
 258                        /* Unknown error while issuing reboot */
 259                        if (rc == OPAL_UNSUPPORTED)
 260                                pr_err("Unsupported '%s' reboot.\n", cmd);
 261                        else
 262                                pr_err("Unable to issue '%s' reboot. Err=%ld\n",
 263                                       cmd, rc);
 264                        pr_info("Forcing a cec-reboot\n");
 265                        cmd = NULL;
 266                        rc = OPAL_BUSY;
 267
 268                } else if (rc != OPAL_SUCCESS) {
 269                        /* Unknown error while issuing cec-reboot */
 270                        pr_err("Unable to reboot. Err=%ld\n", rc);
 271                }
 272
 273        } while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT);
 274
 275        for (;;)
 276                opal_poll_events(NULL);
 277}
 278
 279static void __noreturn pnv_power_off(void)
 280{
 281        long rc = OPAL_BUSY;
 282
 283        pnv_prepare_going_down();
 284
 285        while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 286                rc = opal_cec_power_down(0);
 287                if (rc == OPAL_BUSY_EVENT)
 288                        opal_poll_events(NULL);
 289                else
 290                        mdelay(10);
 291        }
 292        for (;;)
 293                opal_poll_events(NULL);
 294}
 295
 296static void __noreturn pnv_halt(void)
 297{
 298        pnv_power_off();
 299}
 300
 301static void pnv_progress(char *s, unsigned short hex)
 302{
 303}
 304
 305static void pnv_shutdown(void)
 306{
 307        /* Let the PCI code clear up IODA tables */
 308        pnv_pci_shutdown();
 309
 310        /*
 311         * Stop OPAL activity: Unregister all OPAL interrupts so they
 312         * don't fire up while we kexec and make sure all potentially
 313         * DMA'ing ops are complete (such as dump retrieval).
 314         */
 315        opal_shutdown();
 316}
 317
 318#ifdef CONFIG_KEXEC_CORE
 319static void pnv_kexec_wait_secondaries_down(void)
 320{
 321        int my_cpu, i, notified = -1;
 322
 323        my_cpu = get_cpu();
 324
 325        for_each_online_cpu(i) {
 326                uint8_t status;
 327                int64_t rc, timeout = 1000;
 328
 329                if (i == my_cpu)
 330                        continue;
 331
 332                for (;;) {
 333                        rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
 334                                                   &status);
 335                        if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
 336                                break;
 337                        barrier();
 338                        if (i != notified) {
 339                                printk(KERN_INFO "kexec: waiting for cpu %d "
 340                                       "(physical %d) to enter OPAL\n",
 341                                       i, paca_ptrs[i]->hw_cpu_id);
 342                                notified = i;
 343                        }
 344
 345                        /*
 346                         * On crash secondaries might be unreachable or hung,
 347                         * so timeout if we've waited too long
 348                         * */
 349                        mdelay(1);
 350                        if (timeout-- == 0) {
 351                                printk(KERN_ERR "kexec: timed out waiting for "
 352                                       "cpu %d (physical %d) to enter OPAL\n",
 353                                       i, paca_ptrs[i]->hw_cpu_id);
 354                                break;
 355                        }
 356                }
 357        }
 358}
 359
 360static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 361{
 362        u64 reinit_flags;
 363
 364        if (xive_enabled())
 365                xive_kexec_teardown_cpu(secondary);
 366        else
 367                xics_kexec_teardown_cpu(secondary);
 368
 369        /* On OPAL, we return all CPUs to firmware */
 370        if (!firmware_has_feature(FW_FEATURE_OPAL))
 371                return;
 372
 373        if (secondary) {
 374                /* Return secondary CPUs to firmware on OPAL v3 */
 375                mb();
 376                get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
 377                mb();
 378
 379                /* Return the CPU to OPAL */
 380                opal_return_cpu();
 381        } else {
 382                /* Primary waits for the secondaries to have reached OPAL */
 383                pnv_kexec_wait_secondaries_down();
 384
 385                /* Switch XIVE back to emulation mode */
 386                if (xive_enabled())
 387                        xive_shutdown();
 388
 389                /*
 390                 * We might be running as little-endian - now that interrupts
 391                 * are disabled, reset the HILE bit to big-endian so we don't
 392                 * take interrupts in the wrong endian later
 393                 *
 394                 * We reinit to enable both radix and hash on P9 to ensure
 395                 * the mode used by the next kernel is always supported.
 396                 */
 397                reinit_flags = OPAL_REINIT_CPUS_HILE_BE;
 398                if (cpu_has_feature(CPU_FTR_ARCH_300))
 399                        reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX |
 400                                OPAL_REINIT_CPUS_MMU_HASH;
 401                opal_reinit_cpus(reinit_flags);
 402        }
 403}
 404#endif /* CONFIG_KEXEC_CORE */
 405
 406#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 407static unsigned long pnv_memory_block_size(void)
 408{
 409        /*
 410         * We map the kernel linear region with 1GB large pages on radix. For
 411         * memory hot unplug to work our memory block size must be at least
 412         * this size.
 413         */
 414        if (radix_enabled())
 415                return radix_mem_block_size;
 416        else
 417                return 256UL * 1024 * 1024;
 418}
 419#endif
 420
 421static void __init pnv_setup_machdep_opal(void)
 422{
 423        ppc_md.get_boot_time = opal_get_boot_time;
 424        ppc_md.restart = pnv_restart;
 425        pm_power_off = pnv_power_off;
 426        ppc_md.halt = pnv_halt;
 427        /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
 428        ppc_md.machine_check_exception = opal_machine_check;
 429        ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
 430        if (opal_check_token(OPAL_HANDLE_HMI2))
 431                ppc_md.hmi_exception_early = opal_hmi_exception_early2;
 432        else
 433                ppc_md.hmi_exception_early = opal_hmi_exception_early;
 434        ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
 435}
 436
 437static int __init pnv_probe(void)
 438{
 439        if (!of_machine_is_compatible("ibm,powernv"))
 440                return 0;
 441
 442        if (firmware_has_feature(FW_FEATURE_OPAL))
 443                pnv_setup_machdep_opal();
 444
 445        pr_debug("PowerNV detected !\n");
 446
 447        pnv_init();
 448
 449        return 1;
 450}
 451
 452#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 453void __init pnv_tm_init(void)
 454{
 455        if (!firmware_has_feature(FW_FEATURE_OPAL) ||
 456            !pvr_version_is(PVR_POWER9) ||
 457            early_cpu_has_feature(CPU_FTR_TM))
 458                return;
 459
 460        if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS)
 461                return;
 462
 463        pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n");
 464        cur_cpu_spec->cpu_features |= CPU_FTR_TM;
 465        /* Make sure "normal" HTM is off (it should be) */
 466        cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM;
 467        /* Turn on no suspend mode, and HTM no SC */
 468        cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \
 469                                            PPC_FEATURE2_HTM_NOSC;
 470        tm_suspend_disabled = true;
 471}
 472#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 473
 474/*
 475 * Returns the cpu frequency for 'cpu' in Hz. This is used by
 476 * /proc/cpuinfo
 477 */
 478static unsigned long pnv_get_proc_freq(unsigned int cpu)
 479{
 480        unsigned long ret_freq;
 481
 482        ret_freq = cpufreq_get(cpu) * 1000ul;
 483
 484        /*
 485         * If the backend cpufreq driver does not exist,
 486         * then fallback to old way of reporting the clockrate.
 487         */
 488        if (!ret_freq)
 489                ret_freq = ppc_proc_freq;
 490        return ret_freq;
 491}
 492
 493static long pnv_machine_check_early(struct pt_regs *regs)
 494{
 495        long handled = 0;
 496
 497        if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
 498                handled = cur_cpu_spec->machine_check_early(regs);
 499
 500        return handled;
 501}
 502
 503define_machine(powernv) {
 504        .name                   = "PowerNV",
 505        .probe                  = pnv_probe,
 506        .setup_arch             = pnv_setup_arch,
 507        .init_IRQ               = pnv_init_IRQ,
 508        .show_cpuinfo           = pnv_show_cpuinfo,
 509        .get_proc_freq          = pnv_get_proc_freq,
 510        .discover_phbs          = pnv_pci_init,
 511        .progress               = pnv_progress,
 512        .machine_shutdown       = pnv_shutdown,
 513        .power_save             = NULL,
 514        .calibrate_decr         = generic_calibrate_decr,
 515        .machine_check_early    = pnv_machine_check_early,
 516#ifdef CONFIG_KEXEC_CORE
 517        .kexec_cpu_down         = pnv_kexec_cpu_down,
 518#endif
 519#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 520        .memory_block_size      = pnv_memory_block_size,
 521#endif
 522};
 523