linux/arch/powerpc/platforms/powernv/setup.c
<<
>>
Prefs
   1/*
   2 * PowerNV setup code.
   3 *
   4 * Copyright 2011 IBM Corp.
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11
  12#undef DEBUG
  13
  14#include <linux/cpu.h>
  15#include <linux/errno.h>
  16#include <linux/sched.h>
  17#include <linux/kernel.h>
  18#include <linux/tty.h>
  19#include <linux/reboot.h>
  20#include <linux/init.h>
  21#include <linux/console.h>
  22#include <linux/delay.h>
  23#include <linux/irq.h>
  24#include <linux/seq_file.h>
  25#include <linux/of.h>
  26#include <linux/of_fdt.h>
  27#include <linux/interrupt.h>
  28#include <linux/bug.h>
  29#include <linux/pci.h>
  30#include <linux/cpufreq.h>
  31
  32#include <asm/machdep.h>
  33#include <asm/firmware.h>
  34#include <asm/xics.h>
  35#include <asm/xive.h>
  36#include <asm/opal.h>
  37#include <asm/kexec.h>
  38#include <asm/smp.h>
  39#include <asm/tm.h>
  40#include <asm/setup.h>
  41
  42#include "powernv.h"
  43
  44static void pnv_setup_rfi_flush(void)
  45{
  46        struct device_node *np, *fw_features;
  47        enum l1d_flush_type type;
  48        int enable;
  49
  50        /* Default to fallback in case fw-features are not available */
  51        type = L1D_FLUSH_FALLBACK;
  52        enable = 1;
  53
  54        np = of_find_node_by_name(NULL, "ibm,opal");
  55        fw_features = of_get_child_by_name(np, "fw-features");
  56        of_node_put(np);
  57
  58        if (fw_features) {
  59                np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
  60                if (np && of_property_read_bool(np, "enabled"))
  61                        type = L1D_FLUSH_MTTRIG;
  62
  63                of_node_put(np);
  64
  65                np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
  66                if (np && of_property_read_bool(np, "enabled"))
  67                        type = L1D_FLUSH_ORI;
  68
  69                of_node_put(np);
  70
  71                /* Enable unless firmware says NOT to */
  72                enable = 2;
  73                np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
  74                if (np && of_property_read_bool(np, "disabled"))
  75                        enable--;
  76
  77                of_node_put(np);
  78
  79                np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
  80                if (np && of_property_read_bool(np, "disabled"))
  81                        enable--;
  82
  83                np = of_get_child_by_name(fw_features, "speculation-policy-favor-security");
  84                if (np && of_property_read_bool(np, "disabled"))
  85                        enable = 0;
  86
  87                of_node_put(np);
  88                of_node_put(fw_features);
  89        }
  90
  91        setup_rfi_flush(type, enable > 0);
  92}
  93
  94static void __init pnv_setup_arch(void)
  95{
  96        set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
  97
  98        pnv_setup_rfi_flush();
  99
 100        /* Initialize SMP */
 101        pnv_smp_init();
 102
 103        /* Setup PCI */
 104        pnv_pci_init();
 105
 106        /* Setup RTC and NVRAM callbacks */
 107        if (firmware_has_feature(FW_FEATURE_OPAL))
 108                opal_nvram_init();
 109
 110        /* Enable NAP mode */
 111        powersave_nap = 1;
 112
 113        /* XXX PMCS */
 114}
 115
 116static void __init pnv_init(void)
 117{
 118        /*
 119         * Initialize the LPC bus now so that legacy serial
 120         * ports can be found on it
 121         */
 122        opal_lpc_init();
 123
 124#ifdef CONFIG_HVC_OPAL
 125        if (firmware_has_feature(FW_FEATURE_OPAL))
 126                hvc_opal_init_early();
 127        else
 128#endif
 129                add_preferred_console("hvc", 0, NULL);
 130}
 131
 132static void __init pnv_init_IRQ(void)
 133{
 134        /* Try using a XIVE if available, otherwise use a XICS */
 135        if (!xive_native_init())
 136                xics_init();
 137
 138        WARN_ON(!ppc_md.get_irq);
 139}
 140
 141static void pnv_show_cpuinfo(struct seq_file *m)
 142{
 143        struct device_node *root;
 144        const char *model = "";
 145
 146        root = of_find_node_by_path("/");
 147        if (root)
 148                model = of_get_property(root, "model", NULL);
 149        seq_printf(m, "machine\t\t: PowerNV %s\n", model);
 150        if (firmware_has_feature(FW_FEATURE_OPAL))
 151                seq_printf(m, "firmware\t: OPAL\n");
 152        else
 153                seq_printf(m, "firmware\t: BML\n");
 154        of_node_put(root);
 155        if (radix_enabled())
 156                seq_printf(m, "MMU\t\t: Radix\n");
 157        else
 158                seq_printf(m, "MMU\t\t: Hash\n");
 159}
 160
 161static void pnv_prepare_going_down(void)
 162{
 163        /*
 164         * Disable all notifiers from OPAL, we can't
 165         * service interrupts anymore anyway
 166         */
 167        opal_event_shutdown();
 168
 169        /* Soft disable interrupts */
 170        local_irq_disable();
 171
 172        /*
 173         * Return secondary CPUs to firwmare if a flash update
 174         * is pending otherwise we will get all sort of error
 175         * messages about CPU being stuck etc.. This will also
 176         * have the side effect of hard disabling interrupts so
 177         * past this point, the kernel is effectively dead.
 178         */
 179        opal_flash_term_callback();
 180}
 181
 182static void  __noreturn pnv_restart(char *cmd)
 183{
 184        long rc = OPAL_BUSY;
 185
 186        pnv_prepare_going_down();
 187
 188        while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 189                rc = opal_cec_reboot();
 190                if (rc == OPAL_BUSY_EVENT)
 191                        opal_poll_events(NULL);
 192                else
 193                        mdelay(10);
 194        }
 195        for (;;)
 196                opal_poll_events(NULL);
 197}
 198
 199static void __noreturn pnv_power_off(void)
 200{
 201        long rc = OPAL_BUSY;
 202
 203        pnv_prepare_going_down();
 204
 205        while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 206                rc = opal_cec_power_down(0);
 207                if (rc == OPAL_BUSY_EVENT)
 208                        opal_poll_events(NULL);
 209                else
 210                        mdelay(10);
 211        }
 212        for (;;)
 213                opal_poll_events(NULL);
 214}
 215
 216static void __noreturn pnv_halt(void)
 217{
 218        pnv_power_off();
 219}
 220
 221static void pnv_progress(char *s, unsigned short hex)
 222{
 223}
 224
 225static void pnv_shutdown(void)
 226{
 227        /* Let the PCI code clear up IODA tables */
 228        pnv_pci_shutdown();
 229
 230        /*
 231         * Stop OPAL activity: Unregister all OPAL interrupts so they
 232         * don't fire up while we kexec and make sure all potentially
 233         * DMA'ing ops are complete (such as dump retrieval).
 234         */
 235        opal_shutdown();
 236}
 237
 238#ifdef CONFIG_KEXEC_CORE
 239static void pnv_kexec_wait_secondaries_down(void)
 240{
 241        int my_cpu, i, notified = -1;
 242
 243        my_cpu = get_cpu();
 244
 245        for_each_online_cpu(i) {
 246                uint8_t status;
 247                int64_t rc, timeout = 1000;
 248
 249                if (i == my_cpu)
 250                        continue;
 251
 252                for (;;) {
 253                        rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
 254                                                   &status);
 255                        if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
 256                                break;
 257                        barrier();
 258                        if (i != notified) {
 259                                printk(KERN_INFO "kexec: waiting for cpu %d "
 260                                       "(physical %d) to enter OPAL\n",
 261                                       i, paca[i].hw_cpu_id);
 262                                notified = i;
 263                        }
 264
 265                        /*
 266                         * On crash secondaries might be unreachable or hung,
 267                         * so timeout if we've waited too long
 268                         * */
 269                        mdelay(1);
 270                        if (timeout-- == 0) {
 271                                printk(KERN_ERR "kexec: timed out waiting for "
 272                                       "cpu %d (physical %d) to enter OPAL\n",
 273                                       i, paca[i].hw_cpu_id);
 274                                break;
 275                        }
 276                }
 277        }
 278}
 279
 280static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 281{
 282        u64 reinit_flags;
 283
 284        if (xive_enabled())
 285                xive_kexec_teardown_cpu(secondary);
 286        else
 287                xics_kexec_teardown_cpu(secondary);
 288
 289        /* On OPAL, we return all CPUs to firmware */
 290        if (!firmware_has_feature(FW_FEATURE_OPAL))
 291                return;
 292
 293        if (secondary) {
 294                /* Return secondary CPUs to firmware on OPAL v3 */
 295                mb();
 296                get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
 297                mb();
 298
 299                /* Return the CPU to OPAL */
 300                opal_return_cpu();
 301        } else {
 302                /* Primary waits for the secondaries to have reached OPAL */
 303                pnv_kexec_wait_secondaries_down();
 304
 305                /* Switch XIVE back to emulation mode */
 306                if (xive_enabled())
 307                        xive_shutdown();
 308
 309                /*
 310                 * We might be running as little-endian - now that interrupts
 311                 * are disabled, reset the HILE bit to big-endian so we don't
 312                 * take interrupts in the wrong endian later
 313                 *
 314                 * We reinit to enable both radix and hash on P9 to ensure
 315                 * the mode used by the next kernel is always supported.
 316                 */
 317                reinit_flags = OPAL_REINIT_CPUS_HILE_BE;
 318                if (cpu_has_feature(CPU_FTR_ARCH_300))
 319                        reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX |
 320                                OPAL_REINIT_CPUS_MMU_HASH;
 321                opal_reinit_cpus(reinit_flags);
 322        }
 323}
 324#endif /* CONFIG_KEXEC_CORE */
 325
 326#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 327static unsigned long pnv_memory_block_size(void)
 328{
 329        /*
 330         * We map the kernel linear region with 1GB large pages on radix. For
 331         * memory hot unplug to work our memory block size must be at least
 332         * this size.
 333         */
 334        if (radix_enabled())
 335                return 1UL * 1024 * 1024 * 1024;
 336        else
 337                return 256UL * 1024 * 1024;
 338}
 339#endif
 340
 341static void __init pnv_setup_machdep_opal(void)
 342{
 343        ppc_md.get_boot_time = opal_get_boot_time;
 344        ppc_md.restart = pnv_restart;
 345        pm_power_off = pnv_power_off;
 346        ppc_md.halt = pnv_halt;
 347        /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
 348        ppc_md.machine_check_exception = opal_machine_check;
 349        ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
 350        ppc_md.hmi_exception_early = opal_hmi_exception_early;
 351        ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
 352}
 353
 354static int __init pnv_probe(void)
 355{
 356        if (!of_machine_is_compatible("ibm,powernv"))
 357                return 0;
 358
 359        if (firmware_has_feature(FW_FEATURE_OPAL))
 360                pnv_setup_machdep_opal();
 361
 362        pr_debug("PowerNV detected !\n");
 363
 364        pnv_init();
 365
 366        return 1;
 367}
 368
 369#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 370void __init pnv_tm_init(void)
 371{
 372        if (!firmware_has_feature(FW_FEATURE_OPAL) ||
 373            !pvr_version_is(PVR_POWER9) ||
 374            early_cpu_has_feature(CPU_FTR_TM))
 375                return;
 376
 377        if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS)
 378                return;
 379
 380        pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n");
 381        cur_cpu_spec->cpu_features |= CPU_FTR_TM;
 382        /* Make sure "normal" HTM is off (it should be) */
 383        cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM;
 384        /* Turn on no suspend mode, and HTM no SC */
 385        cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \
 386                                            PPC_FEATURE2_HTM_NOSC;
 387        tm_suspend_disabled = true;
 388}
 389#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 390
 391/*
 392 * Returns the cpu frequency for 'cpu' in Hz. This is used by
 393 * /proc/cpuinfo
 394 */
 395static unsigned long pnv_get_proc_freq(unsigned int cpu)
 396{
 397        unsigned long ret_freq;
 398
 399        ret_freq = cpufreq_get(cpu) * 1000ul;
 400
 401        /*
 402         * If the backend cpufreq driver does not exist,
 403         * then fallback to old way of reporting the clockrate.
 404         */
 405        if (!ret_freq)
 406                ret_freq = ppc_proc_freq;
 407        return ret_freq;
 408}
 409
 410define_machine(powernv) {
 411        .name                   = "PowerNV",
 412        .probe                  = pnv_probe,
 413        .setup_arch             = pnv_setup_arch,
 414        .init_IRQ               = pnv_init_IRQ,
 415        .show_cpuinfo           = pnv_show_cpuinfo,
 416        .get_proc_freq          = pnv_get_proc_freq,
 417        .progress               = pnv_progress,
 418        .machine_shutdown       = pnv_shutdown,
 419        .power_save             = NULL,
 420        .calibrate_decr         = generic_calibrate_decr,
 421#ifdef CONFIG_KEXEC_CORE
 422        .kexec_cpu_down         = pnv_kexec_cpu_down,
 423#endif
 424#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 425        .memory_block_size      = pnv_memory_block_size,
 426#endif
 427};
 428