linux/arch/powerpc/platforms/powernv/opal.c
<<
>>
Prefs
   1/*
   2 * PowerNV OPAL high level interfaces
   3 *
   4 * Copyright 2011 IBM Corp.
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11
  12#define pr_fmt(fmt)     "opal: " fmt
  13
  14#include <linux/printk.h>
  15#include <linux/types.h>
  16#include <linux/of.h>
  17#include <linux/of_fdt.h>
  18#include <linux/of_platform.h>
  19#include <linux/of_address.h>
  20#include <linux/interrupt.h>
  21#include <linux/notifier.h>
  22#include <linux/slab.h>
  23#include <linux/sched.h>
  24#include <linux/kobject.h>
  25#include <linux/delay.h>
  26#include <linux/memblock.h>
  27#include <linux/kthread.h>
  28#include <linux/freezer.h>
  29#include <linux/printk.h>
  30#include <linux/kmsg_dump.h>
  31#include <linux/console.h>
  32#include <linux/sched/debug.h>
  33
  34#include <asm/machdep.h>
  35#include <asm/opal.h>
  36#include <asm/firmware.h>
  37#include <asm/mce.h>
  38#include <asm/imc-pmu.h>
  39#include <asm/bug.h>
  40
  41#include "powernv.h"
  42
  43/* /sys/firmware/opal */
  44struct kobject *opal_kobj;
  45
  46struct opal {
  47        u64 base;
  48        u64 entry;
  49        u64 size;
  50} opal;
  51
  52struct mcheck_recoverable_range {
  53        u64 start_addr;
  54        u64 end_addr;
  55        u64 recover_addr;
  56};
  57
  58static struct mcheck_recoverable_range *mc_recoverable_range;
  59static int mc_recoverable_range_len;
  60
  61struct device_node *opal_node;
  62static DEFINE_SPINLOCK(opal_write_lock);
  63static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
  64static uint32_t opal_heartbeat;
  65static struct task_struct *kopald_tsk;
  66
  67void opal_configure_cores(void)
  68{
  69        u64 reinit_flags = 0;
  70
  71        /* Do the actual re-init, This will clobber all FPRs, VRs, etc...
  72         *
  73         * It will preserve non volatile GPRs and HSPRG0/1. It will
  74         * also restore HIDs and other SPRs to their original value
  75         * but it might clobber a bunch.
  76         */
  77#ifdef __BIG_ENDIAN__
  78        reinit_flags |= OPAL_REINIT_CPUS_HILE_BE;
  79#else
  80        reinit_flags |= OPAL_REINIT_CPUS_HILE_LE;
  81#endif
  82
  83        /*
  84         * POWER9 always support running hash:
  85         *  ie. Host hash  supports  hash guests
  86         *      Host radix supports  hash/radix guests
  87         */
  88        if (early_cpu_has_feature(CPU_FTR_ARCH_300)) {
  89                reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
  90                if (early_radix_enabled())
  91                        reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;
  92        }
  93
  94        opal_reinit_cpus(reinit_flags);
  95
  96        /* Restore some bits */
  97        if (cur_cpu_spec->cpu_restore)
  98                cur_cpu_spec->cpu_restore();
  99}
 100
 101int __init early_init_dt_scan_opal(unsigned long node,
 102                                   const char *uname, int depth, void *data)
 103{
 104        const void *basep, *entryp, *sizep;
 105        int basesz, entrysz, runtimesz;
 106
 107        if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
 108                return 0;
 109
 110        basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
 111        entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
 112        sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
 113
 114        if (!basep || !entryp || !sizep)
 115                return 1;
 116
 117        opal.base = of_read_number(basep, basesz/4);
 118        opal.entry = of_read_number(entryp, entrysz/4);
 119        opal.size = of_read_number(sizep, runtimesz/4);
 120
 121        pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
 122                 opal.base, basep, basesz);
 123        pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
 124                 opal.entry, entryp, entrysz);
 125        pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
 126                 opal.size, sizep, runtimesz);
 127
 128        if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
 129                powerpc_firmware_features |= FW_FEATURE_OPAL;
 130                pr_info("OPAL detected !\n");
 131        } else {
 132                panic("OPAL != V3 detected, no longer supported.\n");
 133        }
 134
 135        return 1;
 136}
 137
 138int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
 139                                   const char *uname, int depth, void *data)
 140{
 141        int i, psize, size;
 142        const __be32 *prop;
 143
 144        if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
 145                return 0;
 146
 147        prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
 148
 149        if (!prop)
 150                return 1;
 151
 152        pr_debug("Found machine check recoverable ranges.\n");
 153
 154        /*
 155         * Calculate number of available entries.
 156         *
 157         * Each recoverable address range entry is (start address, len,
 158         * recovery address), 2 cells each for start and recovery address,
 159         * 1 cell for len, totalling 5 cells per entry.
 160         */
 161        mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
 162
 163        /* Sanity check */
 164        if (!mc_recoverable_range_len)
 165                return 1;
 166
 167        /* Size required to hold all the entries. */
 168        size = mc_recoverable_range_len *
 169                        sizeof(struct mcheck_recoverable_range);
 170
 171        /*
 172         * Allocate a buffer to hold the MC recoverable ranges.
 173         */
 174        mc_recoverable_range =__va(memblock_alloc(size, __alignof__(u64)));
 175        memset(mc_recoverable_range, 0, size);
 176
 177        for (i = 0; i < mc_recoverable_range_len; i++) {
 178                mc_recoverable_range[i].start_addr =
 179                                        of_read_number(prop + (i * 5) + 0, 2);
 180                mc_recoverable_range[i].end_addr =
 181                                        mc_recoverable_range[i].start_addr +
 182                                        of_read_number(prop + (i * 5) + 2, 1);
 183                mc_recoverable_range[i].recover_addr =
 184                                        of_read_number(prop + (i * 5) + 3, 2);
 185
 186                pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
 187                                mc_recoverable_range[i].start_addr,
 188                                mc_recoverable_range[i].end_addr,
 189                                mc_recoverable_range[i].recover_addr);
 190        }
 191        return 1;
 192}
 193
 194static int __init opal_register_exception_handlers(void)
 195{
 196#ifdef __BIG_ENDIAN__
 197        u64 glue;
 198
 199        if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
 200                return -ENODEV;
 201
 202        /* Hookup some exception handlers except machine check. We use the
 203         * fwnmi area at 0x7000 to provide the glue space to OPAL
 204         */
 205        glue = 0x7000;
 206
 207        /*
 208         * Check if we are running on newer firmware that exports
 209         * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
 210         * the HMI interrupt and we catch it directly in Linux.
 211         *
 212         * For older firmware (i.e currently released POWER8 System Firmware
 213         * as of today <= SV810_087), we fallback to old behavior and let OPAL
 214         * patch the HMI vector and handle it inside OPAL firmware.
 215         *
 216         * For newer firmware (in development/yet to be released) we will
 217         * start catching/handling HMI directly in Linux.
 218         */
 219        if (!opal_check_token(OPAL_HANDLE_HMI)) {
 220                pr_info("Old firmware detected, OPAL handles HMIs.\n");
 221                opal_register_exception_handler(
 222                                OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
 223                                0, glue);
 224                glue += 128;
 225        }
 226
 227        opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
 228#endif
 229
 230        return 0;
 231}
 232machine_early_initcall(powernv, opal_register_exception_handlers);
 233
 234/*
 235 * Opal message notifier based on message type. Allow subscribers to get
 236 * notified for specific messgae type.
 237 */
 238int opal_message_notifier_register(enum opal_msg_type msg_type,
 239                                        struct notifier_block *nb)
 240{
 241        if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
 242                pr_warning("%s: Invalid arguments, msg_type:%d\n",
 243                           __func__, msg_type);
 244                return -EINVAL;
 245        }
 246
 247        return atomic_notifier_chain_register(
 248                                &opal_msg_notifier_head[msg_type], nb);
 249}
 250EXPORT_SYMBOL_GPL(opal_message_notifier_register);
 251
 252int opal_message_notifier_unregister(enum opal_msg_type msg_type,
 253                                     struct notifier_block *nb)
 254{
 255        return atomic_notifier_chain_unregister(
 256                        &opal_msg_notifier_head[msg_type], nb);
 257}
 258EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
 259
 260static void opal_message_do_notify(uint32_t msg_type, void *msg)
 261{
 262        /* notify subscribers */
 263        atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
 264                                        msg_type, msg);
 265}
 266
 267static void opal_handle_message(void)
 268{
 269        s64 ret;
 270        /*
 271         * TODO: pre-allocate a message buffer depending on opal-msg-size
 272         * value in /proc/device-tree.
 273         */
 274        static struct opal_msg msg;
 275        u32 type;
 276
 277        ret = opal_get_msg(__pa(&msg), sizeof(msg));
 278        /* No opal message pending. */
 279        if (ret == OPAL_RESOURCE)
 280                return;
 281
 282        /* check for errors. */
 283        if (ret) {
 284                pr_warning("%s: Failed to retrieve opal message, err=%lld\n",
 285                                __func__, ret);
 286                return;
 287        }
 288
 289        type = be32_to_cpu(msg.msg_type);
 290
 291        /* Sanity check */
 292        if (type >= OPAL_MSG_TYPE_MAX) {
 293                pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
 294                return;
 295        }
 296        opal_message_do_notify(type, (void *)&msg);
 297}
 298
 299static irqreturn_t opal_message_notify(int irq, void *data)
 300{
 301        opal_handle_message();
 302        return IRQ_HANDLED;
 303}
 304
 305static int __init opal_message_init(void)
 306{
 307        int ret, i, irq;
 308
 309        for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
 310                ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
 311
 312        irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING));
 313        if (!irq) {
 314                pr_err("%s: Can't register OPAL event irq (%d)\n",
 315                       __func__, irq);
 316                return irq;
 317        }
 318
 319        ret = request_irq(irq, opal_message_notify,
 320                        IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL);
 321        if (ret) {
 322                pr_err("%s: Can't request OPAL event irq (%d)\n",
 323                       __func__, ret);
 324                return ret;
 325        }
 326
 327        return 0;
 328}
 329
 330int opal_get_chars(uint32_t vtermno, char *buf, int count)
 331{
 332        s64 rc;
 333        __be64 evt, len;
 334
 335        if (!opal.entry)
 336                return -ENODEV;
 337        opal_poll_events(&evt);
 338        if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
 339                return 0;
 340        len = cpu_to_be64(count);
 341        rc = opal_console_read(vtermno, &len, buf);
 342        if (rc == OPAL_SUCCESS)
 343                return be64_to_cpu(len);
 344        return 0;
 345}
 346
 347int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 348{
 349        int written = 0;
 350        __be64 olen;
 351        s64 len, rc;
 352        unsigned long flags;
 353        __be64 evt;
 354
 355        if (!opal.entry)
 356                return -ENODEV;
 357
 358        /* We want put_chars to be atomic to avoid mangling of hvsi
 359         * packets. To do that, we first test for room and return
 360         * -EAGAIN if there isn't enough.
 361         *
 362         * Unfortunately, opal_console_write_buffer_space() doesn't
 363         * appear to work on opal v1, so we just assume there is
 364         * enough room and be done with it
 365         */
 366        spin_lock_irqsave(&opal_write_lock, flags);
 367        rc = opal_console_write_buffer_space(vtermno, &olen);
 368        len = be64_to_cpu(olen);
 369        if (rc || len < total_len) {
 370                spin_unlock_irqrestore(&opal_write_lock, flags);
 371                /* Closed -> drop characters */
 372                if (rc)
 373                        return total_len;
 374                opal_poll_events(NULL);
 375                return -EAGAIN;
 376        }
 377
 378        /* We still try to handle partial completions, though they
 379         * should no longer happen.
 380         */
 381        rc = OPAL_BUSY;
 382        while(total_len > 0 && (rc == OPAL_BUSY ||
 383                                rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
 384                olen = cpu_to_be64(total_len);
 385                rc = opal_console_write(vtermno, &olen, data);
 386                len = be64_to_cpu(olen);
 387
 388                /* Closed or other error drop */
 389                if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
 390                    rc != OPAL_BUSY_EVENT) {
 391                        written = total_len;
 392                        break;
 393                }
 394                if (rc == OPAL_SUCCESS) {
 395                        total_len -= len;
 396                        data += len;
 397                        written += len;
 398                }
 399                /* This is a bit nasty but we need that for the console to
 400                 * flush when there aren't any interrupts. We will clean
 401                 * things a bit later to limit that to synchronous path
 402                 * such as the kernel console and xmon/udbg
 403                 */
 404                do
 405                        opal_poll_events(&evt);
 406                while(rc == OPAL_SUCCESS &&
 407                        (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
 408        }
 409        spin_unlock_irqrestore(&opal_write_lock, flags);
 410        return written;
 411}
 412
 413static int opal_recover_mce(struct pt_regs *regs,
 414                                        struct machine_check_event *evt)
 415{
 416        int recovered = 0;
 417
 418        if (!(regs->msr & MSR_RI)) {
 419                /* If MSR_RI isn't set, we cannot recover */
 420                pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 421                recovered = 0;
 422        } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 423                /* Platform corrected itself */
 424                recovered = 1;
 425        } else if (evt->severity == MCE_SEV_FATAL) {
 426                /* Fatal machine check */
 427                pr_err("Machine check interrupt is fatal\n");
 428                recovered = 0;
 429        }
 430
 431        if (!recovered && evt->severity == MCE_SEV_ERROR_SYNC) {
 432                /*
 433                 * Try to kill processes if we get a synchronous machine check
 434                 * (e.g., one caused by execution of this instruction). This
 435                 * will devolve into a panic if we try to kill init or are in
 436                 * an interrupt etc.
 437                 *
 438                 * TODO: Queue up this address for hwpoisioning later.
 439                 * TODO: This is not quite right for d-side machine
 440                 *       checks ->nip is not necessarily the important
 441                 *       address.
 442                 */
 443                if ((user_mode(regs))) {
 444                        _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
 445                        recovered = 1;
 446                } else if (die_will_crash()) {
 447                        /*
 448                         * die() would kill the kernel, so better to go via
 449                         * the platform reboot code that will log the
 450                         * machine check.
 451                         */
 452                        recovered = 0;
 453                } else {
 454                        die("Machine check", regs, SIGBUS);
 455                        recovered = 1;
 456                }
 457        }
 458
 459        return recovered;
 460}
 461
 462void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
 463{
 464        /*
 465         * This is mostly taken from kernel/panic.c, but tries to do
 466         * relatively minimal work. Don't use delay functions (TB may
 467         * be broken), don't crash dump (need to set a firmware log),
 468         * don't run notifiers. We do want to get some information to
 469         * Linux console.
 470         */
 471        console_verbose();
 472        bust_spinlocks(1);
 473        pr_emerg("Hardware platform error: %s\n", msg);
 474        if (regs)
 475                show_regs(regs);
 476        smp_send_stop();
 477        printk_safe_flush_on_panic();
 478        kmsg_dump(KMSG_DUMP_PANIC);
 479        bust_spinlocks(0);
 480        debug_locks_off();
 481        console_flush_on_panic();
 482
 483        /*
 484         * Don't bother to shut things down because this will
 485         * xstop the system.
 486         */
 487        if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg)
 488                                                == OPAL_UNSUPPORTED) {
 489                pr_emerg("Reboot type %d not supported for %s\n",
 490                                OPAL_REBOOT_PLATFORM_ERROR, msg);
 491        }
 492
 493        /*
 494         * We reached here. There can be three possibilities:
 495         * 1. We are running on a firmware level that do not support
 496         *    opal_cec_reboot2()
 497         * 2. We are running on a firmware level that do not support
 498         *    OPAL_REBOOT_PLATFORM_ERROR reboot type.
 499         * 3. We are running on FSP based system that does not need
 500         *    opal to trigger checkstop explicitly for error analysis.
 501         *    The FSP PRD component would have already got notified
 502         *    about this error through other channels.
 503         */
 504
 505        ppc_md.restart(NULL);
 506}
 507
 508int opal_machine_check(struct pt_regs *regs)
 509{
 510        struct machine_check_event evt;
 511
 512        if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 513                return 0;
 514
 515        /* Print things out */
 516        if (evt.version != MCE_V1) {
 517                pr_err("Machine Check Exception, Unknown event version %d !\n",
 518                       evt.version);
 519                return 0;
 520        }
 521        machine_check_print_event_info(&evt, user_mode(regs));
 522
 523        if (opal_recover_mce(regs, &evt))
 524                return 1;
 525
 526        pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception");
 527}
 528
 529/* Early hmi handler called in real mode. */
 530int opal_hmi_exception_early(struct pt_regs *regs)
 531{
 532        s64 rc;
 533
 534        /*
 535         * call opal hmi handler. Pass paca address as token.
 536         * The return value OPAL_SUCCESS is an indication that there is
 537         * an HMI event generated waiting to pull by Linux.
 538         */
 539        rc = opal_handle_hmi();
 540        if (rc == OPAL_SUCCESS) {
 541                local_paca->hmi_event_available = 1;
 542                return 1;
 543        }
 544        return 0;
 545}
 546
 547/* HMI exception handler called in virtual mode during check_irq_replay. */
 548int opal_handle_hmi_exception(struct pt_regs *regs)
 549{
 550        s64 rc;
 551        __be64 evt = 0;
 552
 553        /*
 554         * Check if HMI event is available.
 555         * if Yes, then call opal_poll_events to pull opal messages and
 556         * process them.
 557         */
 558        if (!local_paca->hmi_event_available)
 559                return 0;
 560
 561        local_paca->hmi_event_available = 0;
 562        rc = opal_poll_events(&evt);
 563        if (rc == OPAL_SUCCESS && evt)
 564                opal_handle_events(be64_to_cpu(evt));
 565
 566        return 1;
 567}
 568
 569static uint64_t find_recovery_address(uint64_t nip)
 570{
 571        int i;
 572
 573        for (i = 0; i < mc_recoverable_range_len; i++)
 574                if ((nip >= mc_recoverable_range[i].start_addr) &&
 575                    (nip < mc_recoverable_range[i].end_addr))
 576                    return mc_recoverable_range[i].recover_addr;
 577        return 0;
 578}
 579
 580bool opal_mce_check_early_recovery(struct pt_regs *regs)
 581{
 582        uint64_t recover_addr = 0;
 583
 584        if (!opal.base || !opal.size)
 585                goto out;
 586
 587        if ((regs->nip >= opal.base) &&
 588                        (regs->nip < (opal.base + opal.size)))
 589                recover_addr = find_recovery_address(regs->nip);
 590
 591        /*
 592         * Setup regs->nip to rfi into fixup address.
 593         */
 594        if (recover_addr)
 595                regs->nip = recover_addr;
 596
 597out:
 598        return !!recover_addr;
 599}
 600
 601static int opal_sysfs_init(void)
 602{
 603        opal_kobj = kobject_create_and_add("opal", firmware_kobj);
 604        if (!opal_kobj) {
 605                pr_warn("kobject_create_and_add opal failed\n");
 606                return -ENOMEM;
 607        }
 608
 609        return 0;
 610}
 611
 612static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
 613                               struct bin_attribute *bin_attr,
 614                               char *buf, loff_t off, size_t count)
 615{
 616        return memory_read_from_buffer(buf, count, &off, bin_attr->private,
 617                                       bin_attr->size);
 618}
 619
 620static BIN_ATTR_RO(symbol_map, 0);
 621
 622static void opal_export_symmap(void)
 623{
 624        const __be64 *syms;
 625        unsigned int size;
 626        struct device_node *fw;
 627        int rc;
 628
 629        fw = of_find_node_by_path("/ibm,opal/firmware");
 630        if (!fw)
 631                return;
 632        syms = of_get_property(fw, "symbol-map", &size);
 633        if (!syms || size != 2 * sizeof(__be64))
 634                return;
 635
 636        /* Setup attributes */
 637        bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
 638        bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
 639
 640        rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
 641        if (rc)
 642                pr_warn("Error %d creating OPAL symbols file\n", rc);
 643}
 644
 645static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
 646                                struct bin_attribute *bin_attr, char *buf,
 647                                loff_t off, size_t count)
 648{
 649        return memory_read_from_buffer(buf, count, &off, bin_attr->private,
 650                                       bin_attr->size);
 651}
 652
 653/*
 654 * opal_export_attrs: creates a sysfs node for each property listed in
 655 * the device-tree under /ibm,opal/firmware/exports/
 656 * All new sysfs nodes are created under /opal/exports/.
 657 * This allows for reserved memory regions (e.g. HDAT) to be read.
 658 * The new sysfs nodes are only readable by root.
 659 */
 660static void opal_export_attrs(void)
 661{
 662        struct bin_attribute *attr;
 663        struct device_node *np;
 664        struct property *prop;
 665        struct kobject *kobj;
 666        u64 vals[2];
 667        int rc;
 668
 669        np = of_find_node_by_path("/ibm,opal/firmware/exports");
 670        if (!np)
 671                return;
 672
 673        /* Create new 'exports' directory - /sys/firmware/opal/exports */
 674        kobj = kobject_create_and_add("exports", opal_kobj);
 675        if (!kobj) {
 676                pr_warn("kobject_create_and_add() of exports failed\n");
 677                return;
 678        }
 679
 680        for_each_property_of_node(np, prop) {
 681                if (!strcmp(prop->name, "name") || !strcmp(prop->name, "phandle"))
 682                        continue;
 683
 684                if (of_property_read_u64_array(np, prop->name, &vals[0], 2))
 685                        continue;
 686
 687                attr = kzalloc(sizeof(*attr), GFP_KERNEL);
 688
 689                if (attr == NULL) {
 690                        pr_warn("Failed kmalloc for bin_attribute!");
 691                        continue;
 692                }
 693
 694                sysfs_bin_attr_init(attr);
 695                attr->attr.name = kstrdup(prop->name, GFP_KERNEL);
 696                attr->attr.mode = 0400;
 697                attr->read = export_attr_read;
 698                attr->private = __va(vals[0]);
 699                attr->size = vals[1];
 700
 701                if (attr->attr.name == NULL) {
 702                        pr_warn("Failed kstrdup for bin_attribute attr.name");
 703                        kfree(attr);
 704                        continue;
 705                }
 706
 707                rc = sysfs_create_bin_file(kobj, attr);
 708                if (rc) {
 709                        pr_warn("Error %d creating OPAL sysfs exports/%s file\n",
 710                                 rc, prop->name);
 711                        kfree(attr->attr.name);
 712                        kfree(attr);
 713                }
 714        }
 715
 716        of_node_put(np);
 717}
 718
 719static void __init opal_dump_region_init(void)
 720{
 721        void *addr;
 722        uint64_t size;
 723        int rc;
 724
 725        if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
 726                return;
 727
 728        /* Register kernel log buffer */
 729        addr = log_buf_addr_get();
 730        if (addr == NULL)
 731                return;
 732
 733        size = log_buf_len_get();
 734        if (size == 0)
 735                return;
 736
 737        rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
 738                                       __pa(addr), size);
 739        /* Don't warn if this is just an older OPAL that doesn't
 740         * know about that call
 741         */
 742        if (rc && rc != OPAL_UNSUPPORTED)
 743                pr_warn("DUMP: Failed to register kernel log buffer. "
 744                        "rc = %d\n", rc);
 745}
 746
 747static void opal_pdev_init(const char *compatible)
 748{
 749        struct device_node *np;
 750
 751        for_each_compatible_node(np, NULL, compatible)
 752                of_platform_device_create(np, NULL, NULL);
 753}
 754
 755static void __init opal_imc_init_dev(void)
 756{
 757        struct device_node *np;
 758
 759        np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
 760        if (np)
 761                of_platform_device_create(np, NULL, NULL);
 762}
 763
 764static int kopald(void *unused)
 765{
 766        unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1;
 767        __be64 events;
 768
 769        set_freezable();
 770        do {
 771                try_to_freeze();
 772                opal_poll_events(&events);
 773                opal_handle_events(be64_to_cpu(events));
 774                schedule_timeout_interruptible(timeout);
 775        } while (!kthread_should_stop());
 776
 777        return 0;
 778}
 779
 780void opal_wake_poller(void)
 781{
 782        if (kopald_tsk)
 783                wake_up_process(kopald_tsk);
 784}
 785
 786static void opal_init_heartbeat(void)
 787{
 788        /* Old firwmware, we assume the HVC heartbeat is sufficient */
 789        if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
 790                                 &opal_heartbeat) != 0)
 791                opal_heartbeat = 0;
 792
 793        if (opal_heartbeat)
 794                kopald_tsk = kthread_run(kopald, NULL, "kopald");
 795}
 796
 797static int __init opal_init(void)
 798{
 799        struct device_node *np, *consoles, *leds;
 800        int rc;
 801
 802        opal_node = of_find_node_by_path("/ibm,opal");
 803        if (!opal_node) {
 804                pr_warn("Device node not found\n");
 805                return -ENODEV;
 806        }
 807
 808        /* Register OPAL consoles if any ports */
 809        consoles = of_find_node_by_path("/ibm,opal/consoles");
 810        if (consoles) {
 811                for_each_child_of_node(consoles, np) {
 812                        if (strcmp(np->name, "serial"))
 813                                continue;
 814                        of_platform_device_create(np, NULL, NULL);
 815                }
 816                of_node_put(consoles);
 817        }
 818
 819        /* Initialise OPAL messaging system */
 820        opal_message_init();
 821
 822        /* Initialise OPAL asynchronous completion interface */
 823        opal_async_comp_init();
 824
 825        /* Initialise OPAL sensor interface */
 826        opal_sensor_init();
 827
 828        /* Initialise OPAL hypervisor maintainence interrupt handling */
 829        opal_hmi_handler_init();
 830
 831        /* Create i2c platform devices */
 832        opal_pdev_init("ibm,opal-i2c");
 833
 834        /* Setup a heatbeat thread if requested by OPAL */
 835        opal_init_heartbeat();
 836
 837        /* Detect In-Memory Collection counters and create devices*/
 838        opal_imc_init_dev();
 839
 840        /* Create leds platform devices */
 841        leds = of_find_node_by_path("/ibm,opal/leds");
 842        if (leds) {
 843                of_platform_device_create(leds, "opal_leds", NULL);
 844                of_node_put(leds);
 845        }
 846
 847        /* Initialise OPAL message log interface */
 848        opal_msglog_init();
 849
 850        /* Create "opal" kobject under /sys/firmware */
 851        rc = opal_sysfs_init();
 852        if (rc == 0) {
 853                /* Export symbol map to userspace */
 854                opal_export_symmap();
 855                /* Setup dump region interface */
 856                opal_dump_region_init();
 857                /* Setup error log interface */
 858                rc = opal_elog_init();
 859                /* Setup code update interface */
 860                opal_flash_update_init();
 861                /* Setup platform dump extract interface */
 862                opal_platform_dump_init();
 863                /* Setup system parameters interface */
 864                opal_sys_param_init();
 865                /* Setup message log sysfs interface. */
 866                opal_msglog_sysfs_init();
 867        }
 868
 869        /* Export all properties */
 870        opal_export_attrs();
 871
 872        /* Initialize platform devices: IPMI backend, PRD & flash interface */
 873        opal_pdev_init("ibm,opal-ipmi");
 874        opal_pdev_init("ibm,opal-flash");
 875        opal_pdev_init("ibm,opal-prd");
 876
 877        /* Initialise platform device: oppanel interface */
 878        opal_pdev_init("ibm,opal-oppanel");
 879
 880        /* Initialise OPAL kmsg dumper for flushing console on panic */
 881        opal_kmsg_init();
 882
 883        /* Initialise OPAL powercap interface */
 884        opal_powercap_init();
 885
 886        /* Initialise OPAL Power-Shifting-Ratio interface */
 887        opal_psr_init();
 888
 889        /* Initialise OPAL sensor groups */
 890        opal_sensor_groups_init();
 891
 892        return 0;
 893}
 894machine_subsys_initcall(powernv, opal_init);
 895
 896void opal_shutdown(void)
 897{
 898        long rc = OPAL_BUSY;
 899
 900        opal_event_shutdown();
 901
 902        /*
 903         * Then sync with OPAL which ensure anything that can
 904         * potentially write to our memory has completed such
 905         * as an ongoing dump retrieval
 906         */
 907        while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 908                rc = opal_sync_host_reboot();
 909                if (rc == OPAL_BUSY)
 910                        opal_poll_events(NULL);
 911                else
 912                        mdelay(10);
 913        }
 914
 915        /* Unregister memory dump region */
 916        if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
 917                opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
 918}
 919
 920/* Export this so that test modules can use it */
 921EXPORT_SYMBOL_GPL(opal_invalid_call);
 922EXPORT_SYMBOL_GPL(opal_xscom_read);
 923EXPORT_SYMBOL_GPL(opal_xscom_write);
 924EXPORT_SYMBOL_GPL(opal_ipmi_send);
 925EXPORT_SYMBOL_GPL(opal_ipmi_recv);
 926EXPORT_SYMBOL_GPL(opal_flash_read);
 927EXPORT_SYMBOL_GPL(opal_flash_write);
 928EXPORT_SYMBOL_GPL(opal_flash_erase);
 929EXPORT_SYMBOL_GPL(opal_prd_msg);
 930
 931/* Convert a region of vmalloc memory to an opal sg list */
 932struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
 933                                             unsigned long vmalloc_size)
 934{
 935        struct opal_sg_list *sg, *first = NULL;
 936        unsigned long i = 0;
 937
 938        sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
 939        if (!sg)
 940                goto nomem;
 941
 942        first = sg;
 943
 944        while (vmalloc_size > 0) {
 945                uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
 946                uint64_t length = min(vmalloc_size, PAGE_SIZE);
 947
 948                sg->entry[i].data = cpu_to_be64(data);
 949                sg->entry[i].length = cpu_to_be64(length);
 950                i++;
 951
 952                if (i >= SG_ENTRIES_PER_NODE) {
 953                        struct opal_sg_list *next;
 954
 955                        next = kzalloc(PAGE_SIZE, GFP_KERNEL);
 956                        if (!next)
 957                                goto nomem;
 958
 959                        sg->length = cpu_to_be64(
 960                                        i * sizeof(struct opal_sg_entry) + 16);
 961                        i = 0;
 962                        sg->next = cpu_to_be64(__pa(next));
 963                        sg = next;
 964                }
 965
 966                vmalloc_addr += length;
 967                vmalloc_size -= length;
 968        }
 969
 970        sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
 971
 972        return first;
 973
 974nomem:
 975        pr_err("%s : Failed to allocate memory\n", __func__);
 976        opal_free_sg_list(first);
 977        return NULL;
 978}
 979
 980void opal_free_sg_list(struct opal_sg_list *sg)
 981{
 982        while (sg) {
 983                uint64_t next = be64_to_cpu(sg->next);
 984
 985                kfree(sg);
 986
 987                if (next)
 988                        sg = __va(next);
 989                else
 990                        sg = NULL;
 991        }
 992}
 993
 994int opal_error_code(int rc)
 995{
 996        switch (rc) {
 997        case OPAL_SUCCESS:              return 0;
 998
 999        case OPAL_PARAMETER:            return -EINVAL;
1000        case OPAL_ASYNC_COMPLETION:     return -EINPROGRESS;
1001        case OPAL_BUSY:
1002        case OPAL_BUSY_EVENT:           return -EBUSY;
1003        case OPAL_NO_MEM:               return -ENOMEM;
1004        case OPAL_PERMISSION:           return -EPERM;
1005
1006        case OPAL_UNSUPPORTED:          return -EIO;
1007        case OPAL_HARDWARE:             return -EIO;
1008        case OPAL_INTERNAL_ERROR:       return -EIO;
1009        case OPAL_TIMEOUT:              return -ETIMEDOUT;
1010        default:
1011                pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
1012                return -EIO;
1013        }
1014}
1015
1016void powernv_set_nmmu_ptcr(unsigned long ptcr)
1017{
1018        int rc;
1019
1020        if (firmware_has_feature(FW_FEATURE_OPAL)) {
1021                rc = opal_nmmu_set_ptcr(-1UL, ptcr);
1022                if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
1023                        pr_warn("%s: Unable to set nest mmu ptcr\n", __func__);
1024        }
1025}
1026
1027EXPORT_SYMBOL_GPL(opal_poll_events);
1028EXPORT_SYMBOL_GPL(opal_rtc_read);
1029EXPORT_SYMBOL_GPL(opal_rtc_write);
1030EXPORT_SYMBOL_GPL(opal_tpo_read);
1031EXPORT_SYMBOL_GPL(opal_tpo_write);
1032EXPORT_SYMBOL_GPL(opal_i2c_request);
1033/* Export these symbols for PowerNV LED class driver */
1034EXPORT_SYMBOL_GPL(opal_leds_get_ind);
1035EXPORT_SYMBOL_GPL(opal_leds_set_ind);
1036/* Export this symbol for PowerNV Operator Panel class driver */
1037EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
1038/* Export this for KVM */
1039EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
1040EXPORT_SYMBOL_GPL(opal_int_eoi);
1041EXPORT_SYMBOL_GPL(opal_error_code);
1042