linux/arch/powerpc/platforms/powernv/opal.c
<<
>>
Prefs
   1/*
   2 * PowerNV OPAL high level interfaces
   3 *
   4 * Copyright 2011 IBM Corp.
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11
  12#define pr_fmt(fmt)     "opal: " fmt
  13
  14#include <linux/printk.h>
  15#include <linux/types.h>
  16#include <linux/of.h>
  17#include <linux/of_fdt.h>
  18#include <linux/of_platform.h>
  19#include <linux/of_address.h>
  20#include <linux/interrupt.h>
  21#include <linux/notifier.h>
  22#include <linux/slab.h>
  23#include <linux/sched.h>
  24#include <linux/kobject.h>
  25#include <linux/delay.h>
  26#include <linux/memblock.h>
  27#include <linux/kthread.h>
  28#include <linux/freezer.h>
  29#include <linux/kmsg_dump.h>
  30#include <linux/console.h>
  31#include <linux/sched/debug.h>
  32
  33#include <asm/machdep.h>
  34#include <asm/opal.h>
  35#include <asm/firmware.h>
  36#include <asm/mce.h>
  37#include <asm/imc-pmu.h>
  38#include <asm/bug.h>
  39
  40#include "powernv.h"
  41
  42/* /sys/firmware/opal */
  43struct kobject *opal_kobj;
  44
  45struct opal {
  46        u64 base;
  47        u64 entry;
  48        u64 size;
  49} opal;
  50
  51struct mcheck_recoverable_range {
  52        u64 start_addr;
  53        u64 end_addr;
  54        u64 recover_addr;
  55};
  56
  57static struct mcheck_recoverable_range *mc_recoverable_range;
  58static int mc_recoverable_range_len;
  59
  60struct device_node *opal_node;
  61static DEFINE_SPINLOCK(opal_write_lock);
  62static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
  63static uint32_t opal_heartbeat;
  64static struct task_struct *kopald_tsk;
  65
  66void opal_configure_cores(void)
  67{
  68        u64 reinit_flags = 0;
  69
  70        /* Do the actual re-init, This will clobber all FPRs, VRs, etc...
  71         *
  72         * It will preserve non volatile GPRs and HSPRG0/1. It will
  73         * also restore HIDs and other SPRs to their original value
  74         * but it might clobber a bunch.
  75         */
  76#ifdef __BIG_ENDIAN__
  77        reinit_flags |= OPAL_REINIT_CPUS_HILE_BE;
  78#else
  79        reinit_flags |= OPAL_REINIT_CPUS_HILE_LE;
  80#endif
  81
  82        /*
  83         * POWER9 always support running hash:
  84         *  ie. Host hash  supports  hash guests
  85         *      Host radix supports  hash/radix guests
  86         */
  87        if (early_cpu_has_feature(CPU_FTR_ARCH_300)) {
  88                reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
  89                if (early_radix_enabled())
  90                        reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;
  91        }
  92
  93        opal_reinit_cpus(reinit_flags);
  94
  95        /* Restore some bits */
  96        if (cur_cpu_spec->cpu_restore)
  97                cur_cpu_spec->cpu_restore();
  98}
  99
 100int __init early_init_dt_scan_opal(unsigned long node,
 101                                   const char *uname, int depth, void *data)
 102{
 103        const void *basep, *entryp, *sizep;
 104        int basesz, entrysz, runtimesz;
 105
 106        if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
 107                return 0;
 108
 109        basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
 110        entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
 111        sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
 112
 113        if (!basep || !entryp || !sizep)
 114                return 1;
 115
 116        opal.base = of_read_number(basep, basesz/4);
 117        opal.entry = of_read_number(entryp, entrysz/4);
 118        opal.size = of_read_number(sizep, runtimesz/4);
 119
 120        pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
 121                 opal.base, basep, basesz);
 122        pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
 123                 opal.entry, entryp, entrysz);
 124        pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
 125                 opal.size, sizep, runtimesz);
 126
 127        if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
 128                powerpc_firmware_features |= FW_FEATURE_OPAL;
 129                pr_debug("OPAL detected !\n");
 130        } else {
 131                panic("OPAL != V3 detected, no longer supported.\n");
 132        }
 133
 134        return 1;
 135}
 136
 137int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
 138                                   const char *uname, int depth, void *data)
 139{
 140        int i, psize, size;
 141        const __be32 *prop;
 142
 143        if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
 144                return 0;
 145
 146        prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
 147
 148        if (!prop)
 149                return 1;
 150
 151        pr_debug("Found machine check recoverable ranges.\n");
 152
 153        /*
 154         * Calculate number of available entries.
 155         *
 156         * Each recoverable address range entry is (start address, len,
 157         * recovery address), 2 cells each for start and recovery address,
 158         * 1 cell for len, totalling 5 cells per entry.
 159         */
 160        mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
 161
 162        /* Sanity check */
 163        if (!mc_recoverable_range_len)
 164                return 1;
 165
 166        /* Size required to hold all the entries. */
 167        size = mc_recoverable_range_len *
 168                        sizeof(struct mcheck_recoverable_range);
 169
 170        /*
 171         * Allocate a buffer to hold the MC recoverable ranges.
 172         */
 173        mc_recoverable_range = memblock_alloc(size, __alignof__(u64));
 174        if (!mc_recoverable_range)
 175                panic("%s: Failed to allocate %u bytes align=0x%lx\n",
 176                      __func__, size, __alignof__(u64));
 177
 178        for (i = 0; i < mc_recoverable_range_len; i++) {
 179                mc_recoverable_range[i].start_addr =
 180                                        of_read_number(prop + (i * 5) + 0, 2);
 181                mc_recoverable_range[i].end_addr =
 182                                        mc_recoverable_range[i].start_addr +
 183                                        of_read_number(prop + (i * 5) + 2, 1);
 184                mc_recoverable_range[i].recover_addr =
 185                                        of_read_number(prop + (i * 5) + 3, 2);
 186
 187                pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
 188                                mc_recoverable_range[i].start_addr,
 189                                mc_recoverable_range[i].end_addr,
 190                                mc_recoverable_range[i].recover_addr);
 191        }
 192        return 1;
 193}
 194
 195static int __init opal_register_exception_handlers(void)
 196{
 197#ifdef __BIG_ENDIAN__
 198        u64 glue;
 199
 200        if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
 201                return -ENODEV;
 202
 203        /* Hookup some exception handlers except machine check. We use the
 204         * fwnmi area at 0x7000 to provide the glue space to OPAL
 205         */
 206        glue = 0x7000;
 207
 208        /*
 209         * Check if we are running on newer firmware that exports
 210         * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
 211         * the HMI interrupt and we catch it directly in Linux.
 212         *
 213         * For older firmware (i.e currently released POWER8 System Firmware
 214         * as of today <= SV810_087), we fallback to old behavior and let OPAL
 215         * patch the HMI vector and handle it inside OPAL firmware.
 216         *
 217         * For newer firmware (in development/yet to be released) we will
 218         * start catching/handling HMI directly in Linux.
 219         */
 220        if (!opal_check_token(OPAL_HANDLE_HMI)) {
 221                pr_info("Old firmware detected, OPAL handles HMIs.\n");
 222                opal_register_exception_handler(
 223                                OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
 224                                0, glue);
 225                glue += 128;
 226        }
 227
 228        opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
 229#endif
 230
 231        return 0;
 232}
 233machine_early_initcall(powernv, opal_register_exception_handlers);
 234
 235/*
 236 * Opal message notifier based on message type. Allow subscribers to get
 237 * notified for specific messgae type.
 238 */
 239int opal_message_notifier_register(enum opal_msg_type msg_type,
 240                                        struct notifier_block *nb)
 241{
 242        if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
 243                pr_warn("%s: Invalid arguments, msg_type:%d\n",
 244                        __func__, msg_type);
 245                return -EINVAL;
 246        }
 247
 248        return atomic_notifier_chain_register(
 249                                &opal_msg_notifier_head[msg_type], nb);
 250}
 251EXPORT_SYMBOL_GPL(opal_message_notifier_register);
 252
 253int opal_message_notifier_unregister(enum opal_msg_type msg_type,
 254                                     struct notifier_block *nb)
 255{
 256        return atomic_notifier_chain_unregister(
 257                        &opal_msg_notifier_head[msg_type], nb);
 258}
 259EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
 260
 261static void opal_message_do_notify(uint32_t msg_type, void *msg)
 262{
 263        /* notify subscribers */
 264        atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
 265                                        msg_type, msg);
 266}
 267
 268static void opal_handle_message(void)
 269{
 270        s64 ret;
 271        /*
 272         * TODO: pre-allocate a message buffer depending on opal-msg-size
 273         * value in /proc/device-tree.
 274         */
 275        static struct opal_msg msg;
 276        u32 type;
 277
 278        ret = opal_get_msg(__pa(&msg), sizeof(msg));
 279        /* No opal message pending. */
 280        if (ret == OPAL_RESOURCE)
 281                return;
 282
 283        /* check for errors. */
 284        if (ret) {
 285                pr_warn("%s: Failed to retrieve opal message, err=%lld\n",
 286                        __func__, ret);
 287                return;
 288        }
 289
 290        type = be32_to_cpu(msg.msg_type);
 291
 292        /* Sanity check */
 293        if (type >= OPAL_MSG_TYPE_MAX) {
 294                pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
 295                return;
 296        }
 297        opal_message_do_notify(type, (void *)&msg);
 298}
 299
 300static irqreturn_t opal_message_notify(int irq, void *data)
 301{
 302        opal_handle_message();
 303        return IRQ_HANDLED;
 304}
 305
 306static int __init opal_message_init(void)
 307{
 308        int ret, i, irq;
 309
 310        for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
 311                ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
 312
 313        irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING));
 314        if (!irq) {
 315                pr_err("%s: Can't register OPAL event irq (%d)\n",
 316                       __func__, irq);
 317                return irq;
 318        }
 319
 320        ret = request_irq(irq, opal_message_notify,
 321                        IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL);
 322        if (ret) {
 323                pr_err("%s: Can't request OPAL event irq (%d)\n",
 324                       __func__, ret);
 325                return ret;
 326        }
 327
 328        return 0;
 329}
 330
 331int opal_get_chars(uint32_t vtermno, char *buf, int count)
 332{
 333        s64 rc;
 334        __be64 evt, len;
 335
 336        if (!opal.entry)
 337                return -ENODEV;
 338        opal_poll_events(&evt);
 339        if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
 340                return 0;
 341        len = cpu_to_be64(count);
 342        rc = opal_console_read(vtermno, &len, buf);
 343        if (rc == OPAL_SUCCESS)
 344                return be64_to_cpu(len);
 345        return 0;
 346}
 347
 348static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, bool atomic)
 349{
 350        unsigned long flags = 0 /* shut up gcc */;
 351        int written;
 352        __be64 olen;
 353        s64 rc;
 354
 355        if (!opal.entry)
 356                return -ENODEV;
 357
 358        if (atomic)
 359                spin_lock_irqsave(&opal_write_lock, flags);
 360        rc = opal_console_write_buffer_space(vtermno, &olen);
 361        if (rc || be64_to_cpu(olen) < total_len) {
 362                /* Closed -> drop characters */
 363                if (rc)
 364                        written = total_len;
 365                else
 366                        written = -EAGAIN;
 367                goto out;
 368        }
 369
 370        /* Should not get a partial write here because space is available. */
 371        olen = cpu_to_be64(total_len);
 372        rc = opal_console_write(vtermno, &olen, data);
 373        if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 374                if (rc == OPAL_BUSY_EVENT)
 375                        opal_poll_events(NULL);
 376                written = -EAGAIN;
 377                goto out;
 378        }
 379
 380        /* Closed or other error drop */
 381        if (rc != OPAL_SUCCESS) {
 382                written = opal_error_code(rc);
 383                goto out;
 384        }
 385
 386        written = be64_to_cpu(olen);
 387        if (written < total_len) {
 388                if (atomic) {
 389                        /* Should not happen */
 390                        pr_warn("atomic console write returned partial "
 391                                "len=%d written=%d\n", total_len, written);
 392                }
 393                if (!written)
 394                        written = -EAGAIN;
 395        }
 396
 397out:
 398        if (atomic)
 399                spin_unlock_irqrestore(&opal_write_lock, flags);
 400
 401        return written;
 402}
 403
 404int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 405{
 406        return __opal_put_chars(vtermno, data, total_len, false);
 407}
 408
 409/*
 410 * opal_put_chars_atomic will not perform partial-writes. Data will be
 411 * atomically written to the terminal or not at all. This is not strictly
 412 * true at the moment because console space can race with OPAL's console
 413 * writes.
 414 */
 415int opal_put_chars_atomic(uint32_t vtermno, const char *data, int total_len)
 416{
 417        return __opal_put_chars(vtermno, data, total_len, true);
 418}
 419
 420static s64 __opal_flush_console(uint32_t vtermno)
 421{
 422        s64 rc;
 423
 424        if (!opal_check_token(OPAL_CONSOLE_FLUSH)) {
 425                __be64 evt;
 426
 427                /*
 428                 * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
 429                 * the console can still be flushed by calling the polling
 430                 * function while it has OPAL_EVENT_CONSOLE_OUTPUT events.
 431                 */
 432                WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n");
 433
 434                opal_poll_events(&evt);
 435                if (!(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT))
 436                        return OPAL_SUCCESS;
 437                return OPAL_BUSY;
 438
 439        } else {
 440                rc = opal_console_flush(vtermno);
 441                if (rc == OPAL_BUSY_EVENT) {
 442                        opal_poll_events(NULL);
 443                        rc = OPAL_BUSY;
 444                }
 445                return rc;
 446        }
 447
 448}
 449
 450/*
 451 * opal_flush_console spins until the console is flushed
 452 */
 453int opal_flush_console(uint32_t vtermno)
 454{
 455        for (;;) {
 456                s64 rc = __opal_flush_console(vtermno);
 457
 458                if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
 459                        mdelay(1);
 460                        continue;
 461                }
 462
 463                return opal_error_code(rc);
 464        }
 465}
 466
 467/*
 468 * opal_flush_chars is an hvc interface that sleeps until the console is
 469 * flushed if wait, otherwise it will return -EBUSY if the console has data,
 470 * -EAGAIN if it has data and some of it was flushed.
 471 */
 472int opal_flush_chars(uint32_t vtermno, bool wait)
 473{
 474        for (;;) {
 475                s64 rc = __opal_flush_console(vtermno);
 476
 477                if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
 478                        if (wait) {
 479                                msleep(OPAL_BUSY_DELAY_MS);
 480                                continue;
 481                        }
 482                        if (rc == OPAL_PARTIAL)
 483                                return -EAGAIN;
 484                }
 485
 486                return opal_error_code(rc);
 487        }
 488}
 489
 490static int opal_recover_mce(struct pt_regs *regs,
 491                                        struct machine_check_event *evt)
 492{
 493        int recovered = 0;
 494
 495        if (!(regs->msr & MSR_RI)) {
 496                /* If MSR_RI isn't set, we cannot recover */
 497                pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 498                recovered = 0;
 499        } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 500                /* Platform corrected itself */
 501                recovered = 1;
 502        } else if (evt->severity == MCE_SEV_FATAL) {
 503                /* Fatal machine check */
 504                pr_err("Machine check interrupt is fatal\n");
 505                recovered = 0;
 506        }
 507
 508        if (!recovered && evt->severity == MCE_SEV_ERROR_SYNC) {
 509                /*
 510                 * Try to kill processes if we get a synchronous machine check
 511                 * (e.g., one caused by execution of this instruction). This
 512                 * will devolve into a panic if we try to kill init or are in
 513                 * an interrupt etc.
 514                 *
 515                 * TODO: Queue up this address for hwpoisioning later.
 516                 * TODO: This is not quite right for d-side machine
 517                 *       checks ->nip is not necessarily the important
 518                 *       address.
 519                 */
 520                if ((user_mode(regs))) {
 521                        _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
 522                        recovered = 1;
 523                } else if (die_will_crash()) {
 524                        /*
 525                         * die() would kill the kernel, so better to go via
 526                         * the platform reboot code that will log the
 527                         * machine check.
 528                         */
 529                        recovered = 0;
 530                } else {
 531                        die("Machine check", regs, SIGBUS);
 532                        recovered = 1;
 533                }
 534        }
 535
 536        return recovered;
 537}
 538
 539void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
 540{
 541        panic_flush_kmsg_start();
 542
 543        pr_emerg("Hardware platform error: %s\n", msg);
 544        if (regs)
 545                show_regs(regs);
 546        smp_send_stop();
 547
 548        panic_flush_kmsg_end();
 549
 550        /*
 551         * Don't bother to shut things down because this will
 552         * xstop the system.
 553         */
 554        if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg)
 555                                                == OPAL_UNSUPPORTED) {
 556                pr_emerg("Reboot type %d not supported for %s\n",
 557                                OPAL_REBOOT_PLATFORM_ERROR, msg);
 558        }
 559
 560        /*
 561         * We reached here. There can be three possibilities:
 562         * 1. We are running on a firmware level that do not support
 563         *    opal_cec_reboot2()
 564         * 2. We are running on a firmware level that do not support
 565         *    OPAL_REBOOT_PLATFORM_ERROR reboot type.
 566         * 3. We are running on FSP based system that does not need
 567         *    opal to trigger checkstop explicitly for error analysis.
 568         *    The FSP PRD component would have already got notified
 569         *    about this error through other channels.
 570         * 4. We are running on a newer skiboot that by default does
 571         *    not cause a checkstop, drops us back to the kernel to
 572         *    extract context and state at the time of the error.
 573         */
 574
 575        panic(msg);
 576}
 577
 578int opal_machine_check(struct pt_regs *regs)
 579{
 580        struct machine_check_event evt;
 581
 582        if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 583                return 0;
 584
 585        /* Print things out */
 586        if (evt.version != MCE_V1) {
 587                pr_err("Machine Check Exception, Unknown event version %d !\n",
 588                       evt.version);
 589                return 0;
 590        }
 591        machine_check_print_event_info(&evt, user_mode(regs), false);
 592
 593        if (opal_recover_mce(regs, &evt))
 594                return 1;
 595
 596        pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception");
 597}
 598
 599/* Early hmi handler called in real mode. */
 600int opal_hmi_exception_early(struct pt_regs *regs)
 601{
 602        s64 rc;
 603
 604        /*
 605         * call opal hmi handler. Pass paca address as token.
 606         * The return value OPAL_SUCCESS is an indication that there is
 607         * an HMI event generated waiting to pull by Linux.
 608         */
 609        rc = opal_handle_hmi();
 610        if (rc == OPAL_SUCCESS) {
 611                local_paca->hmi_event_available = 1;
 612                return 1;
 613        }
 614        return 0;
 615}
 616
 617/* HMI exception handler called in virtual mode during check_irq_replay. */
 618int opal_handle_hmi_exception(struct pt_regs *regs)
 619{
 620        /*
 621         * Check if HMI event is available.
 622         * if Yes, then wake kopald to process them.
 623         */
 624        if (!local_paca->hmi_event_available)
 625                return 0;
 626
 627        local_paca->hmi_event_available = 0;
 628        opal_wake_poller();
 629
 630        return 1;
 631}
 632
 633static uint64_t find_recovery_address(uint64_t nip)
 634{
 635        int i;
 636
 637        for (i = 0; i < mc_recoverable_range_len; i++)
 638                if ((nip >= mc_recoverable_range[i].start_addr) &&
 639                    (nip < mc_recoverable_range[i].end_addr))
 640                    return mc_recoverable_range[i].recover_addr;
 641        return 0;
 642}
 643
 644bool opal_mce_check_early_recovery(struct pt_regs *regs)
 645{
 646        uint64_t recover_addr = 0;
 647
 648        if (!opal.base || !opal.size)
 649                goto out;
 650
 651        if ((regs->nip >= opal.base) &&
 652                        (regs->nip < (opal.base + opal.size)))
 653                recover_addr = find_recovery_address(regs->nip);
 654
 655        /*
 656         * Setup regs->nip to rfi into fixup address.
 657         */
 658        if (recover_addr)
 659                regs->nip = recover_addr;
 660
 661out:
 662        return !!recover_addr;
 663}
 664
 665static int opal_sysfs_init(void)
 666{
 667        opal_kobj = kobject_create_and_add("opal", firmware_kobj);
 668        if (!opal_kobj) {
 669                pr_warn("kobject_create_and_add opal failed\n");
 670                return -ENOMEM;
 671        }
 672
 673        return 0;
 674}
 675
 676static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
 677                               struct bin_attribute *bin_attr,
 678                               char *buf, loff_t off, size_t count)
 679{
 680        return memory_read_from_buffer(buf, count, &off, bin_attr->private,
 681                                       bin_attr->size);
 682}
 683
 684static BIN_ATTR_RO(symbol_map, 0);
 685
 686static void opal_export_symmap(void)
 687{
 688        const __be64 *syms;
 689        unsigned int size;
 690        struct device_node *fw;
 691        int rc;
 692
 693        fw = of_find_node_by_path("/ibm,opal/firmware");
 694        if (!fw)
 695                return;
 696        syms = of_get_property(fw, "symbol-map", &size);
 697        if (!syms || size != 2 * sizeof(__be64))
 698                return;
 699
 700        /* Setup attributes */
 701        bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
 702        bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
 703
 704        rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
 705        if (rc)
 706                pr_warn("Error %d creating OPAL symbols file\n", rc);
 707}
 708
 709static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
 710                                struct bin_attribute *bin_attr, char *buf,
 711                                loff_t off, size_t count)
 712{
 713        return memory_read_from_buffer(buf, count, &off, bin_attr->private,
 714                                       bin_attr->size);
 715}
 716
 717/*
 718 * opal_export_attrs: creates a sysfs node for each property listed in
 719 * the device-tree under /ibm,opal/firmware/exports/
 720 * All new sysfs nodes are created under /opal/exports/.
 721 * This allows for reserved memory regions (e.g. HDAT) to be read.
 722 * The new sysfs nodes are only readable by root.
 723 */
 724static void opal_export_attrs(void)
 725{
 726        struct bin_attribute *attr;
 727        struct device_node *np;
 728        struct property *prop;
 729        struct kobject *kobj;
 730        u64 vals[2];
 731        int rc;
 732
 733        np = of_find_node_by_path("/ibm,opal/firmware/exports");
 734        if (!np)
 735                return;
 736
 737        /* Create new 'exports' directory - /sys/firmware/opal/exports */
 738        kobj = kobject_create_and_add("exports", opal_kobj);
 739        if (!kobj) {
 740                pr_warn("kobject_create_and_add() of exports failed\n");
 741                return;
 742        }
 743
 744        for_each_property_of_node(np, prop) {
 745                if (!strcmp(prop->name, "name") || !strcmp(prop->name, "phandle"))
 746                        continue;
 747
 748                if (of_property_read_u64_array(np, prop->name, &vals[0], 2))
 749                        continue;
 750
 751                attr = kzalloc(sizeof(*attr), GFP_KERNEL);
 752
 753                if (attr == NULL) {
 754                        pr_warn("Failed kmalloc for bin_attribute!");
 755                        continue;
 756                }
 757
 758                sysfs_bin_attr_init(attr);
 759                attr->attr.name = kstrdup(prop->name, GFP_KERNEL);
 760                attr->attr.mode = 0400;
 761                attr->read = export_attr_read;
 762                attr->private = __va(vals[0]);
 763                attr->size = vals[1];
 764
 765                if (attr->attr.name == NULL) {
 766                        pr_warn("Failed kstrdup for bin_attribute attr.name");
 767                        kfree(attr);
 768                        continue;
 769                }
 770
 771                rc = sysfs_create_bin_file(kobj, attr);
 772                if (rc) {
 773                        pr_warn("Error %d creating OPAL sysfs exports/%s file\n",
 774                                 rc, prop->name);
 775                        kfree(attr->attr.name);
 776                        kfree(attr);
 777                }
 778        }
 779
 780        of_node_put(np);
 781}
 782
 783static void __init opal_dump_region_init(void)
 784{
 785        void *addr;
 786        uint64_t size;
 787        int rc;
 788
 789        if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
 790                return;
 791
 792        /* Register kernel log buffer */
 793        addr = log_buf_addr_get();
 794        if (addr == NULL)
 795                return;
 796
 797        size = log_buf_len_get();
 798        if (size == 0)
 799                return;
 800
 801        rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
 802                                       __pa(addr), size);
 803        /* Don't warn if this is just an older OPAL that doesn't
 804         * know about that call
 805         */
 806        if (rc && rc != OPAL_UNSUPPORTED)
 807                pr_warn("DUMP: Failed to register kernel log buffer. "
 808                        "rc = %d\n", rc);
 809}
 810
 811static void opal_pdev_init(const char *compatible)
 812{
 813        struct device_node *np;
 814
 815        for_each_compatible_node(np, NULL, compatible)
 816                of_platform_device_create(np, NULL, NULL);
 817}
 818
 819static void __init opal_imc_init_dev(void)
 820{
 821        struct device_node *np;
 822
 823        np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
 824        if (np)
 825                of_platform_device_create(np, NULL, NULL);
 826}
 827
 828static int kopald(void *unused)
 829{
 830        unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1;
 831
 832        set_freezable();
 833        do {
 834                try_to_freeze();
 835
 836                opal_handle_events();
 837
 838                set_current_state(TASK_INTERRUPTIBLE);
 839                if (opal_have_pending_events())
 840                        __set_current_state(TASK_RUNNING);
 841                else
 842                        schedule_timeout(timeout);
 843
 844        } while (!kthread_should_stop());
 845
 846        return 0;
 847}
 848
 849void opal_wake_poller(void)
 850{
 851        if (kopald_tsk)
 852                wake_up_process(kopald_tsk);
 853}
 854
 855static void opal_init_heartbeat(void)
 856{
 857        /* Old firwmware, we assume the HVC heartbeat is sufficient */
 858        if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
 859                                 &opal_heartbeat) != 0)
 860                opal_heartbeat = 0;
 861
 862        if (opal_heartbeat)
 863                kopald_tsk = kthread_run(kopald, NULL, "kopald");
 864}
 865
 866static int __init opal_init(void)
 867{
 868        struct device_node *np, *consoles, *leds;
 869        int rc;
 870
 871        opal_node = of_find_node_by_path("/ibm,opal");
 872        if (!opal_node) {
 873                pr_warn("Device node not found\n");
 874                return -ENODEV;
 875        }
 876
 877        /* Register OPAL consoles if any ports */
 878        consoles = of_find_node_by_path("/ibm,opal/consoles");
 879        if (consoles) {
 880                for_each_child_of_node(consoles, np) {
 881                        if (!of_node_name_eq(np, "serial"))
 882                                continue;
 883                        of_platform_device_create(np, NULL, NULL);
 884                }
 885                of_node_put(consoles);
 886        }
 887
 888        /* Initialise OPAL messaging system */
 889        opal_message_init();
 890
 891        /* Initialise OPAL asynchronous completion interface */
 892        opal_async_comp_init();
 893
 894        /* Initialise OPAL sensor interface */
 895        opal_sensor_init();
 896
 897        /* Initialise OPAL hypervisor maintainence interrupt handling */
 898        opal_hmi_handler_init();
 899
 900        /* Create i2c platform devices */
 901        opal_pdev_init("ibm,opal-i2c");
 902
 903        /* Handle non-volatile memory devices */
 904        opal_pdev_init("pmem-region");
 905
 906        /* Setup a heatbeat thread if requested by OPAL */
 907        opal_init_heartbeat();
 908
 909        /* Detect In-Memory Collection counters and create devices*/
 910        opal_imc_init_dev();
 911
 912        /* Create leds platform devices */
 913        leds = of_find_node_by_path("/ibm,opal/leds");
 914        if (leds) {
 915                of_platform_device_create(leds, "opal_leds", NULL);
 916                of_node_put(leds);
 917        }
 918
 919        /* Initialise OPAL message log interface */
 920        opal_msglog_init();
 921
 922        /* Create "opal" kobject under /sys/firmware */
 923        rc = opal_sysfs_init();
 924        if (rc == 0) {
 925                /* Export symbol map to userspace */
 926                opal_export_symmap();
 927                /* Setup dump region interface */
 928                opal_dump_region_init();
 929                /* Setup error log interface */
 930                rc = opal_elog_init();
 931                /* Setup code update interface */
 932                opal_flash_update_init();
 933                /* Setup platform dump extract interface */
 934                opal_platform_dump_init();
 935                /* Setup system parameters interface */
 936                opal_sys_param_init();
 937                /* Setup message log sysfs interface. */
 938                opal_msglog_sysfs_init();
 939        }
 940
 941        /* Export all properties */
 942        opal_export_attrs();
 943
 944        /* Initialize platform devices: IPMI backend, PRD & flash interface */
 945        opal_pdev_init("ibm,opal-ipmi");
 946        opal_pdev_init("ibm,opal-flash");
 947        opal_pdev_init("ibm,opal-prd");
 948
 949        /* Initialise platform device: oppanel interface */
 950        opal_pdev_init("ibm,opal-oppanel");
 951
 952        /* Initialise OPAL kmsg dumper for flushing console on panic */
 953        opal_kmsg_init();
 954
 955        /* Initialise OPAL powercap interface */
 956        opal_powercap_init();
 957
 958        /* Initialise OPAL Power-Shifting-Ratio interface */
 959        opal_psr_init();
 960
 961        /* Initialise OPAL sensor groups */
 962        opal_sensor_groups_init();
 963
 964        /* Initialise OPAL Power control interface */
 965        opal_power_control_init();
 966
 967        return 0;
 968}
 969machine_subsys_initcall(powernv, opal_init);
 970
 971void opal_shutdown(void)
 972{
 973        long rc = OPAL_BUSY;
 974
 975        opal_event_shutdown();
 976
 977        /*
 978         * Then sync with OPAL which ensure anything that can
 979         * potentially write to our memory has completed such
 980         * as an ongoing dump retrieval
 981         */
 982        while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 983                rc = opal_sync_host_reboot();
 984                if (rc == OPAL_BUSY)
 985                        opal_poll_events(NULL);
 986                else
 987                        mdelay(10);
 988        }
 989
 990        /* Unregister memory dump region */
 991        if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
 992                opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
 993}
 994
 995/* Export this so that test modules can use it */
 996EXPORT_SYMBOL_GPL(opal_invalid_call);
 997EXPORT_SYMBOL_GPL(opal_xscom_read);
 998EXPORT_SYMBOL_GPL(opal_xscom_write);
 999EXPORT_SYMBOL_GPL(opal_ipmi_send);
1000EXPORT_SYMBOL_GPL(opal_ipmi_recv);
1001EXPORT_SYMBOL_GPL(opal_flash_read);
1002EXPORT_SYMBOL_GPL(opal_flash_write);
1003EXPORT_SYMBOL_GPL(opal_flash_erase);
1004EXPORT_SYMBOL_GPL(opal_prd_msg);
1005EXPORT_SYMBOL_GPL(opal_check_token);
1006
1007/* Convert a region of vmalloc memory to an opal sg list */
1008struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
1009                                             unsigned long vmalloc_size)
1010{
1011        struct opal_sg_list *sg, *first = NULL;
1012        unsigned long i = 0;
1013
1014        sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
1015        if (!sg)
1016                goto nomem;
1017
1018        first = sg;
1019
1020        while (vmalloc_size > 0) {
1021                uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
1022                uint64_t length = min(vmalloc_size, PAGE_SIZE);
1023
1024                sg->entry[i].data = cpu_to_be64(data);
1025                sg->entry[i].length = cpu_to_be64(length);
1026                i++;
1027
1028                if (i >= SG_ENTRIES_PER_NODE) {
1029                        struct opal_sg_list *next;
1030
1031                        next = kzalloc(PAGE_SIZE, GFP_KERNEL);
1032                        if (!next)
1033                                goto nomem;
1034
1035                        sg->length = cpu_to_be64(
1036                                        i * sizeof(struct opal_sg_entry) + 16);
1037                        i = 0;
1038                        sg->next = cpu_to_be64(__pa(next));
1039                        sg = next;
1040                }
1041
1042                vmalloc_addr += length;
1043                vmalloc_size -= length;
1044        }
1045
1046        sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
1047
1048        return first;
1049
1050nomem:
1051        pr_err("%s : Failed to allocate memory\n", __func__);
1052        opal_free_sg_list(first);
1053        return NULL;
1054}
1055
1056void opal_free_sg_list(struct opal_sg_list *sg)
1057{
1058        while (sg) {
1059                uint64_t next = be64_to_cpu(sg->next);
1060
1061                kfree(sg);
1062
1063                if (next)
1064                        sg = __va(next);
1065                else
1066                        sg = NULL;
1067        }
1068}
1069
1070int opal_error_code(int rc)
1071{
1072        switch (rc) {
1073        case OPAL_SUCCESS:              return 0;
1074
1075        case OPAL_PARAMETER:            return -EINVAL;
1076        case OPAL_ASYNC_COMPLETION:     return -EINPROGRESS;
1077        case OPAL_BUSY:
1078        case OPAL_BUSY_EVENT:           return -EBUSY;
1079        case OPAL_NO_MEM:               return -ENOMEM;
1080        case OPAL_PERMISSION:           return -EPERM;
1081
1082        case OPAL_UNSUPPORTED:          return -EIO;
1083        case OPAL_HARDWARE:             return -EIO;
1084        case OPAL_INTERNAL_ERROR:       return -EIO;
1085        case OPAL_TIMEOUT:              return -ETIMEDOUT;
1086        default:
1087                pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
1088                return -EIO;
1089        }
1090}
1091
1092void powernv_set_nmmu_ptcr(unsigned long ptcr)
1093{
1094        int rc;
1095
1096        if (firmware_has_feature(FW_FEATURE_OPAL)) {
1097                rc = opal_nmmu_set_ptcr(-1UL, ptcr);
1098                if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
1099                        pr_warn("%s: Unable to set nest mmu ptcr\n", __func__);
1100        }
1101}
1102
1103EXPORT_SYMBOL_GPL(opal_poll_events);
1104EXPORT_SYMBOL_GPL(opal_rtc_read);
1105EXPORT_SYMBOL_GPL(opal_rtc_write);
1106EXPORT_SYMBOL_GPL(opal_tpo_read);
1107EXPORT_SYMBOL_GPL(opal_tpo_write);
1108EXPORT_SYMBOL_GPL(opal_i2c_request);
1109/* Export these symbols for PowerNV LED class driver */
1110EXPORT_SYMBOL_GPL(opal_leds_get_ind);
1111EXPORT_SYMBOL_GPL(opal_leds_set_ind);
1112/* Export this symbol for PowerNV Operator Panel class driver */
1113EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
1114/* Export this for KVM */
1115EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
1116EXPORT_SYMBOL_GPL(opal_int_eoi);
1117EXPORT_SYMBOL_GPL(opal_error_code);
1118/* Export the below symbol for NX compression */
1119EXPORT_SYMBOL(opal_nx_coproc_init);
1120