linux/arch/powerpc/platforms/powernv/opal.c
<<
>>
Prefs
   1/*
   2 * PowerNV OPAL high level interfaces
   3 *
   4 * Copyright 2011 IBM Corp.
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11
  12#define pr_fmt(fmt)     "opal: " fmt
  13
  14#include <linux/printk.h>
  15#include <linux/types.h>
  16#include <linux/of.h>
  17#include <linux/of_fdt.h>
  18#include <linux/of_platform.h>
  19#include <linux/of_address.h>
  20#include <linux/interrupt.h>
  21#include <linux/notifier.h>
  22#include <linux/slab.h>
  23#include <linux/sched.h>
  24#include <linux/kobject.h>
  25#include <linux/delay.h>
  26#include <linux/memblock.h>
  27#include <linux/kthread.h>
  28#include <linux/freezer.h>
  29#include <linux/printk.h>
  30#include <linux/kmsg_dump.h>
  31#include <linux/console.h>
  32#include <linux/sched/debug.h>
  33
  34#include <asm/machdep.h>
  35#include <asm/opal.h>
  36#include <asm/firmware.h>
  37#include <asm/mce.h>
  38#include <asm/imc-pmu.h>
  39#include <asm/bug.h>
  40
  41#include "powernv.h"
  42
  43/* /sys/firmware/opal */
  44struct kobject *opal_kobj;
  45
  46struct opal {
  47        u64 base;
  48        u64 entry;
  49        u64 size;
  50} opal;
  51
  52struct mcheck_recoverable_range {
  53        u64 start_addr;
  54        u64 end_addr;
  55        u64 recover_addr;
  56};
  57
  58static struct mcheck_recoverable_range *mc_recoverable_range;
  59static int mc_recoverable_range_len;
  60
  61struct device_node *opal_node;
  62static DEFINE_SPINLOCK(opal_write_lock);
  63static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
  64static uint32_t opal_heartbeat;
  65static struct task_struct *kopald_tsk;
  66
  67void opal_configure_cores(void)
  68{
  69        u64 reinit_flags = 0;
  70
  71        /* Do the actual re-init, This will clobber all FPRs, VRs, etc...
  72         *
  73         * It will preserve non volatile GPRs and HSPRG0/1. It will
  74         * also restore HIDs and other SPRs to their original value
  75         * but it might clobber a bunch.
  76         */
  77#ifdef __BIG_ENDIAN__
  78        reinit_flags |= OPAL_REINIT_CPUS_HILE_BE;
  79#else
  80        reinit_flags |= OPAL_REINIT_CPUS_HILE_LE;
  81#endif
  82
  83        /*
  84         * POWER9 always support running hash:
  85         *  ie. Host hash  supports  hash guests
  86         *      Host radix supports  hash/radix guests
  87         */
  88        if (early_cpu_has_feature(CPU_FTR_ARCH_300)) {
  89                reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
  90                if (early_radix_enabled())
  91                        reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;
  92        }
  93
  94        opal_reinit_cpus(reinit_flags);
  95
  96        /* Restore some bits */
  97        if (cur_cpu_spec->cpu_restore)
  98                cur_cpu_spec->cpu_restore();
  99}
 100
 101int __init early_init_dt_scan_opal(unsigned long node,
 102                                   const char *uname, int depth, void *data)
 103{
 104        const void *basep, *entryp, *sizep;
 105        int basesz, entrysz, runtimesz;
 106
 107        if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
 108                return 0;
 109
 110        basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
 111        entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
 112        sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
 113
 114        if (!basep || !entryp || !sizep)
 115                return 1;
 116
 117        opal.base = of_read_number(basep, basesz/4);
 118        opal.entry = of_read_number(entryp, entrysz/4);
 119        opal.size = of_read_number(sizep, runtimesz/4);
 120
 121        pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
 122                 opal.base, basep, basesz);
 123        pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
 124                 opal.entry, entryp, entrysz);
 125        pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
 126                 opal.size, sizep, runtimesz);
 127
 128        if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
 129                powerpc_firmware_features |= FW_FEATURE_OPAL;
 130                pr_debug("OPAL detected !\n");
 131        } else {
 132                panic("OPAL != V3 detected, no longer supported.\n");
 133        }
 134
 135        return 1;
 136}
 137
 138int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
 139                                   const char *uname, int depth, void *data)
 140{
 141        int i, psize, size;
 142        const __be32 *prop;
 143
 144        if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
 145                return 0;
 146
 147        prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
 148
 149        if (!prop)
 150                return 1;
 151
 152        pr_debug("Found machine check recoverable ranges.\n");
 153
 154        /*
 155         * Calculate number of available entries.
 156         *
 157         * Each recoverable address range entry is (start address, len,
 158         * recovery address), 2 cells each for start and recovery address,
 159         * 1 cell for len, totalling 5 cells per entry.
 160         */
 161        mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
 162
 163        /* Sanity check */
 164        if (!mc_recoverable_range_len)
 165                return 1;
 166
 167        /* Size required to hold all the entries. */
 168        size = mc_recoverable_range_len *
 169                        sizeof(struct mcheck_recoverable_range);
 170
 171        /*
 172         * Allocate a buffer to hold the MC recoverable ranges.
 173         */
 174        mc_recoverable_range =__va(memblock_phys_alloc(size, __alignof__(u64)));
 175        memset(mc_recoverable_range, 0, size);
 176
 177        for (i = 0; i < mc_recoverable_range_len; i++) {
 178                mc_recoverable_range[i].start_addr =
 179                                        of_read_number(prop + (i * 5) + 0, 2);
 180                mc_recoverable_range[i].end_addr =
 181                                        mc_recoverable_range[i].start_addr +
 182                                        of_read_number(prop + (i * 5) + 2, 1);
 183                mc_recoverable_range[i].recover_addr =
 184                                        of_read_number(prop + (i * 5) + 3, 2);
 185
 186                pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
 187                                mc_recoverable_range[i].start_addr,
 188                                mc_recoverable_range[i].end_addr,
 189                                mc_recoverable_range[i].recover_addr);
 190        }
 191        return 1;
 192}
 193
 194static int __init opal_register_exception_handlers(void)
 195{
 196#ifdef __BIG_ENDIAN__
 197        u64 glue;
 198
 199        if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
 200                return -ENODEV;
 201
 202        /* Hookup some exception handlers except machine check. We use the
 203         * fwnmi area at 0x7000 to provide the glue space to OPAL
 204         */
 205        glue = 0x7000;
 206
 207        /*
 208         * Check if we are running on newer firmware that exports
 209         * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
 210         * the HMI interrupt and we catch it directly in Linux.
 211         *
 212         * For older firmware (i.e currently released POWER8 System Firmware
 213         * as of today <= SV810_087), we fallback to old behavior and let OPAL
 214         * patch the HMI vector and handle it inside OPAL firmware.
 215         *
 216         * For newer firmware (in development/yet to be released) we will
 217         * start catching/handling HMI directly in Linux.
 218         */
 219        if (!opal_check_token(OPAL_HANDLE_HMI)) {
 220                pr_info("Old firmware detected, OPAL handles HMIs.\n");
 221                opal_register_exception_handler(
 222                                OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
 223                                0, glue);
 224                glue += 128;
 225        }
 226
 227        opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
 228#endif
 229
 230        return 0;
 231}
 232machine_early_initcall(powernv, opal_register_exception_handlers);
 233
 234/*
 235 * Opal message notifier based on message type. Allow subscribers to get
 236 * notified for specific messgae type.
 237 */
 238int opal_message_notifier_register(enum opal_msg_type msg_type,
 239                                        struct notifier_block *nb)
 240{
 241        if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
 242                pr_warn("%s: Invalid arguments, msg_type:%d\n",
 243                        __func__, msg_type);
 244                return -EINVAL;
 245        }
 246
 247        return atomic_notifier_chain_register(
 248                                &opal_msg_notifier_head[msg_type], nb);
 249}
 250EXPORT_SYMBOL_GPL(opal_message_notifier_register);
 251
 252int opal_message_notifier_unregister(enum opal_msg_type msg_type,
 253                                     struct notifier_block *nb)
 254{
 255        return atomic_notifier_chain_unregister(
 256                        &opal_msg_notifier_head[msg_type], nb);
 257}
 258EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
 259
 260static void opal_message_do_notify(uint32_t msg_type, void *msg)
 261{
 262        /* notify subscribers */
 263        atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
 264                                        msg_type, msg);
 265}
 266
 267static void opal_handle_message(void)
 268{
 269        s64 ret;
 270        /*
 271         * TODO: pre-allocate a message buffer depending on opal-msg-size
 272         * value in /proc/device-tree.
 273         */
 274        static struct opal_msg msg;
 275        u32 type;
 276
 277        ret = opal_get_msg(__pa(&msg), sizeof(msg));
 278        /* No opal message pending. */
 279        if (ret == OPAL_RESOURCE)
 280                return;
 281
 282        /* check for errors. */
 283        if (ret) {
 284                pr_warn("%s: Failed to retrieve opal message, err=%lld\n",
 285                        __func__, ret);
 286                return;
 287        }
 288
 289        type = be32_to_cpu(msg.msg_type);
 290
 291        /* Sanity check */
 292        if (type >= OPAL_MSG_TYPE_MAX) {
 293                pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
 294                return;
 295        }
 296        opal_message_do_notify(type, (void *)&msg);
 297}
 298
 299static irqreturn_t opal_message_notify(int irq, void *data)
 300{
 301        opal_handle_message();
 302        return IRQ_HANDLED;
 303}
 304
 305static int __init opal_message_init(void)
 306{
 307        int ret, i, irq;
 308
 309        for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
 310                ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
 311
 312        irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING));
 313        if (!irq) {
 314                pr_err("%s: Can't register OPAL event irq (%d)\n",
 315                       __func__, irq);
 316                return irq;
 317        }
 318
 319        ret = request_irq(irq, opal_message_notify,
 320                        IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL);
 321        if (ret) {
 322                pr_err("%s: Can't request OPAL event irq (%d)\n",
 323                       __func__, ret);
 324                return ret;
 325        }
 326
 327        return 0;
 328}
 329
 330int opal_get_chars(uint32_t vtermno, char *buf, int count)
 331{
 332        s64 rc;
 333        __be64 evt, len;
 334
 335        if (!opal.entry)
 336                return -ENODEV;
 337        opal_poll_events(&evt);
 338        if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
 339                return 0;
 340        len = cpu_to_be64(count);
 341        rc = opal_console_read(vtermno, &len, buf);
 342        if (rc == OPAL_SUCCESS)
 343                return be64_to_cpu(len);
 344        return 0;
 345}
 346
 347static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, bool atomic)
 348{
 349        unsigned long flags = 0 /* shut up gcc */;
 350        int written;
 351        __be64 olen;
 352        s64 rc;
 353
 354        if (!opal.entry)
 355                return -ENODEV;
 356
 357        if (atomic)
 358                spin_lock_irqsave(&opal_write_lock, flags);
 359        rc = opal_console_write_buffer_space(vtermno, &olen);
 360        if (rc || be64_to_cpu(olen) < total_len) {
 361                /* Closed -> drop characters */
 362                if (rc)
 363                        written = total_len;
 364                else
 365                        written = -EAGAIN;
 366                goto out;
 367        }
 368
 369        /* Should not get a partial write here because space is available. */
 370        olen = cpu_to_be64(total_len);
 371        rc = opal_console_write(vtermno, &olen, data);
 372        if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 373                if (rc == OPAL_BUSY_EVENT)
 374                        opal_poll_events(NULL);
 375                written = -EAGAIN;
 376                goto out;
 377        }
 378
 379        /* Closed or other error drop */
 380        if (rc != OPAL_SUCCESS) {
 381                written = opal_error_code(rc);
 382                goto out;
 383        }
 384
 385        written = be64_to_cpu(olen);
 386        if (written < total_len) {
 387                if (atomic) {
 388                        /* Should not happen */
 389                        pr_warn("atomic console write returned partial "
 390                                "len=%d written=%d\n", total_len, written);
 391                }
 392                if (!written)
 393                        written = -EAGAIN;
 394        }
 395
 396out:
 397        if (atomic)
 398                spin_unlock_irqrestore(&opal_write_lock, flags);
 399
 400        return written;
 401}
 402
 403int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 404{
 405        return __opal_put_chars(vtermno, data, total_len, false);
 406}
 407
 408/*
 409 * opal_put_chars_atomic will not perform partial-writes. Data will be
 410 * atomically written to the terminal or not at all. This is not strictly
 411 * true at the moment because console space can race with OPAL's console
 412 * writes.
 413 */
 414int opal_put_chars_atomic(uint32_t vtermno, const char *data, int total_len)
 415{
 416        return __opal_put_chars(vtermno, data, total_len, true);
 417}
 418
 419static s64 __opal_flush_console(uint32_t vtermno)
 420{
 421        s64 rc;
 422
 423        if (!opal_check_token(OPAL_CONSOLE_FLUSH)) {
 424                __be64 evt;
 425
 426                /*
 427                 * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
 428                 * the console can still be flushed by calling the polling
 429                 * function while it has OPAL_EVENT_CONSOLE_OUTPUT events.
 430                 */
 431                WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n");
 432
 433                opal_poll_events(&evt);
 434                if (!(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT))
 435                        return OPAL_SUCCESS;
 436                return OPAL_BUSY;
 437
 438        } else {
 439                rc = opal_console_flush(vtermno);
 440                if (rc == OPAL_BUSY_EVENT) {
 441                        opal_poll_events(NULL);
 442                        rc = OPAL_BUSY;
 443                }
 444                return rc;
 445        }
 446
 447}
 448
 449/*
 450 * opal_flush_console spins until the console is flushed
 451 */
 452int opal_flush_console(uint32_t vtermno)
 453{
 454        for (;;) {
 455                s64 rc = __opal_flush_console(vtermno);
 456
 457                if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
 458                        mdelay(1);
 459                        continue;
 460                }
 461
 462                return opal_error_code(rc);
 463        }
 464}
 465
 466/*
 467 * opal_flush_chars is an hvc interface that sleeps until the console is
 468 * flushed if wait, otherwise it will return -EBUSY if the console has data,
 469 * -EAGAIN if it has data and some of it was flushed.
 470 */
 471int opal_flush_chars(uint32_t vtermno, bool wait)
 472{
 473        for (;;) {
 474                s64 rc = __opal_flush_console(vtermno);
 475
 476                if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
 477                        if (wait) {
 478                                msleep(OPAL_BUSY_DELAY_MS);
 479                                continue;
 480                        }
 481                        if (rc == OPAL_PARTIAL)
 482                                return -EAGAIN;
 483                }
 484
 485                return opal_error_code(rc);
 486        }
 487}
 488
 489static int opal_recover_mce(struct pt_regs *regs,
 490                                        struct machine_check_event *evt)
 491{
 492        int recovered = 0;
 493
 494        if (!(regs->msr & MSR_RI)) {
 495                /* If MSR_RI isn't set, we cannot recover */
 496                pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 497                recovered = 0;
 498        } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 499                /* Platform corrected itself */
 500                recovered = 1;
 501        } else if (evt->severity == MCE_SEV_FATAL) {
 502                /* Fatal machine check */
 503                pr_err("Machine check interrupt is fatal\n");
 504                recovered = 0;
 505        }
 506
 507        if (!recovered && evt->severity == MCE_SEV_ERROR_SYNC) {
 508                /*
 509                 * Try to kill processes if we get a synchronous machine check
 510                 * (e.g., one caused by execution of this instruction). This
 511                 * will devolve into a panic if we try to kill init or are in
 512                 * an interrupt etc.
 513                 *
 514                 * TODO: Queue up this address for hwpoisioning later.
 515                 * TODO: This is not quite right for d-side machine
 516                 *       checks ->nip is not necessarily the important
 517                 *       address.
 518                 */
 519                if ((user_mode(regs))) {
 520                        _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
 521                        recovered = 1;
 522                } else if (die_will_crash()) {
 523                        /*
 524                         * die() would kill the kernel, so better to go via
 525                         * the platform reboot code that will log the
 526                         * machine check.
 527                         */
 528                        recovered = 0;
 529                } else {
 530                        die("Machine check", regs, SIGBUS);
 531                        recovered = 1;
 532                }
 533        }
 534
 535        return recovered;
 536}
 537
 538void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
 539{
 540        panic_flush_kmsg_start();
 541
 542        pr_emerg("Hardware platform error: %s\n", msg);
 543        if (regs)
 544                show_regs(regs);
 545        smp_send_stop();
 546
 547        panic_flush_kmsg_end();
 548
 549        /*
 550         * Don't bother to shut things down because this will
 551         * xstop the system.
 552         */
 553        if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg)
 554                                                == OPAL_UNSUPPORTED) {
 555                pr_emerg("Reboot type %d not supported for %s\n",
 556                                OPAL_REBOOT_PLATFORM_ERROR, msg);
 557        }
 558
 559        /*
 560         * We reached here. There can be three possibilities:
 561         * 1. We are running on a firmware level that do not support
 562         *    opal_cec_reboot2()
 563         * 2. We are running on a firmware level that do not support
 564         *    OPAL_REBOOT_PLATFORM_ERROR reboot type.
 565         * 3. We are running on FSP based system that does not need
 566         *    opal to trigger checkstop explicitly for error analysis.
 567         *    The FSP PRD component would have already got notified
 568         *    about this error through other channels.
 569         * 4. We are running on a newer skiboot that by default does
 570         *    not cause a checkstop, drops us back to the kernel to
 571         *    extract context and state at the time of the error.
 572         */
 573
 574        panic(msg);
 575}
 576
 577int opal_machine_check(struct pt_regs *regs)
 578{
 579        struct machine_check_event evt;
 580
 581        if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 582                return 0;
 583
 584        /* Print things out */
 585        if (evt.version != MCE_V1) {
 586                pr_err("Machine Check Exception, Unknown event version %d !\n",
 587                       evt.version);
 588                return 0;
 589        }
 590        machine_check_print_event_info(&evt, user_mode(regs));
 591
 592        if (opal_recover_mce(regs, &evt))
 593                return 1;
 594
 595        pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception");
 596}
 597
 598/* Early hmi handler called in real mode. */
 599int opal_hmi_exception_early(struct pt_regs *regs)
 600{
 601        s64 rc;
 602
 603        /*
 604         * call opal hmi handler. Pass paca address as token.
 605         * The return value OPAL_SUCCESS is an indication that there is
 606         * an HMI event generated waiting to pull by Linux.
 607         */
 608        rc = opal_handle_hmi();
 609        if (rc == OPAL_SUCCESS) {
 610                local_paca->hmi_event_available = 1;
 611                return 1;
 612        }
 613        return 0;
 614}
 615
 616/* HMI exception handler called in virtual mode during check_irq_replay. */
 617int opal_handle_hmi_exception(struct pt_regs *regs)
 618{
 619        /*
 620         * Check if HMI event is available.
 621         * if Yes, then wake kopald to process them.
 622         */
 623        if (!local_paca->hmi_event_available)
 624                return 0;
 625
 626        local_paca->hmi_event_available = 0;
 627        opal_wake_poller();
 628
 629        return 1;
 630}
 631
 632static uint64_t find_recovery_address(uint64_t nip)
 633{
 634        int i;
 635
 636        for (i = 0; i < mc_recoverable_range_len; i++)
 637                if ((nip >= mc_recoverable_range[i].start_addr) &&
 638                    (nip < mc_recoverable_range[i].end_addr))
 639                    return mc_recoverable_range[i].recover_addr;
 640        return 0;
 641}
 642
 643bool opal_mce_check_early_recovery(struct pt_regs *regs)
 644{
 645        uint64_t recover_addr = 0;
 646
 647        if (!opal.base || !opal.size)
 648                goto out;
 649
 650        if ((regs->nip >= opal.base) &&
 651                        (regs->nip < (opal.base + opal.size)))
 652                recover_addr = find_recovery_address(regs->nip);
 653
 654        /*
 655         * Setup regs->nip to rfi into fixup address.
 656         */
 657        if (recover_addr)
 658                regs->nip = recover_addr;
 659
 660out:
 661        return !!recover_addr;
 662}
 663
 664static int opal_sysfs_init(void)
 665{
 666        opal_kobj = kobject_create_and_add("opal", firmware_kobj);
 667        if (!opal_kobj) {
 668                pr_warn("kobject_create_and_add opal failed\n");
 669                return -ENOMEM;
 670        }
 671
 672        return 0;
 673}
 674
 675static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
 676                               struct bin_attribute *bin_attr,
 677                               char *buf, loff_t off, size_t count)
 678{
 679        return memory_read_from_buffer(buf, count, &off, bin_attr->private,
 680                                       bin_attr->size);
 681}
 682
 683static BIN_ATTR_RO(symbol_map, 0);
 684
 685static void opal_export_symmap(void)
 686{
 687        const __be64 *syms;
 688        unsigned int size;
 689        struct device_node *fw;
 690        int rc;
 691
 692        fw = of_find_node_by_path("/ibm,opal/firmware");
 693        if (!fw)
 694                return;
 695        syms = of_get_property(fw, "symbol-map", &size);
 696        if (!syms || size != 2 * sizeof(__be64))
 697                return;
 698
 699        /* Setup attributes */
 700        bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
 701        bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
 702
 703        rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
 704        if (rc)
 705                pr_warn("Error %d creating OPAL symbols file\n", rc);
 706}
 707
 708static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
 709                                struct bin_attribute *bin_attr, char *buf,
 710                                loff_t off, size_t count)
 711{
 712        return memory_read_from_buffer(buf, count, &off, bin_attr->private,
 713                                       bin_attr->size);
 714}
 715
 716/*
 717 * opal_export_attrs: creates a sysfs node for each property listed in
 718 * the device-tree under /ibm,opal/firmware/exports/
 719 * All new sysfs nodes are created under /opal/exports/.
 720 * This allows for reserved memory regions (e.g. HDAT) to be read.
 721 * The new sysfs nodes are only readable by root.
 722 */
 723static void opal_export_attrs(void)
 724{
 725        struct bin_attribute *attr;
 726        struct device_node *np;
 727        struct property *prop;
 728        struct kobject *kobj;
 729        u64 vals[2];
 730        int rc;
 731
 732        np = of_find_node_by_path("/ibm,opal/firmware/exports");
 733        if (!np)
 734                return;
 735
 736        /* Create new 'exports' directory - /sys/firmware/opal/exports */
 737        kobj = kobject_create_and_add("exports", opal_kobj);
 738        if (!kobj) {
 739                pr_warn("kobject_create_and_add() of exports failed\n");
 740                return;
 741        }
 742
 743        for_each_property_of_node(np, prop) {
 744                if (!strcmp(prop->name, "name") || !strcmp(prop->name, "phandle"))
 745                        continue;
 746
 747                if (of_property_read_u64_array(np, prop->name, &vals[0], 2))
 748                        continue;
 749
 750                attr = kzalloc(sizeof(*attr), GFP_KERNEL);
 751
 752                if (attr == NULL) {
 753                        pr_warn("Failed kmalloc for bin_attribute!");
 754                        continue;
 755                }
 756
 757                sysfs_bin_attr_init(attr);
 758                attr->attr.name = kstrdup(prop->name, GFP_KERNEL);
 759                attr->attr.mode = 0400;
 760                attr->read = export_attr_read;
 761                attr->private = __va(vals[0]);
 762                attr->size = vals[1];
 763
 764                if (attr->attr.name == NULL) {
 765                        pr_warn("Failed kstrdup for bin_attribute attr.name");
 766                        kfree(attr);
 767                        continue;
 768                }
 769
 770                rc = sysfs_create_bin_file(kobj, attr);
 771                if (rc) {
 772                        pr_warn("Error %d creating OPAL sysfs exports/%s file\n",
 773                                 rc, prop->name);
 774                        kfree(attr->attr.name);
 775                        kfree(attr);
 776                }
 777        }
 778
 779        of_node_put(np);
 780}
 781
 782static void __init opal_dump_region_init(void)
 783{
 784        void *addr;
 785        uint64_t size;
 786        int rc;
 787
 788        if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
 789                return;
 790
 791        /* Register kernel log buffer */
 792        addr = log_buf_addr_get();
 793        if (addr == NULL)
 794                return;
 795
 796        size = log_buf_len_get();
 797        if (size == 0)
 798                return;
 799
 800        rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
 801                                       __pa(addr), size);
 802        /* Don't warn if this is just an older OPAL that doesn't
 803         * know about that call
 804         */
 805        if (rc && rc != OPAL_UNSUPPORTED)
 806                pr_warn("DUMP: Failed to register kernel log buffer. "
 807                        "rc = %d\n", rc);
 808}
 809
 810static void opal_pdev_init(const char *compatible)
 811{
 812        struct device_node *np;
 813
 814        for_each_compatible_node(np, NULL, compatible)
 815                of_platform_device_create(np, NULL, NULL);
 816}
 817
 818static void __init opal_imc_init_dev(void)
 819{
 820        struct device_node *np;
 821
 822        np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
 823        if (np)
 824                of_platform_device_create(np, NULL, NULL);
 825}
 826
 827static int kopald(void *unused)
 828{
 829        unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1;
 830
 831        set_freezable();
 832        do {
 833                try_to_freeze();
 834
 835                opal_handle_events();
 836
 837                set_current_state(TASK_INTERRUPTIBLE);
 838                if (opal_have_pending_events())
 839                        __set_current_state(TASK_RUNNING);
 840                else
 841                        schedule_timeout(timeout);
 842
 843        } while (!kthread_should_stop());
 844
 845        return 0;
 846}
 847
 848void opal_wake_poller(void)
 849{
 850        if (kopald_tsk)
 851                wake_up_process(kopald_tsk);
 852}
 853
 854static void opal_init_heartbeat(void)
 855{
 856        /* Old firwmware, we assume the HVC heartbeat is sufficient */
 857        if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
 858                                 &opal_heartbeat) != 0)
 859                opal_heartbeat = 0;
 860
 861        if (opal_heartbeat)
 862                kopald_tsk = kthread_run(kopald, NULL, "kopald");
 863}
 864
 865static int __init opal_init(void)
 866{
 867        struct device_node *np, *consoles, *leds;
 868        int rc;
 869
 870        opal_node = of_find_node_by_path("/ibm,opal");
 871        if (!opal_node) {
 872                pr_warn("Device node not found\n");
 873                return -ENODEV;
 874        }
 875
 876        /* Register OPAL consoles if any ports */
 877        consoles = of_find_node_by_path("/ibm,opal/consoles");
 878        if (consoles) {
 879                for_each_child_of_node(consoles, np) {
 880                        if (strcmp(np->name, "serial"))
 881                                continue;
 882                        of_platform_device_create(np, NULL, NULL);
 883                }
 884                of_node_put(consoles);
 885        }
 886
 887        /* Initialise OPAL messaging system */
 888        opal_message_init();
 889
 890        /* Initialise OPAL asynchronous completion interface */
 891        opal_async_comp_init();
 892
 893        /* Initialise OPAL sensor interface */
 894        opal_sensor_init();
 895
 896        /* Initialise OPAL hypervisor maintainence interrupt handling */
 897        opal_hmi_handler_init();
 898
 899        /* Create i2c platform devices */
 900        opal_pdev_init("ibm,opal-i2c");
 901
 902        /* Handle non-volatile memory devices */
 903        opal_pdev_init("pmem-region");
 904
 905        /* Setup a heatbeat thread if requested by OPAL */
 906        opal_init_heartbeat();
 907
 908        /* Detect In-Memory Collection counters and create devices*/
 909        opal_imc_init_dev();
 910
 911        /* Create leds platform devices */
 912        leds = of_find_node_by_path("/ibm,opal/leds");
 913        if (leds) {
 914                of_platform_device_create(leds, "opal_leds", NULL);
 915                of_node_put(leds);
 916        }
 917
 918        /* Initialise OPAL message log interface */
 919        opal_msglog_init();
 920
 921        /* Create "opal" kobject under /sys/firmware */
 922        rc = opal_sysfs_init();
 923        if (rc == 0) {
 924                /* Export symbol map to userspace */
 925                opal_export_symmap();
 926                /* Setup dump region interface */
 927                opal_dump_region_init();
 928                /* Setup error log interface */
 929                rc = opal_elog_init();
 930                /* Setup code update interface */
 931                opal_flash_update_init();
 932                /* Setup platform dump extract interface */
 933                opal_platform_dump_init();
 934                /* Setup system parameters interface */
 935                opal_sys_param_init();
 936                /* Setup message log sysfs interface. */
 937                opal_msglog_sysfs_init();
 938        }
 939
 940        /* Export all properties */
 941        opal_export_attrs();
 942
 943        /* Initialize platform devices: IPMI backend, PRD & flash interface */
 944        opal_pdev_init("ibm,opal-ipmi");
 945        opal_pdev_init("ibm,opal-flash");
 946        opal_pdev_init("ibm,opal-prd");
 947
 948        /* Initialise platform device: oppanel interface */
 949        opal_pdev_init("ibm,opal-oppanel");
 950
 951        /* Initialise OPAL kmsg dumper for flushing console on panic */
 952        opal_kmsg_init();
 953
 954        /* Initialise OPAL powercap interface */
 955        opal_powercap_init();
 956
 957        /* Initialise OPAL Power-Shifting-Ratio interface */
 958        opal_psr_init();
 959
 960        /* Initialise OPAL sensor groups */
 961        opal_sensor_groups_init();
 962
 963        return 0;
 964}
 965machine_subsys_initcall(powernv, opal_init);
 966
 967void opal_shutdown(void)
 968{
 969        long rc = OPAL_BUSY;
 970
 971        opal_event_shutdown();
 972
 973        /*
 974         * Then sync with OPAL which ensure anything that can
 975         * potentially write to our memory has completed such
 976         * as an ongoing dump retrieval
 977         */
 978        while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 979                rc = opal_sync_host_reboot();
 980                if (rc == OPAL_BUSY)
 981                        opal_poll_events(NULL);
 982                else
 983                        mdelay(10);
 984        }
 985
 986        /* Unregister memory dump region */
 987        if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
 988                opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
 989}
 990
 991/* Export this so that test modules can use it */
 992EXPORT_SYMBOL_GPL(opal_invalid_call);
 993EXPORT_SYMBOL_GPL(opal_xscom_read);
 994EXPORT_SYMBOL_GPL(opal_xscom_write);
 995EXPORT_SYMBOL_GPL(opal_ipmi_send);
 996EXPORT_SYMBOL_GPL(opal_ipmi_recv);
 997EXPORT_SYMBOL_GPL(opal_flash_read);
 998EXPORT_SYMBOL_GPL(opal_flash_write);
 999EXPORT_SYMBOL_GPL(opal_flash_erase);
1000EXPORT_SYMBOL_GPL(opal_prd_msg);
1001EXPORT_SYMBOL_GPL(opal_check_token);
1002
1003/* Convert a region of vmalloc memory to an opal sg list */
1004struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
1005                                             unsigned long vmalloc_size)
1006{
1007        struct opal_sg_list *sg, *first = NULL;
1008        unsigned long i = 0;
1009
1010        sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
1011        if (!sg)
1012                goto nomem;
1013
1014        first = sg;
1015
1016        while (vmalloc_size > 0) {
1017                uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
1018                uint64_t length = min(vmalloc_size, PAGE_SIZE);
1019
1020                sg->entry[i].data = cpu_to_be64(data);
1021                sg->entry[i].length = cpu_to_be64(length);
1022                i++;
1023
1024                if (i >= SG_ENTRIES_PER_NODE) {
1025                        struct opal_sg_list *next;
1026
1027                        next = kzalloc(PAGE_SIZE, GFP_KERNEL);
1028                        if (!next)
1029                                goto nomem;
1030
1031                        sg->length = cpu_to_be64(
1032                                        i * sizeof(struct opal_sg_entry) + 16);
1033                        i = 0;
1034                        sg->next = cpu_to_be64(__pa(next));
1035                        sg = next;
1036                }
1037
1038                vmalloc_addr += length;
1039                vmalloc_size -= length;
1040        }
1041
1042        sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
1043
1044        return first;
1045
1046nomem:
1047        pr_err("%s : Failed to allocate memory\n", __func__);
1048        opal_free_sg_list(first);
1049        return NULL;
1050}
1051
1052void opal_free_sg_list(struct opal_sg_list *sg)
1053{
1054        while (sg) {
1055                uint64_t next = be64_to_cpu(sg->next);
1056
1057                kfree(sg);
1058
1059                if (next)
1060                        sg = __va(next);
1061                else
1062                        sg = NULL;
1063        }
1064}
1065
1066int opal_error_code(int rc)
1067{
1068        switch (rc) {
1069        case OPAL_SUCCESS:              return 0;
1070
1071        case OPAL_PARAMETER:            return -EINVAL;
1072        case OPAL_ASYNC_COMPLETION:     return -EINPROGRESS;
1073        case OPAL_BUSY:
1074        case OPAL_BUSY_EVENT:           return -EBUSY;
1075        case OPAL_NO_MEM:               return -ENOMEM;
1076        case OPAL_PERMISSION:           return -EPERM;
1077
1078        case OPAL_UNSUPPORTED:          return -EIO;
1079        case OPAL_HARDWARE:             return -EIO;
1080        case OPAL_INTERNAL_ERROR:       return -EIO;
1081        case OPAL_TIMEOUT:              return -ETIMEDOUT;
1082        default:
1083                pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
1084                return -EIO;
1085        }
1086}
1087
1088void powernv_set_nmmu_ptcr(unsigned long ptcr)
1089{
1090        int rc;
1091
1092        if (firmware_has_feature(FW_FEATURE_OPAL)) {
1093                rc = opal_nmmu_set_ptcr(-1UL, ptcr);
1094                if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
1095                        pr_warn("%s: Unable to set nest mmu ptcr\n", __func__);
1096        }
1097}
1098
1099EXPORT_SYMBOL_GPL(opal_poll_events);
1100EXPORT_SYMBOL_GPL(opal_rtc_read);
1101EXPORT_SYMBOL_GPL(opal_rtc_write);
1102EXPORT_SYMBOL_GPL(opal_tpo_read);
1103EXPORT_SYMBOL_GPL(opal_tpo_write);
1104EXPORT_SYMBOL_GPL(opal_i2c_request);
1105/* Export these symbols for PowerNV LED class driver */
1106EXPORT_SYMBOL_GPL(opal_leds_get_ind);
1107EXPORT_SYMBOL_GPL(opal_leds_set_ind);
1108/* Export this symbol for PowerNV Operator Panel class driver */
1109EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
1110/* Export this for KVM */
1111EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
1112EXPORT_SYMBOL_GPL(opal_int_eoi);
1113EXPORT_SYMBOL_GPL(opal_error_code);
1114/* Export the below symbol for NX compression */
1115EXPORT_SYMBOL(opal_nx_coproc_init);
1116