qemu/hw/acpi/ghes.c
<<
>>
Prefs
   1/*
   2 * Support for generating APEI tables and recording CPER for Guests
   3 *
   4 * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD.
   5 *
   6 * Author: Dongjiu Geng <gengdongjiu@huawei.com>
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License as published by
  10 * the Free Software Foundation; either version 2 of the License, or
  11 * (at your option) any later version.
  12
  13 * This program is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 * GNU General Public License for more details.
  17
  18 * You should have received a copy of the GNU General Public License along
  19 * with this program; if not, see <http://www.gnu.org/licenses/>.
  20 */
  21
  22#include "qemu/osdep.h"
  23#include "qemu/units.h"
  24#include "hw/acpi/ghes.h"
  25#include "hw/acpi/aml-build.h"
  26#include "qemu/error-report.h"
  27#include "hw/acpi/generic_event_device.h"
  28#include "hw/nvram/fw_cfg.h"
  29#include "qemu/uuid.h"
  30
  31#define ACPI_GHES_ERRORS_FW_CFG_FILE        "etc/hardware_errors"
  32#define ACPI_GHES_DATA_ADDR_FW_CFG_FILE     "etc/hardware_errors_addr"
  33
  34/* The max size in bytes for one error block */
  35#define ACPI_GHES_MAX_RAW_DATA_LENGTH   (1 * KiB)
  36
  37/* Now only support ARMv8 SEA notification type error source */
  38#define ACPI_GHES_ERROR_SOURCE_COUNT        1
  39
  40/* Generic Hardware Error Source version 2 */
  41#define ACPI_GHES_SOURCE_GENERIC_ERROR_V2   10
  42
  43/* Address offset in Generic Address Structure(GAS) */
  44#define GAS_ADDR_OFFSET 4
  45
  46/*
  47 * The total size of Generic Error Data Entry
  48 * ACPI 6.1/6.2: 18.3.2.7.1 Generic Error Data,
  49 * Table 18-343 Generic Error Data Entry
  50 */
  51#define ACPI_GHES_DATA_LENGTH               72
  52
  53/* The memory section CPER size, UEFI 2.6: N.2.5 Memory Error Section */
  54#define ACPI_GHES_MEM_CPER_LENGTH           80
  55
  56/* Masks for block_status flags */
  57#define ACPI_GEBS_UNCORRECTABLE         1
  58
  59/*
  60 * Total size for Generic Error Status Block except Generic Error Data Entries
  61 * ACPI 6.2: 18.3.2.7.1 Generic Error Data,
  62 * Table 18-380 Generic Error Status Block
  63 */
  64#define ACPI_GHES_GESB_SIZE                 20
  65
  66/*
  67 * Values for error_severity field
  68 */
  69enum AcpiGenericErrorSeverity {
  70    ACPI_CPER_SEV_RECOVERABLE = 0,
  71    ACPI_CPER_SEV_FATAL = 1,
  72    ACPI_CPER_SEV_CORRECTED = 2,
  73    ACPI_CPER_SEV_NONE = 3,
  74};
  75
  76/*
  77 * Hardware Error Notification
  78 * ACPI 4.0: 17.3.2.7 Hardware Error Notification
  79 * Composes dummy Hardware Error Notification descriptor of specified type
  80 */
  81static void build_ghes_hw_error_notification(GArray *table, const uint8_t type)
  82{
  83    /* Type */
  84    build_append_int_noprefix(table, type, 1);
  85    /*
  86     * Length:
  87     * Total length of the structure in bytes
  88     */
  89    build_append_int_noprefix(table, 28, 1);
  90    /* Configuration Write Enable */
  91    build_append_int_noprefix(table, 0, 2);
  92    /* Poll Interval */
  93    build_append_int_noprefix(table, 0, 4);
  94    /* Vector */
  95    build_append_int_noprefix(table, 0, 4);
  96    /* Switch To Polling Threshold Value */
  97    build_append_int_noprefix(table, 0, 4);
  98    /* Switch To Polling Threshold Window */
  99    build_append_int_noprefix(table, 0, 4);
 100    /* Error Threshold Value */
 101    build_append_int_noprefix(table, 0, 4);
 102    /* Error Threshold Window */
 103    build_append_int_noprefix(table, 0, 4);
 104}
 105
 106/*
 107 * Generic Error Data Entry
 108 * ACPI 6.1: 18.3.2.7.1 Generic Error Data
 109 */
 110static void acpi_ghes_generic_error_data(GArray *table,
 111                const uint8_t *section_type, uint32_t error_severity,
 112                uint8_t validation_bits, uint8_t flags,
 113                uint32_t error_data_length, QemuUUID fru_id,
 114                uint64_t time_stamp)
 115{
 116    const uint8_t fru_text[20] = {0};
 117
 118    /* Section Type */
 119    g_array_append_vals(table, section_type, 16);
 120
 121    /* Error Severity */
 122    build_append_int_noprefix(table, error_severity, 4);
 123    /* Revision */
 124    build_append_int_noprefix(table, 0x300, 2);
 125    /* Validation Bits */
 126    build_append_int_noprefix(table, validation_bits, 1);
 127    /* Flags */
 128    build_append_int_noprefix(table, flags, 1);
 129    /* Error Data Length */
 130    build_append_int_noprefix(table, error_data_length, 4);
 131
 132    /* FRU Id */
 133    g_array_append_vals(table, fru_id.data, ARRAY_SIZE(fru_id.data));
 134
 135    /* FRU Text */
 136    g_array_append_vals(table, fru_text, sizeof(fru_text));
 137
 138    /* Timestamp */
 139    build_append_int_noprefix(table, time_stamp, 8);
 140}
 141
 142/*
 143 * Generic Error Status Block
 144 * ACPI 6.1: 18.3.2.7.1 Generic Error Data
 145 */
 146static void acpi_ghes_generic_error_status(GArray *table, uint32_t block_status,
 147                uint32_t raw_data_offset, uint32_t raw_data_length,
 148                uint32_t data_length, uint32_t error_severity)
 149{
 150    /* Block Status */
 151    build_append_int_noprefix(table, block_status, 4);
 152    /* Raw Data Offset */
 153    build_append_int_noprefix(table, raw_data_offset, 4);
 154    /* Raw Data Length */
 155    build_append_int_noprefix(table, raw_data_length, 4);
 156    /* Data Length */
 157    build_append_int_noprefix(table, data_length, 4);
 158    /* Error Severity */
 159    build_append_int_noprefix(table, error_severity, 4);
 160}
 161
 162/* UEFI 2.6: N.2.5 Memory Error Section */
 163static void acpi_ghes_build_append_mem_cper(GArray *table,
 164                                            uint64_t error_physical_addr)
 165{
 166    /*
 167     * Memory Error Record
 168     */
 169
 170    /* Validation Bits */
 171    build_append_int_noprefix(table,
 172                              (1ULL << 14) | /* Type Valid */
 173                              (1ULL << 1) /* Physical Address Valid */,
 174                              8);
 175    /* Error Status */
 176    build_append_int_noprefix(table, 0, 8);
 177    /* Physical Address */
 178    build_append_int_noprefix(table, error_physical_addr, 8);
 179    /* Skip all the detailed information normally found in such a record */
 180    build_append_int_noprefix(table, 0, 48);
 181    /* Memory Error Type */
 182    build_append_int_noprefix(table, 0 /* Unknown error */, 1);
 183    /* Skip all the detailed information normally found in such a record */
 184    build_append_int_noprefix(table, 0, 7);
 185}
 186
 187static int acpi_ghes_record_mem_error(uint64_t error_block_address,
 188                                      uint64_t error_physical_addr)
 189{
 190    GArray *block;
 191
 192    /* Memory Error Section Type */
 193    const uint8_t uefi_cper_mem_sec[] =
 194          UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \
 195                  0xED, 0x7C, 0x83, 0xB1);
 196
 197    /* invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data,
 198     * Table 17-13 Generic Error Data Entry
 199     */
 200    QemuUUID fru_id = {};
 201    uint32_t data_length;
 202
 203    block = g_array_new(false, true /* clear */, 1);
 204
 205    /* This is the length if adding a new generic error data entry*/
 206    data_length = ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH;
 207    /*
 208     * It should not run out of the preallocated memory if adding a new generic
 209     * error data entry
 210     */
 211    assert((data_length + ACPI_GHES_GESB_SIZE) <=
 212            ACPI_GHES_MAX_RAW_DATA_LENGTH);
 213
 214    /* Build the new generic error status block header */
 215    acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE,
 216        0, 0, data_length, ACPI_CPER_SEV_RECOVERABLE);
 217
 218    /* Build this new generic error data entry header */
 219    acpi_ghes_generic_error_data(block, uefi_cper_mem_sec,
 220        ACPI_CPER_SEV_RECOVERABLE, 0, 0,
 221        ACPI_GHES_MEM_CPER_LENGTH, fru_id, 0);
 222
 223    /* Build the memory section CPER for above new generic error data entry */
 224    acpi_ghes_build_append_mem_cper(block, error_physical_addr);
 225
 226    /* Write the generic error data entry into guest memory */
 227    cpu_physical_memory_write(error_block_address, block->data, block->len);
 228
 229    g_array_free(block, true);
 230
 231    return 0;
 232}
 233
 234/*
 235 * Build table for the hardware error fw_cfg blob.
 236 * Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs.
 237 * See docs/specs/acpi_hest_ghes.rst for blobs format.
 238 */
 239void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker)
 240{
 241    int i, error_status_block_offset;
 242
 243    /* Build error_block_address */
 244    for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
 245        build_append_int_noprefix(hardware_errors, 0, sizeof(uint64_t));
 246    }
 247
 248    /* Build read_ack_register */
 249    for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
 250        /*
 251         * Initialize the value of read_ack_register to 1, so GHES can be
 252         * writable after (re)boot.
 253         * ACPI 6.2: 18.3.2.8 Generic Hardware Error Source version 2
 254         * (GHESv2 - Type 10)
 255         */
 256        build_append_int_noprefix(hardware_errors, 1, sizeof(uint64_t));
 257    }
 258
 259    /* Generic Error Status Block offset in the hardware error fw_cfg blob */
 260    error_status_block_offset = hardware_errors->len;
 261
 262    /* Reserve space for Error Status Data Block */
 263    acpi_data_push(hardware_errors,
 264        ACPI_GHES_MAX_RAW_DATA_LENGTH * ACPI_GHES_ERROR_SOURCE_COUNT);
 265
 266    /* Tell guest firmware to place hardware_errors blob into RAM */
 267    bios_linker_loader_alloc(linker, ACPI_GHES_ERRORS_FW_CFG_FILE,
 268                             hardware_errors, sizeof(uint64_t), false);
 269
 270    for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
 271        /*
 272         * Tell firmware to patch error_block_address entries to point to
 273         * corresponding "Generic Error Status Block"
 274         */
 275        bios_linker_loader_add_pointer(linker,
 276            ACPI_GHES_ERRORS_FW_CFG_FILE, sizeof(uint64_t) * i,
 277            sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE,
 278            error_status_block_offset + i * ACPI_GHES_MAX_RAW_DATA_LENGTH);
 279    }
 280
 281    /*
 282     * tell firmware to write hardware_errors GPA into
 283     * hardware_errors_addr fw_cfg, once the former has been initialized.
 284     */
 285    bios_linker_loader_write_pointer(linker, ACPI_GHES_DATA_ADDR_FW_CFG_FILE,
 286        0, sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, 0);
 287}
 288
 289/* Build Generic Hardware Error Source version 2 (GHESv2) */
 290static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker)
 291{
 292    uint64_t address_offset;
 293    /*
 294     * Type:
 295     * Generic Hardware Error Source version 2(GHESv2 - Type 10)
 296     */
 297    build_append_int_noprefix(table_data, ACPI_GHES_SOURCE_GENERIC_ERROR_V2, 2);
 298    /* Source Id */
 299    build_append_int_noprefix(table_data, source_id, 2);
 300    /* Related Source Id */
 301    build_append_int_noprefix(table_data, 0xffff, 2);
 302    /* Flags */
 303    build_append_int_noprefix(table_data, 0, 1);
 304    /* Enabled */
 305    build_append_int_noprefix(table_data, 1, 1);
 306
 307    /* Number of Records To Pre-allocate */
 308    build_append_int_noprefix(table_data, 1, 4);
 309    /* Max Sections Per Record */
 310    build_append_int_noprefix(table_data, 1, 4);
 311    /* Max Raw Data Length */
 312    build_append_int_noprefix(table_data, ACPI_GHES_MAX_RAW_DATA_LENGTH, 4);
 313
 314    address_offset = table_data->len;
 315    /* Error Status Address */
 316    build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0,
 317                     4 /* QWord access */, 0);
 318    bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
 319        address_offset + GAS_ADDR_OFFSET, sizeof(uint64_t),
 320        ACPI_GHES_ERRORS_FW_CFG_FILE, source_id * sizeof(uint64_t));
 321
 322    switch (source_id) {
 323    case ACPI_HEST_SRC_ID_SEA:
 324        /*
 325         * Notification Structure
 326         * Now only enable ARMv8 SEA notification type
 327         */
 328        build_ghes_hw_error_notification(table_data, ACPI_GHES_NOTIFY_SEA);
 329        break;
 330    default:
 331        error_report("Not support this error source");
 332        abort();
 333    }
 334
 335    /* Error Status Block Length */
 336    build_append_int_noprefix(table_data, ACPI_GHES_MAX_RAW_DATA_LENGTH, 4);
 337
 338    /*
 339     * Read Ack Register
 340     * ACPI 6.1: 18.3.2.8 Generic Hardware Error Source
 341     * version 2 (GHESv2 - Type 10)
 342     */
 343    address_offset = table_data->len;
 344    build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0,
 345                     4 /* QWord access */, 0);
 346    bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
 347        address_offset + GAS_ADDR_OFFSET,
 348        sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE,
 349        (ACPI_GHES_ERROR_SOURCE_COUNT + source_id) * sizeof(uint64_t));
 350
 351    /*
 352     * Read Ack Preserve field
 353     * We only provide the first bit in Read Ack Register to OSPM to write
 354     * while the other bits are preserved.
 355     */
 356    build_append_int_noprefix(table_data, ~0x1ULL, 8);
 357    /* Read Ack Write */
 358    build_append_int_noprefix(table_data, 0x1, 8);
 359}
 360
 361/* Build Hardware Error Source Table */
 362void acpi_build_hest(GArray *table_data, BIOSLinker *linker,
 363                     const char *oem_id, const char *oem_table_id)
 364{
 365    AcpiTable table = { .sig = "HEST", .rev = 1,
 366                        .oem_id = oem_id, .oem_table_id = oem_table_id };
 367
 368    acpi_table_begin(&table, table_data);
 369
 370    /* Error Source Count */
 371    build_append_int_noprefix(table_data, ACPI_GHES_ERROR_SOURCE_COUNT, 4);
 372    build_ghes_v2(table_data, ACPI_HEST_SRC_ID_SEA, linker);
 373
 374    acpi_table_end(linker, &table);
 375}
 376
 377void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
 378                          GArray *hardware_error)
 379{
 380    /* Create a read-only fw_cfg file for GHES */
 381    fw_cfg_add_file(s, ACPI_GHES_ERRORS_FW_CFG_FILE, hardware_error->data,
 382                    hardware_error->len);
 383
 384    /* Create a read-write fw_cfg file for Address */
 385    fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL,
 386        NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false);
 387
 388    ags->present = true;
 389}
 390
 391int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address)
 392{
 393    uint64_t error_block_addr, read_ack_register_addr, read_ack_register = 0;
 394    uint64_t start_addr;
 395    bool ret = -1;
 396    AcpiGedState *acpi_ged_state;
 397    AcpiGhesState *ags;
 398
 399    assert(source_id < ACPI_HEST_SRC_ID_RESERVED);
 400
 401    acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
 402                                                       NULL));
 403    g_assert(acpi_ged_state);
 404    ags = &acpi_ged_state->ghes_state;
 405
 406    start_addr = le64_to_cpu(ags->ghes_addr_le);
 407
 408    if (physical_address) {
 409
 410        if (source_id < ACPI_HEST_SRC_ID_RESERVED) {
 411            start_addr += source_id * sizeof(uint64_t);
 412        }
 413
 414        cpu_physical_memory_read(start_addr, &error_block_addr,
 415                                 sizeof(error_block_addr));
 416
 417        error_block_addr = le64_to_cpu(error_block_addr);
 418
 419        read_ack_register_addr = start_addr +
 420            ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t);
 421
 422        cpu_physical_memory_read(read_ack_register_addr,
 423                                 &read_ack_register, sizeof(read_ack_register));
 424
 425        /* zero means OSPM does not acknowledge the error */
 426        if (!read_ack_register) {
 427            error_report("OSPM does not acknowledge previous error,"
 428                " so can not record CPER for current error anymore");
 429        } else if (error_block_addr) {
 430            read_ack_register = cpu_to_le64(0);
 431            /*
 432             * Clear the Read Ack Register, OSPM will write it to 1 when
 433             * it acknowledges this error.
 434             */
 435            cpu_physical_memory_write(read_ack_register_addr,
 436                &read_ack_register, sizeof(uint64_t));
 437
 438            ret = acpi_ghes_record_mem_error(error_block_addr,
 439                                             physical_address);
 440        } else
 441            error_report("can not find Generic Error Status Block");
 442    }
 443
 444    return ret;
 445}
 446
 447bool acpi_ghes_present(void)
 448{
 449    AcpiGedState *acpi_ged_state;
 450    AcpiGhesState *ags;
 451
 452    acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
 453                                                       NULL));
 454
 455    if (!acpi_ged_state) {
 456        return false;
 457    }
 458    ags = &acpi_ged_state->ghes_state;
 459    return ags->present;
 460}
 461