linux/drivers/firmware/efi/cper.c
<<
>>
Prefs
   1/*
   2 * UEFI Common Platform Error Record (CPER) support
   3 *
   4 * Copyright (C) 2010, Intel Corp.
   5 *      Author: Huang Ying <ying.huang@intel.com>
   6 *
   7 * CPER is the format used to describe platform hardware error by
   8 * various tables, such as ERST, BERT and HEST etc.
   9 *
  10 * For more information about CPER, please refer to Appendix N of UEFI
  11 * Specification version 2.4.
  12 *
  13 * This program is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU General Public License version
  15 * 2 as published by the Free Software Foundation.
  16 *
  17 * This program is distributed in the hope that it will be useful,
  18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 * GNU General Public License for more details.
  21 *
  22 * You should have received a copy of the GNU General Public License
  23 * along with this program; if not, write to the Free Software
  24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  25 */
  26
  27#include <linux/kernel.h>
  28#include <linux/module.h>
  29#include <linux/time.h>
  30#include <linux/cper.h>
  31#include <linux/dmi.h>
  32#include <linux/acpi.h>
  33#include <linux/pci.h>
  34#include <linux/aer.h>
  35#include <linux/printk.h>
  36#include <linux/bcd.h>
  37#include <acpi/ghes.h>
  38#include <ras/ras_event.h>
  39
  40#define INDENT_SP       " "
  41
  42static char rcd_decode_str[CPER_REC_LEN];
  43
  44/*
  45 * CPER record ID need to be unique even after reboot, because record
  46 * ID is used as index for ERST storage, while CPER records from
  47 * multiple boot may co-exist in ERST.
  48 */
  49u64 cper_next_record_id(void)
  50{
  51        static atomic64_t seq;
  52
  53        if (!atomic64_read(&seq))
  54                atomic64_set(&seq, ((u64)get_seconds()) << 32);
  55
  56        return atomic64_inc_return(&seq);
  57}
  58EXPORT_SYMBOL_GPL(cper_next_record_id);
  59
  60static const char * const severity_strs[] = {
  61        "recoverable",
  62        "fatal",
  63        "corrected",
  64        "info",
  65};
  66
  67const char *cper_severity_str(unsigned int severity)
  68{
  69        return severity < ARRAY_SIZE(severity_strs) ?
  70                severity_strs[severity] : "unknown";
  71}
  72EXPORT_SYMBOL_GPL(cper_severity_str);
  73
  74/*
  75 * cper_print_bits - print strings for set bits
  76 * @pfx: prefix for each line, including log level and prefix string
  77 * @bits: bit mask
  78 * @strs: string array, indexed by bit position
  79 * @strs_size: size of the string array: @strs
  80 *
  81 * For each set bit in @bits, print the corresponding string in @strs.
  82 * If the output length is longer than 80, multiple line will be
  83 * printed, with @pfx is printed at the beginning of each line.
  84 */
  85void cper_print_bits(const char *pfx, unsigned int bits,
  86                     const char * const strs[], unsigned int strs_size)
  87{
  88        int i, len = 0;
  89        const char *str;
  90        char buf[84];
  91
  92        for (i = 0; i < strs_size; i++) {
  93                if (!(bits & (1U << i)))
  94                        continue;
  95                str = strs[i];
  96                if (!str)
  97                        continue;
  98                if (len && len + strlen(str) + 2 > 80) {
  99                        printk("%s\n", buf);
 100                        len = 0;
 101                }
 102                if (!len)
 103                        len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
 104                else
 105                        len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
 106        }
 107        if (len)
 108                printk("%s\n", buf);
 109}
 110
 111static const char * const proc_type_strs[] = {
 112        "IA32/X64",
 113        "IA64",
 114        "ARM",
 115};
 116
 117static const char * const proc_isa_strs[] = {
 118        "IA32",
 119        "IA64",
 120        "X64",
 121        "ARM A32/T32",
 122        "ARM A64",
 123};
 124
 125const char * const cper_proc_error_type_strs[] = {
 126        "cache error",
 127        "TLB error",
 128        "bus error",
 129        "micro-architectural error",
 130};
 131
 132static const char * const proc_op_strs[] = {
 133        "unknown or generic",
 134        "data read",
 135        "data write",
 136        "instruction execution",
 137};
 138
 139static const char * const proc_flag_strs[] = {
 140        "restartable",
 141        "precise IP",
 142        "overflow",
 143        "corrected",
 144};
 145
 146static void cper_print_proc_generic(const char *pfx,
 147                                    const struct cper_sec_proc_generic *proc)
 148{
 149        if (proc->validation_bits & CPER_PROC_VALID_TYPE)
 150                printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
 151                       proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
 152                       proc_type_strs[proc->proc_type] : "unknown");
 153        if (proc->validation_bits & CPER_PROC_VALID_ISA)
 154                printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
 155                       proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
 156                       proc_isa_strs[proc->proc_isa] : "unknown");
 157        if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
 158                printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
 159                cper_print_bits(pfx, proc->proc_error_type,
 160                                cper_proc_error_type_strs,
 161                                ARRAY_SIZE(cper_proc_error_type_strs));
 162        }
 163        if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
 164                printk("%s""operation: %d, %s\n", pfx, proc->operation,
 165                       proc->operation < ARRAY_SIZE(proc_op_strs) ?
 166                       proc_op_strs[proc->operation] : "unknown");
 167        if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
 168                printk("%s""flags: 0x%02x\n", pfx, proc->flags);
 169                cper_print_bits(pfx, proc->flags, proc_flag_strs,
 170                                ARRAY_SIZE(proc_flag_strs));
 171        }
 172        if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
 173                printk("%s""level: %d\n", pfx, proc->level);
 174        if (proc->validation_bits & CPER_PROC_VALID_VERSION)
 175                printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
 176        if (proc->validation_bits & CPER_PROC_VALID_ID)
 177                printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
 178        if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
 179                printk("%s""target_address: 0x%016llx\n",
 180                       pfx, proc->target_addr);
 181        if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
 182                printk("%s""requestor_id: 0x%016llx\n",
 183                       pfx, proc->requestor_id);
 184        if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
 185                printk("%s""responder_id: 0x%016llx\n",
 186                       pfx, proc->responder_id);
 187        if (proc->validation_bits & CPER_PROC_VALID_IP)
 188                printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
 189}
 190
 191static const char * const mem_err_type_strs[] = {
 192        "unknown",
 193        "no error",
 194        "single-bit ECC",
 195        "multi-bit ECC",
 196        "single-symbol chipkill ECC",
 197        "multi-symbol chipkill ECC",
 198        "master abort",
 199        "target abort",
 200        "parity error",
 201        "watchdog timeout",
 202        "invalid address",
 203        "mirror Broken",
 204        "memory sparing",
 205        "scrub corrected error",
 206        "scrub uncorrected error",
 207        "physical memory map-out event",
 208};
 209
 210const char *cper_mem_err_type_str(unsigned int etype)
 211{
 212        return etype < ARRAY_SIZE(mem_err_type_strs) ?
 213                mem_err_type_strs[etype] : "unknown";
 214}
 215EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
 216
 217static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
 218{
 219        u32 len, n;
 220
 221        if (!msg)
 222                return 0;
 223
 224        n = 0;
 225        len = CPER_REC_LEN - 1;
 226        if (mem->validation_bits & CPER_MEM_VALID_NODE)
 227                n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
 228        if (mem->validation_bits & CPER_MEM_VALID_CARD)
 229                n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
 230        if (mem->validation_bits & CPER_MEM_VALID_MODULE)
 231                n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
 232        if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
 233                n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
 234        if (mem->validation_bits & CPER_MEM_VALID_BANK)
 235                n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
 236        if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
 237                n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
 238        if (mem->validation_bits & CPER_MEM_VALID_ROW)
 239                n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
 240        if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
 241                n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
 242        if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
 243                n += scnprintf(msg + n, len - n, "bit_position: %d ",
 244                               mem->bit_pos);
 245        if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
 246                n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
 247                               mem->requestor_id);
 248        if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
 249                n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
 250                               mem->responder_id);
 251        if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
 252                scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
 253                          mem->target_id);
 254
 255        msg[n] = '\0';
 256        return n;
 257}
 258
 259static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
 260{
 261        u32 len, n;
 262        const char *bank = NULL, *device = NULL;
 263
 264        if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
 265                return 0;
 266
 267        n = 0;
 268        len = CPER_REC_LEN - 1;
 269        dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
 270        if (bank && device)
 271                n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
 272        else
 273                n = snprintf(msg, len,
 274                             "DIMM location: not present. DMI handle: 0x%.4x ",
 275                             mem->mem_dev_handle);
 276
 277        msg[n] = '\0';
 278        return n;
 279}
 280
 281void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
 282                       struct cper_mem_err_compact *cmem)
 283{
 284        cmem->validation_bits = mem->validation_bits;
 285        cmem->node = mem->node;
 286        cmem->card = mem->card;
 287        cmem->module = mem->module;
 288        cmem->bank = mem->bank;
 289        cmem->device = mem->device;
 290        cmem->row = mem->row;
 291        cmem->column = mem->column;
 292        cmem->bit_pos = mem->bit_pos;
 293        cmem->requestor_id = mem->requestor_id;
 294        cmem->responder_id = mem->responder_id;
 295        cmem->target_id = mem->target_id;
 296        cmem->rank = mem->rank;
 297        cmem->mem_array_handle = mem->mem_array_handle;
 298        cmem->mem_dev_handle = mem->mem_dev_handle;
 299}
 300
 301const char *cper_mem_err_unpack(struct trace_seq *p,
 302                                struct cper_mem_err_compact *cmem)
 303{
 304        const char *ret = trace_seq_buffer_ptr(p);
 305
 306        if (cper_mem_err_location(cmem, rcd_decode_str))
 307                trace_seq_printf(p, "%s", rcd_decode_str);
 308        if (cper_dimm_err_location(cmem, rcd_decode_str))
 309                trace_seq_printf(p, "%s", rcd_decode_str);
 310        trace_seq_putc(p, '\0');
 311
 312        return ret;
 313}
 314
 315static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
 316        int len)
 317{
 318        struct cper_mem_err_compact cmem;
 319
 320        /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
 321        if (len == sizeof(struct cper_sec_mem_err_old) &&
 322            (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
 323                pr_err(FW_WARN "valid bits set for fields beyond structure\n");
 324                return;
 325        }
 326        if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
 327                printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
 328        if (mem->validation_bits & CPER_MEM_VALID_PA)
 329                printk("%s""physical_address: 0x%016llx\n",
 330                       pfx, mem->physical_addr);
 331        if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
 332                printk("%s""physical_address_mask: 0x%016llx\n",
 333                       pfx, mem->physical_addr_mask);
 334        cper_mem_err_pack(mem, &cmem);
 335        if (cper_mem_err_location(&cmem, rcd_decode_str))
 336                printk("%s%s\n", pfx, rcd_decode_str);
 337        if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
 338                u8 etype = mem->error_type;
 339                printk("%s""error_type: %d, %s\n", pfx, etype,
 340                       cper_mem_err_type_str(etype));
 341        }
 342        if (cper_dimm_err_location(&cmem, rcd_decode_str))
 343                printk("%s%s\n", pfx, rcd_decode_str);
 344}
 345
 346static const char * const pcie_port_type_strs[] = {
 347        "PCIe end point",
 348        "legacy PCI end point",
 349        "unknown",
 350        "unknown",
 351        "root port",
 352        "upstream switch port",
 353        "downstream switch port",
 354        "PCIe to PCI/PCI-X bridge",
 355        "PCI/PCI-X to PCIe bridge",
 356        "root complex integrated endpoint device",
 357        "root complex event collector",
 358};
 359
 360static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 361                            const struct acpi_hest_generic_data *gdata)
 362{
 363        if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
 364                printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
 365                       pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
 366                       pcie_port_type_strs[pcie->port_type] : "unknown");
 367        if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
 368                printk("%s""version: %d.%d\n", pfx,
 369                       pcie->version.major, pcie->version.minor);
 370        if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
 371                printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
 372                       pcie->command, pcie->status);
 373        if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
 374                const __u8 *p;
 375                printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
 376                       pcie->device_id.segment, pcie->device_id.bus,
 377                       pcie->device_id.device, pcie->device_id.function);
 378                printk("%s""slot: %d\n", pfx,
 379                       pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
 380                printk("%s""secondary_bus: 0x%02x\n", pfx,
 381                       pcie->device_id.secondary_bus);
 382                printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
 383                       pcie->device_id.vendor_id, pcie->device_id.device_id);
 384                p = pcie->device_id.class_code;
 385                printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
 386        }
 387        if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
 388                printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
 389                       pcie->serial_number.lower, pcie->serial_number.upper);
 390        if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
 391                printk(
 392        "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
 393        pfx, pcie->bridge.secondary_status, pcie->bridge.control);
 394}
 395
 396static void cper_print_tstamp(const char *pfx,
 397                                   struct acpi_hest_generic_data_v300 *gdata)
 398{
 399        __u8 hour, min, sec, day, mon, year, century, *timestamp;
 400
 401        if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
 402                timestamp = (__u8 *)&(gdata->time_stamp);
 403                sec       = bcd2bin(timestamp[0]);
 404                min       = bcd2bin(timestamp[1]);
 405                hour      = bcd2bin(timestamp[2]);
 406                day       = bcd2bin(timestamp[4]);
 407                mon       = bcd2bin(timestamp[5]);
 408                year      = bcd2bin(timestamp[6]);
 409                century   = bcd2bin(timestamp[7]);
 410
 411                printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
 412                       (timestamp[3] & 0x1 ? "precise " : "imprecise "),
 413                       century, year, mon, day, hour, min, sec);
 414        }
 415}
 416
 417static void
 418cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
 419                           int sec_no)
 420{
 421        guid_t *sec_type = (guid_t *)gdata->section_type;
 422        __u16 severity;
 423        char newpfx[64];
 424
 425        if (acpi_hest_get_version(gdata) >= 3)
 426                cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata);
 427
 428        severity = gdata->error_severity;
 429        printk("%s""Error %d, type: %s\n", pfx, sec_no,
 430               cper_severity_str(severity));
 431        if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
 432                printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id);
 433        if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
 434                printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
 435
 436        snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
 437        if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
 438                struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
 439
 440                printk("%s""section_type: general processor error\n", newpfx);
 441                if (gdata->error_data_length >= sizeof(*proc_err))
 442                        cper_print_proc_generic(newpfx, proc_err);
 443                else
 444                        goto err_section_too_small;
 445        } else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
 446                struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
 447
 448                printk("%s""section_type: memory error\n", newpfx);
 449                if (gdata->error_data_length >=
 450                    sizeof(struct cper_sec_mem_err_old))
 451                        cper_print_mem(newpfx, mem_err,
 452                                       gdata->error_data_length);
 453                else
 454                        goto err_section_too_small;
 455        } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
 456                struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
 457
 458                printk("%s""section_type: PCIe error\n", newpfx);
 459                if (gdata->error_data_length >= sizeof(*pcie))
 460                        cper_print_pcie(newpfx, pcie, gdata);
 461                else
 462                        goto err_section_too_small;
 463#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
 464        } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_ARM)) {
 465                struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
 466
 467                printk("%ssection_type: ARM processor error\n", newpfx);
 468                if (gdata->error_data_length >= sizeof(*arm_err))
 469                        cper_print_proc_arm(newpfx, arm_err);
 470                else
 471                        goto err_section_too_small;
 472#endif
 473        } else {
 474                const void *err = acpi_hest_get_payload(gdata);
 475
 476                printk("%ssection type: unknown, %pUl\n", newpfx, sec_type);
 477                printk("%ssection length: %#x\n", newpfx,
 478                       gdata->error_data_length);
 479                print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err,
 480                               gdata->error_data_length, true);
 481        }
 482
 483        return;
 484
 485err_section_too_small:
 486        pr_err(FW_WARN "error section length is too small\n");
 487}
 488
 489void cper_estatus_print(const char *pfx,
 490                        const struct acpi_hest_generic_status *estatus)
 491{
 492        struct acpi_hest_generic_data *gdata;
 493        int sec_no = 0;
 494        char newpfx[64];
 495        __u16 severity;
 496
 497        severity = estatus->error_severity;
 498        if (severity == CPER_SEV_CORRECTED)
 499                printk("%s%s\n", pfx,
 500                       "It has been corrected by h/w "
 501                       "and requires no further action");
 502        printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
 503        snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
 504
 505        apei_estatus_for_each_section(estatus, gdata) {
 506                cper_estatus_print_section(newpfx, gdata, sec_no);
 507                sec_no++;
 508        }
 509}
 510EXPORT_SYMBOL_GPL(cper_estatus_print);
 511
 512int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
 513{
 514        if (estatus->data_length &&
 515            estatus->data_length < sizeof(struct acpi_hest_generic_data))
 516                return -EINVAL;
 517        if (estatus->raw_data_length &&
 518            estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
 519                return -EINVAL;
 520
 521        return 0;
 522}
 523EXPORT_SYMBOL_GPL(cper_estatus_check_header);
 524
 525int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
 526{
 527        struct acpi_hest_generic_data *gdata;
 528        unsigned int data_len, gedata_len;
 529        int rc;
 530
 531        rc = cper_estatus_check_header(estatus);
 532        if (rc)
 533                return rc;
 534        data_len = estatus->data_length;
 535
 536        apei_estatus_for_each_section(estatus, gdata) {
 537                gedata_len = acpi_hest_get_error_length(gdata);
 538                if (gedata_len > data_len - acpi_hest_get_size(gdata))
 539                        return -EINVAL;
 540                data_len -= acpi_hest_get_record_size(gdata);
 541        }
 542        if (data_len)
 543                return -EINVAL;
 544
 545        return 0;
 546}
 547EXPORT_SYMBOL_GPL(cper_estatus_check);
 548