linux/drivers/acpi/apei/cper.c
<<
>>
Prefs
   1/*
   2 * UEFI Common Platform Error Record (CPER) support
   3 *
   4 * Copyright (C) 2010, Intel Corp.
   5 *      Author: Huang Ying <ying.huang@intel.com>
   6 *
   7 * CPER is the format used to describe platform hardware error by
   8 * various APEI tables, such as ERST, BERT and HEST etc.
   9 *
  10 * For more information about CPER, please refer to Appendix N of UEFI
  11 * Specification version 2.3.
  12 *
  13 * This program is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU General Public License version
  15 * 2 as published by the Free Software Foundation.
  16 *
  17 * This program is distributed in the hope that it will be useful,
  18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 * GNU General Public License for more details.
  21 *
  22 * You should have received a copy of the GNU General Public License
  23 * along with this program; if not, write to the Free Software
  24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  25 */
  26
  27#include <linux/kernel.h>
  28#include <linux/module.h>
  29#include <linux/time.h>
  30#include <linux/cper.h>
  31#include <linux/acpi.h>
  32#include <linux/aer.h>
  33
  34/*
  35 * CPER record ID need to be unique even after reboot, because record
  36 * ID is used as index for ERST storage, while CPER records from
  37 * multiple boot may co-exist in ERST.
  38 */
  39u64 cper_next_record_id(void)
  40{
  41        static atomic64_t seq;
  42
  43        if (!atomic64_read(&seq))
  44                atomic64_set(&seq, ((u64)get_seconds()) << 32);
  45
  46        return atomic64_inc_return(&seq);
  47}
  48EXPORT_SYMBOL_GPL(cper_next_record_id);
  49
  50static const char *cper_severity_strs[] = {
  51        "recoverable",
  52        "fatal",
  53        "corrected",
  54        "info",
  55};
  56
  57static const char *cper_severity_str(unsigned int severity)
  58{
  59        return severity < ARRAY_SIZE(cper_severity_strs) ?
  60                cper_severity_strs[severity] : "unknown";
  61}
  62
  63/*
  64 * cper_print_bits - print strings for set bits
  65 * @pfx: prefix for each line, including log level and prefix string
  66 * @bits: bit mask
  67 * @strs: string array, indexed by bit position
  68 * @strs_size: size of the string array: @strs
  69 *
  70 * For each set bit in @bits, print the corresponding string in @strs.
  71 * If the output length is longer than 80, multiple line will be
  72 * printed, with @pfx is printed at the beginning of each line.
  73 */
  74void cper_print_bits(const char *pfx, unsigned int bits,
  75                     const char *strs[], unsigned int strs_size)
  76{
  77        int i, len = 0;
  78        const char *str;
  79        char buf[84];
  80
  81        for (i = 0; i < strs_size; i++) {
  82                if (!(bits & (1U << i)))
  83                        continue;
  84                str = strs[i];
  85                if (!str)
  86                        continue;
  87                if (len && len + strlen(str) + 2 > 80) {
  88                        printk("%s\n", buf);
  89                        len = 0;
  90                }
  91                if (!len)
  92                        len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
  93                else
  94                        len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
  95        }
  96        if (len)
  97                printk("%s\n", buf);
  98}
  99
 100static const char *cper_proc_type_strs[] = {
 101        "IA32/X64",
 102        "IA64",
 103};
 104
 105static const char *cper_proc_isa_strs[] = {
 106        "IA32",
 107        "IA64",
 108        "X64",
 109};
 110
 111static const char *cper_proc_error_type_strs[] = {
 112        "cache error",
 113        "TLB error",
 114        "bus error",
 115        "micro-architectural error",
 116};
 117
 118static const char *cper_proc_op_strs[] = {
 119        "unknown or generic",
 120        "data read",
 121        "data write",
 122        "instruction execution",
 123};
 124
 125static const char *cper_proc_flag_strs[] = {
 126        "restartable",
 127        "precise IP",
 128        "overflow",
 129        "corrected",
 130};
 131
 132static void cper_print_proc_generic(const char *pfx,
 133                                    const struct cper_sec_proc_generic *proc)
 134{
 135        if (proc->validation_bits & CPER_PROC_VALID_TYPE)
 136                printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
 137                       proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
 138                       cper_proc_type_strs[proc->proc_type] : "unknown");
 139        if (proc->validation_bits & CPER_PROC_VALID_ISA)
 140                printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
 141                       proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
 142                       cper_proc_isa_strs[proc->proc_isa] : "unknown");
 143        if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
 144                printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
 145                cper_print_bits(pfx, proc->proc_error_type,
 146                                cper_proc_error_type_strs,
 147                                ARRAY_SIZE(cper_proc_error_type_strs));
 148        }
 149        if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
 150                printk("%s""operation: %d, %s\n", pfx, proc->operation,
 151                       proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
 152                       cper_proc_op_strs[proc->operation] : "unknown");
 153        if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
 154                printk("%s""flags: 0x%02x\n", pfx, proc->flags);
 155                cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
 156                                ARRAY_SIZE(cper_proc_flag_strs));
 157        }
 158        if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
 159                printk("%s""level: %d\n", pfx, proc->level);
 160        if (proc->validation_bits & CPER_PROC_VALID_VERSION)
 161                printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
 162        if (proc->validation_bits & CPER_PROC_VALID_ID)
 163                printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
 164        if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
 165                printk("%s""target_address: 0x%016llx\n",
 166                       pfx, proc->target_addr);
 167        if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
 168                printk("%s""requestor_id: 0x%016llx\n",
 169                       pfx, proc->requestor_id);
 170        if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
 171                printk("%s""responder_id: 0x%016llx\n",
 172                       pfx, proc->responder_id);
 173        if (proc->validation_bits & CPER_PROC_VALID_IP)
 174                printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
 175}
 176
 177static const char *cper_mem_err_type_strs[] = {
 178        "unknown",
 179        "no error",
 180        "single-bit ECC",
 181        "multi-bit ECC",
 182        "single-symbol chipkill ECC",
 183        "multi-symbol chipkill ECC",
 184        "master abort",
 185        "target abort",
 186        "parity error",
 187        "watchdog timeout",
 188        "invalid address",
 189        "mirror Broken",
 190        "memory sparing",
 191        "scrub corrected error",
 192        "scrub uncorrected error",
 193};
 194
 195static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 196{
 197        if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
 198                printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
 199        if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
 200                printk("%s""physical_address: 0x%016llx\n",
 201                       pfx, mem->physical_addr);
 202        if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
 203                printk("%s""physical_address_mask: 0x%016llx\n",
 204                       pfx, mem->physical_addr_mask);
 205        if (mem->validation_bits & CPER_MEM_VALID_NODE)
 206                printk("%s""node: %d\n", pfx, mem->node);
 207        if (mem->validation_bits & CPER_MEM_VALID_CARD)
 208                printk("%s""card: %d\n", pfx, mem->card);
 209        if (mem->validation_bits & CPER_MEM_VALID_MODULE)
 210                printk("%s""module: %d\n", pfx, mem->module);
 211        if (mem->validation_bits & CPER_MEM_VALID_BANK)
 212                printk("%s""bank: %d\n", pfx, mem->bank);
 213        if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
 214                printk("%s""device: %d\n", pfx, mem->device);
 215        if (mem->validation_bits & CPER_MEM_VALID_ROW)
 216                printk("%s""row: %d\n", pfx, mem->row);
 217        if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
 218                printk("%s""column: %d\n", pfx, mem->column);
 219        if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
 220                printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
 221        if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
 222                printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
 223        if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
 224                printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
 225        if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
 226                printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
 227        if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
 228                u8 etype = mem->error_type;
 229                printk("%s""error_type: %d, %s\n", pfx, etype,
 230                       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
 231                       cper_mem_err_type_strs[etype] : "unknown");
 232        }
 233}
 234
 235static const char *cper_pcie_port_type_strs[] = {
 236        "PCIe end point",
 237        "legacy PCI end point",
 238        "unknown",
 239        "unknown",
 240        "root port",
 241        "upstream switch port",
 242        "downstream switch port",
 243        "PCIe to PCI/PCI-X bridge",
 244        "PCI/PCI-X to PCIe bridge",
 245        "root complex integrated endpoint device",
 246        "root complex event collector",
 247};
 248
 249static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 250                            const struct acpi_hest_generic_data *gdata)
 251{
 252        if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
 253                printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
 254                       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
 255                       cper_pcie_port_type_strs[pcie->port_type] : "unknown");
 256        if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
 257                printk("%s""version: %d.%d\n", pfx,
 258                       pcie->version.major, pcie->version.minor);
 259        if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
 260                printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
 261                       pcie->command, pcie->status);
 262        if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
 263                const __u8 *p;
 264                printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
 265                       pcie->device_id.segment, pcie->device_id.bus,
 266                       pcie->device_id.device, pcie->device_id.function);
 267                printk("%s""slot: %d\n", pfx,
 268                       pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
 269                printk("%s""secondary_bus: 0x%02x\n", pfx,
 270                       pcie->device_id.secondary_bus);
 271                printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
 272                       pcie->device_id.vendor_id, pcie->device_id.device_id);
 273                p = pcie->device_id.class_code;
 274                printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
 275        }
 276        if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
 277                printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
 278                       pcie->serial_number.lower, pcie->serial_number.upper);
 279        if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
 280                printk(
 281        "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
 282        pfx, pcie->bridge.secondary_status, pcie->bridge.control);
 283#ifdef CONFIG_ACPI_APEI_PCIEAER
 284        if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
 285                struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
 286                cper_print_aer(pfx, gdata->error_severity, aer_regs);
 287        }
 288#endif
 289}
 290
 291static const char *apei_estatus_section_flag_strs[] = {
 292        "primary",
 293        "containment warning",
 294        "reset",
 295        "threshold exceeded",
 296        "resource not accessible",
 297        "latent error",
 298};
 299
 300static void apei_estatus_print_section(
 301        const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
 302{
 303        uuid_le *sec_type = (uuid_le *)gdata->section_type;
 304        __u16 severity;
 305
 306        severity = gdata->error_severity;
 307        printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
 308               cper_severity_str(severity));
 309        printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
 310        cper_print_bits(pfx, gdata->flags, apei_estatus_section_flag_strs,
 311                        ARRAY_SIZE(apei_estatus_section_flag_strs));
 312        if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
 313                printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
 314        if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
 315                printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
 316
 317        if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
 318                struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
 319                printk("%s""section_type: general processor error\n", pfx);
 320                if (gdata->error_data_length >= sizeof(*proc_err))
 321                        cper_print_proc_generic(pfx, proc_err);
 322                else
 323                        goto err_section_too_small;
 324        } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
 325                struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
 326                printk("%s""section_type: memory error\n", pfx);
 327                if (gdata->error_data_length >= sizeof(*mem_err))
 328                        cper_print_mem(pfx, mem_err);
 329                else
 330                        goto err_section_too_small;
 331        } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
 332                struct cper_sec_pcie *pcie = (void *)(gdata + 1);
 333                printk("%s""section_type: PCIe error\n", pfx);
 334                if (gdata->error_data_length >= sizeof(*pcie))
 335                        cper_print_pcie(pfx, pcie, gdata);
 336                else
 337                        goto err_section_too_small;
 338        } else
 339                printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
 340
 341        return;
 342
 343err_section_too_small:
 344        pr_err(FW_WARN "error section length is too small\n");
 345}
 346
 347void apei_estatus_print(const char *pfx,
 348                        const struct acpi_hest_generic_status *estatus)
 349{
 350        struct acpi_hest_generic_data *gdata;
 351        unsigned int data_len, gedata_len;
 352        int sec_no = 0;
 353        __u16 severity;
 354
 355        printk("%s""APEI generic hardware error status\n", pfx);
 356        severity = estatus->error_severity;
 357        printk("%s""severity: %d, %s\n", pfx, severity,
 358               cper_severity_str(severity));
 359        data_len = estatus->data_length;
 360        gdata = (struct acpi_hest_generic_data *)(estatus + 1);
 361        while (data_len > sizeof(*gdata)) {
 362                gedata_len = gdata->error_data_length;
 363                apei_estatus_print_section(pfx, gdata, sec_no);
 364                data_len -= gedata_len + sizeof(*gdata);
 365                sec_no++;
 366        }
 367}
 368EXPORT_SYMBOL_GPL(apei_estatus_print);
 369
 370int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
 371{
 372        if (estatus->data_length &&
 373            estatus->data_length < sizeof(struct acpi_hest_generic_data))
 374                return -EINVAL;
 375        if (estatus->raw_data_length &&
 376            estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
 377                return -EINVAL;
 378
 379        return 0;
 380}
 381EXPORT_SYMBOL_GPL(apei_estatus_check_header);
 382
 383int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
 384{
 385        struct acpi_hest_generic_data *gdata;
 386        unsigned int data_len, gedata_len;
 387        int rc;
 388
 389        rc = apei_estatus_check_header(estatus);
 390        if (rc)
 391                return rc;
 392        data_len = estatus->data_length;
 393        gdata = (struct acpi_hest_generic_data *)(estatus + 1);
 394        while (data_len > sizeof(*gdata)) {
 395                gedata_len = gdata->error_data_length;
 396                if (gedata_len > data_len - sizeof(*gdata))
 397                        return -EINVAL;
 398                data_len -= gedata_len + sizeof(*gdata);
 399        }
 400        if (data_len)
 401                return -EINVAL;
 402
 403        return 0;
 404}
 405EXPORT_SYMBOL_GPL(apei_estatus_check);
 406