linux/drivers/acpi/apei/cper.c
<<
>>
Prefs
   1/*
   2 * UEFI Common Platform Error Record (CPER) support
   3 *
   4 * Copyright (C) 2010, Intel Corp.
   5 *      Author: Huang Ying <ying.huang@intel.com>
   6 *
   7 * CPER is the format used to describe platform hardware error by
   8 * various APEI tables, such as ERST, BERT and HEST etc.
   9 *
  10 * For more information about CPER, please refer to Appendix N of UEFI
  11 * Specification version 2.3.
  12 *
  13 * This program is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU General Public License version
  15 * 2 as published by the Free Software Foundation.
  16 *
  17 * This program is distributed in the hope that it will be useful,
  18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 * GNU General Public License for more details.
  21 *
  22 * You should have received a copy of the GNU General Public License
  23 * along with this program; if not, write to the Free Software
  24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  25 */
  26
  27#include <linux/kernel.h>
  28#include <linux/module.h>
  29#include <linux/time.h>
  30#include <linux/cper.h>
  31#include <linux/acpi.h>
  32#include <linux/pci.h>
  33#include <linux/aer.h>
  34
  35/*
  36 * CPER record ID need to be unique even after reboot, because record
  37 * ID is used as index for ERST storage, while CPER records from
  38 * multiple boot may co-exist in ERST.
  39 */
  40u64 cper_next_record_id(void)
  41{
  42        static atomic64_t seq;
  43
  44        if (!atomic64_read(&seq))
  45                atomic64_set(&seq, ((u64)get_seconds()) << 32);
  46
  47        return atomic64_inc_return(&seq);
  48}
  49EXPORT_SYMBOL_GPL(cper_next_record_id);
  50
  51static const char *cper_severity_strs[] = {
  52        "recoverable",
  53        "fatal",
  54        "corrected",
  55        "info",
  56};
  57
  58static const char *cper_severity_str(unsigned int severity)
  59{
  60        return severity < ARRAY_SIZE(cper_severity_strs) ?
  61                cper_severity_strs[severity] : "unknown";
  62}
  63
  64/*
  65 * cper_print_bits - print strings for set bits
  66 * @pfx: prefix for each line, including log level and prefix string
  67 * @bits: bit mask
  68 * @strs: string array, indexed by bit position
  69 * @strs_size: size of the string array: @strs
  70 *
  71 * For each set bit in @bits, print the corresponding string in @strs.
  72 * If the output length is longer than 80, multiple line will be
  73 * printed, with @pfx is printed at the beginning of each line.
  74 */
  75void cper_print_bits(const char *pfx, unsigned int bits,
  76                     const char *strs[], unsigned int strs_size)
  77{
  78        int i, len = 0;
  79        const char *str;
  80        char buf[84];
  81
  82        for (i = 0; i < strs_size; i++) {
  83                if (!(bits & (1U << i)))
  84                        continue;
  85                str = strs[i];
  86                if (!str)
  87                        continue;
  88                if (len && len + strlen(str) + 2 > 80) {
  89                        printk("%s\n", buf);
  90                        len = 0;
  91                }
  92                if (!len)
  93                        len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
  94                else
  95                        len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
  96        }
  97        if (len)
  98                printk("%s\n", buf);
  99}
 100
 101static const char *cper_proc_type_strs[] = {
 102        "IA32/X64",
 103        "IA64",
 104};
 105
 106static const char *cper_proc_isa_strs[] = {
 107        "IA32",
 108        "IA64",
 109        "X64",
 110};
 111
 112static const char *cper_proc_error_type_strs[] = {
 113        "cache error",
 114        "TLB error",
 115        "bus error",
 116        "micro-architectural error",
 117};
 118
 119static const char *cper_proc_op_strs[] = {
 120        "unknown or generic",
 121        "data read",
 122        "data write",
 123        "instruction execution",
 124};
 125
 126static const char *cper_proc_flag_strs[] = {
 127        "restartable",
 128        "precise IP",
 129        "overflow",
 130        "corrected",
 131};
 132
 133static void cper_print_proc_generic(const char *pfx,
 134                                    const struct cper_sec_proc_generic *proc)
 135{
 136        if (proc->validation_bits & CPER_PROC_VALID_TYPE)
 137                printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
 138                       proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
 139                       cper_proc_type_strs[proc->proc_type] : "unknown");
 140        if (proc->validation_bits & CPER_PROC_VALID_ISA)
 141                printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
 142                       proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
 143                       cper_proc_isa_strs[proc->proc_isa] : "unknown");
 144        if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
 145                printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
 146                cper_print_bits(pfx, proc->proc_error_type,
 147                                cper_proc_error_type_strs,
 148                                ARRAY_SIZE(cper_proc_error_type_strs));
 149        }
 150        if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
 151                printk("%s""operation: %d, %s\n", pfx, proc->operation,
 152                       proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
 153                       cper_proc_op_strs[proc->operation] : "unknown");
 154        if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
 155                printk("%s""flags: 0x%02x\n", pfx, proc->flags);
 156                cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
 157                                ARRAY_SIZE(cper_proc_flag_strs));
 158        }
 159        if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
 160                printk("%s""level: %d\n", pfx, proc->level);
 161        if (proc->validation_bits & CPER_PROC_VALID_VERSION)
 162                printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
 163        if (proc->validation_bits & CPER_PROC_VALID_ID)
 164                printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
 165        if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
 166                printk("%s""target_address: 0x%016llx\n",
 167                       pfx, proc->target_addr);
 168        if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
 169                printk("%s""requestor_id: 0x%016llx\n",
 170                       pfx, proc->requestor_id);
 171        if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
 172                printk("%s""responder_id: 0x%016llx\n",
 173                       pfx, proc->responder_id);
 174        if (proc->validation_bits & CPER_PROC_VALID_IP)
 175                printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
 176}
 177
 178static const char *cper_mem_err_type_strs[] = {
 179        "unknown",
 180        "no error",
 181        "single-bit ECC",
 182        "multi-bit ECC",
 183        "single-symbol chipkill ECC",
 184        "multi-symbol chipkill ECC",
 185        "master abort",
 186        "target abort",
 187        "parity error",
 188        "watchdog timeout",
 189        "invalid address",
 190        "mirror Broken",
 191        "memory sparing",
 192        "scrub corrected error",
 193        "scrub uncorrected error",
 194};
 195
 196static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 197{
 198        if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
 199                printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
 200        if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
 201                printk("%s""physical_address: 0x%016llx\n",
 202                       pfx, mem->physical_addr);
 203        if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
 204                printk("%s""physical_address_mask: 0x%016llx\n",
 205                       pfx, mem->physical_addr_mask);
 206        if (mem->validation_bits & CPER_MEM_VALID_NODE)
 207                printk("%s""node: %d\n", pfx, mem->node);
 208        if (mem->validation_bits & CPER_MEM_VALID_CARD)
 209                printk("%s""card: %d\n", pfx, mem->card);
 210        if (mem->validation_bits & CPER_MEM_VALID_MODULE)
 211                printk("%s""module: %d\n", pfx, mem->module);
 212        if (mem->validation_bits & CPER_MEM_VALID_BANK)
 213                printk("%s""bank: %d\n", pfx, mem->bank);
 214        if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
 215                printk("%s""device: %d\n", pfx, mem->device);
 216        if (mem->validation_bits & CPER_MEM_VALID_ROW)
 217                printk("%s""row: %d\n", pfx, mem->row);
 218        if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
 219                printk("%s""column: %d\n", pfx, mem->column);
 220        if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
 221                printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
 222        if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
 223                printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
 224        if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
 225                printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
 226        if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
 227                printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
 228        if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
 229                u8 etype = mem->error_type;
 230                printk("%s""error_type: %d, %s\n", pfx, etype,
 231                       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
 232                       cper_mem_err_type_strs[etype] : "unknown");
 233        }
 234}
 235
 236static const char *cper_pcie_port_type_strs[] = {
 237        "PCIe end point",
 238        "legacy PCI end point",
 239        "unknown",
 240        "unknown",
 241        "root port",
 242        "upstream switch port",
 243        "downstream switch port",
 244        "PCIe to PCI/PCI-X bridge",
 245        "PCI/PCI-X to PCIe bridge",
 246        "root complex integrated endpoint device",
 247        "root complex event collector",
 248};
 249
 250static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 251                            const struct acpi_hest_generic_data *gdata)
 252{
 253        if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
 254                printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
 255                       pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
 256                       cper_pcie_port_type_strs[pcie->port_type] : "unknown");
 257        if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
 258                printk("%s""version: %d.%d\n", pfx,
 259                       pcie->version.major, pcie->version.minor);
 260        if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
 261                printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
 262                       pcie->command, pcie->status);
 263        if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
 264                const __u8 *p;
 265                printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
 266                       pcie->device_id.segment, pcie->device_id.bus,
 267                       pcie->device_id.device, pcie->device_id.function);
 268                printk("%s""slot: %d\n", pfx,
 269                       pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
 270                printk("%s""secondary_bus: 0x%02x\n", pfx,
 271                       pcie->device_id.secondary_bus);
 272                printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
 273                       pcie->device_id.vendor_id, pcie->device_id.device_id);
 274                p = pcie->device_id.class_code;
 275                printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
 276        }
 277        if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
 278                printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
 279                       pcie->serial_number.lower, pcie->serial_number.upper);
 280        if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
 281                printk(
 282        "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
 283        pfx, pcie->bridge.secondary_status, pcie->bridge.control);
 284}
 285
 286static const char *apei_estatus_section_flag_strs[] = {
 287        "primary",
 288        "containment warning",
 289        "reset",
 290        "threshold exceeded",
 291        "resource not accessible",
 292        "latent error",
 293};
 294
 295static void apei_estatus_print_section(
 296        const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
 297{
 298        uuid_le *sec_type = (uuid_le *)gdata->section_type;
 299        __u16 severity;
 300
 301        severity = gdata->error_severity;
 302        printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
 303               cper_severity_str(severity));
 304        printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
 305        cper_print_bits(pfx, gdata->flags, apei_estatus_section_flag_strs,
 306                        ARRAY_SIZE(apei_estatus_section_flag_strs));
 307        if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
 308                printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
 309        if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
 310                printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
 311
 312        if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
 313                struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
 314                printk("%s""section_type: general processor error\n", pfx);
 315                if (gdata->error_data_length >= sizeof(*proc_err))
 316                        cper_print_proc_generic(pfx, proc_err);
 317                else
 318                        goto err_section_too_small;
 319        } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
 320                struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
 321                printk("%s""section_type: memory error\n", pfx);
 322                if (gdata->error_data_length >= sizeof(*mem_err))
 323                        cper_print_mem(pfx, mem_err);
 324                else
 325                        goto err_section_too_small;
 326        } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
 327                struct cper_sec_pcie *pcie = (void *)(gdata + 1);
 328                printk("%s""section_type: PCIe error\n", pfx);
 329                if (gdata->error_data_length >= sizeof(*pcie))
 330                        cper_print_pcie(pfx, pcie, gdata);
 331                else
 332                        goto err_section_too_small;
 333        } else
 334                printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
 335
 336        return;
 337
 338err_section_too_small:
 339        pr_err(FW_WARN "error section length is too small\n");
 340}
 341
 342void apei_estatus_print(const char *pfx,
 343                        const struct acpi_hest_generic_status *estatus)
 344{
 345        struct acpi_hest_generic_data *gdata;
 346        unsigned int data_len, gedata_len;
 347        int sec_no = 0;
 348        __u16 severity;
 349
 350        printk("%s""APEI generic hardware error status\n", pfx);
 351        severity = estatus->error_severity;
 352        printk("%s""severity: %d, %s\n", pfx, severity,
 353               cper_severity_str(severity));
 354        data_len = estatus->data_length;
 355        gdata = (struct acpi_hest_generic_data *)(estatus + 1);
 356        while (data_len > sizeof(*gdata)) {
 357                gedata_len = gdata->error_data_length;
 358                apei_estatus_print_section(pfx, gdata, sec_no);
 359                data_len -= gedata_len + sizeof(*gdata);
 360                gdata = (void *)(gdata + 1) + gedata_len;
 361                sec_no++;
 362        }
 363}
 364EXPORT_SYMBOL_GPL(apei_estatus_print);
 365
 366int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
 367{
 368        if (estatus->data_length &&
 369            estatus->data_length < sizeof(struct acpi_hest_generic_data))
 370                return -EINVAL;
 371        if (estatus->raw_data_length &&
 372            estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
 373                return -EINVAL;
 374
 375        return 0;
 376}
 377EXPORT_SYMBOL_GPL(apei_estatus_check_header);
 378
 379int apei_estatus_check(const struct acpi_hest_generic_status *estatus)
 380{
 381        struct acpi_hest_generic_data *gdata;
 382        unsigned int data_len, gedata_len;
 383        int rc;
 384
 385        rc = apei_estatus_check_header(estatus);
 386        if (rc)
 387                return rc;
 388        data_len = estatus->data_length;
 389        gdata = (struct acpi_hest_generic_data *)(estatus + 1);
 390        while (data_len >= sizeof(*gdata)) {
 391                gedata_len = gdata->error_data_length;
 392                if (gedata_len > data_len - sizeof(*gdata))
 393                        return -EINVAL;
 394                data_len -= gedata_len + sizeof(*gdata);
 395                gdata = (void *)(gdata + 1) + gedata_len;
 396        }
 397        if (data_len)
 398                return -EINVAL;
 399
 400        return 0;
 401}
 402EXPORT_SYMBOL_GPL(apei_estatus_check);
 403