linux/drivers/acpi/apei/ghes.c
<<
>>
Prefs
   1/*
   2 * APEI Generic Hardware Error Source support
   3 *
   4 * Generic Hardware Error Source provides a way to report platform
   5 * hardware errors (such as that from chipset). It works in so called
   6 * "Firmware First" mode, that is, hardware errors are reported to
   7 * firmware firstly, then reported to Linux by firmware. This way,
   8 * some non-standard hardware error registers or non-standard hardware
   9 * link can be checked by firmware to produce more hardware error
  10 * information for Linux.
  11 *
  12 * For more information about Generic Hardware Error Source, please
  13 * refer to ACPI Specification version 4.0, section 17.3.2.6
  14 *
  15 * Copyright 2010,2011 Intel Corp.
  16 *   Author: Huang Ying <ying.huang@intel.com>
  17 *
  18 * This program is free software; you can redistribute it and/or
  19 * modify it under the terms of the GNU General Public License version
  20 * 2 as published by the Free Software Foundation;
  21 *
  22 * This program is distributed in the hope that it will be useful,
  23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  25 * GNU General Public License for more details.
  26 */
  27
  28#include <linux/kernel.h>
  29#include <linux/moduleparam.h>
  30#include <linux/init.h>
  31#include <linux/acpi.h>
  32#include <linux/io.h>
  33#include <linux/interrupt.h>
  34#include <linux/timer.h>
  35#include <linux/cper.h>
  36#include <linux/kdebug.h>
  37#include <linux/platform_device.h>
  38#include <linux/mutex.h>
  39#include <linux/ratelimit.h>
  40#include <linux/vmalloc.h>
  41#include <linux/irq_work.h>
  42#include <linux/llist.h>
  43#include <linux/genalloc.h>
  44#include <linux/pci.h>
  45#include <linux/aer.h>
  46#include <linux/nmi.h>
  47#include <linux/sched/clock.h>
  48#include <linux/uuid.h>
  49#include <linux/ras.h>
  50
  51#include <acpi/actbl1.h>
  52#include <acpi/ghes.h>
  53#include <acpi/apei.h>
  54#include <asm/fixmap.h>
  55#include <asm/tlbflush.h>
  56#include <ras/ras_event.h>
  57
  58#include "apei-internal.h"
  59
  60#define GHES_PFX        "GHES: "
  61
  62#define GHES_ESTATUS_MAX_SIZE           65536
  63#define GHES_ESOURCE_PREALLOC_MAX_SIZE  65536
  64
  65#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
  66
  67/* This is just an estimation for memory pool allocation */
  68#define GHES_ESTATUS_CACHE_AVG_SIZE     512
  69
  70#define GHES_ESTATUS_CACHES_SIZE        4
  71
  72#define GHES_ESTATUS_IN_CACHE_MAX_NSEC  10000000000ULL
  73/* Prevent too many caches are allocated because of RCU */
  74#define GHES_ESTATUS_CACHE_ALLOCED_MAX  (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
  75
  76#define GHES_ESTATUS_CACHE_LEN(estatus_len)                     \
  77        (sizeof(struct ghes_estatus_cache) + (estatus_len))
  78#define GHES_ESTATUS_FROM_CACHE(estatus_cache)                  \
  79        ((struct acpi_hest_generic_status *)                            \
  80         ((struct ghes_estatus_cache *)(estatus_cache) + 1))
  81
  82#define GHES_ESTATUS_NODE_LEN(estatus_len)                      \
  83        (sizeof(struct ghes_estatus_node) + (estatus_len))
  84#define GHES_ESTATUS_FROM_NODE(estatus_node)                    \
  85        ((struct acpi_hest_generic_status *)                            \
  86         ((struct ghes_estatus_node *)(estatus_node) + 1))
  87
  88static inline bool is_hest_type_generic_v2(struct ghes *ghes)
  89{
  90        return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
  91}
  92
  93/*
  94 * This driver isn't really modular, however for the time being,
  95 * continuing to use module_param is the easiest way to remain
  96 * compatible with existing boot arg use cases.
  97 */
  98bool ghes_disable;
  99module_param_named(disable, ghes_disable, bool, 0);
 100
 101/*
 102 * All error sources notified with HED (Hardware Error Device) share a
 103 * single notifier callback, so they need to be linked and checked one
 104 * by one. This holds true for NMI too.
 105 *
 106 * RCU is used for these lists, so ghes_list_mutex is only used for
 107 * list changing, not for traversing.
 108 */
 109static LIST_HEAD(ghes_hed);
 110static DEFINE_MUTEX(ghes_list_mutex);
 111
 112/*
 113 * Because the memory area used to transfer hardware error information
 114 * from BIOS to Linux can be determined only in NMI, IRQ or timer
 115 * handler, but general ioremap can not be used in atomic context, so
 116 * the fixmap is used instead.
 117 *
 118 * These 2 spinlocks are used to prevent the fixmap entries from being used
 119 * simultaneously.
 120 */
 121static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
 122static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
 123
 124static struct gen_pool *ghes_estatus_pool;
 125static unsigned long ghes_estatus_pool_size_request;
 126
 127static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
 128static atomic_t ghes_estatus_cache_alloced;
 129
 130static int ghes_panic_timeout __read_mostly = 30;
 131
 132static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
 133{
 134        phys_addr_t paddr;
 135        pgprot_t prot;
 136
 137        paddr = pfn << PAGE_SHIFT;
 138        prot = arch_apei_get_mem_attribute(paddr);
 139        __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot);
 140
 141        return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI);
 142}
 143
 144static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
 145{
 146        phys_addr_t paddr;
 147        pgprot_t prot;
 148
 149        paddr = pfn << PAGE_SHIFT;
 150        prot = arch_apei_get_mem_attribute(paddr);
 151        __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot);
 152
 153        return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ);
 154}
 155
 156static void ghes_iounmap_nmi(void)
 157{
 158        clear_fixmap(FIX_APEI_GHES_NMI);
 159}
 160
 161static void ghes_iounmap_irq(void)
 162{
 163        clear_fixmap(FIX_APEI_GHES_IRQ);
 164}
 165
 166static int ghes_estatus_pool_init(void)
 167{
 168        ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
 169        if (!ghes_estatus_pool)
 170                return -ENOMEM;
 171        return 0;
 172}
 173
 174static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
 175                                              struct gen_pool_chunk *chunk,
 176                                              void *data)
 177{
 178        free_page(chunk->start_addr);
 179}
 180
 181static void ghes_estatus_pool_exit(void)
 182{
 183        gen_pool_for_each_chunk(ghes_estatus_pool,
 184                                ghes_estatus_pool_free_chunk_page, NULL);
 185        gen_pool_destroy(ghes_estatus_pool);
 186}
 187
 188static int ghes_estatus_pool_expand(unsigned long len)
 189{
 190        unsigned long i, pages, size, addr;
 191        int ret;
 192
 193        ghes_estatus_pool_size_request += PAGE_ALIGN(len);
 194        size = gen_pool_size(ghes_estatus_pool);
 195        if (size >= ghes_estatus_pool_size_request)
 196                return 0;
 197        pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
 198        for (i = 0; i < pages; i++) {
 199                addr = __get_free_page(GFP_KERNEL);
 200                if (!addr)
 201                        return -ENOMEM;
 202                ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
 203                if (ret)
 204                        return ret;
 205        }
 206
 207        return 0;
 208}
 209
 210static int map_gen_v2(struct ghes *ghes)
 211{
 212        return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
 213}
 214
 215static void unmap_gen_v2(struct ghes *ghes)
 216{
 217        apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
 218}
 219
 220static struct ghes *ghes_new(struct acpi_hest_generic *generic)
 221{
 222        struct ghes *ghes;
 223        unsigned int error_block_length;
 224        int rc;
 225
 226        ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
 227        if (!ghes)
 228                return ERR_PTR(-ENOMEM);
 229
 230        ghes->generic = generic;
 231        if (is_hest_type_generic_v2(ghes)) {
 232                rc = map_gen_v2(ghes);
 233                if (rc)
 234                        goto err_free;
 235        }
 236
 237        rc = apei_map_generic_address(&generic->error_status_address);
 238        if (rc)
 239                goto err_unmap_read_ack_addr;
 240        error_block_length = generic->error_block_length;
 241        if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
 242                pr_warning(FW_WARN GHES_PFX
 243                           "Error status block length is too long: %u for "
 244                           "generic hardware error source: %d.\n",
 245                           error_block_length, generic->header.source_id);
 246                error_block_length = GHES_ESTATUS_MAX_SIZE;
 247        }
 248        ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
 249        if (!ghes->estatus) {
 250                rc = -ENOMEM;
 251                goto err_unmap_status_addr;
 252        }
 253
 254        return ghes;
 255
 256err_unmap_status_addr:
 257        apei_unmap_generic_address(&generic->error_status_address);
 258err_unmap_read_ack_addr:
 259        if (is_hest_type_generic_v2(ghes))
 260                unmap_gen_v2(ghes);
 261err_free:
 262        kfree(ghes);
 263        return ERR_PTR(rc);
 264}
 265
 266static void ghes_fini(struct ghes *ghes)
 267{
 268        kfree(ghes->estatus);
 269        apei_unmap_generic_address(&ghes->generic->error_status_address);
 270        if (is_hest_type_generic_v2(ghes))
 271                unmap_gen_v2(ghes);
 272}
 273
 274static inline int ghes_severity(int severity)
 275{
 276        switch (severity) {
 277        case CPER_SEV_INFORMATIONAL:
 278                return GHES_SEV_NO;
 279        case CPER_SEV_CORRECTED:
 280                return GHES_SEV_CORRECTED;
 281        case CPER_SEV_RECOVERABLE:
 282                return GHES_SEV_RECOVERABLE;
 283        case CPER_SEV_FATAL:
 284                return GHES_SEV_PANIC;
 285        default:
 286                /* Unknown, go panic */
 287                return GHES_SEV_PANIC;
 288        }
 289}
 290
 291static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
 292                                  int from_phys)
 293{
 294        void __iomem *vaddr;
 295        unsigned long flags = 0;
 296        int in_nmi = in_nmi();
 297        u64 offset;
 298        u32 trunk;
 299
 300        while (len > 0) {
 301                offset = paddr - (paddr & PAGE_MASK);
 302                if (in_nmi) {
 303                        raw_spin_lock(&ghes_ioremap_lock_nmi);
 304                        vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
 305                } else {
 306                        spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
 307                        vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
 308                }
 309                trunk = PAGE_SIZE - offset;
 310                trunk = min(trunk, len);
 311                if (from_phys)
 312                        memcpy_fromio(buffer, vaddr + offset, trunk);
 313                else
 314                        memcpy_toio(vaddr + offset, buffer, trunk);
 315                len -= trunk;
 316                paddr += trunk;
 317                buffer += trunk;
 318                if (in_nmi) {
 319                        ghes_iounmap_nmi();
 320                        raw_spin_unlock(&ghes_ioremap_lock_nmi);
 321                } else {
 322                        ghes_iounmap_irq();
 323                        spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
 324                }
 325        }
 326}
 327
 328static int ghes_read_estatus(struct ghes *ghes, int silent)
 329{
 330        struct acpi_hest_generic *g = ghes->generic;
 331        u64 buf_paddr;
 332        u32 len;
 333        int rc;
 334
 335        rc = apei_read(&buf_paddr, &g->error_status_address);
 336        if (rc) {
 337                if (!silent && printk_ratelimit())
 338                        pr_warning(FW_WARN GHES_PFX
 339"Failed to read error status block address for hardware error source: %d.\n",
 340                                   g->header.source_id);
 341                return -EIO;
 342        }
 343        if (!buf_paddr)
 344                return -ENOENT;
 345
 346        ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
 347                              sizeof(*ghes->estatus), 1);
 348        if (!ghes->estatus->block_status)
 349                return -ENOENT;
 350
 351        ghes->buffer_paddr = buf_paddr;
 352        ghes->flags |= GHES_TO_CLEAR;
 353
 354        rc = -EIO;
 355        len = cper_estatus_len(ghes->estatus);
 356        if (len < sizeof(*ghes->estatus))
 357                goto err_read_block;
 358        if (len > ghes->generic->error_block_length)
 359                goto err_read_block;
 360        if (cper_estatus_check_header(ghes->estatus))
 361                goto err_read_block;
 362        ghes_copy_tofrom_phys(ghes->estatus + 1,
 363                              buf_paddr + sizeof(*ghes->estatus),
 364                              len - sizeof(*ghes->estatus), 1);
 365        if (cper_estatus_check(ghes->estatus))
 366                goto err_read_block;
 367        rc = 0;
 368
 369err_read_block:
 370        if (rc && !silent && printk_ratelimit())
 371                pr_warning(FW_WARN GHES_PFX
 372                           "Failed to read error status block!\n");
 373        return rc;
 374}
 375
 376static void ghes_clear_estatus(struct ghes *ghes)
 377{
 378        ghes->estatus->block_status = 0;
 379        if (!(ghes->flags & GHES_TO_CLEAR))
 380                return;
 381        ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr,
 382                              sizeof(ghes->estatus->block_status), 0);
 383        ghes->flags &= ~GHES_TO_CLEAR;
 384}
 385
 386static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
 387{
 388#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
 389        unsigned long pfn;
 390        int flags = -1;
 391        int sec_sev = ghes_severity(gdata->error_severity);
 392        struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
 393
 394        if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
 395                return;
 396
 397        pfn = mem_err->physical_addr >> PAGE_SHIFT;
 398        if (!pfn_valid(pfn)) {
 399                pr_warn_ratelimited(FW_WARN GHES_PFX
 400                "Invalid address in generic error data: %#llx\n",
 401                mem_err->physical_addr);
 402                return;
 403        }
 404
 405        /* iff following two events can be handled properly by now */
 406        if (sec_sev == GHES_SEV_CORRECTED &&
 407            (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
 408                flags = MF_SOFT_OFFLINE;
 409        if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
 410                flags = 0;
 411
 412        if (flags != -1)
 413                memory_failure_queue(pfn, flags);
 414#endif
 415}
 416
 417/*
 418 * PCIe AER errors need to be sent to the AER driver for reporting and
 419 * recovery. The GHES severities map to the following AER severities and
 420 * require the following handling:
 421 *
 422 * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
 423 *     These need to be reported by the AER driver but no recovery is
 424 *     necessary.
 425 * GHES_SEV_RECOVERABLE -> AER_NONFATAL
 426 * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
 427 *     These both need to be reported and recovered from by the AER driver.
 428 * GHES_SEV_PANIC does not make it to this handling since the kernel must
 429 *     panic.
 430 */
 431static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
 432{
 433#ifdef CONFIG_ACPI_APEI_PCIEAER
 434        struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
 435
 436        if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
 437            pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
 438                unsigned int devfn;
 439                int aer_severity;
 440
 441                devfn = PCI_DEVFN(pcie_err->device_id.device,
 442                                  pcie_err->device_id.function);
 443                aer_severity = cper_severity_to_aer(gdata->error_severity);
 444
 445                /*
 446                 * If firmware reset the component to contain
 447                 * the error, we must reinitialize it before
 448                 * use, so treat it as a fatal AER error.
 449                 */
 450                if (gdata->flags & CPER_SEC_RESET)
 451                        aer_severity = AER_FATAL;
 452
 453                aer_recover_queue(pcie_err->device_id.segment,
 454                                  pcie_err->device_id.bus,
 455                                  devfn, aer_severity,
 456                                  (struct aer_capability_regs *)
 457                                  pcie_err->aer_info);
 458        }
 459#endif
 460}
 461
 462static void ghes_do_proc(struct ghes *ghes,
 463                         const struct acpi_hest_generic_status *estatus)
 464{
 465        int sev, sec_sev;
 466        struct acpi_hest_generic_data *gdata;
 467        guid_t *sec_type;
 468        guid_t *fru_id = &NULL_UUID_LE;
 469        char *fru_text = "";
 470
 471        sev = ghes_severity(estatus->error_severity);
 472        apei_estatus_for_each_section(estatus, gdata) {
 473                sec_type = (guid_t *)gdata->section_type;
 474                sec_sev = ghes_severity(gdata->error_severity);
 475                if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
 476                        fru_id = (guid_t *)gdata->fru_id;
 477
 478                if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
 479                        fru_text = gdata->fru_text;
 480
 481                if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
 482                        struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
 483
 484                        ghes_edac_report_mem_error(ghes, sev, mem_err);
 485
 486                        arch_apei_report_mem_error(sev, mem_err);
 487                        ghes_handle_memory_failure(gdata, sev);
 488                }
 489                else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
 490                        ghes_handle_aer(gdata);
 491                }
 492                else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
 493                        struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
 494
 495                        log_arm_hw_error(err);
 496                } else {
 497                        void *err = acpi_hest_get_payload(gdata);
 498
 499                        log_non_standard_event(sec_type, fru_id, fru_text,
 500                                               sec_sev, err,
 501                                               gdata->error_data_length);
 502                }
 503        }
 504}
 505
 506static void __ghes_print_estatus(const char *pfx,
 507                                 const struct acpi_hest_generic *generic,
 508                                 const struct acpi_hest_generic_status *estatus)
 509{
 510        static atomic_t seqno;
 511        unsigned int curr_seqno;
 512        char pfx_seq[64];
 513
 514        if (pfx == NULL) {
 515                if (ghes_severity(estatus->error_severity) <=
 516                    GHES_SEV_CORRECTED)
 517                        pfx = KERN_WARNING;
 518                else
 519                        pfx = KERN_ERR;
 520        }
 521        curr_seqno = atomic_inc_return(&seqno);
 522        snprintf(pfx_seq, sizeof(pfx_seq), "%s{%u}" HW_ERR, pfx, curr_seqno);
 523        printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
 524               pfx_seq, generic->header.source_id);
 525        cper_estatus_print(pfx_seq, estatus);
 526}
 527
 528static int ghes_print_estatus(const char *pfx,
 529                              const struct acpi_hest_generic *generic,
 530                              const struct acpi_hest_generic_status *estatus)
 531{
 532        /* Not more than 2 messages every 5 seconds */
 533        static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
 534        static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
 535        struct ratelimit_state *ratelimit;
 536
 537        if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
 538                ratelimit = &ratelimit_corrected;
 539        else
 540                ratelimit = &ratelimit_uncorrected;
 541        if (__ratelimit(ratelimit)) {
 542                __ghes_print_estatus(pfx, generic, estatus);
 543                return 1;
 544        }
 545        return 0;
 546}
 547
 548/*
 549 * GHES error status reporting throttle, to report more kinds of
 550 * errors, instead of just most frequently occurred errors.
 551 */
 552static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
 553{
 554        u32 len;
 555        int i, cached = 0;
 556        unsigned long long now;
 557        struct ghes_estatus_cache *cache;
 558        struct acpi_hest_generic_status *cache_estatus;
 559
 560        len = cper_estatus_len(estatus);
 561        rcu_read_lock();
 562        for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
 563                cache = rcu_dereference(ghes_estatus_caches[i]);
 564                if (cache == NULL)
 565                        continue;
 566                if (len != cache->estatus_len)
 567                        continue;
 568                cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
 569                if (memcmp(estatus, cache_estatus, len))
 570                        continue;
 571                atomic_inc(&cache->count);
 572                now = sched_clock();
 573                if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
 574                        cached = 1;
 575                break;
 576        }
 577        rcu_read_unlock();
 578        return cached;
 579}
 580
 581static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
 582        struct acpi_hest_generic *generic,
 583        struct acpi_hest_generic_status *estatus)
 584{
 585        int alloced;
 586        u32 len, cache_len;
 587        struct ghes_estatus_cache *cache;
 588        struct acpi_hest_generic_status *cache_estatus;
 589
 590        alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
 591        if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
 592                atomic_dec(&ghes_estatus_cache_alloced);
 593                return NULL;
 594        }
 595        len = cper_estatus_len(estatus);
 596        cache_len = GHES_ESTATUS_CACHE_LEN(len);
 597        cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
 598        if (!cache) {
 599                atomic_dec(&ghes_estatus_cache_alloced);
 600                return NULL;
 601        }
 602        cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
 603        memcpy(cache_estatus, estatus, len);
 604        cache->estatus_len = len;
 605        atomic_set(&cache->count, 0);
 606        cache->generic = generic;
 607        cache->time_in = sched_clock();
 608        return cache;
 609}
 610
 611static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
 612{
 613        u32 len;
 614
 615        len = cper_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
 616        len = GHES_ESTATUS_CACHE_LEN(len);
 617        gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
 618        atomic_dec(&ghes_estatus_cache_alloced);
 619}
 620
 621static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
 622{
 623        struct ghes_estatus_cache *cache;
 624
 625        cache = container_of(head, struct ghes_estatus_cache, rcu);
 626        ghes_estatus_cache_free(cache);
 627}
 628
 629static void ghes_estatus_cache_add(
 630        struct acpi_hest_generic *generic,
 631        struct acpi_hest_generic_status *estatus)
 632{
 633        int i, slot = -1, count;
 634        unsigned long long now, duration, period, max_period = 0;
 635        struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
 636
 637        new_cache = ghes_estatus_cache_alloc(generic, estatus);
 638        if (new_cache == NULL)
 639                return;
 640        rcu_read_lock();
 641        now = sched_clock();
 642        for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
 643                cache = rcu_dereference(ghes_estatus_caches[i]);
 644                if (cache == NULL) {
 645                        slot = i;
 646                        slot_cache = NULL;
 647                        break;
 648                }
 649                duration = now - cache->time_in;
 650                if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
 651                        slot = i;
 652                        slot_cache = cache;
 653                        break;
 654                }
 655                count = atomic_read(&cache->count);
 656                period = duration;
 657                do_div(period, (count + 1));
 658                if (period > max_period) {
 659                        max_period = period;
 660                        slot = i;
 661                        slot_cache = cache;
 662                }
 663        }
 664        /* new_cache must be put into array after its contents are written */
 665        smp_wmb();
 666        if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
 667                                  slot_cache, new_cache) == slot_cache) {
 668                if (slot_cache)
 669                        call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
 670        } else
 671                ghes_estatus_cache_free(new_cache);
 672        rcu_read_unlock();
 673}
 674
 675static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
 676{
 677        int rc;
 678        u64 val = 0;
 679
 680        rc = apei_read(&val, &gv2->read_ack_register);
 681        if (rc)
 682                return rc;
 683
 684        val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
 685        val |= gv2->read_ack_write    << gv2->read_ack_register.bit_offset;
 686
 687        return apei_write(val, &gv2->read_ack_register);
 688}
 689
 690static void __ghes_panic(struct ghes *ghes)
 691{
 692        __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
 693
 694        /* reboot to log the error! */
 695        if (!panic_timeout)
 696                panic_timeout = ghes_panic_timeout;
 697        panic("Fatal hardware error!");
 698}
 699
 700static int ghes_proc(struct ghes *ghes)
 701{
 702        int rc;
 703
 704        rc = ghes_read_estatus(ghes, 0);
 705        if (rc)
 706                goto out;
 707
 708        if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) {
 709                __ghes_panic(ghes);
 710        }
 711
 712        if (!ghes_estatus_cached(ghes->estatus)) {
 713                if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
 714                        ghes_estatus_cache_add(ghes->generic, ghes->estatus);
 715        }
 716        ghes_do_proc(ghes, ghes->estatus);
 717
 718out:
 719        ghes_clear_estatus(ghes);
 720
 721        if (rc == -ENOENT)
 722                return rc;
 723
 724        /*
 725         * GHESv2 type HEST entries introduce support for error acknowledgment,
 726         * so only acknowledge the error if this support is present.
 727         */
 728        if (is_hest_type_generic_v2(ghes))
 729                return ghes_ack_error(ghes->generic_v2);
 730
 731        return rc;
 732}
 733
 734static void ghes_add_timer(struct ghes *ghes)
 735{
 736        struct acpi_hest_generic *g = ghes->generic;
 737        unsigned long expire;
 738
 739        if (!g->notify.poll_interval) {
 740                pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
 741                           g->header.source_id);
 742                return;
 743        }
 744        expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
 745        ghes->timer.expires = round_jiffies_relative(expire);
 746        add_timer(&ghes->timer);
 747}
 748
 749static void ghes_poll_func(struct timer_list *t)
 750{
 751        struct ghes *ghes = from_timer(ghes, t, timer);
 752
 753        ghes_proc(ghes);
 754        if (!(ghes->flags & GHES_EXITING))
 755                ghes_add_timer(ghes);
 756}
 757
 758static irqreturn_t ghes_irq_func(int irq, void *data)
 759{
 760        struct ghes *ghes = data;
 761        int rc;
 762
 763        rc = ghes_proc(ghes);
 764        if (rc)
 765                return IRQ_NONE;
 766
 767        return IRQ_HANDLED;
 768}
 769
 770static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
 771                           void *data)
 772{
 773        struct ghes *ghes;
 774        int ret = NOTIFY_DONE;
 775
 776        rcu_read_lock();
 777        list_for_each_entry_rcu(ghes, &ghes_hed, list) {
 778                if (!ghes_proc(ghes))
 779                        ret = NOTIFY_OK;
 780        }
 781        rcu_read_unlock();
 782
 783        return ret;
 784}
 785
 786static struct notifier_block ghes_notifier_hed = {
 787        .notifier_call = ghes_notify_hed,
 788};
 789
 790#ifdef CONFIG_ACPI_APEI_SEA
 791static LIST_HEAD(ghes_sea);
 792
 793/*
 794 * Return 0 only if one of the SEA error sources successfully reported an error
 795 * record sent from the firmware.
 796 */
 797int ghes_notify_sea(void)
 798{
 799        struct ghes *ghes;
 800        int ret = -ENOENT;
 801
 802        rcu_read_lock();
 803        list_for_each_entry_rcu(ghes, &ghes_sea, list) {
 804                if (!ghes_proc(ghes))
 805                        ret = 0;
 806        }
 807        rcu_read_unlock();
 808        return ret;
 809}
 810
 811static void ghes_sea_add(struct ghes *ghes)
 812{
 813        mutex_lock(&ghes_list_mutex);
 814        list_add_rcu(&ghes->list, &ghes_sea);
 815        mutex_unlock(&ghes_list_mutex);
 816}
 817
 818static void ghes_sea_remove(struct ghes *ghes)
 819{
 820        mutex_lock(&ghes_list_mutex);
 821        list_del_rcu(&ghes->list);
 822        mutex_unlock(&ghes_list_mutex);
 823        synchronize_rcu();
 824}
 825#else /* CONFIG_ACPI_APEI_SEA */
 826static inline void ghes_sea_add(struct ghes *ghes) { }
 827static inline void ghes_sea_remove(struct ghes *ghes) { }
 828#endif /* CONFIG_ACPI_APEI_SEA */
 829
 830#ifdef CONFIG_HAVE_ACPI_APEI_NMI
 831/*
 832 * printk is not safe in NMI context.  So in NMI handler, we allocate
 833 * required memory from lock-less memory allocator
 834 * (ghes_estatus_pool), save estatus into it, put them into lock-less
 835 * list (ghes_estatus_llist), then delay printk into IRQ context via
 836 * irq_work (ghes_proc_irq_work).  ghes_estatus_size_request record
 837 * required pool size by all NMI error source.
 838 */
 839static struct llist_head ghes_estatus_llist;
 840static struct irq_work ghes_proc_irq_work;
 841
 842/*
 843 * NMI may be triggered on any CPU, so ghes_in_nmi is used for
 844 * having only one concurrent reader.
 845 */
 846static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
 847
 848static LIST_HEAD(ghes_nmi);
 849
 850static void ghes_proc_in_irq(struct irq_work *irq_work)
 851{
 852        struct llist_node *llnode, *next;
 853        struct ghes_estatus_node *estatus_node;
 854        struct acpi_hest_generic *generic;
 855        struct acpi_hest_generic_status *estatus;
 856        u32 len, node_len;
 857
 858        llnode = llist_del_all(&ghes_estatus_llist);
 859        /*
 860         * Because the time order of estatus in list is reversed,
 861         * revert it back to proper order.
 862         */
 863        llnode = llist_reverse_order(llnode);
 864        while (llnode) {
 865                next = llnode->next;
 866                estatus_node = llist_entry(llnode, struct ghes_estatus_node,
 867                                           llnode);
 868                estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
 869                len = cper_estatus_len(estatus);
 870                node_len = GHES_ESTATUS_NODE_LEN(len);
 871                ghes_do_proc(estatus_node->ghes, estatus);
 872                if (!ghes_estatus_cached(estatus)) {
 873                        generic = estatus_node->generic;
 874                        if (ghes_print_estatus(NULL, generic, estatus))
 875                                ghes_estatus_cache_add(generic, estatus);
 876                }
 877                gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
 878                              node_len);
 879                llnode = next;
 880        }
 881}
 882
 883static void ghes_print_queued_estatus(void)
 884{
 885        struct llist_node *llnode;
 886        struct ghes_estatus_node *estatus_node;
 887        struct acpi_hest_generic *generic;
 888        struct acpi_hest_generic_status *estatus;
 889
 890        llnode = llist_del_all(&ghes_estatus_llist);
 891        /*
 892         * Because the time order of estatus in list is reversed,
 893         * revert it back to proper order.
 894         */
 895        llnode = llist_reverse_order(llnode);
 896        while (llnode) {
 897                estatus_node = llist_entry(llnode, struct ghes_estatus_node,
 898                                           llnode);
 899                estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
 900                generic = estatus_node->generic;
 901                ghes_print_estatus(NULL, generic, estatus);
 902                llnode = llnode->next;
 903        }
 904}
 905
 906/* Save estatus for further processing in IRQ context */
 907static void __process_error(struct ghes *ghes)
 908{
 909#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
 910        u32 len, node_len;
 911        struct ghes_estatus_node *estatus_node;
 912        struct acpi_hest_generic_status *estatus;
 913
 914        if (ghes_estatus_cached(ghes->estatus))
 915                return;
 916
 917        len = cper_estatus_len(ghes->estatus);
 918        node_len = GHES_ESTATUS_NODE_LEN(len);
 919
 920        estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len);
 921        if (!estatus_node)
 922                return;
 923
 924        estatus_node->ghes = ghes;
 925        estatus_node->generic = ghes->generic;
 926        estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
 927        memcpy(estatus, ghes->estatus, len);
 928        llist_add(&estatus_node->llnode, &ghes_estatus_llist);
 929#endif
 930}
 931
 932static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
 933{
 934        struct ghes *ghes;
 935        int sev, ret = NMI_DONE;
 936
 937        if (!atomic_add_unless(&ghes_in_nmi, 1, 1))
 938                return ret;
 939
 940        list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
 941                if (ghes_read_estatus(ghes, 1)) {
 942                        ghes_clear_estatus(ghes);
 943                        continue;
 944                } else {
 945                        ret = NMI_HANDLED;
 946                }
 947
 948                sev = ghes_severity(ghes->estatus->error_severity);
 949                if (sev >= GHES_SEV_PANIC) {
 950                        oops_begin();
 951                        ghes_print_queued_estatus();
 952                        __ghes_panic(ghes);
 953                }
 954
 955                if (!(ghes->flags & GHES_TO_CLEAR))
 956                        continue;
 957
 958                __process_error(ghes);
 959                ghes_clear_estatus(ghes);
 960        }
 961
 962#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
 963        if (ret == NMI_HANDLED)
 964                irq_work_queue(&ghes_proc_irq_work);
 965#endif
 966        atomic_dec(&ghes_in_nmi);
 967        return ret;
 968}
 969
 970static unsigned long ghes_esource_prealloc_size(
 971        const struct acpi_hest_generic *generic)
 972{
 973        unsigned long block_length, prealloc_records, prealloc_size;
 974
 975        block_length = min_t(unsigned long, generic->error_block_length,
 976                             GHES_ESTATUS_MAX_SIZE);
 977        prealloc_records = max_t(unsigned long,
 978                                 generic->records_to_preallocate, 1);
 979        prealloc_size = min_t(unsigned long, block_length * prealloc_records,
 980                              GHES_ESOURCE_PREALLOC_MAX_SIZE);
 981
 982        return prealloc_size;
 983}
 984
 985static void ghes_estatus_pool_shrink(unsigned long len)
 986{
 987        ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
 988}
 989
 990static void ghes_nmi_add(struct ghes *ghes)
 991{
 992        unsigned long len;
 993
 994        len = ghes_esource_prealloc_size(ghes->generic);
 995        ghes_estatus_pool_expand(len);
 996        mutex_lock(&ghes_list_mutex);
 997        if (list_empty(&ghes_nmi))
 998                register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
 999        list_add_rcu(&ghes->list, &ghes_nmi);
1000        mutex_unlock(&ghes_list_mutex);
1001}
1002
1003static void ghes_nmi_remove(struct ghes *ghes)
1004{
1005        unsigned long len;
1006
1007        mutex_lock(&ghes_list_mutex);
1008        list_del_rcu(&ghes->list);
1009        if (list_empty(&ghes_nmi))
1010                unregister_nmi_handler(NMI_LOCAL, "ghes");
1011        mutex_unlock(&ghes_list_mutex);
1012        /*
1013         * To synchronize with NMI handler, ghes can only be
1014         * freed after NMI handler finishes.
1015         */
1016        synchronize_rcu();
1017        len = ghes_esource_prealloc_size(ghes->generic);
1018        ghes_estatus_pool_shrink(len);
1019}
1020
1021static void ghes_nmi_init_cxt(void)
1022{
1023        init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
1024}
1025#else /* CONFIG_HAVE_ACPI_APEI_NMI */
1026static inline void ghes_nmi_add(struct ghes *ghes) { }
1027static inline void ghes_nmi_remove(struct ghes *ghes) { }
1028static inline void ghes_nmi_init_cxt(void) { }
1029#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
1030
1031static int ghes_probe(struct platform_device *ghes_dev)
1032{
1033        struct acpi_hest_generic *generic;
1034        struct ghes *ghes = NULL;
1035
1036        int rc = -EINVAL;
1037
1038        generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
1039        if (!generic->enabled)
1040                return -ENODEV;
1041
1042        switch (generic->notify.type) {
1043        case ACPI_HEST_NOTIFY_POLLED:
1044        case ACPI_HEST_NOTIFY_EXTERNAL:
1045        case ACPI_HEST_NOTIFY_SCI:
1046        case ACPI_HEST_NOTIFY_GSIV:
1047        case ACPI_HEST_NOTIFY_GPIO:
1048                break;
1049
1050        case ACPI_HEST_NOTIFY_SEA:
1051                if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
1052                        pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
1053                                generic->header.source_id);
1054                        rc = -ENOTSUPP;
1055                        goto err;
1056                }
1057                break;
1058        case ACPI_HEST_NOTIFY_NMI:
1059                if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
1060                        pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
1061                                generic->header.source_id);
1062                        goto err;
1063                }
1064                break;
1065        case ACPI_HEST_NOTIFY_LOCAL:
1066                pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
1067                           generic->header.source_id);
1068                goto err;
1069        default:
1070                pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
1071                           generic->notify.type, generic->header.source_id);
1072                goto err;
1073        }
1074
1075        rc = -EIO;
1076        if (generic->error_block_length <
1077            sizeof(struct acpi_hest_generic_status)) {
1078                pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
1079                           generic->error_block_length,
1080                           generic->header.source_id);
1081                goto err;
1082        }
1083        ghes = ghes_new(generic);
1084        if (IS_ERR(ghes)) {
1085                rc = PTR_ERR(ghes);
1086                ghes = NULL;
1087                goto err;
1088        }
1089
1090        rc = ghes_edac_register(ghes, &ghes_dev->dev);
1091        if (rc < 0)
1092                goto err;
1093
1094        switch (generic->notify.type) {
1095        case ACPI_HEST_NOTIFY_POLLED:
1096                timer_setup(&ghes->timer, ghes_poll_func, TIMER_DEFERRABLE);
1097                ghes_add_timer(ghes);
1098                break;
1099        case ACPI_HEST_NOTIFY_EXTERNAL:
1100                /* External interrupt vector is GSI */
1101                rc = acpi_gsi_to_irq(generic->notify.vector, &ghes->irq);
1102                if (rc) {
1103                        pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
1104                               generic->header.source_id);
1105                        goto err_edac_unreg;
1106                }
1107                rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED,
1108                                 "GHES IRQ", ghes);
1109                if (rc) {
1110                        pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
1111                               generic->header.source_id);
1112                        goto err_edac_unreg;
1113                }
1114                break;
1115
1116        case ACPI_HEST_NOTIFY_SCI:
1117        case ACPI_HEST_NOTIFY_GSIV:
1118        case ACPI_HEST_NOTIFY_GPIO:
1119                mutex_lock(&ghes_list_mutex);
1120                if (list_empty(&ghes_hed))
1121                        register_acpi_hed_notifier(&ghes_notifier_hed);
1122                list_add_rcu(&ghes->list, &ghes_hed);
1123                mutex_unlock(&ghes_list_mutex);
1124                break;
1125
1126        case ACPI_HEST_NOTIFY_SEA:
1127                ghes_sea_add(ghes);
1128                break;
1129        case ACPI_HEST_NOTIFY_NMI:
1130                ghes_nmi_add(ghes);
1131                break;
1132        default:
1133                BUG();
1134        }
1135        platform_set_drvdata(ghes_dev, ghes);
1136
1137        /* Handle any pending errors right away */
1138        ghes_proc(ghes);
1139
1140        return 0;
1141err_edac_unreg:
1142        ghes_edac_unregister(ghes);
1143err:
1144        if (ghes) {
1145                ghes_fini(ghes);
1146                kfree(ghes);
1147        }
1148        return rc;
1149}
1150
1151static int ghes_remove(struct platform_device *ghes_dev)
1152{
1153        struct ghes *ghes;
1154        struct acpi_hest_generic *generic;
1155
1156        ghes = platform_get_drvdata(ghes_dev);
1157        generic = ghes->generic;
1158
1159        ghes->flags |= GHES_EXITING;
1160        switch (generic->notify.type) {
1161        case ACPI_HEST_NOTIFY_POLLED:
1162                del_timer_sync(&ghes->timer);
1163                break;
1164        case ACPI_HEST_NOTIFY_EXTERNAL:
1165                free_irq(ghes->irq, ghes);
1166                break;
1167
1168        case ACPI_HEST_NOTIFY_SCI:
1169        case ACPI_HEST_NOTIFY_GSIV:
1170        case ACPI_HEST_NOTIFY_GPIO:
1171                mutex_lock(&ghes_list_mutex);
1172                list_del_rcu(&ghes->list);
1173                if (list_empty(&ghes_hed))
1174                        unregister_acpi_hed_notifier(&ghes_notifier_hed);
1175                mutex_unlock(&ghes_list_mutex);
1176                synchronize_rcu();
1177                break;
1178
1179        case ACPI_HEST_NOTIFY_SEA:
1180                ghes_sea_remove(ghes);
1181                break;
1182        case ACPI_HEST_NOTIFY_NMI:
1183                ghes_nmi_remove(ghes);
1184                break;
1185        default:
1186                BUG();
1187                break;
1188        }
1189
1190        ghes_fini(ghes);
1191
1192        ghes_edac_unregister(ghes);
1193
1194        kfree(ghes);
1195
1196        platform_set_drvdata(ghes_dev, NULL);
1197
1198        return 0;
1199}
1200
1201static struct platform_driver ghes_platform_driver = {
1202        .driver         = {
1203                .name   = "GHES",
1204        },
1205        .probe          = ghes_probe,
1206        .remove         = ghes_remove,
1207};
1208
1209static int __init ghes_init(void)
1210{
1211        int rc;
1212
1213        if (acpi_disabled)
1214                return -ENODEV;
1215
1216        switch (hest_disable) {
1217        case HEST_NOT_FOUND:
1218                return -ENODEV;
1219        case HEST_DISABLED:
1220                pr_info(GHES_PFX "HEST is not enabled!\n");
1221                return -EINVAL;
1222        default:
1223                break;
1224        }
1225
1226        if (ghes_disable) {
1227                pr_info(GHES_PFX "GHES is not enabled!\n");
1228                return -EINVAL;
1229        }
1230
1231        ghes_nmi_init_cxt();
1232
1233        rc = ghes_estatus_pool_init();
1234        if (rc)
1235                goto err;
1236
1237        rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
1238                                      GHES_ESTATUS_CACHE_ALLOCED_MAX);
1239        if (rc)
1240                goto err_pool_exit;
1241
1242        rc = platform_driver_register(&ghes_platform_driver);
1243        if (rc)
1244                goto err_pool_exit;
1245
1246        rc = apei_osc_setup();
1247        if (rc == 0 && osc_sb_apei_support_acked)
1248                pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
1249        else if (rc == 0 && !osc_sb_apei_support_acked)
1250                pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
1251        else if (rc && osc_sb_apei_support_acked)
1252                pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
1253        else
1254                pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
1255
1256        return 0;
1257err_pool_exit:
1258        ghes_estatus_pool_exit();
1259err:
1260        return rc;
1261}
1262device_initcall(ghes_init);
1263