linux/arch/x86/kernel/cpu/mce/amd.c
<<
>>
Prefs
   1/*
   2 *  (c) 2005-2016 Advanced Micro Devices, Inc.
   3 *  Your use of this code is subject to the terms and conditions of the
   4 *  GNU general public license version 2. See "COPYING" or
   5 *  http://www.gnu.org/licenses/gpl.html
   6 *
   7 *  Written by Jacob Shin - AMD, Inc.
   8 *  Maintained by: Borislav Petkov <bp@alien8.de>
   9 *
  10 *  All MC4_MISCi registers are shared between cores on a node.
  11 */
  12#include <linux/interrupt.h>
  13#include <linux/notifier.h>
  14#include <linux/kobject.h>
  15#include <linux/percpu.h>
  16#include <linux/errno.h>
  17#include <linux/sched.h>
  18#include <linux/sysfs.h>
  19#include <linux/slab.h>
  20#include <linux/init.h>
  21#include <linux/cpu.h>
  22#include <linux/smp.h>
  23#include <linux/string.h>
  24
  25#include <asm/amd_nb.h>
  26#include <asm/traps.h>
  27#include <asm/apic.h>
  28#include <asm/mce.h>
  29#include <asm/msr.h>
  30#include <asm/trace/irq_vectors.h>
  31
  32#include "internal.h"
  33
  34#define NR_BLOCKS         5
  35#define THRESHOLD_MAX     0xFFF
  36#define INT_TYPE_APIC     0x00020000
  37#define MASK_VALID_HI     0x80000000
  38#define MASK_CNTP_HI      0x40000000
  39#define MASK_LOCKED_HI    0x20000000
  40#define MASK_LVTOFF_HI    0x00F00000
  41#define MASK_COUNT_EN_HI  0x00080000
  42#define MASK_INT_TYPE_HI  0x00060000
  43#define MASK_OVERFLOW_HI  0x00010000
  44#define MASK_ERR_COUNT_HI 0x00000FFF
  45#define MASK_BLKPTR_LO    0xFF000000
  46#define MCG_XBLK_ADDR     0xC0000400
  47
  48/* Deferred error settings */
  49#define MSR_CU_DEF_ERR          0xC0000410
  50#define MASK_DEF_LVTOFF         0x000000F0
  51#define MASK_DEF_INT_TYPE       0x00000006
  52#define DEF_LVT_OFF             0x2
  53#define DEF_INT_TYPE_APIC       0x2
  54
  55/* Scalable MCA: */
  56
  57/* Threshold LVT offset is at MSR0xC0000410[15:12] */
  58#define SMCA_THR_LVT_OFF        0xF000
  59
  60static bool thresholding_irq_en;
  61
  62static const char * const th_names[] = {
  63        "load_store",
  64        "insn_fetch",
  65        "combined_unit",
  66        "decode_unit",
  67        "northbridge",
  68        "execution_unit",
  69};
  70
  71static const char * const smca_umc_block_names[] = {
  72        "dram_ecc",
  73        "misc_umc"
  74};
  75
  76#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
  77
  78struct smca_hwid {
  79        unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
  80        u32 hwid_mcatype;       /* (hwid,mcatype) tuple */
  81};
  82
  83struct smca_bank {
  84        const struct smca_hwid *hwid;
  85        u32 id;                 /* Value of MCA_IPID[InstanceId]. */
  86        u8 sysfs_id;            /* Value used for sysfs name. */
  87};
  88
  89static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks);
  90static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts);
  91
  92struct smca_bank_name {
  93        const char *name;       /* Short name for sysfs */
  94        const char *long_name;  /* Long name for pretty-printing */
  95};
  96
  97static struct smca_bank_name smca_names[] = {
  98        [SMCA_LS ... SMCA_LS_V2]        = { "load_store",       "Load Store Unit" },
  99        [SMCA_IF]                       = { "insn_fetch",       "Instruction Fetch Unit" },
 100        [SMCA_L2_CACHE]                 = { "l2_cache",         "L2 Cache" },
 101        [SMCA_DE]                       = { "decode_unit",      "Decode Unit" },
 102        [SMCA_RESERVED]                 = { "reserved",         "Reserved" },
 103        [SMCA_EX]                       = { "execution_unit",   "Execution Unit" },
 104        [SMCA_FP]                       = { "floating_point",   "Floating Point Unit" },
 105        [SMCA_L3_CACHE]                 = { "l3_cache",         "L3 Cache" },
 106        [SMCA_CS ... SMCA_CS_V2]        = { "coherent_slave",   "Coherent Slave" },
 107        [SMCA_PIE]                      = { "pie",              "Power, Interrupts, etc." },
 108
 109        /* UMC v2 is separate because both of them can exist in a single system. */
 110        [SMCA_UMC]                      = { "umc",              "Unified Memory Controller" },
 111        [SMCA_UMC_V2]                   = { "umc_v2",           "Unified Memory Controller v2" },
 112        [SMCA_PB]                       = { "param_block",      "Parameter Block" },
 113        [SMCA_PSP ... SMCA_PSP_V2]      = { "psp",              "Platform Security Processor" },
 114        [SMCA_SMU ... SMCA_SMU_V2]      = { "smu",              "System Management Unit" },
 115        [SMCA_MP5]                      = { "mp5",              "Microprocessor 5 Unit" },
 116        [SMCA_MPDMA]                    = { "mpdma",            "MPDMA Unit" },
 117        [SMCA_NBIO]                     = { "nbio",             "Northbridge IO Unit" },
 118        [SMCA_PCIE ... SMCA_PCIE_V2]    = { "pcie",             "PCI Express Unit" },
 119        [SMCA_XGMI_PCS]                 = { "xgmi_pcs",         "Ext Global Memory Interconnect PCS Unit" },
 120        [SMCA_NBIF]                     = { "nbif",             "NBIF Unit" },
 121        [SMCA_SHUB]                     = { "shub",             "System Hub Unit" },
 122        [SMCA_SATA]                     = { "sata",             "SATA Unit" },
 123        [SMCA_USB]                      = { "usb",              "USB Unit" },
 124        [SMCA_GMI_PCS]                  = { "gmi_pcs",          "Global Memory Interconnect PCS Unit" },
 125        [SMCA_XGMI_PHY]                 = { "xgmi_phy",         "Ext Global Memory Interconnect PHY Unit" },
 126        [SMCA_WAFL_PHY]                 = { "wafl_phy",         "WAFL PHY Unit" },
 127        [SMCA_GMI_PHY]                  = { "gmi_phy",          "Global Memory Interconnect PHY Unit" },
 128};
 129
 130static const char *smca_get_name(enum smca_bank_types t)
 131{
 132        if (t >= N_SMCA_BANK_TYPES)
 133                return NULL;
 134
 135        return smca_names[t].name;
 136}
 137
 138const char *smca_get_long_name(enum smca_bank_types t)
 139{
 140        if (t >= N_SMCA_BANK_TYPES)
 141                return NULL;
 142
 143        return smca_names[t].long_name;
 144}
 145EXPORT_SYMBOL_GPL(smca_get_long_name);
 146
 147enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
 148{
 149        struct smca_bank *b;
 150
 151        if (bank >= MAX_NR_BANKS)
 152                return N_SMCA_BANK_TYPES;
 153
 154        b = &per_cpu(smca_banks, cpu)[bank];
 155        if (!b->hwid)
 156                return N_SMCA_BANK_TYPES;
 157
 158        return b->hwid->bank_type;
 159}
 160EXPORT_SYMBOL_GPL(smca_get_bank_type);
 161
 162static const struct smca_hwid smca_hwid_mcatypes[] = {
 163        /* { bank_type, hwid_mcatype } */
 164
 165        /* Reserved type */
 166        { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0)        },
 167
 168        /* ZN Core (HWID=0xB0) MCA types */
 169        { SMCA_LS,       HWID_MCATYPE(0xB0, 0x0)        },
 170        { SMCA_LS_V2,    HWID_MCATYPE(0xB0, 0x10)       },
 171        { SMCA_IF,       HWID_MCATYPE(0xB0, 0x1)        },
 172        { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2)        },
 173        { SMCA_DE,       HWID_MCATYPE(0xB0, 0x3)        },
 174        /* HWID 0xB0 MCATYPE 0x4 is Reserved */
 175        { SMCA_EX,       HWID_MCATYPE(0xB0, 0x5)        },
 176        { SMCA_FP,       HWID_MCATYPE(0xB0, 0x6)        },
 177        { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7)        },
 178
 179        /* Data Fabric MCA types */
 180        { SMCA_CS,       HWID_MCATYPE(0x2E, 0x0)        },
 181        { SMCA_PIE,      HWID_MCATYPE(0x2E, 0x1)        },
 182        { SMCA_CS_V2,    HWID_MCATYPE(0x2E, 0x2)        },
 183
 184        /* Unified Memory Controller MCA type */
 185        { SMCA_UMC,      HWID_MCATYPE(0x96, 0x0)        },
 186        { SMCA_UMC_V2,   HWID_MCATYPE(0x96, 0x1)        },
 187
 188        /* Parameter Block MCA type */
 189        { SMCA_PB,       HWID_MCATYPE(0x05, 0x0)        },
 190
 191        /* Platform Security Processor MCA type */
 192        { SMCA_PSP,      HWID_MCATYPE(0xFF, 0x0)        },
 193        { SMCA_PSP_V2,   HWID_MCATYPE(0xFF, 0x1)        },
 194
 195        /* System Management Unit MCA type */
 196        { SMCA_SMU,      HWID_MCATYPE(0x01, 0x0)        },
 197        { SMCA_SMU_V2,   HWID_MCATYPE(0x01, 0x1)        },
 198
 199        /* Microprocessor 5 Unit MCA type */
 200        { SMCA_MP5,      HWID_MCATYPE(0x01, 0x2)        },
 201
 202        /* MPDMA MCA type */
 203        { SMCA_MPDMA,    HWID_MCATYPE(0x01, 0x3)        },
 204
 205        /* Northbridge IO Unit MCA type */
 206        { SMCA_NBIO,     HWID_MCATYPE(0x18, 0x0)        },
 207
 208        /* PCI Express Unit MCA type */
 209        { SMCA_PCIE,     HWID_MCATYPE(0x46, 0x0)        },
 210        { SMCA_PCIE_V2,  HWID_MCATYPE(0x46, 0x1)        },
 211
 212        { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0)        },
 213        { SMCA_NBIF,     HWID_MCATYPE(0x6C, 0x0)        },
 214        { SMCA_SHUB,     HWID_MCATYPE(0x80, 0x0)        },
 215        { SMCA_SATA,     HWID_MCATYPE(0xA8, 0x0)        },
 216        { SMCA_USB,      HWID_MCATYPE(0xAA, 0x0)        },
 217        { SMCA_GMI_PCS,  HWID_MCATYPE(0x241, 0x0)       },
 218        { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0)       },
 219        { SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0)       },
 220        { SMCA_GMI_PHY,  HWID_MCATYPE(0x269, 0x0)       },
 221};
 222
 223/*
 224 * In SMCA enabled processors, we can have multiple banks for a given IP type.
 225 * So to define a unique name for each bank, we use a temp c-string to append
 226 * the MCA_IPID[InstanceId] to type's name in get_name().
 227 *
 228 * InstanceId is 32 bits which is 8 characters. Make sure MAX_MCATYPE_NAME_LEN
 229 * is greater than 8 plus 1 (for underscore) plus length of longest type name.
 230 */
 231#define MAX_MCATYPE_NAME_LEN    30
 232static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
 233
 234static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
 235
 236/*
 237 * A list of the banks enabled on each logical CPU. Controls which respective
 238 * descriptors to initialize later in mce_threshold_create_device().
 239 */
 240static DEFINE_PER_CPU(unsigned int, bank_map);
 241
 242/* Map of banks that have more than MCA_MISC0 available. */
 243static DEFINE_PER_CPU(u32, smca_misc_banks_map);
 244
 245static void amd_threshold_interrupt(void);
 246static void amd_deferred_error_interrupt(void);
 247
 248static void default_deferred_error_interrupt(void)
 249{
 250        pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
 251}
 252void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
 253
 254static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
 255{
 256        u32 low, high;
 257
 258        /*
 259         * For SMCA enabled processors, BLKPTR field of the first MISC register
 260         * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
 261         */
 262        if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
 263                return;
 264
 265        if (!(low & MCI_CONFIG_MCAX))
 266                return;
 267
 268        if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high))
 269                return;
 270
 271        if (low & MASK_BLKPTR_LO)
 272                per_cpu(smca_misc_banks_map, cpu) |= BIT(bank);
 273
 274}
 275
 276static void smca_configure(unsigned int bank, unsigned int cpu)
 277{
 278        u8 *bank_counts = this_cpu_ptr(smca_bank_counts);
 279        const struct smca_hwid *s_hwid;
 280        unsigned int i, hwid_mcatype;
 281        u32 high, low;
 282        u32 smca_config = MSR_AMD64_SMCA_MCx_CONFIG(bank);
 283
 284        /* Set appropriate bits in MCA_CONFIG */
 285        if (!rdmsr_safe(smca_config, &low, &high)) {
 286                /*
 287                 * OS is required to set the MCAX bit to acknowledge that it is
 288                 * now using the new MSR ranges and new registers under each
 289                 * bank. It also means that the OS will configure deferred
 290                 * errors in the new MCx_CONFIG register. If the bit is not set,
 291                 * uncorrectable errors will cause a system panic.
 292                 *
 293                 * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
 294                 */
 295                high |= BIT(0);
 296
 297                /*
 298                 * SMCA sets the Deferred Error Interrupt type per bank.
 299                 *
 300                 * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us
 301                 * if the DeferredIntType bit field is available.
 302                 *
 303                 * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the
 304                 * high portion of the MSR). OS should set this to 0x1 to enable
 305                 * APIC based interrupt. First, check that no interrupt has been
 306                 * set.
 307                 */
 308                if ((low & BIT(5)) && !((high >> 5) & 0x3))
 309                        high |= BIT(5);
 310
 311                wrmsr(smca_config, low, high);
 312        }
 313
 314        smca_set_misc_banks_map(bank, cpu);
 315
 316        if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
 317                pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
 318                return;
 319        }
 320
 321        hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID,
 322                                    (high & MCI_IPID_MCATYPE) >> 16);
 323
 324        for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
 325                s_hwid = &smca_hwid_mcatypes[i];
 326
 327                if (hwid_mcatype == s_hwid->hwid_mcatype) {
 328                        this_cpu_ptr(smca_banks)[bank].hwid = s_hwid;
 329                        this_cpu_ptr(smca_banks)[bank].id = low;
 330                        this_cpu_ptr(smca_banks)[bank].sysfs_id = bank_counts[s_hwid->bank_type]++;
 331                        break;
 332                }
 333        }
 334}
 335
 336struct thresh_restart {
 337        struct threshold_block  *b;
 338        int                     reset;
 339        int                     set_lvt_off;
 340        int                     lvt_off;
 341        u16                     old_limit;
 342};
 343
 344static inline bool is_shared_bank(int bank)
 345{
 346        /*
 347         * Scalable MCA provides for only one core to have access to the MSRs of
 348         * a shared bank.
 349         */
 350        if (mce_flags.smca)
 351                return false;
 352
 353        /* Bank 4 is for northbridge reporting and is thus shared */
 354        return (bank == 4);
 355}
 356
 357static const char *bank4_names(const struct threshold_block *b)
 358{
 359        switch (b->address) {
 360        /* MSR4_MISC0 */
 361        case 0x00000413:
 362                return "dram";
 363
 364        case 0xc0000408:
 365                return "ht_links";
 366
 367        case 0xc0000409:
 368                return "l3_cache";
 369
 370        default:
 371                WARN(1, "Funny MSR: 0x%08x\n", b->address);
 372                return "";
 373        }
 374};
 375
 376
 377static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
 378{
 379        /*
 380         * bank 4 supports APIC LVT interrupts implicitly since forever.
 381         */
 382        if (bank == 4)
 383                return true;
 384
 385        /*
 386         * IntP: interrupt present; if this bit is set, the thresholding
 387         * bank can generate APIC LVT interrupts
 388         */
 389        return msr_high_bits & BIT(28);
 390}
 391
 392static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
 393{
 394        int msr = (hi & MASK_LVTOFF_HI) >> 20;
 395
 396        if (apic < 0) {
 397                pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
 398                       "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
 399                       b->bank, b->block, b->address, hi, lo);
 400                return 0;
 401        }
 402
 403        if (apic != msr) {
 404                /*
 405                 * On SMCA CPUs, LVT offset is programmed at a different MSR, and
 406                 * the BIOS provides the value. The original field where LVT offset
 407                 * was set is reserved. Return early here:
 408                 */
 409                if (mce_flags.smca)
 410                        return 0;
 411
 412                pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
 413                       "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
 414                       b->cpu, apic, b->bank, b->block, b->address, hi, lo);
 415                return 0;
 416        }
 417
 418        return 1;
 419};
 420
 421/* Reprogram MCx_MISC MSR behind this threshold bank. */
 422static void threshold_restart_bank(void *_tr)
 423{
 424        struct thresh_restart *tr = _tr;
 425        u32 hi, lo;
 426
 427        /* sysfs write might race against an offline operation */
 428        if (this_cpu_read(threshold_banks))
 429                return;
 430
 431        rdmsr(tr->b->address, lo, hi);
 432
 433        if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
 434                tr->reset = 1;  /* limit cannot be lower than err count */
 435
 436        if (tr->reset) {                /* reset err count and overflow bit */
 437                hi =
 438                    (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
 439                    (THRESHOLD_MAX - tr->b->threshold_limit);
 440        } else if (tr->old_limit) {     /* change limit w/o reset */
 441                int new_count = (hi & THRESHOLD_MAX) +
 442                    (tr->old_limit - tr->b->threshold_limit);
 443
 444                hi = (hi & ~MASK_ERR_COUNT_HI) |
 445                    (new_count & THRESHOLD_MAX);
 446        }
 447
 448        /* clear IntType */
 449        hi &= ~MASK_INT_TYPE_HI;
 450
 451        if (!tr->b->interrupt_capable)
 452                goto done;
 453
 454        if (tr->set_lvt_off) {
 455                if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
 456                        /* set new lvt offset */
 457                        hi &= ~MASK_LVTOFF_HI;
 458                        hi |= tr->lvt_off << 20;
 459                }
 460        }
 461
 462        if (tr->b->interrupt_enable)
 463                hi |= INT_TYPE_APIC;
 464
 465 done:
 466
 467        hi |= MASK_COUNT_EN_HI;
 468        wrmsr(tr->b->address, lo, hi);
 469}
 470
 471static void mce_threshold_block_init(struct threshold_block *b, int offset)
 472{
 473        struct thresh_restart tr = {
 474                .b                      = b,
 475                .set_lvt_off            = 1,
 476                .lvt_off                = offset,
 477        };
 478
 479        b->threshold_limit              = THRESHOLD_MAX;
 480        threshold_restart_bank(&tr);
 481};
 482
 483static int setup_APIC_mce_threshold(int reserved, int new)
 484{
 485        if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
 486                                              APIC_EILVT_MSG_FIX, 0))
 487                return new;
 488
 489        return reserved;
 490}
 491
 492static int setup_APIC_deferred_error(int reserved, int new)
 493{
 494        if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
 495                                              APIC_EILVT_MSG_FIX, 0))
 496                return new;
 497
 498        return reserved;
 499}
 500
 501static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
 502{
 503        u32 low = 0, high = 0;
 504        int def_offset = -1, def_new;
 505
 506        if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
 507                return;
 508
 509        def_new = (low & MASK_DEF_LVTOFF) >> 4;
 510        if (!(low & MASK_DEF_LVTOFF)) {
 511                pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
 512                def_new = DEF_LVT_OFF;
 513                low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
 514        }
 515
 516        def_offset = setup_APIC_deferred_error(def_offset, def_new);
 517        if ((def_offset == def_new) &&
 518            (deferred_error_int_vector != amd_deferred_error_interrupt))
 519                deferred_error_int_vector = amd_deferred_error_interrupt;
 520
 521        if (!mce_flags.smca)
 522                low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
 523
 524        wrmsr(MSR_CU_DEF_ERR, low, high);
 525}
 526
 527static u32 smca_get_block_address(unsigned int bank, unsigned int block,
 528                                  unsigned int cpu)
 529{
 530        if (!block)
 531                return MSR_AMD64_SMCA_MCx_MISC(bank);
 532
 533        if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank)))
 534                return 0;
 535
 536        return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
 537}
 538
 539static u32 get_block_address(u32 current_addr, u32 low, u32 high,
 540                             unsigned int bank, unsigned int block,
 541                             unsigned int cpu)
 542{
 543        u32 addr = 0, offset = 0;
 544
 545        if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS))
 546                return addr;
 547
 548        if (mce_flags.smca)
 549                return smca_get_block_address(bank, block, cpu);
 550
 551        /* Fall back to method we used for older processors: */
 552        switch (block) {
 553        case 0:
 554                addr = msr_ops.misc(bank);
 555                break;
 556        case 1:
 557                offset = ((low & MASK_BLKPTR_LO) >> 21);
 558                if (offset)
 559                        addr = MCG_XBLK_ADDR + offset;
 560                break;
 561        default:
 562                addr = ++current_addr;
 563        }
 564        return addr;
 565}
 566
 567static int
 568prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
 569                        int offset, u32 misc_high)
 570{
 571        unsigned int cpu = smp_processor_id();
 572        u32 smca_low, smca_high;
 573        struct threshold_block b;
 574        int new;
 575
 576        if (!block)
 577                per_cpu(bank_map, cpu) |= (1 << bank);
 578
 579        memset(&b, 0, sizeof(b));
 580        b.cpu                   = cpu;
 581        b.bank                  = bank;
 582        b.block                 = block;
 583        b.address               = addr;
 584        b.interrupt_capable     = lvt_interrupt_supported(bank, misc_high);
 585
 586        if (!b.interrupt_capable)
 587                goto done;
 588
 589        b.interrupt_enable = 1;
 590
 591        if (!mce_flags.smca) {
 592                new = (misc_high & MASK_LVTOFF_HI) >> 20;
 593                goto set_offset;
 594        }
 595
 596        /* Gather LVT offset for thresholding: */
 597        if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
 598                goto out;
 599
 600        new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
 601
 602set_offset:
 603        offset = setup_APIC_mce_threshold(offset, new);
 604        if (offset == new)
 605                thresholding_irq_en = true;
 606
 607done:
 608        mce_threshold_block_init(&b, offset);
 609
 610out:
 611        return offset;
 612}
 613
 614bool amd_filter_mce(struct mce *m)
 615{
 616        enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank);
 617        struct cpuinfo_x86 *c = &boot_cpu_data;
 618
 619        /* See Family 17h Models 10h-2Fh Erratum #1114. */
 620        if (c->x86 == 0x17 &&
 621            c->x86_model >= 0x10 && c->x86_model <= 0x2F &&
 622            bank_type == SMCA_IF && XEC(m->status, 0x3f) == 10)
 623                return true;
 624
 625        /* NB GART TLB error reporting is disabled by default. */
 626        if (c->x86 < 0x17) {
 627                if (m->bank == 4 && XEC(m->status, 0x1f) == 0x5)
 628                        return true;
 629        }
 630
 631        return false;
 632}
 633
 634/*
 635 * Turn off thresholding banks for the following conditions:
 636 * - MC4_MISC thresholding is not supported on Family 0x15.
 637 * - Prevent possible spurious interrupts from the IF bank on Family 0x17
 638 *   Models 0x10-0x2F due to Erratum #1114.
 639 */
 640static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
 641{
 642        int i, num_msrs;
 643        u64 hwcr;
 644        bool need_toggle;
 645        u32 msrs[NR_BLOCKS];
 646
 647        if (c->x86 == 0x15 && bank == 4) {
 648                msrs[0] = 0x00000413; /* MC4_MISC0 */
 649                msrs[1] = 0xc0000408; /* MC4_MISC1 */
 650                num_msrs = 2;
 651        } else if (c->x86 == 0x17 &&
 652                   (c->x86_model >= 0x10 && c->x86_model <= 0x2F)) {
 653
 654                if (smca_get_bank_type(smp_processor_id(), bank) != SMCA_IF)
 655                        return;
 656
 657                msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank);
 658                num_msrs = 1;
 659        } else {
 660                return;
 661        }
 662
 663        rdmsrl(MSR_K7_HWCR, hwcr);
 664
 665        /* McStatusWrEn has to be set */
 666        need_toggle = !(hwcr & BIT(18));
 667        if (need_toggle)
 668                wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
 669
 670        /* Clear CntP bit safely */
 671        for (i = 0; i < num_msrs; i++)
 672                msr_clear_bit(msrs[i], 62);
 673
 674        /* restore old settings */
 675        if (need_toggle)
 676                wrmsrl(MSR_K7_HWCR, hwcr);
 677}
 678
 679/* cpu init entry point, called from mce.c with preempt off */
 680void mce_amd_feature_init(struct cpuinfo_x86 *c)
 681{
 682        unsigned int bank, block, cpu = smp_processor_id();
 683        u32 low = 0, high = 0, address = 0;
 684        int offset = -1;
 685
 686
 687        for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
 688                if (mce_flags.smca)
 689                        smca_configure(bank, cpu);
 690
 691                disable_err_thresholding(c, bank);
 692
 693                for (block = 0; block < NR_BLOCKS; ++block) {
 694                        address = get_block_address(address, low, high, bank, block, cpu);
 695                        if (!address)
 696                                break;
 697
 698                        if (rdmsr_safe(address, &low, &high))
 699                                break;
 700
 701                        if (!(high & MASK_VALID_HI))
 702                                continue;
 703
 704                        if (!(high & MASK_CNTP_HI)  ||
 705                             (high & MASK_LOCKED_HI))
 706                                continue;
 707
 708                        offset = prepare_threshold_block(bank, block, address, offset, high);
 709                }
 710        }
 711
 712        if (mce_flags.succor)
 713                deferred_error_interrupt_enable(c);
 714}
 715
 716int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
 717{
 718        u64 dram_base_addr, dram_limit_addr, dram_hole_base;
 719        /* We start from the normalized address */
 720        u64 ret_addr = norm_addr;
 721
 722        u32 tmp;
 723
 724        u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask;
 725        u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets;
 726        u8 intlv_addr_sel, intlv_addr_bit;
 727        u8 num_intlv_bits, hashed_bit;
 728        u8 lgcy_mmio_hole_en, base = 0;
 729        u8 cs_mask, cs_id = 0;
 730        bool hash_enabled = false;
 731
 732        /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */
 733        if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp))
 734                goto out_err;
 735
 736        /* Remove HiAddrOffset from normalized address, if enabled: */
 737        if (tmp & BIT(0)) {
 738                u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8;
 739
 740                if (norm_addr >= hi_addr_offset) {
 741                        ret_addr -= hi_addr_offset;
 742                        base = 1;
 743                }
 744        }
 745
 746        /* Read D18F0x110 (DramBaseAddress). */
 747        if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp))
 748                goto out_err;
 749
 750        /* Check if address range is valid. */
 751        if (!(tmp & BIT(0))) {
 752                pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n",
 753                        __func__, tmp);
 754                goto out_err;
 755        }
 756
 757        lgcy_mmio_hole_en = tmp & BIT(1);
 758        intlv_num_chan    = (tmp >> 4) & 0xF;
 759        intlv_addr_sel    = (tmp >> 8) & 0x7;
 760        dram_base_addr    = (tmp & GENMASK_ULL(31, 12)) << 16;
 761
 762        /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */
 763        if (intlv_addr_sel > 3) {
 764                pr_err("%s: Invalid interleave address select %d.\n",
 765                        __func__, intlv_addr_sel);
 766                goto out_err;
 767        }
 768
 769        /* Read D18F0x114 (DramLimitAddress). */
 770        if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp))
 771                goto out_err;
 772
 773        intlv_num_sockets = (tmp >> 8) & 0x1;
 774        intlv_num_dies    = (tmp >> 10) & 0x3;
 775        dram_limit_addr   = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0);
 776
 777        intlv_addr_bit = intlv_addr_sel + 8;
 778
 779        /* Re-use intlv_num_chan by setting it equal to log2(#channels) */
 780        switch (intlv_num_chan) {
 781        case 0: intlv_num_chan = 0; break;
 782        case 1: intlv_num_chan = 1; break;
 783        case 3: intlv_num_chan = 2; break;
 784        case 5: intlv_num_chan = 3; break;
 785        case 7: intlv_num_chan = 4; break;
 786
 787        case 8: intlv_num_chan = 1;
 788                hash_enabled = true;
 789                break;
 790        default:
 791                pr_err("%s: Invalid number of interleaved channels %d.\n",
 792                        __func__, intlv_num_chan);
 793                goto out_err;
 794        }
 795
 796        num_intlv_bits = intlv_num_chan;
 797
 798        if (intlv_num_dies > 2) {
 799                pr_err("%s: Invalid number of interleaved nodes/dies %d.\n",
 800                        __func__, intlv_num_dies);
 801                goto out_err;
 802        }
 803
 804        num_intlv_bits += intlv_num_dies;
 805
 806        /* Add a bit if sockets are interleaved. */
 807        num_intlv_bits += intlv_num_sockets;
 808
 809        /* Assert num_intlv_bits <= 4 */
 810        if (num_intlv_bits > 4) {
 811                pr_err("%s: Invalid interleave bits %d.\n",
 812                        __func__, num_intlv_bits);
 813                goto out_err;
 814        }
 815
 816        if (num_intlv_bits > 0) {
 817                u64 temp_addr_x, temp_addr_i, temp_addr_y;
 818                u8 die_id_bit, sock_id_bit, cs_fabric_id;
 819
 820                /*
 821                 * Read FabricBlockInstanceInformation3_CS[BlockFabricID].
 822                 * This is the fabric id for this coherent slave. Use
 823                 * umc/channel# as instance id of the coherent slave
 824                 * for FICAA.
 825                 */
 826                if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp))
 827                        goto out_err;
 828
 829                cs_fabric_id = (tmp >> 8) & 0xFF;
 830                die_id_bit   = 0;
 831
 832                /* If interleaved over more than 1 channel: */
 833                if (intlv_num_chan) {
 834                        die_id_bit = intlv_num_chan;
 835                        cs_mask    = (1 << die_id_bit) - 1;
 836                        cs_id      = cs_fabric_id & cs_mask;
 837                }
 838
 839                sock_id_bit = die_id_bit;
 840
 841                /* Read D18F1x208 (SystemFabricIdMask). */
 842                if (intlv_num_dies || intlv_num_sockets)
 843                        if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp))
 844                                goto out_err;
 845
 846                /* If interleaved over more than 1 die. */
 847                if (intlv_num_dies) {
 848                        sock_id_bit  = die_id_bit + intlv_num_dies;
 849                        die_id_shift = (tmp >> 24) & 0xF;
 850                        die_id_mask  = (tmp >> 8) & 0xFF;
 851
 852                        cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit;
 853                }
 854
 855                /* If interleaved over more than 1 socket. */
 856                if (intlv_num_sockets) {
 857                        socket_id_shift = (tmp >> 28) & 0xF;
 858                        socket_id_mask  = (tmp >> 16) & 0xFF;
 859
 860                        cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit;
 861                }
 862
 863                /*
 864                 * The pre-interleaved address consists of XXXXXXIIIYYYYY
 865                 * where III is the ID for this CS, and XXXXXXYYYYY are the
 866                 * address bits from the post-interleaved address.
 867                 * "num_intlv_bits" has been calculated to tell us how many "I"
 868                 * bits there are. "intlv_addr_bit" tells us how many "Y" bits
 869                 * there are (where "I" starts).
 870                 */
 871                temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0);
 872                temp_addr_i = (cs_id << intlv_addr_bit);
 873                temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits;
 874                ret_addr    = temp_addr_x | temp_addr_i | temp_addr_y;
 875        }
 876
 877        /* Add dram base address */
 878        ret_addr += dram_base_addr;
 879
 880        /* If legacy MMIO hole enabled */
 881        if (lgcy_mmio_hole_en) {
 882                if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp))
 883                        goto out_err;
 884
 885                dram_hole_base = tmp & GENMASK(31, 24);
 886                if (ret_addr >= dram_hole_base)
 887                        ret_addr += (BIT_ULL(32) - dram_hole_base);
 888        }
 889
 890        if (hash_enabled) {
 891                /* Save some parentheses and grab ls-bit at the end. */
 892                hashed_bit =    (ret_addr >> 12) ^
 893                                (ret_addr >> 18) ^
 894                                (ret_addr >> 21) ^
 895                                (ret_addr >> 30) ^
 896                                cs_id;
 897
 898                hashed_bit &= BIT(0);
 899
 900                if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0)))
 901                        ret_addr ^= BIT(intlv_addr_bit);
 902        }
 903
 904        /* Is calculated system address is above DRAM limit address? */
 905        if (ret_addr > dram_limit_addr)
 906                goto out_err;
 907
 908        *sys_addr = ret_addr;
 909        return 0;
 910
 911out_err:
 912        return -EINVAL;
 913}
 914EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
 915
 916bool amd_mce_is_memory_error(struct mce *m)
 917{
 918        /* ErrCodeExt[20:16] */
 919        u8 xec = (m->status >> 16) & 0x1f;
 920
 921        if (mce_flags.smca)
 922                return smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC && xec == 0x0;
 923
 924        return m->bank == 4 && xec == 0x8;
 925}
 926
 927static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
 928{
 929        struct mce m;
 930
 931        mce_setup(&m);
 932
 933        m.status = status;
 934        m.misc   = misc;
 935        m.bank   = bank;
 936        m.tsc    = rdtsc();
 937
 938        if (m.status & MCI_STATUS_ADDRV) {
 939                m.addr = addr;
 940
 941                /*
 942                 * Extract [55:<lsb>] where lsb is the least significant
 943                 * *valid* bit of the address bits.
 944                 */
 945                if (mce_flags.smca) {
 946                        u8 lsb = (m.addr >> 56) & 0x3f;
 947
 948                        m.addr &= GENMASK_ULL(55, lsb);
 949                }
 950        }
 951
 952        if (mce_flags.smca) {
 953                rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
 954
 955                if (m.status & MCI_STATUS_SYNDV)
 956                        rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
 957        }
 958
 959        mce_log(&m);
 960}
 961
 962asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(struct pt_regs *regs)
 963{
 964        entering_irq();
 965        trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
 966        inc_irq_stat(irq_deferred_error_count);
 967        deferred_error_int_vector();
 968        trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
 969        exiting_ack_irq();
 970}
 971
 972/*
 973 * Returns true if the logged error is deferred. False, otherwise.
 974 */
 975static inline bool
 976_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
 977{
 978        u64 status, addr = 0;
 979
 980        rdmsrl(msr_stat, status);
 981        if (!(status & MCI_STATUS_VAL))
 982                return false;
 983
 984        if (status & MCI_STATUS_ADDRV)
 985                rdmsrl(msr_addr, addr);
 986
 987        __log_error(bank, status, addr, misc);
 988
 989        wrmsrl(msr_stat, 0);
 990
 991        return status & MCI_STATUS_DEFERRED;
 992}
 993
 994/*
 995 * We have three scenarios for checking for Deferred errors:
 996 *
 997 * 1) Non-SMCA systems check MCA_STATUS and log error if found.
 998 * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
 999 *    clear MCA_DESTAT.
1000 * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
1001 *    log it.
1002 */
1003static void log_error_deferred(unsigned int bank)
1004{
1005        bool defrd;
1006
1007        defrd = _log_error_bank(bank, msr_ops.status(bank),
1008                                        msr_ops.addr(bank), 0);
1009
1010        if (!mce_flags.smca)
1011                return;
1012
1013        /* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */
1014        if (defrd) {
1015                wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
1016                return;
1017        }
1018
1019        /*
1020         * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
1021         * for a valid error.
1022         */
1023        _log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
1024                              MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
1025}
1026
1027/* APIC interrupt handler for deferred errors */
1028static void amd_deferred_error_interrupt(void)
1029{
1030        unsigned int bank;
1031
1032        for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
1033                log_error_deferred(bank);
1034}
1035
1036static void log_error_thresholding(unsigned int bank, u64 misc)
1037{
1038        _log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc);
1039}
1040
1041static void log_and_reset_block(struct threshold_block *block)
1042{
1043        struct thresh_restart tr;
1044        u32 low = 0, high = 0;
1045
1046        if (!block)
1047                return;
1048
1049        if (rdmsr_safe(block->address, &low, &high))
1050                return;
1051
1052        if (!(high & MASK_OVERFLOW_HI))
1053                return;
1054
1055        /* Log the MCE which caused the threshold event. */
1056        log_error_thresholding(block->bank, ((u64)high << 32) | low);
1057
1058        /* Reset threshold block after logging error. */
1059        memset(&tr, 0, sizeof(tr));
1060        tr.b = block;
1061        threshold_restart_bank(&tr);
1062}
1063
1064/*
1065 * Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
1066 * goes off when error_count reaches threshold_limit.
1067 */
1068static void amd_threshold_interrupt(void)
1069{
1070        struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
1071        struct threshold_bank **bp = this_cpu_read(threshold_banks);
1072        unsigned int bank, cpu = smp_processor_id();
1073
1074        /*
1075         * Validate that the threshold bank has been initialized already. The
1076         * handler is installed at boot time, but on a hotplug event the
1077         * interrupt might fire before the data has been initialized.
1078         */
1079        if (!bp)
1080                return;
1081
1082        for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
1083                if (!(per_cpu(bank_map, cpu) & (1 << bank)))
1084                        continue;
1085
1086                first_block = bp[bank]->blocks;
1087                if (!first_block)
1088                        continue;
1089
1090                /*
1091                 * The first block is also the head of the list. Check it first
1092                 * before iterating over the rest.
1093                 */
1094                log_and_reset_block(first_block);
1095                list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
1096                        log_and_reset_block(block);
1097        }
1098}
1099
1100/*
1101 * Sysfs Interface
1102 */
1103
1104struct threshold_attr {
1105        struct attribute attr;
1106        ssize_t (*show) (struct threshold_block *, char *);
1107        ssize_t (*store) (struct threshold_block *, const char *, size_t count);
1108};
1109
1110#define SHOW_FIELDS(name)                                               \
1111static ssize_t show_ ## name(struct threshold_block *b, char *buf)      \
1112{                                                                       \
1113        return sprintf(buf, "%lu\n", (unsigned long) b->name);          \
1114}
1115SHOW_FIELDS(interrupt_enable)
1116SHOW_FIELDS(threshold_limit)
1117
1118static ssize_t
1119store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
1120{
1121        struct thresh_restart tr;
1122        unsigned long new;
1123
1124        if (!b->interrupt_capable)
1125                return -EINVAL;
1126
1127        if (kstrtoul(buf, 0, &new) < 0)
1128                return -EINVAL;
1129
1130        b->interrupt_enable = !!new;
1131
1132        memset(&tr, 0, sizeof(tr));
1133        tr.b            = b;
1134
1135        if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
1136                return -ENODEV;
1137
1138        return size;
1139}
1140
1141static ssize_t
1142store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
1143{
1144        struct thresh_restart tr;
1145        unsigned long new;
1146
1147        if (kstrtoul(buf, 0, &new) < 0)
1148                return -EINVAL;
1149
1150        if (new > THRESHOLD_MAX)
1151                new = THRESHOLD_MAX;
1152        if (new < 1)
1153                new = 1;
1154
1155        memset(&tr, 0, sizeof(tr));
1156        tr.old_limit = b->threshold_limit;
1157        b->threshold_limit = new;
1158        tr.b = b;
1159
1160        if (smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1))
1161                return -ENODEV;
1162
1163        return size;
1164}
1165
1166static ssize_t show_error_count(struct threshold_block *b, char *buf)
1167{
1168        u32 lo, hi;
1169
1170        /* CPU might be offline by now */
1171        if (rdmsr_on_cpu(b->cpu, b->address, &lo, &hi))
1172                return -ENODEV;
1173
1174        return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
1175                                     (THRESHOLD_MAX - b->threshold_limit)));
1176}
1177
1178static struct threshold_attr error_count = {
1179        .attr = {.name = __stringify(error_count), .mode = 0444 },
1180        .show = show_error_count,
1181};
1182
1183#define RW_ATTR(val)                                                    \
1184static struct threshold_attr val = {                                    \
1185        .attr   = {.name = __stringify(val), .mode = 0644 },            \
1186        .show   = show_## val,                                          \
1187        .store  = store_## val,                                         \
1188};
1189
1190RW_ATTR(interrupt_enable);
1191RW_ATTR(threshold_limit);
1192
1193static struct attribute *default_attrs[] = {
1194        &threshold_limit.attr,
1195        &error_count.attr,
1196        NULL,   /* possibly interrupt_enable if supported, see below */
1197        NULL,
1198};
1199
1200#define to_block(k)     container_of(k, struct threshold_block, kobj)
1201#define to_attr(a)      container_of(a, struct threshold_attr, attr)
1202
1203static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1204{
1205        struct threshold_block *b = to_block(kobj);
1206        struct threshold_attr *a = to_attr(attr);
1207        ssize_t ret;
1208
1209        ret = a->show ? a->show(b, buf) : -EIO;
1210
1211        return ret;
1212}
1213
1214static ssize_t store(struct kobject *kobj, struct attribute *attr,
1215                     const char *buf, size_t count)
1216{
1217        struct threshold_block *b = to_block(kobj);
1218        struct threshold_attr *a = to_attr(attr);
1219        ssize_t ret;
1220
1221        ret = a->store ? a->store(b, buf, count) : -EIO;
1222
1223        return ret;
1224}
1225
1226static const struct sysfs_ops threshold_ops = {
1227        .show                   = show,
1228        .store                  = store,
1229};
1230
1231static void threshold_block_release(struct kobject *kobj);
1232
1233static struct kobj_type threshold_ktype = {
1234        .sysfs_ops              = &threshold_ops,
1235        .default_attrs          = default_attrs,
1236        .release                = threshold_block_release,
1237};
1238
1239static const char *get_name(unsigned int cpu, unsigned int bank, struct threshold_block *b)
1240{
1241        enum smca_bank_types bank_type;
1242
1243        if (!mce_flags.smca) {
1244                if (b && bank == 4)
1245                        return bank4_names(b);
1246
1247                return th_names[bank];
1248        }
1249
1250        bank_type = smca_get_bank_type(cpu, bank);
1251        if (bank_type >= N_SMCA_BANK_TYPES)
1252                return NULL;
1253
1254        if (b && bank_type == SMCA_UMC) {
1255                if (b->block < ARRAY_SIZE(smca_umc_block_names))
1256                        return smca_umc_block_names[b->block];
1257                return NULL;
1258        }
1259
1260        if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)
1261                return smca_get_name(bank_type);
1262
1263        snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
1264                 "%s_%u", smca_get_name(bank_type),
1265                          per_cpu(smca_banks, cpu)[bank].sysfs_id);
1266        return buf_mcatype;
1267}
1268
1269static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb,
1270                                     unsigned int bank, unsigned int block,
1271                                     u32 address)
1272{
1273        struct threshold_block *b = NULL;
1274        u32 low, high;
1275        int err;
1276
1277        if ((bank >= this_cpu_read(mce_num_banks)) || (block >= NR_BLOCKS))
1278                return 0;
1279
1280        if (rdmsr_safe(address, &low, &high))
1281                return 0;
1282
1283        if (!(high & MASK_VALID_HI)) {
1284                if (block)
1285                        goto recurse;
1286                else
1287                        return 0;
1288        }
1289
1290        if (!(high & MASK_CNTP_HI)  ||
1291             (high & MASK_LOCKED_HI))
1292                goto recurse;
1293
1294        b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
1295        if (!b)
1296                return -ENOMEM;
1297
1298        b->block                = block;
1299        b->bank                 = bank;
1300        b->cpu                  = cpu;
1301        b->address              = address;
1302        b->interrupt_enable     = 0;
1303        b->interrupt_capable    = lvt_interrupt_supported(bank, high);
1304        b->threshold_limit      = THRESHOLD_MAX;
1305
1306        if (b->interrupt_capable) {
1307                threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
1308                b->interrupt_enable = 1;
1309        } else {
1310                threshold_ktype.default_attrs[2] = NULL;
1311        }
1312
1313        INIT_LIST_HEAD(&b->miscj);
1314
1315        /* This is safe as @tb is not visible yet */
1316        if (tb->blocks)
1317                list_add(&b->miscj, &tb->blocks->miscj);
1318        else
1319                tb->blocks = b;
1320
1321        err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b));
1322        if (err)
1323                goto out_free;
1324recurse:
1325        address = get_block_address(address, low, high, bank, ++block, cpu);
1326        if (!address)
1327                return 0;
1328
1329        err = allocate_threshold_blocks(cpu, tb, bank, block, address);
1330        if (err)
1331                goto out_free;
1332
1333        if (b)
1334                kobject_uevent(&b->kobj, KOBJ_ADD);
1335
1336        return 0;
1337
1338out_free:
1339        if (b) {
1340                list_del(&b->miscj);
1341                kobject_put(&b->kobj);
1342        }
1343        return err;
1344}
1345
1346static int __threshold_add_blocks(struct threshold_bank *b)
1347{
1348        struct list_head *head = &b->blocks->miscj;
1349        struct threshold_block *pos = NULL;
1350        struct threshold_block *tmp = NULL;
1351        int err = 0;
1352
1353        err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
1354        if (err)
1355                return err;
1356
1357        list_for_each_entry_safe(pos, tmp, head, miscj) {
1358
1359                err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
1360                if (err) {
1361                        list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
1362                                kobject_del(&pos->kobj);
1363
1364                        return err;
1365                }
1366        }
1367        return err;
1368}
1369
1370static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
1371                                 unsigned int bank)
1372{
1373        struct device *dev = this_cpu_read(mce_device);
1374        struct amd_northbridge *nb = NULL;
1375        struct threshold_bank *b = NULL;
1376        const char *name = get_name(cpu, bank, NULL);
1377        int err = 0;
1378
1379        if (!dev)
1380                return -ENODEV;
1381
1382        if (is_shared_bank(bank)) {
1383                nb = node_to_amd_nb(topology_die_id(cpu));
1384
1385                /* threshold descriptor already initialized on this node? */
1386                if (nb && nb->bank4) {
1387                        /* yes, use it */
1388                        b = nb->bank4;
1389                        err = kobject_add(b->kobj, &dev->kobj, name);
1390                        if (err)
1391                                goto out;
1392
1393                        bp[bank] = b;
1394                        refcount_inc(&b->cpus);
1395
1396                        err = __threshold_add_blocks(b);
1397
1398                        goto out;
1399                }
1400        }
1401
1402        b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
1403        if (!b) {
1404                err = -ENOMEM;
1405                goto out;
1406        }
1407
1408        /* Associate the bank with the per-CPU MCE device */
1409        b->kobj = kobject_create_and_add(name, &dev->kobj);
1410        if (!b->kobj) {
1411                err = -EINVAL;
1412                goto out_free;
1413        }
1414
1415        if (is_shared_bank(bank)) {
1416                b->shared = 1;
1417                refcount_set(&b->cpus, 1);
1418
1419                /* nb is already initialized, see above */
1420                if (nb) {
1421                        WARN_ON(nb->bank4);
1422                        nb->bank4 = b;
1423                }
1424        }
1425
1426        err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank));
1427        if (err)
1428                goto out_kobj;
1429
1430        bp[bank] = b;
1431        return 0;
1432
1433out_kobj:
1434        kobject_put(b->kobj);
1435out_free:
1436        kfree(b);
1437out:
1438        return err;
1439}
1440
1441static void threshold_block_release(struct kobject *kobj)
1442{
1443        kfree(to_block(kobj));
1444}
1445
1446static void deallocate_threshold_blocks(struct threshold_bank *bank)
1447{
1448        struct threshold_block *pos, *tmp;
1449
1450        list_for_each_entry_safe(pos, tmp, &bank->blocks->miscj, miscj) {
1451                list_del(&pos->miscj);
1452                kobject_put(&pos->kobj);
1453        }
1454
1455        kobject_put(&bank->blocks->kobj);
1456}
1457
1458static void __threshold_remove_blocks(struct threshold_bank *b)
1459{
1460        struct threshold_block *pos = NULL;
1461        struct threshold_block *tmp = NULL;
1462
1463        kobject_del(b->kobj);
1464
1465        list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
1466                kobject_del(&pos->kobj);
1467}
1468
1469static void threshold_remove_bank(struct threshold_bank *bank)
1470{
1471        struct amd_northbridge *nb;
1472
1473        if (!bank->blocks)
1474                goto out_free;
1475
1476        if (!bank->shared)
1477                goto out_dealloc;
1478
1479        if (!refcount_dec_and_test(&bank->cpus)) {
1480                __threshold_remove_blocks(bank);
1481                return;
1482        } else {
1483                /*
1484                 * The last CPU on this node using the shared bank is going
1485                 * away, remove that bank now.
1486                 */
1487                nb = node_to_amd_nb(topology_die_id(smp_processor_id()));
1488                nb->bank4 = NULL;
1489        }
1490
1491out_dealloc:
1492        deallocate_threshold_blocks(bank);
1493
1494out_free:
1495        kobject_put(bank->kobj);
1496        kfree(bank);
1497}
1498
1499int mce_threshold_remove_device(unsigned int cpu)
1500{
1501        struct threshold_bank **bp = this_cpu_read(threshold_banks);
1502        unsigned int bank, numbanks = this_cpu_read(mce_num_banks);
1503
1504        if (!bp)
1505                return 0;
1506
1507        /*
1508         * Clear the pointer before cleaning up, so that the interrupt won't
1509         * touch anything of this.
1510         */
1511        this_cpu_write(threshold_banks, NULL);
1512
1513        for (bank = 0; bank < numbanks; bank++) {
1514                if (bp[bank]) {
1515                        threshold_remove_bank(bp[bank]);
1516                        bp[bank] = NULL;
1517                }
1518        }
1519        kfree(bp);
1520        return 0;
1521}
1522
1523/**
1524 * mce_threshold_create_device - Create the per-CPU MCE threshold device
1525 * @cpu:        The plugged in CPU
1526 *
1527 * Create directories and files for all valid threshold banks.
1528 *
1529 * This is invoked from the CPU hotplug callback which was installed in
1530 * mcheck_init_device(). The invocation happens in context of the hotplug
1531 * thread running on @cpu.  The callback is invoked on all CPUs which are
1532 * online when the callback is installed or during a real hotplug event.
1533 */
1534int mce_threshold_create_device(unsigned int cpu)
1535{
1536        unsigned int numbanks, bank;
1537        struct threshold_bank **bp;
1538        int err;
1539
1540        if (!mce_flags.amd_threshold)
1541                return 0;
1542
1543        bp = this_cpu_read(threshold_banks);
1544        if (bp)
1545                return 0;
1546
1547        numbanks = this_cpu_read(mce_num_banks);
1548        bp = kcalloc(numbanks, sizeof(*bp), GFP_KERNEL);
1549        if (!bp)
1550                return -ENOMEM;
1551
1552        for (bank = 0; bank < numbanks; ++bank) {
1553                if (!(this_cpu_read(bank_map) & (1 << bank)))
1554                        continue;
1555                err = threshold_create_bank(bp, cpu, bank);
1556                if (err)
1557                        goto out_err;
1558        }
1559        this_cpu_write(threshold_banks, bp);
1560
1561        if (thresholding_irq_en)
1562                mce_threshold_vector = amd_threshold_interrupt;
1563        return 0;
1564out_err:
1565        mce_threshold_remove_device(cpu);
1566        return err;
1567}
1568