linux/arch/x86/kernel/cpu/mcheck/mce_amd.c
<<
>>
Prefs
   1/*
   2 *  (c) 2005-2016 Advanced Micro Devices, Inc.
   3 *  Your use of this code is subject to the terms and conditions of the
   4 *  GNU general public license version 2. See "COPYING" or
   5 *  http://www.gnu.org/licenses/gpl.html
   6 *
   7 *  Written by Jacob Shin - AMD, Inc.
   8 *  Maintained by: Borislav Petkov <bp@alien8.de>
   9 *
  10 *  All MC4_MISCi registers are shared between cores on a node.
  11 */
  12#include <linux/interrupt.h>
  13#include <linux/notifier.h>
  14#include <linux/kobject.h>
  15#include <linux/percpu.h>
  16#include <linux/errno.h>
  17#include <linux/sched.h>
  18#include <linux/sysfs.h>
  19#include <linux/slab.h>
  20#include <linux/init.h>
  21#include <linux/cpu.h>
  22#include <linux/smp.h>
  23
  24#include <asm/amd_nb.h>
  25#include <asm/apic.h>
  26#include <asm/idle.h>
  27#include <asm/mce.h>
  28#include <asm/msr.h>
  29#include <asm/trace/irq_vectors.h>
  30
  31#define NR_BLOCKS         5
  32#define THRESHOLD_MAX     0xFFF
  33#define INT_TYPE_APIC     0x00020000
  34#define MASK_VALID_HI     0x80000000
  35#define MASK_CNTP_HI      0x40000000
  36#define MASK_LOCKED_HI    0x20000000
  37#define MASK_LVTOFF_HI    0x00F00000
  38#define MASK_COUNT_EN_HI  0x00080000
  39#define MASK_INT_TYPE_HI  0x00060000
  40#define MASK_OVERFLOW_HI  0x00010000
  41#define MASK_ERR_COUNT_HI 0x00000FFF
  42#define MASK_BLKPTR_LO    0xFF000000
  43#define MCG_XBLK_ADDR     0xC0000400
  44
  45/* Deferred error settings */
  46#define MSR_CU_DEF_ERR          0xC0000410
  47#define MASK_DEF_LVTOFF         0x000000F0
  48#define MASK_DEF_INT_TYPE       0x00000006
  49#define DEF_LVT_OFF             0x2
  50#define DEF_INT_TYPE_APIC       0x2
  51
  52/* Scalable MCA: */
  53
  54/* Threshold LVT offset is at MSR0xC0000410[15:12] */
  55#define SMCA_THR_LVT_OFF        0xF000
  56
  57static const char * const th_names[] = {
  58        "load_store",
  59        "insn_fetch",
  60        "combined_unit",
  61        "",
  62        "northbridge",
  63        "execution_unit",
  64};
  65
  66/* Define HWID to IP type mappings for Scalable MCA */
  67struct amd_hwid amd_hwids[] = {
  68        [SMCA_F17H_CORE]        = { "f17h_core",        0xB0 },
  69        [SMCA_DF]               = { "data_fabric",      0x2E },
  70        [SMCA_UMC]              = { "umc",              0x96 },
  71        [SMCA_PB]               = { "param_block",      0x5 },
  72        [SMCA_PSP]              = { "psp",              0xFF },
  73        [SMCA_SMU]              = { "smu",              0x1 },
  74};
  75EXPORT_SYMBOL_GPL(amd_hwids);
  76
  77const char * const amd_core_mcablock_names[] = {
  78        [SMCA_LS]               = "load_store",
  79        [SMCA_IF]               = "insn_fetch",
  80        [SMCA_L2_CACHE]         = "l2_cache",
  81        [SMCA_DE]               = "decode_unit",
  82        [RES]                   = "",
  83        [SMCA_EX]               = "execution_unit",
  84        [SMCA_FP]               = "floating_point",
  85        [SMCA_L3_CACHE]         = "l3_cache",
  86};
  87EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
  88
  89const char * const amd_df_mcablock_names[] = {
  90        [SMCA_CS]               = "coherent_slave",
  91        [SMCA_PIE]              = "pie",
  92};
  93EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
  94
  95static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
  96static DEFINE_PER_CPU(unsigned int, bank_map);  /* see which banks are on */
  97
  98static void amd_threshold_interrupt(void);
  99static void amd_deferred_error_interrupt(void);
 100
 101static void default_deferred_error_interrupt(void)
 102{
 103        pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
 104}
 105void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
 106
 107/*
 108 * CPU Initialization
 109 */
 110
 111struct thresh_restart {
 112        struct threshold_block  *b;
 113        int                     reset;
 114        int                     set_lvt_off;
 115        int                     lvt_off;
 116        u16                     old_limit;
 117};
 118
 119static inline bool is_shared_bank(int bank)
 120{
 121        /*
 122         * Scalable MCA provides for only one core to have access to the MSRs of
 123         * a shared bank.
 124         */
 125        if (mce_flags.smca)
 126                return false;
 127
 128        /* Bank 4 is for northbridge reporting and is thus shared */
 129        return (bank == 4);
 130}
 131
 132static const char *bank4_names(const struct threshold_block *b)
 133{
 134        switch (b->address) {
 135        /* MSR4_MISC0 */
 136        case 0x00000413:
 137                return "dram";
 138
 139        case 0xc0000408:
 140                return "ht_links";
 141
 142        case 0xc0000409:
 143                return "l3_cache";
 144
 145        default:
 146                WARN(1, "Funny MSR: 0x%08x\n", b->address);
 147                return "";
 148        }
 149};
 150
 151
 152static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
 153{
 154        /*
 155         * bank 4 supports APIC LVT interrupts implicitly since forever.
 156         */
 157        if (bank == 4)
 158                return true;
 159
 160        /*
 161         * IntP: interrupt present; if this bit is set, the thresholding
 162         * bank can generate APIC LVT interrupts
 163         */
 164        return msr_high_bits & BIT(28);
 165}
 166
 167static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
 168{
 169        int msr = (hi & MASK_LVTOFF_HI) >> 20;
 170
 171        if (apic < 0) {
 172                pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
 173                       "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
 174                       b->bank, b->block, b->address, hi, lo);
 175                return 0;
 176        }
 177
 178        if (apic != msr) {
 179                /*
 180                 * On SMCA CPUs, LVT offset is programmed at a different MSR, and
 181                 * the BIOS provides the value. The original field where LVT offset
 182                 * was set is reserved. Return early here:
 183                 */
 184                if (mce_flags.smca)
 185                        return 0;
 186
 187                pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
 188                       "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
 189                       b->cpu, apic, b->bank, b->block, b->address, hi, lo);
 190                return 0;
 191        }
 192
 193        return 1;
 194};
 195
 196/* Reprogram MCx_MISC MSR behind this threshold bank. */
 197static void threshold_restart_bank(void *_tr)
 198{
 199        struct thresh_restart *tr = _tr;
 200        u32 hi, lo;
 201
 202        rdmsr(tr->b->address, lo, hi);
 203
 204        if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
 205                tr->reset = 1;  /* limit cannot be lower than err count */
 206
 207        if (tr->reset) {                /* reset err count and overflow bit */
 208                hi =
 209                    (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
 210                    (THRESHOLD_MAX - tr->b->threshold_limit);
 211        } else if (tr->old_limit) {     /* change limit w/o reset */
 212                int new_count = (hi & THRESHOLD_MAX) +
 213                    (tr->old_limit - tr->b->threshold_limit);
 214
 215                hi = (hi & ~MASK_ERR_COUNT_HI) |
 216                    (new_count & THRESHOLD_MAX);
 217        }
 218
 219        /* clear IntType */
 220        hi &= ~MASK_INT_TYPE_HI;
 221
 222        if (!tr->b->interrupt_capable)
 223                goto done;
 224
 225        if (tr->set_lvt_off) {
 226                if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
 227                        /* set new lvt offset */
 228                        hi &= ~MASK_LVTOFF_HI;
 229                        hi |= tr->lvt_off << 20;
 230                }
 231        }
 232
 233        if (tr->b->interrupt_enable)
 234                hi |= INT_TYPE_APIC;
 235
 236 done:
 237
 238        hi |= MASK_COUNT_EN_HI;
 239        wrmsr(tr->b->address, lo, hi);
 240}
 241
 242static void mce_threshold_block_init(struct threshold_block *b, int offset)
 243{
 244        struct thresh_restart tr = {
 245                .b                      = b,
 246                .set_lvt_off            = 1,
 247                .lvt_off                = offset,
 248        };
 249
 250        b->threshold_limit              = THRESHOLD_MAX;
 251        threshold_restart_bank(&tr);
 252};
 253
 254static int setup_APIC_mce_threshold(int reserved, int new)
 255{
 256        if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
 257                                              APIC_EILVT_MSG_FIX, 0))
 258                return new;
 259
 260        return reserved;
 261}
 262
 263static int setup_APIC_deferred_error(int reserved, int new)
 264{
 265        if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
 266                                              APIC_EILVT_MSG_FIX, 0))
 267                return new;
 268
 269        return reserved;
 270}
 271
 272static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
 273{
 274        u32 low = 0, high = 0;
 275        int def_offset = -1, def_new;
 276
 277        if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
 278                return;
 279
 280        def_new = (low & MASK_DEF_LVTOFF) >> 4;
 281        if (!(low & MASK_DEF_LVTOFF)) {
 282                pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
 283                def_new = DEF_LVT_OFF;
 284                low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
 285        }
 286
 287        def_offset = setup_APIC_deferred_error(def_offset, def_new);
 288        if ((def_offset == def_new) &&
 289            (deferred_error_int_vector != amd_deferred_error_interrupt))
 290                deferred_error_int_vector = amd_deferred_error_interrupt;
 291
 292        low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
 293        wrmsr(MSR_CU_DEF_ERR, low, high);
 294}
 295
 296static u32 get_block_address(u32 current_addr, u32 low, u32 high,
 297                             unsigned int bank, unsigned int block)
 298{
 299        u32 addr = 0, offset = 0;
 300
 301        if (mce_flags.smca) {
 302                if (!block) {
 303                        addr = MSR_AMD64_SMCA_MCx_MISC(bank);
 304                } else {
 305                        /*
 306                         * For SMCA enabled processors, BLKPTR field of the
 307                         * first MISC register (MCx_MISC0) indicates presence of
 308                         * additional MISC register set (MISC1-4).
 309                         */
 310                        u32 low, high;
 311
 312                        if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
 313                                return addr;
 314
 315                        if (!(low & MCI_CONFIG_MCAX))
 316                                return addr;
 317
 318                        if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
 319                            (low & MASK_BLKPTR_LO))
 320                                addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
 321                }
 322                return addr;
 323        }
 324
 325        /* Fall back to method we used for older processors: */
 326        switch (block) {
 327        case 0:
 328                addr = msr_ops.misc(bank);
 329                break;
 330        case 1:
 331                offset = ((low & MASK_BLKPTR_LO) >> 21);
 332                if (offset)
 333                        addr = MCG_XBLK_ADDR + offset;
 334                break;
 335        default:
 336                addr = ++current_addr;
 337        }
 338        return addr;
 339}
 340
 341static int
 342prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
 343                        int offset, u32 misc_high)
 344{
 345        unsigned int cpu = smp_processor_id();
 346        u32 smca_low, smca_high, smca_addr;
 347        struct threshold_block b;
 348        int new;
 349
 350        if (!block)
 351                per_cpu(bank_map, cpu) |= (1 << bank);
 352
 353        memset(&b, 0, sizeof(b));
 354        b.cpu                   = cpu;
 355        b.bank                  = bank;
 356        b.block                 = block;
 357        b.address               = addr;
 358        b.interrupt_capable     = lvt_interrupt_supported(bank, misc_high);
 359
 360        if (!b.interrupt_capable)
 361                goto done;
 362
 363        b.interrupt_enable = 1;
 364
 365        if (!mce_flags.smca) {
 366                new = (misc_high & MASK_LVTOFF_HI) >> 20;
 367                goto set_offset;
 368        }
 369
 370        smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
 371
 372        if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
 373                /*
 374                 * OS is required to set the MCAX bit to acknowledge that it is
 375                 * now using the new MSR ranges and new registers under each
 376                 * bank. It also means that the OS will configure deferred
 377                 * errors in the new MCx_CONFIG register. If the bit is not set,
 378                 * uncorrectable errors will cause a system panic.
 379                 *
 380                 * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
 381                 */
 382                smca_high |= BIT(0);
 383
 384                /*
 385                 * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
 386                 * registers with the option of additionally logging to
 387                 * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
 388                 *
 389                 * This bit is usually set by BIOS to retain the old behavior
 390                 * for OSes that don't use the new registers. Linux supports the
 391                 * new registers so let's disable that additional logging here.
 392                 *
 393                 * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
 394                 * portion of the MSR).
 395                 */
 396                smca_high &= ~BIT(2);
 397
 398                wrmsr(smca_addr, smca_low, smca_high);
 399        }
 400
 401        /* Gather LVT offset for thresholding: */
 402        if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
 403                goto out;
 404
 405        new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
 406
 407set_offset:
 408        offset = setup_APIC_mce_threshold(offset, new);
 409
 410        if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
 411                mce_threshold_vector = amd_threshold_interrupt;
 412
 413done:
 414        mce_threshold_block_init(&b, offset);
 415
 416out:
 417        return offset;
 418}
 419
 420/* cpu init entry point, called from mce.c with preempt off */
 421void mce_amd_feature_init(struct cpuinfo_x86 *c)
 422{
 423        u32 low = 0, high = 0, address = 0;
 424        unsigned int bank, block;
 425        int offset = -1;
 426
 427        for (bank = 0; bank < mca_cfg.banks; ++bank) {
 428                for (block = 0; block < NR_BLOCKS; ++block) {
 429                        address = get_block_address(address, low, high, bank, block);
 430                        if (!address)
 431                                break;
 432
 433                        if (rdmsr_safe(address, &low, &high))
 434                                break;
 435
 436                        if (!(high & MASK_VALID_HI))
 437                                continue;
 438
 439                        if (!(high & MASK_CNTP_HI)  ||
 440                             (high & MASK_LOCKED_HI))
 441                                continue;
 442
 443                        offset = prepare_threshold_block(bank, block, address, offset, high);
 444                }
 445        }
 446
 447        if (mce_flags.succor)
 448                deferred_error_interrupt_enable(c);
 449}
 450
 451static void
 452__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
 453{
 454        u32 msr_status = msr_ops.status(bank);
 455        u32 msr_addr = msr_ops.addr(bank);
 456        struct mce m;
 457        u64 status;
 458
 459        WARN_ON_ONCE(deferred_err && threshold_err);
 460
 461        if (deferred_err && mce_flags.smca) {
 462                msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
 463                msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
 464        }
 465
 466        rdmsrl(msr_status, status);
 467
 468        if (!(status & MCI_STATUS_VAL))
 469                return;
 470
 471        mce_setup(&m);
 472
 473        m.status = status;
 474        m.bank = bank;
 475
 476        if (threshold_err)
 477                m.misc = misc;
 478
 479        if (m.status & MCI_STATUS_ADDRV)
 480                rdmsrl(msr_addr, m.addr);
 481
 482        mce_log(&m);
 483
 484        wrmsrl(msr_status, 0);
 485}
 486
 487static inline void __smp_deferred_error_interrupt(void)
 488{
 489        inc_irq_stat(irq_deferred_error_count);
 490        deferred_error_int_vector();
 491}
 492
 493asmlinkage __visible void smp_deferred_error_interrupt(void)
 494{
 495        entering_irq();
 496        __smp_deferred_error_interrupt();
 497        exiting_ack_irq();
 498}
 499
 500asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
 501{
 502        entering_irq();
 503        trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
 504        __smp_deferred_error_interrupt();
 505        trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
 506        exiting_ack_irq();
 507}
 508
 509/* APIC interrupt handler for deferred errors */
 510static void amd_deferred_error_interrupt(void)
 511{
 512        unsigned int bank;
 513        u32 msr_status;
 514        u64 status;
 515
 516        for (bank = 0; bank < mca_cfg.banks; ++bank) {
 517                msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
 518                                              : msr_ops.status(bank);
 519
 520                rdmsrl(msr_status, status);
 521
 522                if (!(status & MCI_STATUS_VAL) ||
 523                    !(status & MCI_STATUS_DEFERRED))
 524                        continue;
 525
 526                __log_error(bank, true, false, 0);
 527                break;
 528        }
 529}
 530
 531/*
 532 * APIC Interrupt Handler
 533 */
 534
 535/*
 536 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
 537 * the interrupt goes off when error_count reaches threshold_limit.
 538 * the handler will simply log mcelog w/ software defined bank number.
 539 */
 540
 541static void amd_threshold_interrupt(void)
 542{
 543        u32 low = 0, high = 0, address = 0;
 544        int cpu = smp_processor_id();
 545        unsigned int bank, block;
 546
 547        /* assume first bank caused it */
 548        for (bank = 0; bank < mca_cfg.banks; ++bank) {
 549                if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 550                        continue;
 551                for (block = 0; block < NR_BLOCKS; ++block) {
 552                        address = get_block_address(address, low, high, bank, block);
 553                        if (!address)
 554                                break;
 555
 556                        if (rdmsr_safe(address, &low, &high))
 557                                break;
 558
 559                        if (!(high & MASK_VALID_HI)) {
 560                                if (block)
 561                                        continue;
 562                                else
 563                                        break;
 564                        }
 565
 566                        if (!(high & MASK_CNTP_HI)  ||
 567                             (high & MASK_LOCKED_HI))
 568                                continue;
 569
 570                        /*
 571                         * Log the machine check that caused the threshold
 572                         * event.
 573                         */
 574                        if (high & MASK_OVERFLOW_HI)
 575                                goto log;
 576                }
 577        }
 578        return;
 579
 580log:
 581        __log_error(bank, false, true, ((u64)high << 32) | low);
 582}
 583
 584/*
 585 * Sysfs Interface
 586 */
 587
 588struct threshold_attr {
 589        struct attribute attr;
 590        ssize_t (*show) (struct threshold_block *, char *);
 591        ssize_t (*store) (struct threshold_block *, const char *, size_t count);
 592};
 593
 594#define SHOW_FIELDS(name)                                               \
 595static ssize_t show_ ## name(struct threshold_block *b, char *buf)      \
 596{                                                                       \
 597        return sprintf(buf, "%lu\n", (unsigned long) b->name);          \
 598}
 599SHOW_FIELDS(interrupt_enable)
 600SHOW_FIELDS(threshold_limit)
 601
 602static ssize_t
 603store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
 604{
 605        struct thresh_restart tr;
 606        unsigned long new;
 607
 608        if (!b->interrupt_capable)
 609                return -EINVAL;
 610
 611        if (kstrtoul(buf, 0, &new) < 0)
 612                return -EINVAL;
 613
 614        b->interrupt_enable = !!new;
 615
 616        memset(&tr, 0, sizeof(tr));
 617        tr.b            = b;
 618
 619        smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 620
 621        return size;
 622}
 623
 624static ssize_t
 625store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
 626{
 627        struct thresh_restart tr;
 628        unsigned long new;
 629
 630        if (kstrtoul(buf, 0, &new) < 0)
 631                return -EINVAL;
 632
 633        if (new > THRESHOLD_MAX)
 634                new = THRESHOLD_MAX;
 635        if (new < 1)
 636                new = 1;
 637
 638        memset(&tr, 0, sizeof(tr));
 639        tr.old_limit = b->threshold_limit;
 640        b->threshold_limit = new;
 641        tr.b = b;
 642
 643        smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 644
 645        return size;
 646}
 647
 648static ssize_t show_error_count(struct threshold_block *b, char *buf)
 649{
 650        u32 lo, hi;
 651
 652        rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
 653
 654        return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
 655                                     (THRESHOLD_MAX - b->threshold_limit)));
 656}
 657
 658static struct threshold_attr error_count = {
 659        .attr = {.name = __stringify(error_count), .mode = 0444 },
 660        .show = show_error_count,
 661};
 662
 663#define RW_ATTR(val)                                                    \
 664static struct threshold_attr val = {                                    \
 665        .attr   = {.name = __stringify(val), .mode = 0644 },            \
 666        .show   = show_## val,                                          \
 667        .store  = store_## val,                                         \
 668};
 669
 670RW_ATTR(interrupt_enable);
 671RW_ATTR(threshold_limit);
 672
 673static struct attribute *default_attrs[] = {
 674        &threshold_limit.attr,
 675        &error_count.attr,
 676        NULL,   /* possibly interrupt_enable if supported, see below */
 677        NULL,
 678};
 679
 680#define to_block(k)     container_of(k, struct threshold_block, kobj)
 681#define to_attr(a)      container_of(a, struct threshold_attr, attr)
 682
 683static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 684{
 685        struct threshold_block *b = to_block(kobj);
 686        struct threshold_attr *a = to_attr(attr);
 687        ssize_t ret;
 688
 689        ret = a->show ? a->show(b, buf) : -EIO;
 690
 691        return ret;
 692}
 693
 694static ssize_t store(struct kobject *kobj, struct attribute *attr,
 695                     const char *buf, size_t count)
 696{
 697        struct threshold_block *b = to_block(kobj);
 698        struct threshold_attr *a = to_attr(attr);
 699        ssize_t ret;
 700
 701        ret = a->store ? a->store(b, buf, count) : -EIO;
 702
 703        return ret;
 704}
 705
 706static const struct sysfs_ops threshold_ops = {
 707        .show                   = show,
 708        .store                  = store,
 709};
 710
 711static struct kobj_type threshold_ktype = {
 712        .sysfs_ops              = &threshold_ops,
 713        .default_attrs          = default_attrs,
 714};
 715
 716static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
 717                                     unsigned int block, u32 address)
 718{
 719        struct threshold_block *b = NULL;
 720        u32 low, high;
 721        int err;
 722
 723        if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
 724                return 0;
 725
 726        if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
 727                return 0;
 728
 729        if (!(high & MASK_VALID_HI)) {
 730                if (block)
 731                        goto recurse;
 732                else
 733                        return 0;
 734        }
 735
 736        if (!(high & MASK_CNTP_HI)  ||
 737             (high & MASK_LOCKED_HI))
 738                goto recurse;
 739
 740        b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
 741        if (!b)
 742                return -ENOMEM;
 743
 744        b->block                = block;
 745        b->bank                 = bank;
 746        b->cpu                  = cpu;
 747        b->address              = address;
 748        b->interrupt_enable     = 0;
 749        b->interrupt_capable    = lvt_interrupt_supported(bank, high);
 750        b->threshold_limit      = THRESHOLD_MAX;
 751
 752        if (b->interrupt_capable) {
 753                threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
 754                b->interrupt_enable = 1;
 755        } else {
 756                threshold_ktype.default_attrs[2] = NULL;
 757        }
 758
 759        INIT_LIST_HEAD(&b->miscj);
 760
 761        if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
 762                list_add(&b->miscj,
 763                         &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
 764        } else {
 765                per_cpu(threshold_banks, cpu)[bank]->blocks = b;
 766        }
 767
 768        err = kobject_init_and_add(&b->kobj, &threshold_ktype,
 769                                   per_cpu(threshold_banks, cpu)[bank]->kobj,
 770                                   (bank == 4 ? bank4_names(b) : th_names[bank]));
 771        if (err)
 772                goto out_free;
 773recurse:
 774        address = get_block_address(address, low, high, bank, ++block);
 775        if (!address)
 776                return 0;
 777
 778        err = allocate_threshold_blocks(cpu, bank, block, address);
 779        if (err)
 780                goto out_free;
 781
 782        if (b)
 783                kobject_uevent(&b->kobj, KOBJ_ADD);
 784
 785        return err;
 786
 787out_free:
 788        if (b) {
 789                kobject_put(&b->kobj);
 790                list_del(&b->miscj);
 791                kfree(b);
 792        }
 793        return err;
 794}
 795
 796static int __threshold_add_blocks(struct threshold_bank *b)
 797{
 798        struct list_head *head = &b->blocks->miscj;
 799        struct threshold_block *pos = NULL;
 800        struct threshold_block *tmp = NULL;
 801        int err = 0;
 802
 803        err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
 804        if (err)
 805                return err;
 806
 807        list_for_each_entry_safe(pos, tmp, head, miscj) {
 808
 809                err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
 810                if (err) {
 811                        list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
 812                                kobject_del(&pos->kobj);
 813
 814                        return err;
 815                }
 816        }
 817        return err;
 818}
 819
 820static int threshold_create_bank(unsigned int cpu, unsigned int bank)
 821{
 822        struct device *dev = per_cpu(mce_device, cpu);
 823        struct amd_northbridge *nb = NULL;
 824        struct threshold_bank *b = NULL;
 825        const char *name = th_names[bank];
 826        int err = 0;
 827
 828        if (is_shared_bank(bank)) {
 829                nb = node_to_amd_nb(amd_get_nb_id(cpu));
 830
 831                /* threshold descriptor already initialized on this node? */
 832                if (nb && nb->bank4) {
 833                        /* yes, use it */
 834                        b = nb->bank4;
 835                        err = kobject_add(b->kobj, &dev->kobj, name);
 836                        if (err)
 837                                goto out;
 838
 839                        per_cpu(threshold_banks, cpu)[bank] = b;
 840                        atomic_inc(&b->cpus);
 841
 842                        err = __threshold_add_blocks(b);
 843
 844                        goto out;
 845                }
 846        }
 847
 848        b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
 849        if (!b) {
 850                err = -ENOMEM;
 851                goto out;
 852        }
 853
 854        b->kobj = kobject_create_and_add(name, &dev->kobj);
 855        if (!b->kobj) {
 856                err = -EINVAL;
 857                goto out_free;
 858        }
 859
 860        per_cpu(threshold_banks, cpu)[bank] = b;
 861
 862        if (is_shared_bank(bank)) {
 863                atomic_set(&b->cpus, 1);
 864
 865                /* nb is already initialized, see above */
 866                if (nb) {
 867                        WARN_ON(nb->bank4);
 868                        nb->bank4 = b;
 869                }
 870        }
 871
 872        err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank));
 873        if (!err)
 874                goto out;
 875
 876 out_free:
 877        kfree(b);
 878
 879 out:
 880        return err;
 881}
 882
 883/* create dir/files for all valid threshold banks */
 884static int threshold_create_device(unsigned int cpu)
 885{
 886        unsigned int bank;
 887        struct threshold_bank **bp;
 888        int err = 0;
 889
 890        bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
 891                     GFP_KERNEL);
 892        if (!bp)
 893                return -ENOMEM;
 894
 895        per_cpu(threshold_banks, cpu) = bp;
 896
 897        for (bank = 0; bank < mca_cfg.banks; ++bank) {
 898                if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 899                        continue;
 900                err = threshold_create_bank(cpu, bank);
 901                if (err)
 902                        return err;
 903        }
 904
 905        return err;
 906}
 907
 908static void deallocate_threshold_block(unsigned int cpu,
 909                                                 unsigned int bank)
 910{
 911        struct threshold_block *pos = NULL;
 912        struct threshold_block *tmp = NULL;
 913        struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
 914
 915        if (!head)
 916                return;
 917
 918        list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
 919                kobject_put(&pos->kobj);
 920                list_del(&pos->miscj);
 921                kfree(pos);
 922        }
 923
 924        kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
 925        per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
 926}
 927
 928static void __threshold_remove_blocks(struct threshold_bank *b)
 929{
 930        struct threshold_block *pos = NULL;
 931        struct threshold_block *tmp = NULL;
 932
 933        kobject_del(b->kobj);
 934
 935        list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
 936                kobject_del(&pos->kobj);
 937}
 938
 939static void threshold_remove_bank(unsigned int cpu, int bank)
 940{
 941        struct amd_northbridge *nb;
 942        struct threshold_bank *b;
 943
 944        b = per_cpu(threshold_banks, cpu)[bank];
 945        if (!b)
 946                return;
 947
 948        if (!b->blocks)
 949                goto free_out;
 950
 951        if (is_shared_bank(bank)) {
 952                if (!atomic_dec_and_test(&b->cpus)) {
 953                        __threshold_remove_blocks(b);
 954                        per_cpu(threshold_banks, cpu)[bank] = NULL;
 955                        return;
 956                } else {
 957                        /*
 958                         * the last CPU on this node using the shared bank is
 959                         * going away, remove that bank now.
 960                         */
 961                        nb = node_to_amd_nb(amd_get_nb_id(cpu));
 962                        nb->bank4 = NULL;
 963                }
 964        }
 965
 966        deallocate_threshold_block(cpu, bank);
 967
 968free_out:
 969        kobject_del(b->kobj);
 970        kobject_put(b->kobj);
 971        kfree(b);
 972        per_cpu(threshold_banks, cpu)[bank] = NULL;
 973}
 974
 975static void threshold_remove_device(unsigned int cpu)
 976{
 977        unsigned int bank;
 978
 979        for (bank = 0; bank < mca_cfg.banks; ++bank) {
 980                if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 981                        continue;
 982                threshold_remove_bank(cpu, bank);
 983        }
 984        kfree(per_cpu(threshold_banks, cpu));
 985}
 986
 987/* get notified when a cpu comes on/off */
 988static void
 989amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
 990{
 991        switch (action) {
 992        case CPU_ONLINE:
 993        case CPU_ONLINE_FROZEN:
 994                threshold_create_device(cpu);
 995                break;
 996        case CPU_DEAD:
 997        case CPU_DEAD_FROZEN:
 998                threshold_remove_device(cpu);
 999                break;
1000        default:
1001                break;
1002        }
1003}
1004
1005static __init int threshold_init_device(void)
1006{
1007        unsigned lcpu = 0;
1008
1009        /* to hit CPUs online before the notifier is up */
1010        for_each_online_cpu(lcpu) {
1011                int err = threshold_create_device(lcpu);
1012
1013                if (err)
1014                        return err;
1015        }
1016        threshold_cpu_callback = amd_64_threshold_cpu_callback;
1017
1018        return 0;
1019}
1020/*
1021 * there are 3 funcs which need to be _initcalled in a logic sequence:
1022 * 1. xen_late_init_mcelog
1023 * 2. mcheck_init_device
1024 * 3. threshold_init_device
1025 *
1026 * xen_late_init_mcelog must register xen_mce_chrdev_device before
1027 * native mce_chrdev_device registration if running under xen platform;
1028 *
1029 * mcheck_init_device should be inited before threshold_init_device to
1030 * initialize mce_device, otherwise a NULL ptr dereference will cause panic.
1031 *
1032 * so we use following _initcalls
1033 * 1. device_initcall(xen_late_init_mcelog);
1034 * 2. device_initcall_sync(mcheck_init_device);
1035 * 3. late_initcall(threshold_init_device);
1036 *
1037 * when running under xen, the initcall order is 1,2,3;
1038 * on baremetal, we skip 1 and we do only 2 and 3.
1039 */
1040late_initcall(threshold_init_device);
1041