linux/arch/x86/kernel/cpu/mcheck/mce_amd.c
<<
>>
Prefs
   1/*
   2 *  (c) 2005-2015 Advanced Micro Devices, Inc.
   3 *  Your use of this code is subject to the terms and conditions of the
   4 *  GNU general public license version 2. See "COPYING" or
   5 *  http://www.gnu.org/licenses/gpl.html
   6 *
   7 *  Written by Jacob Shin - AMD, Inc.
   8 *  Maintained by: Borislav Petkov <bp@alien8.de>
   9 *
  10 *  All MC4_MISCi registers are shared between cores on a node.
  11 */
  12#include <linux/interrupt.h>
  13#include <linux/notifier.h>
  14#include <linux/kobject.h>
  15#include <linux/percpu.h>
  16#include <linux/errno.h>
  17#include <linux/sched.h>
  18#include <linux/sysfs.h>
  19#include <linux/slab.h>
  20#include <linux/init.h>
  21#include <linux/cpu.h>
  22#include <linux/smp.h>
  23
  24#include <asm/amd_nb.h>
  25#include <asm/apic.h>
  26#include <asm/idle.h>
  27#include <asm/mce.h>
  28#include <asm/msr.h>
  29#include <asm/trace/irq_vectors.h>
  30
  31#define NR_BLOCKS         9
  32#define THRESHOLD_MAX     0xFFF
  33#define INT_TYPE_APIC     0x00020000
  34#define MASK_VALID_HI     0x80000000
  35#define MASK_CNTP_HI      0x40000000
  36#define MASK_LOCKED_HI    0x20000000
  37#define MASK_LVTOFF_HI    0x00F00000
  38#define MASK_COUNT_EN_HI  0x00080000
  39#define MASK_INT_TYPE_HI  0x00060000
  40#define MASK_OVERFLOW_HI  0x00010000
  41#define MASK_ERR_COUNT_HI 0x00000FFF
  42#define MASK_BLKPTR_LO    0xFF000000
  43#define MCG_XBLK_ADDR     0xC0000400
  44
  45/* Deferred error settings */
  46#define MSR_CU_DEF_ERR          0xC0000410
  47#define MASK_DEF_LVTOFF         0x000000F0
  48#define MASK_DEF_INT_TYPE       0x00000006
  49#define DEF_LVT_OFF             0x2
  50#define DEF_INT_TYPE_APIC       0x2
  51
  52static const char * const th_names[] = {
  53        "load_store",
  54        "insn_fetch",
  55        "combined_unit",
  56        "",
  57        "northbridge",
  58        "execution_unit",
  59};
  60
  61static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
  62static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
  63
  64static void amd_threshold_interrupt(void);
  65static void amd_deferred_error_interrupt(void);
  66
  67static void default_deferred_error_interrupt(void)
  68{
  69        pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
  70}
  71void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
  72
  73/*
  74 * CPU Initialization
  75 */
  76
  77struct thresh_restart {
  78        struct threshold_block  *b;
  79        int                     reset;
  80        int                     set_lvt_off;
  81        int                     lvt_off;
  82        u16                     old_limit;
  83};
  84
  85static inline bool is_shared_bank(int bank)
  86{
  87        /* Bank 4 is for northbridge reporting and is thus shared */
  88        return (bank == 4);
  89}
  90
  91static const char *bank4_names(const struct threshold_block *b)
  92{
  93        switch (b->address) {
  94        /* MSR4_MISC0 */
  95        case 0x00000413:
  96                return "dram";
  97
  98        case 0xc0000408:
  99                return "ht_links";
 100
 101        case 0xc0000409:
 102                return "l3_cache";
 103
 104        default:
 105                WARN(1, "Funny MSR: 0x%08x\n", b->address);
 106                return "";
 107        }
 108};
 109
 110
 111static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
 112{
 113        /*
 114         * bank 4 supports APIC LVT interrupts implicitly since forever.
 115         */
 116        if (bank == 4)
 117                return true;
 118
 119        /*
 120         * IntP: interrupt present; if this bit is set, the thresholding
 121         * bank can generate APIC LVT interrupts
 122         */
 123        return msr_high_bits & BIT(28);
 124}
 125
 126static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
 127{
 128        int msr = (hi & MASK_LVTOFF_HI) >> 20;
 129
 130        if (apic < 0) {
 131                pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
 132                       "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
 133                       b->bank, b->block, b->address, hi, lo);
 134                return 0;
 135        }
 136
 137        if (apic != msr) {
 138                pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
 139                       "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
 140                       b->cpu, apic, b->bank, b->block, b->address, hi, lo);
 141                return 0;
 142        }
 143
 144        return 1;
 145};
 146
 147/*
 148 * Called via smp_call_function_single(), must be called with correct
 149 * cpu affinity.
 150 */
 151static void threshold_restart_bank(void *_tr)
 152{
 153        struct thresh_restart *tr = _tr;
 154        u32 hi, lo;
 155
 156        rdmsr(tr->b->address, lo, hi);
 157
 158        if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
 159                tr->reset = 1;  /* limit cannot be lower than err count */
 160
 161        if (tr->reset) {                /* reset err count and overflow bit */
 162                hi =
 163                    (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
 164                    (THRESHOLD_MAX - tr->b->threshold_limit);
 165        } else if (tr->old_limit) {     /* change limit w/o reset */
 166                int new_count = (hi & THRESHOLD_MAX) +
 167                    (tr->old_limit - tr->b->threshold_limit);
 168
 169                hi = (hi & ~MASK_ERR_COUNT_HI) |
 170                    (new_count & THRESHOLD_MAX);
 171        }
 172
 173        /* clear IntType */
 174        hi &= ~MASK_INT_TYPE_HI;
 175
 176        if (!tr->b->interrupt_capable)
 177                goto done;
 178
 179        if (tr->set_lvt_off) {
 180                if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
 181                        /* set new lvt offset */
 182                        hi &= ~MASK_LVTOFF_HI;
 183                        hi |= tr->lvt_off << 20;
 184                }
 185        }
 186
 187        if (tr->b->interrupt_enable)
 188                hi |= INT_TYPE_APIC;
 189
 190 done:
 191
 192        hi |= MASK_COUNT_EN_HI;
 193        wrmsr(tr->b->address, lo, hi);
 194}
 195
 196static void mce_threshold_block_init(struct threshold_block *b, int offset)
 197{
 198        struct thresh_restart tr = {
 199                .b                      = b,
 200                .set_lvt_off            = 1,
 201                .lvt_off                = offset,
 202        };
 203
 204        b->threshold_limit              = THRESHOLD_MAX;
 205        threshold_restart_bank(&tr);
 206};
 207
 208static int setup_APIC_mce_threshold(int reserved, int new)
 209{
 210        if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
 211                                              APIC_EILVT_MSG_FIX, 0))
 212                return new;
 213
 214        return reserved;
 215}
 216
 217static int setup_APIC_deferred_error(int reserved, int new)
 218{
 219        if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
 220                                              APIC_EILVT_MSG_FIX, 0))
 221                return new;
 222
 223        return reserved;
 224}
 225
 226static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
 227{
 228        u32 low = 0, high = 0;
 229        int def_offset = -1, def_new;
 230
 231        if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
 232                return;
 233
 234        def_new = (low & MASK_DEF_LVTOFF) >> 4;
 235        if (!(low & MASK_DEF_LVTOFF)) {
 236                pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
 237                def_new = DEF_LVT_OFF;
 238                low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
 239        }
 240
 241        def_offset = setup_APIC_deferred_error(def_offset, def_new);
 242        if ((def_offset == def_new) &&
 243            (deferred_error_int_vector != amd_deferred_error_interrupt))
 244                deferred_error_int_vector = amd_deferred_error_interrupt;
 245
 246        low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
 247        wrmsr(MSR_CU_DEF_ERR, low, high);
 248}
 249
 250/* cpu init entry point, called from mce.c with preempt off */
 251void mce_amd_feature_init(struct cpuinfo_x86 *c)
 252{
 253        struct threshold_block b;
 254        unsigned int cpu = smp_processor_id();
 255        u32 low = 0, high = 0, address = 0;
 256        unsigned int bank, block;
 257        int offset = -1, new;
 258
 259        for (bank = 0; bank < mca_cfg.banks; ++bank) {
 260                for (block = 0; block < NR_BLOCKS; ++block) {
 261                        if (block == 0)
 262                                address = MSR_IA32_MCx_MISC(bank);
 263                        else if (block == 1) {
 264                                address = (low & MASK_BLKPTR_LO) >> 21;
 265                                if (!address)
 266                                        break;
 267
 268                                address += MCG_XBLK_ADDR;
 269                        } else
 270                                ++address;
 271
 272                        if (rdmsr_safe(address, &low, &high))
 273                                break;
 274
 275                        if (!(high & MASK_VALID_HI))
 276                                continue;
 277
 278                        if (!(high & MASK_CNTP_HI)  ||
 279                             (high & MASK_LOCKED_HI))
 280                                continue;
 281
 282                        if (!block)
 283                                per_cpu(bank_map, cpu) |= (1 << bank);
 284
 285                        memset(&b, 0, sizeof(b));
 286                        b.cpu                   = cpu;
 287                        b.bank                  = bank;
 288                        b.block                 = block;
 289                        b.address               = address;
 290                        b.interrupt_capable     = lvt_interrupt_supported(bank, high);
 291
 292                        if (!b.interrupt_capable)
 293                                goto init;
 294
 295                        b.interrupt_enable = 1;
 296                        new     = (high & MASK_LVTOFF_HI) >> 20;
 297                        offset  = setup_APIC_mce_threshold(offset, new);
 298
 299                        if ((offset == new) &&
 300                            (mce_threshold_vector != amd_threshold_interrupt))
 301                                mce_threshold_vector = amd_threshold_interrupt;
 302
 303init:
 304                        mce_threshold_block_init(&b, offset);
 305                }
 306        }
 307
 308        if (mce_flags.succor)
 309                deferred_error_interrupt_enable(c);
 310}
 311
 312static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
 313{
 314        struct mce m;
 315        u64 status;
 316
 317        rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
 318        if (!(status & MCI_STATUS_VAL))
 319                return;
 320
 321        mce_setup(&m);
 322
 323        m.status = status;
 324        m.bank = bank;
 325
 326        if (threshold_err)
 327                m.misc = misc;
 328
 329        if (m.status & MCI_STATUS_ADDRV)
 330                rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
 331
 332        mce_log(&m);
 333        wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
 334}
 335
 336static inline void __smp_deferred_error_interrupt(void)
 337{
 338        inc_irq_stat(irq_deferred_error_count);
 339        deferred_error_int_vector();
 340}
 341
 342asmlinkage __visible void smp_deferred_error_interrupt(void)
 343{
 344        entering_irq();
 345        __smp_deferred_error_interrupt();
 346        exiting_ack_irq();
 347}
 348
 349asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
 350{
 351        entering_irq();
 352        trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
 353        __smp_deferred_error_interrupt();
 354        trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
 355        exiting_ack_irq();
 356}
 357
 358/* APIC interrupt handler for deferred errors */
 359static void amd_deferred_error_interrupt(void)
 360{
 361        u64 status;
 362        unsigned int bank;
 363
 364        for (bank = 0; bank < mca_cfg.banks; ++bank) {
 365                rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
 366
 367                if (!(status & MCI_STATUS_VAL) ||
 368                    !(status & MCI_STATUS_DEFERRED))
 369                        continue;
 370
 371                __log_error(bank, false, 0);
 372                break;
 373        }
 374}
 375
 376/*
 377 * APIC Interrupt Handler
 378 */
 379
 380/*
 381 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
 382 * the interrupt goes off when error_count reaches threshold_limit.
 383 * the handler will simply log mcelog w/ software defined bank number.
 384 */
 385
 386static void amd_threshold_interrupt(void)
 387{
 388        u32 low = 0, high = 0, address = 0;
 389        int cpu = smp_processor_id();
 390        unsigned int bank, block;
 391
 392        /* assume first bank caused it */
 393        for (bank = 0; bank < mca_cfg.banks; ++bank) {
 394                if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 395                        continue;
 396                for (block = 0; block < NR_BLOCKS; ++block) {
 397                        if (block == 0) {
 398                                address = MSR_IA32_MCx_MISC(bank);
 399                        } else if (block == 1) {
 400                                address = (low & MASK_BLKPTR_LO) >> 21;
 401                                if (!address)
 402                                        break;
 403                                address += MCG_XBLK_ADDR;
 404                        } else {
 405                                ++address;
 406                        }
 407
 408                        if (rdmsr_safe(address, &low, &high))
 409                                break;
 410
 411                        if (!(high & MASK_VALID_HI)) {
 412                                if (block)
 413                                        continue;
 414                                else
 415                                        break;
 416                        }
 417
 418                        if (!(high & MASK_CNTP_HI)  ||
 419                             (high & MASK_LOCKED_HI))
 420                                continue;
 421
 422                        /*
 423                         * Log the machine check that caused the threshold
 424                         * event.
 425                         */
 426                        if (high & MASK_OVERFLOW_HI)
 427                                goto log;
 428                }
 429        }
 430        return;
 431
 432log:
 433        __log_error(bank, true, ((u64)high << 32) | low);
 434}
 435
 436/*
 437 * Sysfs Interface
 438 */
 439
 440struct threshold_attr {
 441        struct attribute attr;
 442        ssize_t (*show) (struct threshold_block *, char *);
 443        ssize_t (*store) (struct threshold_block *, const char *, size_t count);
 444};
 445
 446#define SHOW_FIELDS(name)                                               \
 447static ssize_t show_ ## name(struct threshold_block *b, char *buf)      \
 448{                                                                       \
 449        return sprintf(buf, "%lu\n", (unsigned long) b->name);          \
 450}
 451SHOW_FIELDS(interrupt_enable)
 452SHOW_FIELDS(threshold_limit)
 453
 454static ssize_t
 455store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
 456{
 457        struct thresh_restart tr;
 458        unsigned long new;
 459
 460        if (!b->interrupt_capable)
 461                return -EINVAL;
 462
 463        if (kstrtoul(buf, 0, &new) < 0)
 464                return -EINVAL;
 465
 466        b->interrupt_enable = !!new;
 467
 468        memset(&tr, 0, sizeof(tr));
 469        tr.b            = b;
 470
 471        smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 472
 473        return size;
 474}
 475
 476static ssize_t
 477store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
 478{
 479        struct thresh_restart tr;
 480        unsigned long new;
 481
 482        if (kstrtoul(buf, 0, &new) < 0)
 483                return -EINVAL;
 484
 485        if (new > THRESHOLD_MAX)
 486                new = THRESHOLD_MAX;
 487        if (new < 1)
 488                new = 1;
 489
 490        memset(&tr, 0, sizeof(tr));
 491        tr.old_limit = b->threshold_limit;
 492        b->threshold_limit = new;
 493        tr.b = b;
 494
 495        smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
 496
 497        return size;
 498}
 499
 500static ssize_t show_error_count(struct threshold_block *b, char *buf)
 501{
 502        u32 lo, hi;
 503
 504        rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
 505
 506        return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
 507                                     (THRESHOLD_MAX - b->threshold_limit)));
 508}
 509
 510static struct threshold_attr error_count = {
 511        .attr = {.name = __stringify(error_count), .mode = 0444 },
 512        .show = show_error_count,
 513};
 514
 515#define RW_ATTR(val)                                                    \
 516static struct threshold_attr val = {                                    \
 517        .attr   = {.name = __stringify(val), .mode = 0644 },            \
 518        .show   = show_## val,                                          \
 519        .store  = store_## val,                                         \
 520};
 521
 522RW_ATTR(interrupt_enable);
 523RW_ATTR(threshold_limit);
 524
 525static struct attribute *default_attrs[] = {
 526        &threshold_limit.attr,
 527        &error_count.attr,
 528        NULL,   /* possibly interrupt_enable if supported, see below */
 529        NULL,
 530};
 531
 532#define to_block(k)     container_of(k, struct threshold_block, kobj)
 533#define to_attr(a)      container_of(a, struct threshold_attr, attr)
 534
 535static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 536{
 537        struct threshold_block *b = to_block(kobj);
 538        struct threshold_attr *a = to_attr(attr);
 539        ssize_t ret;
 540
 541        ret = a->show ? a->show(b, buf) : -EIO;
 542
 543        return ret;
 544}
 545
 546static ssize_t store(struct kobject *kobj, struct attribute *attr,
 547                     const char *buf, size_t count)
 548{
 549        struct threshold_block *b = to_block(kobj);
 550        struct threshold_attr *a = to_attr(attr);
 551        ssize_t ret;
 552
 553        ret = a->store ? a->store(b, buf, count) : -EIO;
 554
 555        return ret;
 556}
 557
 558static const struct sysfs_ops threshold_ops = {
 559        .show                   = show,
 560        .store                  = store,
 561};
 562
 563static struct kobj_type threshold_ktype = {
 564        .sysfs_ops              = &threshold_ops,
 565        .default_attrs          = default_attrs,
 566};
 567
 568static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
 569                                     unsigned int block, u32 address)
 570{
 571        struct threshold_block *b = NULL;
 572        u32 low, high;
 573        int err;
 574
 575        if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
 576                return 0;
 577
 578        if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
 579                return 0;
 580
 581        if (!(high & MASK_VALID_HI)) {
 582                if (block)
 583                        goto recurse;
 584                else
 585                        return 0;
 586        }
 587
 588        if (!(high & MASK_CNTP_HI)  ||
 589             (high & MASK_LOCKED_HI))
 590                goto recurse;
 591
 592        b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
 593        if (!b)
 594                return -ENOMEM;
 595
 596        b->block                = block;
 597        b->bank                 = bank;
 598        b->cpu                  = cpu;
 599        b->address              = address;
 600        b->interrupt_enable     = 0;
 601        b->interrupt_capable    = lvt_interrupt_supported(bank, high);
 602        b->threshold_limit      = THRESHOLD_MAX;
 603
 604        if (b->interrupt_capable) {
 605                threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
 606                b->interrupt_enable = 1;
 607        } else {
 608                threshold_ktype.default_attrs[2] = NULL;
 609        }
 610
 611        INIT_LIST_HEAD(&b->miscj);
 612
 613        if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
 614                list_add(&b->miscj,
 615                         &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
 616        } else {
 617                per_cpu(threshold_banks, cpu)[bank]->blocks = b;
 618        }
 619
 620        err = kobject_init_and_add(&b->kobj, &threshold_ktype,
 621                                   per_cpu(threshold_banks, cpu)[bank]->kobj,
 622                                   (bank == 4 ? bank4_names(b) : th_names[bank]));
 623        if (err)
 624                goto out_free;
 625recurse:
 626        if (!block) {
 627                address = (low & MASK_BLKPTR_LO) >> 21;
 628                if (!address)
 629                        return 0;
 630                address += MCG_XBLK_ADDR;
 631        } else {
 632                ++address;
 633        }
 634
 635        err = allocate_threshold_blocks(cpu, bank, ++block, address);
 636        if (err)
 637                goto out_free;
 638
 639        if (b)
 640                kobject_uevent(&b->kobj, KOBJ_ADD);
 641
 642        return err;
 643
 644out_free:
 645        if (b) {
 646                kobject_put(&b->kobj);
 647                list_del(&b->miscj);
 648                kfree(b);
 649        }
 650        return err;
 651}
 652
 653static int __threshold_add_blocks(struct threshold_bank *b)
 654{
 655        struct list_head *head = &b->blocks->miscj;
 656        struct threshold_block *pos = NULL;
 657        struct threshold_block *tmp = NULL;
 658        int err = 0;
 659
 660        err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
 661        if (err)
 662                return err;
 663
 664        list_for_each_entry_safe(pos, tmp, head, miscj) {
 665
 666                err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
 667                if (err) {
 668                        list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
 669                                kobject_del(&pos->kobj);
 670
 671                        return err;
 672                }
 673        }
 674        return err;
 675}
 676
 677static int threshold_create_bank(unsigned int cpu, unsigned int bank)
 678{
 679        struct device *dev = per_cpu(mce_device, cpu);
 680        struct amd_northbridge *nb = NULL;
 681        struct threshold_bank *b = NULL;
 682        const char *name = th_names[bank];
 683        int err = 0;
 684
 685        if (is_shared_bank(bank)) {
 686                nb = node_to_amd_nb(amd_get_nb_id(cpu));
 687
 688                /* threshold descriptor already initialized on this node? */
 689                if (nb && nb->bank4) {
 690                        /* yes, use it */
 691                        b = nb->bank4;
 692                        err = kobject_add(b->kobj, &dev->kobj, name);
 693                        if (err)
 694                                goto out;
 695
 696                        per_cpu(threshold_banks, cpu)[bank] = b;
 697                        atomic_inc(&b->cpus);
 698
 699                        err = __threshold_add_blocks(b);
 700
 701                        goto out;
 702                }
 703        }
 704
 705        b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
 706        if (!b) {
 707                err = -ENOMEM;
 708                goto out;
 709        }
 710
 711        b->kobj = kobject_create_and_add(name, &dev->kobj);
 712        if (!b->kobj) {
 713                err = -EINVAL;
 714                goto out_free;
 715        }
 716
 717        per_cpu(threshold_banks, cpu)[bank] = b;
 718
 719        if (is_shared_bank(bank)) {
 720                atomic_set(&b->cpus, 1);
 721
 722                /* nb is already initialized, see above */
 723                if (nb) {
 724                        WARN_ON(nb->bank4);
 725                        nb->bank4 = b;
 726                }
 727        }
 728
 729        err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank));
 730        if (!err)
 731                goto out;
 732
 733 out_free:
 734        kfree(b);
 735
 736 out:
 737        return err;
 738}
 739
 740/* create dir/files for all valid threshold banks */
 741static int threshold_create_device(unsigned int cpu)
 742{
 743        unsigned int bank;
 744        struct threshold_bank **bp;
 745        int err = 0;
 746
 747        bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
 748                     GFP_KERNEL);
 749        if (!bp)
 750                return -ENOMEM;
 751
 752        per_cpu(threshold_banks, cpu) = bp;
 753
 754        for (bank = 0; bank < mca_cfg.banks; ++bank) {
 755                if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 756                        continue;
 757                err = threshold_create_bank(cpu, bank);
 758                if (err)
 759                        return err;
 760        }
 761
 762        return err;
 763}
 764
 765static void deallocate_threshold_block(unsigned int cpu,
 766                                                 unsigned int bank)
 767{
 768        struct threshold_block *pos = NULL;
 769        struct threshold_block *tmp = NULL;
 770        struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
 771
 772        if (!head)
 773                return;
 774
 775        list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
 776                kobject_put(&pos->kobj);
 777                list_del(&pos->miscj);
 778                kfree(pos);
 779        }
 780
 781        kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
 782        per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
 783}
 784
 785static void __threshold_remove_blocks(struct threshold_bank *b)
 786{
 787        struct threshold_block *pos = NULL;
 788        struct threshold_block *tmp = NULL;
 789
 790        kobject_del(b->kobj);
 791
 792        list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
 793                kobject_del(&pos->kobj);
 794}
 795
 796static void threshold_remove_bank(unsigned int cpu, int bank)
 797{
 798        struct amd_northbridge *nb;
 799        struct threshold_bank *b;
 800
 801        b = per_cpu(threshold_banks, cpu)[bank];
 802        if (!b)
 803                return;
 804
 805        if (!b->blocks)
 806                goto free_out;
 807
 808        if (is_shared_bank(bank)) {
 809                if (!atomic_dec_and_test(&b->cpus)) {
 810                        __threshold_remove_blocks(b);
 811                        per_cpu(threshold_banks, cpu)[bank] = NULL;
 812                        return;
 813                } else {
 814                        /*
 815                         * the last CPU on this node using the shared bank is
 816                         * going away, remove that bank now.
 817                         */
 818                        nb = node_to_amd_nb(amd_get_nb_id(cpu));
 819                        nb->bank4 = NULL;
 820                }
 821        }
 822
 823        deallocate_threshold_block(cpu, bank);
 824
 825free_out:
 826        kobject_del(b->kobj);
 827        kobject_put(b->kobj);
 828        kfree(b);
 829        per_cpu(threshold_banks, cpu)[bank] = NULL;
 830}
 831
 832static void threshold_remove_device(unsigned int cpu)
 833{
 834        unsigned int bank;
 835
 836        for (bank = 0; bank < mca_cfg.banks; ++bank) {
 837                if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 838                        continue;
 839                threshold_remove_bank(cpu, bank);
 840        }
 841        kfree(per_cpu(threshold_banks, cpu));
 842}
 843
 844/* get notified when a cpu comes on/off */
 845static void
 846amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
 847{
 848        switch (action) {
 849        case CPU_ONLINE:
 850        case CPU_ONLINE_FROZEN:
 851                threshold_create_device(cpu);
 852                break;
 853        case CPU_DEAD:
 854        case CPU_DEAD_FROZEN:
 855                threshold_remove_device(cpu);
 856                break;
 857        default:
 858                break;
 859        }
 860}
 861
 862static __init int threshold_init_device(void)
 863{
 864        unsigned lcpu = 0;
 865
 866        /* to hit CPUs online before the notifier is up */
 867        for_each_online_cpu(lcpu) {
 868                int err = threshold_create_device(lcpu);
 869
 870                if (err)
 871                        return err;
 872        }
 873        threshold_cpu_callback = amd_64_threshold_cpu_callback;
 874
 875        return 0;
 876}
 877/*
 878 * there are 3 funcs which need to be _initcalled in a logic sequence:
 879 * 1. xen_late_init_mcelog
 880 * 2. mcheck_init_device
 881 * 3. threshold_init_device
 882 *
 883 * xen_late_init_mcelog must register xen_mce_chrdev_device before
 884 * native mce_chrdev_device registration if running under xen platform;
 885 *
 886 * mcheck_init_device should be inited before threshold_init_device to
 887 * initialize mce_device, otherwise a NULL ptr dereference will cause panic.
 888 *
 889 * so we use following _initcalls
 890 * 1. device_initcall(xen_late_init_mcelog);
 891 * 2. device_initcall_sync(mcheck_init_device);
 892 * 3. late_initcall(threshold_init_device);
 893 *
 894 * when running under xen, the initcall order is 1,2,3;
 895 * on baremetal, we skip 1 and we do only 2 and 3.
 896 */
 897late_initcall(threshold_init_device);
 898