linux/arch/x86/kernel/cpu/perfctr-watchdog.c
<<
>>
Prefs
   1/* local apic based NMI watchdog for various CPUs.
   2   This file also handles reservation of performance counters for coordination
   3   with other users (like oprofile).
   4
   5   Note that these events normally don't tick when the CPU idles. This means
   6   the frequency varies with CPU load.
   7
   8   Original code for K7/P6 written by Keith Owens */
   9
  10#include <linux/percpu.h>
  11#include <linux/module.h>
  12#include <linux/kernel.h>
  13#include <linux/bitops.h>
  14#include <linux/smp.h>
  15#include <linux/nmi.h>
  16#include <asm/apic.h>
  17#include <asm/intel_arch_perfmon.h>
  18
  19struct nmi_watchdog_ctlblk {
  20        unsigned int cccr_msr;
  21        unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
  22        unsigned int evntsel_msr;  /* the MSR to select the events to handle */
  23};
  24
  25/* Interface defining a CPU specific perfctr watchdog */
  26struct wd_ops {
  27        int (*reserve)(void);
  28        void (*unreserve)(void);
  29        int (*setup)(unsigned nmi_hz);
  30        void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
  31        void (*stop)(void);
  32        unsigned perfctr;
  33        unsigned evntsel;
  34        u64 checkbit;
  35};
  36
  37static const struct wd_ops *wd_ops;
  38
  39/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  40 * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
  41 */
  42#define NMI_MAX_COUNTER_BITS 66
  43
  44/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
  45 * evtsel_nmi_owner tracks the ownership of the event selection
  46 * - different performance counters/ event selection may be reserved for
  47 *   different subsystems this reservation system just tries to coordinate
  48 *   things a little
  49 */
  50static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
  51static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
  52
  53static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
  54
  55/* converts an msr to an appropriate reservation bit */
  56static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
  57{
  58        /* returns the bit offset of the performance counter register */
  59        switch (boot_cpu_data.x86_vendor) {
  60        case X86_VENDOR_AMD:
  61                return (msr - MSR_K7_PERFCTR0);
  62        case X86_VENDOR_INTEL:
  63                if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  64                        return (msr - MSR_ARCH_PERFMON_PERFCTR0);
  65
  66                switch (boot_cpu_data.x86) {
  67                case 6:
  68                        return (msr - MSR_P6_PERFCTR0);
  69                case 15:
  70                        return (msr - MSR_P4_BPU_PERFCTR0);
  71                }
  72        }
  73        return 0;
  74}
  75
  76/* converts an msr to an appropriate reservation bit */
  77/* returns the bit offset of the event selection register */
  78static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
  79{
  80        /* returns the bit offset of the event selection register */
  81        switch (boot_cpu_data.x86_vendor) {
  82        case X86_VENDOR_AMD:
  83                return (msr - MSR_K7_EVNTSEL0);
  84        case X86_VENDOR_INTEL:
  85                if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  86                        return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
  87
  88                switch (boot_cpu_data.x86) {
  89                case 6:
  90                        return (msr - MSR_P6_EVNTSEL0);
  91                case 15:
  92                        return (msr - MSR_P4_BSU_ESCR0);
  93                }
  94        }
  95        return 0;
  96
  97}
  98
  99/* checks for a bit availability (hack for oprofile) */
 100int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
 101{
 102        BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 103
 104        return (!test_bit(counter, perfctr_nmi_owner));
 105}
 106
 107/* checks the an msr for availability */
 108int avail_to_resrv_perfctr_nmi(unsigned int msr)
 109{
 110        unsigned int counter;
 111
 112        counter = nmi_perfctr_msr_to_bit(msr);
 113        BUG_ON(counter > NMI_MAX_COUNTER_BITS);
 114
 115        return (!test_bit(counter, perfctr_nmi_owner));
 116}
 117
 118int reserve_perfctr_nmi(unsigned int msr)
 119{
 120        unsigned int counter;
 121
 122        counter = nmi_perfctr_msr_to_bit(msr);
 123        /* register not managed by the allocator? */
 124        if (counter > NMI_MAX_COUNTER_BITS)
 125                return 1;
 126
 127        if (!test_and_set_bit(counter, perfctr_nmi_owner))
 128                return 1;
 129        return 0;
 130}
 131
 132void release_perfctr_nmi(unsigned int msr)
 133{
 134        unsigned int counter;
 135
 136        counter = nmi_perfctr_msr_to_bit(msr);
 137        /* register not managed by the allocator? */
 138        if (counter > NMI_MAX_COUNTER_BITS)
 139                return;
 140
 141        clear_bit(counter, perfctr_nmi_owner);
 142}
 143
 144int reserve_evntsel_nmi(unsigned int msr)
 145{
 146        unsigned int counter;
 147
 148        counter = nmi_evntsel_msr_to_bit(msr);
 149        /* register not managed by the allocator? */
 150        if (counter > NMI_MAX_COUNTER_BITS)
 151                return 1;
 152
 153        if (!test_and_set_bit(counter, evntsel_nmi_owner))
 154                return 1;
 155        return 0;
 156}
 157
 158void release_evntsel_nmi(unsigned int msr)
 159{
 160        unsigned int counter;
 161
 162        counter = nmi_evntsel_msr_to_bit(msr);
 163        /* register not managed by the allocator? */
 164        if (counter > NMI_MAX_COUNTER_BITS)
 165                return;
 166
 167        clear_bit(counter, evntsel_nmi_owner);
 168}
 169
 170EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
 171EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
 172EXPORT_SYMBOL(reserve_perfctr_nmi);
 173EXPORT_SYMBOL(release_perfctr_nmi);
 174EXPORT_SYMBOL(reserve_evntsel_nmi);
 175EXPORT_SYMBOL(release_evntsel_nmi);
 176
 177void disable_lapic_nmi_watchdog(void)
 178{
 179        BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
 180
 181        if (atomic_read(&nmi_active) <= 0)
 182                return;
 183
 184        on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
 185        wd_ops->unreserve();
 186
 187        BUG_ON(atomic_read(&nmi_active) != 0);
 188}
 189
 190void enable_lapic_nmi_watchdog(void)
 191{
 192        BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
 193
 194        /* are we already enabled */
 195        if (atomic_read(&nmi_active) != 0)
 196                return;
 197
 198        /* are we lapic aware */
 199        if (!wd_ops)
 200                return;
 201        if (!wd_ops->reserve()) {
 202                printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
 203                return;
 204        }
 205
 206        on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
 207        touch_nmi_watchdog();
 208}
 209
 210/*
 211 * Activate the NMI watchdog via the local APIC.
 212 */
 213
 214static unsigned int adjust_for_32bit_ctr(unsigned int hz)
 215{
 216        u64 counter_val;
 217        unsigned int retval = hz;
 218
 219        /*
 220         * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
 221         * are writable, with higher bits sign extending from bit 31.
 222         * So, we can only program the counter with 31 bit values and
 223         * 32nd bit should be 1, for 33.. to be 1.
 224         * Find the appropriate nmi_hz
 225         */
 226        counter_val = (u64)cpu_khz * 1000;
 227        do_div(counter_val, retval);
 228        if (counter_val > 0x7fffffffULL) {
 229                u64 count = (u64)cpu_khz * 1000;
 230                do_div(count, 0x7fffffffUL);
 231                retval = count + 1;
 232        }
 233        return retval;
 234}
 235
 236static void
 237write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz)
 238{
 239        u64 count = (u64)cpu_khz * 1000;
 240
 241        do_div(count, nmi_hz);
 242        if(descr)
 243                Dprintk("setting %s to -0x%08Lx\n", descr, count);
 244        wrmsrl(perfctr_msr, 0 - count);
 245}
 246
 247static void write_watchdog_counter32(unsigned int perfctr_msr,
 248                const char *descr, unsigned nmi_hz)
 249{
 250        u64 count = (u64)cpu_khz * 1000;
 251
 252        do_div(count, nmi_hz);
 253        if(descr)
 254                Dprintk("setting %s to -0x%08Lx\n", descr, count);
 255        wrmsr(perfctr_msr, (u32)(-count), 0);
 256}
 257
 258/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface
 259   nicely stable so there is not much variety */
 260
 261#define K7_EVNTSEL_ENABLE       (1 << 22)
 262#define K7_EVNTSEL_INT          (1 << 20)
 263#define K7_EVNTSEL_OS           (1 << 17)
 264#define K7_EVNTSEL_USR          (1 << 16)
 265#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING    0x76
 266#define K7_NMI_EVENT            K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
 267
 268static int setup_k7_watchdog(unsigned nmi_hz)
 269{
 270        unsigned int perfctr_msr, evntsel_msr;
 271        unsigned int evntsel;
 272        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 273
 274        perfctr_msr = wd_ops->perfctr;
 275        evntsel_msr = wd_ops->evntsel;
 276
 277        wrmsrl(perfctr_msr, 0UL);
 278
 279        evntsel = K7_EVNTSEL_INT
 280                | K7_EVNTSEL_OS
 281                | K7_EVNTSEL_USR
 282                | K7_NMI_EVENT;
 283
 284        /* setup the timer */
 285        wrmsr(evntsel_msr, evntsel, 0);
 286        write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
 287        apic_write(APIC_LVTPC, APIC_DM_NMI);
 288        evntsel |= K7_EVNTSEL_ENABLE;
 289        wrmsr(evntsel_msr, evntsel, 0);
 290
 291        wd->perfctr_msr = perfctr_msr;
 292        wd->evntsel_msr = evntsel_msr;
 293        wd->cccr_msr = 0;  //unused
 294        return 1;
 295}
 296
 297static void single_msr_stop_watchdog(void)
 298{
 299        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 300
 301        wrmsr(wd->evntsel_msr, 0, 0);
 302}
 303
 304static int single_msr_reserve(void)
 305{
 306        if (!reserve_perfctr_nmi(wd_ops->perfctr))
 307                return 0;
 308
 309        if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
 310                release_perfctr_nmi(wd_ops->perfctr);
 311                return 0;
 312        }
 313        return 1;
 314}
 315
 316static void single_msr_unreserve(void)
 317{
 318        release_evntsel_nmi(wd_ops->evntsel);
 319        release_perfctr_nmi(wd_ops->perfctr);
 320}
 321
 322static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 323{
 324        /* start the cycle over again */
 325        write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
 326}
 327
 328static const struct wd_ops k7_wd_ops = {
 329        .reserve = single_msr_reserve,
 330        .unreserve = single_msr_unreserve,
 331        .setup = setup_k7_watchdog,
 332        .rearm = single_msr_rearm,
 333        .stop = single_msr_stop_watchdog,
 334        .perfctr = MSR_K7_PERFCTR0,
 335        .evntsel = MSR_K7_EVNTSEL0,
 336        .checkbit = 1ULL<<47,
 337};
 338
 339/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */
 340
 341#define P6_EVNTSEL0_ENABLE      (1 << 22)
 342#define P6_EVNTSEL_INT          (1 << 20)
 343#define P6_EVNTSEL_OS           (1 << 17)
 344#define P6_EVNTSEL_USR          (1 << 16)
 345#define P6_EVENT_CPU_CLOCKS_NOT_HALTED  0x79
 346#define P6_NMI_EVENT            P6_EVENT_CPU_CLOCKS_NOT_HALTED
 347
 348static int setup_p6_watchdog(unsigned nmi_hz)
 349{
 350        unsigned int perfctr_msr, evntsel_msr;
 351        unsigned int evntsel;
 352        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 353
 354        perfctr_msr = wd_ops->perfctr;
 355        evntsel_msr = wd_ops->evntsel;
 356
 357        /* KVM doesn't implement this MSR */
 358        if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
 359                return 0;
 360
 361        evntsel = P6_EVNTSEL_INT
 362                | P6_EVNTSEL_OS
 363                | P6_EVNTSEL_USR
 364                | P6_NMI_EVENT;
 365
 366        /* setup the timer */
 367        wrmsr(evntsel_msr, evntsel, 0);
 368        nmi_hz = adjust_for_32bit_ctr(nmi_hz);
 369        write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
 370        apic_write(APIC_LVTPC, APIC_DM_NMI);
 371        evntsel |= P6_EVNTSEL0_ENABLE;
 372        wrmsr(evntsel_msr, evntsel, 0);
 373
 374        wd->perfctr_msr = perfctr_msr;
 375        wd->evntsel_msr = evntsel_msr;
 376        wd->cccr_msr = 0;  //unused
 377        return 1;
 378}
 379
 380static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 381{
 382        /* P6 based Pentium M need to re-unmask
 383         * the apic vector but it doesn't hurt
 384         * other P6 variant.
 385         * ArchPerfom/Core Duo also needs this */
 386        apic_write(APIC_LVTPC, APIC_DM_NMI);
 387        /* P6/ARCH_PERFMON has 32 bit counter write */
 388        write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
 389}
 390
 391static const struct wd_ops p6_wd_ops = {
 392        .reserve = single_msr_reserve,
 393        .unreserve = single_msr_unreserve,
 394        .setup = setup_p6_watchdog,
 395        .rearm = p6_rearm,
 396        .stop = single_msr_stop_watchdog,
 397        .perfctr = MSR_P6_PERFCTR0,
 398        .evntsel = MSR_P6_EVNTSEL0,
 399        .checkbit = 1ULL<<39,
 400};
 401
 402/* Intel P4 performance counters. By far the most complicated of all. */
 403
 404#define MSR_P4_MISC_ENABLE_PERF_AVAIL   (1<<7)
 405#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
 406#define P4_ESCR_OS              (1<<3)
 407#define P4_ESCR_USR             (1<<2)
 408#define P4_CCCR_OVF_PMI0        (1<<26)
 409#define P4_CCCR_OVF_PMI1        (1<<27)
 410#define P4_CCCR_THRESHOLD(N)    ((N)<<20)
 411#define P4_CCCR_COMPLEMENT      (1<<19)
 412#define P4_CCCR_COMPARE         (1<<18)
 413#define P4_CCCR_REQUIRED        (3<<16)
 414#define P4_CCCR_ESCR_SELECT(N)  ((N)<<13)
 415#define P4_CCCR_ENABLE          (1<<12)
 416#define P4_CCCR_OVF             (1<<31)
 417
 418/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
 419   CRU_ESCR0 (with any non-null event selector) through a complemented
 420   max threshold. [IA32-Vol3, Section 14.9.9] */
 421
 422static int setup_p4_watchdog(unsigned nmi_hz)
 423{
 424        unsigned int perfctr_msr, evntsel_msr, cccr_msr;
 425        unsigned int evntsel, cccr_val;
 426        unsigned int misc_enable, dummy;
 427        unsigned int ht_num;
 428        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 429
 430        rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
 431        if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
 432                return 0;
 433
 434#ifdef CONFIG_SMP
 435        /* detect which hyperthread we are on */
 436        if (smp_num_siblings == 2) {
 437                unsigned int ebx, apicid;
 438
 439                ebx = cpuid_ebx(1);
 440                apicid = (ebx >> 24) & 0xff;
 441                ht_num = apicid & 1;
 442        } else
 443#endif
 444                ht_num = 0;
 445
 446        /* performance counters are shared resources
 447         * assign each hyperthread its own set
 448         * (re-use the ESCR0 register, seems safe
 449         * and keeps the cccr_val the same)
 450         */
 451        if (!ht_num) {
 452                /* logical cpu 0 */
 453                perfctr_msr = MSR_P4_IQ_PERFCTR0;
 454                evntsel_msr = MSR_P4_CRU_ESCR0;
 455                cccr_msr = MSR_P4_IQ_CCCR0;
 456                cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
 457        } else {
 458                /* logical cpu 1 */
 459                perfctr_msr = MSR_P4_IQ_PERFCTR1;
 460                evntsel_msr = MSR_P4_CRU_ESCR0;
 461                cccr_msr = MSR_P4_IQ_CCCR1;
 462                cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
 463        }
 464
 465        evntsel = P4_ESCR_EVENT_SELECT(0x3F)
 466                | P4_ESCR_OS
 467                | P4_ESCR_USR;
 468
 469        cccr_val |= P4_CCCR_THRESHOLD(15)
 470                 | P4_CCCR_COMPLEMENT
 471                 | P4_CCCR_COMPARE
 472                 | P4_CCCR_REQUIRED;
 473
 474        wrmsr(evntsel_msr, evntsel, 0);
 475        wrmsr(cccr_msr, cccr_val, 0);
 476        write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
 477        apic_write(APIC_LVTPC, APIC_DM_NMI);
 478        cccr_val |= P4_CCCR_ENABLE;
 479        wrmsr(cccr_msr, cccr_val, 0);
 480        wd->perfctr_msr = perfctr_msr;
 481        wd->evntsel_msr = evntsel_msr;
 482        wd->cccr_msr = cccr_msr;
 483        return 1;
 484}
 485
 486static void stop_p4_watchdog(void)
 487{
 488        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 489        wrmsr(wd->cccr_msr, 0, 0);
 490        wrmsr(wd->evntsel_msr, 0, 0);
 491}
 492
 493static int p4_reserve(void)
 494{
 495        if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
 496                return 0;
 497#ifdef CONFIG_SMP
 498        if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
 499                goto fail1;
 500#endif
 501        if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
 502                goto fail2;
 503        /* RED-PEN why is ESCR1 not reserved here? */
 504        return 1;
 505 fail2:
 506#ifdef CONFIG_SMP
 507        if (smp_num_siblings > 1)
 508                release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
 509 fail1:
 510#endif
 511        release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
 512        return 0;
 513}
 514
 515static void p4_unreserve(void)
 516{
 517#ifdef CONFIG_SMP
 518        if (smp_num_siblings > 1)
 519                release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
 520#endif
 521        release_evntsel_nmi(MSR_P4_CRU_ESCR0);
 522        release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
 523}
 524
 525static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 526{
 527        unsigned dummy;
 528        /*
 529         * P4 quirks:
 530         * - An overflown perfctr will assert its interrupt
 531         *   until the OVF flag in its CCCR is cleared.
 532         * - LVTPC is masked on interrupt and must be
 533         *   unmasked by the LVTPC handler.
 534         */
 535        rdmsrl(wd->cccr_msr, dummy);
 536        dummy &= ~P4_CCCR_OVF;
 537        wrmsrl(wd->cccr_msr, dummy);
 538        apic_write(APIC_LVTPC, APIC_DM_NMI);
 539        /* start the cycle over again */
 540        write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
 541}
 542
 543static const struct wd_ops p4_wd_ops = {
 544        .reserve = p4_reserve,
 545        .unreserve = p4_unreserve,
 546        .setup = setup_p4_watchdog,
 547        .rearm = p4_rearm,
 548        .stop = stop_p4_watchdog,
 549        /* RED-PEN this is wrong for the other sibling */
 550        .perfctr = MSR_P4_BPU_PERFCTR0,
 551        .evntsel = MSR_P4_BSU_ESCR0,
 552        .checkbit = 1ULL<<39,
 553};
 554
 555/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully
 556   all future Intel CPUs. */
 557
 558#define ARCH_PERFMON_NMI_EVENT_SEL      ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
 559#define ARCH_PERFMON_NMI_EVENT_UMASK    ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
 560
 561static struct wd_ops intel_arch_wd_ops;
 562
 563static int setup_intel_arch_watchdog(unsigned nmi_hz)
 564{
 565        unsigned int ebx;
 566        union cpuid10_eax eax;
 567        unsigned int unused;
 568        unsigned int perfctr_msr, evntsel_msr;
 569        unsigned int evntsel;
 570        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 571
 572        /*
 573         * Check whether the Architectural PerfMon supports
 574         * Unhalted Core Cycles Event or not.
 575         * NOTE: Corresponding bit = 0 in ebx indicates event present.
 576         */
 577        cpuid(10, &(eax.full), &ebx, &unused, &unused);
 578        if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
 579            (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
 580                return 0;
 581
 582        perfctr_msr = wd_ops->perfctr;
 583        evntsel_msr = wd_ops->evntsel;
 584
 585        wrmsrl(perfctr_msr, 0UL);
 586
 587        evntsel = ARCH_PERFMON_EVENTSEL_INT
 588                | ARCH_PERFMON_EVENTSEL_OS
 589                | ARCH_PERFMON_EVENTSEL_USR
 590                | ARCH_PERFMON_NMI_EVENT_SEL
 591                | ARCH_PERFMON_NMI_EVENT_UMASK;
 592
 593        /* setup the timer */
 594        wrmsr(evntsel_msr, evntsel, 0);
 595        nmi_hz = adjust_for_32bit_ctr(nmi_hz);
 596        write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
 597        apic_write(APIC_LVTPC, APIC_DM_NMI);
 598        evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
 599        wrmsr(evntsel_msr, evntsel, 0);
 600
 601        wd->perfctr_msr = perfctr_msr;
 602        wd->evntsel_msr = evntsel_msr;
 603        wd->cccr_msr = 0;  //unused
 604        intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
 605        return 1;
 606}
 607
 608static struct wd_ops intel_arch_wd_ops __read_mostly = {
 609        .reserve = single_msr_reserve,
 610        .unreserve = single_msr_unreserve,
 611        .setup = setup_intel_arch_watchdog,
 612        .rearm = p6_rearm,
 613        .stop = single_msr_stop_watchdog,
 614        .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
 615        .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
 616};
 617
 618static struct wd_ops coreduo_wd_ops = {
 619        .reserve = single_msr_reserve,
 620        .unreserve = single_msr_unreserve,
 621        .setup = setup_intel_arch_watchdog,
 622        .rearm = p6_rearm,
 623        .stop = single_msr_stop_watchdog,
 624        .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
 625        .evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
 626};
 627
 628static void probe_nmi_watchdog(void)
 629{
 630        switch (boot_cpu_data.x86_vendor) {
 631        case X86_VENDOR_AMD:
 632                if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
 633                    boot_cpu_data.x86 != 16)
 634                        return;
 635                wd_ops = &k7_wd_ops;
 636                break;
 637        case X86_VENDOR_INTEL:
 638                /* Work around Core Duo (Yonah) errata AE49 where perfctr1
 639                   doesn't have a working enable bit. */
 640                if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
 641                        wd_ops = &coreduo_wd_ops;
 642                        break;
 643                }
 644                if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
 645                        wd_ops = &intel_arch_wd_ops;
 646                        break;
 647                }
 648                switch (boot_cpu_data.x86) {
 649                case 6:
 650                        if (boot_cpu_data.x86_model > 0xd)
 651                                return;
 652
 653                        wd_ops = &p6_wd_ops;
 654                        break;
 655                case 15:
 656                        if (boot_cpu_data.x86_model > 0x4)
 657                                return;
 658
 659                        wd_ops = &p4_wd_ops;
 660                        break;
 661                default:
 662                        return;
 663                }
 664                break;
 665        }
 666}
 667
 668/* Interface to nmi.c */
 669
 670int lapic_watchdog_init(unsigned nmi_hz)
 671{
 672        if (!wd_ops) {
 673                probe_nmi_watchdog();
 674                if (!wd_ops)
 675                        return -1;
 676
 677                if (!wd_ops->reserve()) {
 678                        printk(KERN_ERR
 679                                "NMI watchdog: cannot reserve perfctrs\n");
 680                        return -1;
 681                }
 682        }
 683
 684        if (!(wd_ops->setup(nmi_hz))) {
 685                printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
 686                       raw_smp_processor_id());
 687                return -1;
 688        }
 689
 690        return 0;
 691}
 692
 693void lapic_watchdog_stop(void)
 694{
 695        if (wd_ops)
 696                wd_ops->stop();
 697}
 698
 699unsigned lapic_adjust_nmi_hz(unsigned hz)
 700{
 701        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 702        if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
 703            wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
 704                hz = adjust_for_32bit_ctr(hz);
 705        return hz;
 706}
 707
 708int lapic_wd_event(unsigned nmi_hz)
 709{
 710        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 711        u64 ctr;
 712        rdmsrl(wd->perfctr_msr, ctr);
 713        if (ctr & wd_ops->checkbit) { /* perfctr still running? */
 714                return 0;
 715        }
 716        wd_ops->rearm(wd, nmi_hz);
 717        return 1;
 718}
 719
 720int lapic_watchdog_ok(void)
 721{
 722        return wd_ops != NULL;
 723}
 724