linux/arch/x86/oprofile/op_model_p4.c
<<
>>
Prefs
   1/**
   2 * @file op_model_p4.c
   3 * P4 model-specific MSR operations
   4 *
   5 * @remark Copyright 2002 OProfile authors
   6 * @remark Read the file COPYING
   7 *
   8 * @author Graydon Hoare
   9 */
  10
  11#include <linux/oprofile.h>
  12#include <linux/smp.h>
  13#include <linux/ptrace.h>
  14#include <asm/nmi.h>
  15#include <asm/msr.h>
  16#include <asm/fixmap.h>
  17#include <asm/apic.h>
  18
  19
  20#include "op_x86_model.h"
  21#include "op_counter.h"
  22
  23#define NUM_EVENTS 39
  24
  25#define NUM_COUNTERS_NON_HT 8
  26#define NUM_ESCRS_NON_HT 45
  27#define NUM_CCCRS_NON_HT 18
  28#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
  29
  30#define NUM_COUNTERS_HT2 4
  31#define NUM_ESCRS_HT2 23
  32#define NUM_CCCRS_HT2 9
  33#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
  34
  35#define OP_CTR_OVERFLOW                 (1ULL<<31)
  36
  37static unsigned int num_counters = NUM_COUNTERS_NON_HT;
  38static unsigned int num_controls = NUM_CONTROLS_NON_HT;
  39
  40/* this has to be checked dynamically since the
  41   hyper-threadedness of a chip is discovered at
  42   kernel boot-time. */
  43static inline void setup_num_counters(void)
  44{
  45#ifdef CONFIG_SMP
  46        if (smp_num_siblings == 2) {
  47                num_counters = NUM_COUNTERS_HT2;
  48                num_controls = NUM_CONTROLS_HT2;
  49        }
  50#endif
  51}
  52
  53static inline int addr_increment(void)
  54{
  55#ifdef CONFIG_SMP
  56        return smp_num_siblings == 2 ? 2 : 1;
  57#else
  58        return 1;
  59#endif
  60}
  61
  62
  63/* tables to simulate simplified hardware view of p4 registers */
  64struct p4_counter_binding {
  65        int virt_counter;
  66        int counter_address;
  67        int cccr_address;
  68};
  69
  70struct p4_event_binding {
  71        int escr_select;  /* value to put in CCCR */
  72        int event_select; /* value to put in ESCR */
  73        struct {
  74                int virt_counter; /* for this counter... */
  75                int escr_address; /* use this ESCR       */
  76        } bindings[2];
  77};
  78
  79/* nb: these CTR_* defines are a duplicate of defines in
  80   event/i386.p4*events. */
  81
  82
  83#define CTR_BPU_0      (1 << 0)
  84#define CTR_MS_0       (1 << 1)
  85#define CTR_FLAME_0    (1 << 2)
  86#define CTR_IQ_4       (1 << 3)
  87#define CTR_BPU_2      (1 << 4)
  88#define CTR_MS_2       (1 << 5)
  89#define CTR_FLAME_2    (1 << 6)
  90#define CTR_IQ_5       (1 << 7)
  91
  92static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
  93        { CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
  94        { CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
  95        { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
  96        { CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
  97        { CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
  98        { CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
  99        { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
 100        { CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
 101};
 102
 103#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
 104
 105/* p4 event codes in libop/op_event.h are indices into this table. */
 106
 107static struct p4_event_binding p4_events[NUM_EVENTS] = {
 108
 109        { /* BRANCH_RETIRED */
 110                0x05, 0x06,
 111                { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
 112                  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 113        },
 114
 115        { /* MISPRED_BRANCH_RETIRED */
 116                0x04, 0x03,
 117                { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
 118                  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 119        },
 120
 121        { /* TC_DELIVER_MODE */
 122                0x01, 0x01,
 123                { { CTR_MS_0, MSR_P4_TC_ESCR0},
 124                  { CTR_MS_2, MSR_P4_TC_ESCR1} }
 125        },
 126
 127        { /* BPU_FETCH_REQUEST */
 128                0x00, 0x03,
 129                { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
 130                  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
 131        },
 132
 133        { /* ITLB_REFERENCE */
 134                0x03, 0x18,
 135                { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
 136                  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
 137        },
 138
 139        { /* MEMORY_CANCEL */
 140                0x05, 0x02,
 141                { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
 142                  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
 143        },
 144
 145        { /* MEMORY_COMPLETE */
 146                0x02, 0x08,
 147                { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
 148                  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
 149        },
 150
 151        { /* LOAD_PORT_REPLAY */
 152                0x02, 0x04,
 153                { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
 154                  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
 155        },
 156
 157        { /* STORE_PORT_REPLAY */
 158                0x02, 0x05,
 159                { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
 160                  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
 161        },
 162
 163        { /* MOB_LOAD_REPLAY */
 164                0x02, 0x03,
 165                { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
 166                  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
 167        },
 168
 169        { /* PAGE_WALK_TYPE */
 170                0x04, 0x01,
 171                { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
 172                  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
 173        },
 174
 175        { /* BSQ_CACHE_REFERENCE */
 176                0x07, 0x0c,
 177                { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
 178                  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
 179        },
 180
 181        { /* IOQ_ALLOCATION */
 182                0x06, 0x03,
 183                { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 184                  { 0, 0 } }
 185        },
 186
 187        { /* IOQ_ACTIVE_ENTRIES */
 188                0x06, 0x1a,
 189                { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
 190                  { 0, 0 } }
 191        },
 192
 193        { /* FSB_DATA_ACTIVITY */
 194                0x06, 0x17,
 195                { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 196                  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
 197        },
 198
 199        { /* BSQ_ALLOCATION */
 200                0x07, 0x05,
 201                { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
 202                  { 0, 0 } }
 203        },
 204
 205        { /* BSQ_ACTIVE_ENTRIES */
 206                0x07, 0x06,
 207                { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
 208                  { 0, 0 } }
 209        },
 210
 211        { /* X87_ASSIST */
 212                0x05, 0x03,
 213                { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 214                  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 215        },
 216
 217        { /* SSE_INPUT_ASSIST */
 218                0x01, 0x34,
 219                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 220                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 221        },
 222
 223        { /* PACKED_SP_UOP */
 224                0x01, 0x08,
 225                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 226                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 227        },
 228
 229        { /* PACKED_DP_UOP */
 230                0x01, 0x0c,
 231                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 232                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 233        },
 234
 235        { /* SCALAR_SP_UOP */
 236                0x01, 0x0a,
 237                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 238                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 239        },
 240
 241        { /* SCALAR_DP_UOP */
 242                0x01, 0x0e,
 243                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 244                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 245        },
 246
 247        { /* 64BIT_MMX_UOP */
 248                0x01, 0x02,
 249                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 250                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 251        },
 252
 253        { /* 128BIT_MMX_UOP */
 254                0x01, 0x1a,
 255                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 256                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 257        },
 258
 259        { /* X87_FP_UOP */
 260                0x01, 0x04,
 261                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 262                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 263        },
 264
 265        { /* X87_SIMD_MOVES_UOP */
 266                0x01, 0x2e,
 267                { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
 268                  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
 269        },
 270
 271        { /* MACHINE_CLEAR */
 272                0x05, 0x02,
 273                { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 274                  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 275        },
 276
 277        { /* GLOBAL_POWER_EVENTS */
 278                0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
 279                { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
 280                  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
 281        },
 282
 283        { /* TC_MS_XFER */
 284                0x00, 0x05,
 285                { { CTR_MS_0, MSR_P4_MS_ESCR0},
 286                  { CTR_MS_2, MSR_P4_MS_ESCR1} }
 287        },
 288
 289        { /* UOP_QUEUE_WRITES */
 290                0x00, 0x09,
 291                { { CTR_MS_0, MSR_P4_MS_ESCR0},
 292                  { CTR_MS_2, MSR_P4_MS_ESCR1} }
 293        },
 294
 295        { /* FRONT_END_EVENT */
 296                0x05, 0x08,
 297                { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 298                  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 299        },
 300
 301        { /* EXECUTION_EVENT */
 302                0x05, 0x0c,
 303                { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 304                  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 305        },
 306
 307        { /* REPLAY_EVENT */
 308                0x05, 0x09,
 309                { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
 310                  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
 311        },
 312
 313        { /* INSTR_RETIRED */
 314                0x04, 0x02,
 315                { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
 316                  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 317        },
 318
 319        { /* UOPS_RETIRED */
 320                0x04, 0x01,
 321                { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
 322                  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
 323        },
 324
 325        { /* UOP_TYPE */
 326                0x02, 0x02,
 327                { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
 328                  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
 329        },
 330
 331        { /* RETIRED_MISPRED_BRANCH_TYPE */
 332                0x02, 0x05,
 333                { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
 334                  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
 335        },
 336
 337        { /* RETIRED_BRANCH_TYPE */
 338                0x02, 0x04,
 339                { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
 340                  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
 341        }
 342};
 343
 344
 345#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
 346
 347#define ESCR_RESERVED_BITS 0x80000003
 348#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
 349#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
 350#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
 351#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
 352#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
 353#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
 354#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
 355
 356#define CCCR_RESERVED_BITS 0x38030FFF
 357#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
 358#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
 359#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
 360#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
 361#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
 362#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
 363#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
 364#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
 365#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
 366
 367
 368/* this assigns a "stagger" to the current CPU, which is used throughout
 369   the code in this module as an extra array offset, to select the "even"
 370   or "odd" part of all the divided resources. */
 371static unsigned int get_stagger(void)
 372{
 373#ifdef CONFIG_SMP
 374        int cpu = smp_processor_id();
 375        return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map));
 376#endif
 377        return 0;
 378}
 379
 380
 381/* finally, mediate access to a real hardware counter
 382   by passing a "virtual" counter numer to this macro,
 383   along with your stagger setting. */
 384#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
 385
 386static unsigned long reset_value[NUM_COUNTERS_NON_HT];
 387
 388static void p4_shutdown(struct op_msrs const * const msrs)
 389{
 390        int i;
 391
 392        for (i = 0; i < num_counters; ++i) {
 393                if (msrs->counters[i].addr)
 394                        release_perfctr_nmi(msrs->counters[i].addr);
 395        }
 396        /*
 397         * some of the control registers are specially reserved in
 398         * conjunction with the counter registers (hence the starting offset).
 399         * This saves a few bits.
 400         */
 401        for (i = num_counters; i < num_controls; ++i) {
 402                if (msrs->controls[i].addr)
 403                        release_evntsel_nmi(msrs->controls[i].addr);
 404        }
 405}
 406
 407static int p4_fill_in_addresses(struct op_msrs * const msrs)
 408{
 409        unsigned int i;
 410        unsigned int addr, cccraddr, stag;
 411
 412        setup_num_counters();
 413        stag = get_stagger();
 414
 415        /* the counter & cccr registers we pay attention to */
 416        for (i = 0; i < num_counters; ++i) {
 417                addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
 418                cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
 419                if (reserve_perfctr_nmi(addr)) {
 420                        msrs->counters[i].addr = addr;
 421                        msrs->controls[i].addr = cccraddr;
 422                }
 423        }
 424
 425        /* 43 ESCR registers in three or four discontiguous group */
 426        for (addr = MSR_P4_BSU_ESCR0 + stag;
 427             addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
 428                if (reserve_evntsel_nmi(addr))
 429                        msrs->controls[i].addr = addr;
 430        }
 431
 432        /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
 433         * to avoid special case in nmi_{save|restore}_registers() */
 434        if (boot_cpu_data.x86_model >= 0x3) {
 435                for (addr = MSR_P4_BSU_ESCR0 + stag;
 436                     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
 437                        if (reserve_evntsel_nmi(addr))
 438                                msrs->controls[i].addr = addr;
 439                }
 440        } else {
 441                for (addr = MSR_P4_IQ_ESCR0 + stag;
 442                     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
 443                        if (reserve_evntsel_nmi(addr))
 444                                msrs->controls[i].addr = addr;
 445                }
 446        }
 447
 448        for (addr = MSR_P4_RAT_ESCR0 + stag;
 449             addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
 450                if (reserve_evntsel_nmi(addr))
 451                        msrs->controls[i].addr = addr;
 452        }
 453
 454        for (addr = MSR_P4_MS_ESCR0 + stag;
 455             addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
 456                if (reserve_evntsel_nmi(addr))
 457                        msrs->controls[i].addr = addr;
 458        }
 459
 460        for (addr = MSR_P4_IX_ESCR0 + stag;
 461             addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
 462                if (reserve_evntsel_nmi(addr))
 463                        msrs->controls[i].addr = addr;
 464        }
 465
 466        /* there are 2 remaining non-contiguously located ESCRs */
 467
 468        if (num_counters == NUM_COUNTERS_NON_HT) {
 469                /* standard non-HT CPUs handle both remaining ESCRs*/
 470                if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
 471                        msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
 472                if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
 473                        msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
 474
 475        } else if (stag == 0) {
 476                /* HT CPUs give the first remainder to the even thread, as
 477                   the 32nd control register */
 478                if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
 479                        msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
 480
 481        } else {
 482                /* and two copies of the second to the odd thread,
 483                   for the 22st and 23nd control registers */
 484                if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
 485                        msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
 486                        msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
 487                }
 488        }
 489
 490        for (i = 0; i < num_counters; ++i) {
 491                if (!counter_config[i].enabled)
 492                        continue;
 493                if (msrs->controls[i].addr)
 494                        continue;
 495                op_x86_warn_reserved(i);
 496                p4_shutdown(msrs);
 497                return -EBUSY;
 498        }
 499
 500        return 0;
 501}
 502
 503
 504static void pmc_setup_one_p4_counter(unsigned int ctr)
 505{
 506        int i;
 507        int const maxbind = 2;
 508        unsigned int cccr = 0;
 509        unsigned int escr = 0;
 510        unsigned int high = 0;
 511        unsigned int counter_bit;
 512        struct p4_event_binding *ev = NULL;
 513        unsigned int stag;
 514
 515        stag = get_stagger();
 516
 517        /* convert from counter *number* to counter *bit* */
 518        counter_bit = 1 << VIRT_CTR(stag, ctr);
 519
 520        /* find our event binding structure. */
 521        if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
 522                printk(KERN_ERR
 523                       "oprofile: P4 event code 0x%lx out of range\n",
 524                       counter_config[ctr].event);
 525                return;
 526        }
 527
 528        ev = &(p4_events[counter_config[ctr].event - 1]);
 529
 530        for (i = 0; i < maxbind; i++) {
 531                if (ev->bindings[i].virt_counter & counter_bit) {
 532
 533                        /* modify ESCR */
 534                        rdmsr(ev->bindings[i].escr_address, escr, high);
 535                        ESCR_CLEAR(escr);
 536                        if (stag == 0) {
 537                                ESCR_SET_USR_0(escr, counter_config[ctr].user);
 538                                ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
 539                        } else {
 540                                ESCR_SET_USR_1(escr, counter_config[ctr].user);
 541                                ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
 542                        }
 543                        ESCR_SET_EVENT_SELECT(escr, ev->event_select);
 544                        ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
 545                        wrmsr(ev->bindings[i].escr_address, escr, high);
 546
 547                        /* modify CCCR */
 548                        rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
 549                              cccr, high);
 550                        CCCR_CLEAR(cccr);
 551                        CCCR_SET_REQUIRED_BITS(cccr);
 552                        CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
 553                        if (stag == 0)
 554                                CCCR_SET_PMI_OVF_0(cccr);
 555                        else
 556                                CCCR_SET_PMI_OVF_1(cccr);
 557                        wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
 558                              cccr, high);
 559                        return;
 560                }
 561        }
 562
 563        printk(KERN_ERR
 564               "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
 565               counter_config[ctr].event, stag, ctr);
 566}
 567
 568
 569static void p4_setup_ctrs(struct op_x86_model_spec const *model,
 570                          struct op_msrs const * const msrs)
 571{
 572        unsigned int i;
 573        unsigned int low, high;
 574        unsigned int stag;
 575
 576        stag = get_stagger();
 577
 578        rdmsr(MSR_IA32_MISC_ENABLE, low, high);
 579        if (!MISC_PMC_ENABLED_P(low)) {
 580                printk(KERN_ERR "oprofile: P4 PMC not available\n");
 581                return;
 582        }
 583
 584        /* clear the cccrs we will use */
 585        for (i = 0; i < num_counters; i++) {
 586                if (unlikely(!msrs->controls[i].addr))
 587                        continue;
 588                rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 589                CCCR_CLEAR(low);
 590                CCCR_SET_REQUIRED_BITS(low);
 591                wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 592        }
 593
 594        /* clear all escrs (including those outside our concern) */
 595        for (i = num_counters; i < num_controls; i++) {
 596                if (unlikely(!msrs->controls[i].addr))
 597                        continue;
 598                wrmsr(msrs->controls[i].addr, 0, 0);
 599        }
 600
 601        /* setup all counters */
 602        for (i = 0; i < num_counters; ++i) {
 603                if (counter_config[i].enabled && msrs->controls[i].addr) {
 604                        reset_value[i] = counter_config[i].count;
 605                        pmc_setup_one_p4_counter(i);
 606                        wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
 607                               -(u64)counter_config[i].count);
 608                } else {
 609                        reset_value[i] = 0;
 610                }
 611        }
 612}
 613
 614
 615static int p4_check_ctrs(struct pt_regs * const regs,
 616                         struct op_msrs const * const msrs)
 617{
 618        unsigned long ctr, low, high, stag, real;
 619        int i;
 620
 621        stag = get_stagger();
 622
 623        for (i = 0; i < num_counters; ++i) {
 624
 625                if (!reset_value[i])
 626                        continue;
 627
 628                /*
 629                 * there is some eccentricity in the hardware which
 630                 * requires that we perform 2 extra corrections:
 631                 *
 632                 * - check both the CCCR:OVF flag for overflow and the
 633                 *   counter high bit for un-flagged overflows.
 634                 *
 635                 * - write the counter back twice to ensure it gets
 636                 *   updated properly.
 637                 *
 638                 * the former seems to be related to extra NMIs happening
 639                 * during the current NMI; the latter is reported as errata
 640                 * N15 in intel doc 249199-029, pentium 4 specification
 641                 * update, though their suggested work-around does not
 642                 * appear to solve the problem.
 643                 */
 644
 645                real = VIRT_CTR(stag, i);
 646
 647                rdmsr(p4_counters[real].cccr_address, low, high);
 648                rdmsr(p4_counters[real].counter_address, ctr, high);
 649                if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
 650                        oprofile_add_sample(regs, i);
 651                        wrmsrl(p4_counters[real].counter_address,
 652                               -(u64)reset_value[i]);
 653                        CCCR_CLEAR_OVF(low);
 654                        wrmsr(p4_counters[real].cccr_address, low, high);
 655                        wrmsrl(p4_counters[real].counter_address,
 656                               -(u64)reset_value[i]);
 657                }
 658        }
 659
 660        /* P4 quirk: you have to re-unmask the apic vector */
 661        apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
 662
 663        /* See op_model_ppro.c */
 664        return 1;
 665}
 666
 667
 668static void p4_start(struct op_msrs const * const msrs)
 669{
 670        unsigned int low, high, stag;
 671        int i;
 672
 673        stag = get_stagger();
 674
 675        for (i = 0; i < num_counters; ++i) {
 676                if (!reset_value[i])
 677                        continue;
 678                rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 679                CCCR_SET_ENABLE(low);
 680                wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 681        }
 682}
 683
 684
 685static void p4_stop(struct op_msrs const * const msrs)
 686{
 687        unsigned int low, high, stag;
 688        int i;
 689
 690        stag = get_stagger();
 691
 692        for (i = 0; i < num_counters; ++i) {
 693                if (!reset_value[i])
 694                        continue;
 695                rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 696                CCCR_SET_DISABLE(low);
 697                wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 698        }
 699}
 700
 701#ifdef CONFIG_SMP
 702struct op_x86_model_spec op_p4_ht2_spec = {
 703        .num_counters           = NUM_COUNTERS_HT2,
 704        .num_controls           = NUM_CONTROLS_HT2,
 705        .fill_in_addresses      = &p4_fill_in_addresses,
 706        .setup_ctrs             = &p4_setup_ctrs,
 707        .check_ctrs             = &p4_check_ctrs,
 708        .start                  = &p4_start,
 709        .stop                   = &p4_stop,
 710        .shutdown               = &p4_shutdown
 711};
 712#endif
 713
 714struct op_x86_model_spec op_p4_spec = {
 715        .num_counters           = NUM_COUNTERS_NON_HT,
 716        .num_controls           = NUM_CONTROLS_NON_HT,
 717        .fill_in_addresses      = &p4_fill_in_addresses,
 718        .setup_ctrs             = &p4_setup_ctrs,
 719        .check_ctrs             = &p4_check_ctrs,
 720        .start                  = &p4_start,
 721        .stop                   = &p4_stop,
 722        .shutdown               = &p4_shutdown
 723};
 724