linux/arch/x86/oprofile/op_model_amd.c
<<
>>
Prefs
   1/*
   2 * @file op_model_amd.c
   3 * athlon / K7 / K8 / Family 10h model-specific MSR operations
   4 *
   5 * @remark Copyright 2002-2009 OProfile authors
   6 * @remark Read the file COPYING
   7 *
   8 * @author John Levon
   9 * @author Philippe Elie
  10 * @author Graydon Hoare
  11 * @author Robert Richter <robert.richter@amd.com>
  12 * @author Barry Kasindorf <barry.kasindorf@amd.com>
  13 * @author Jason Yeh <jason.yeh@amd.com>
  14 * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
  15 */
  16
  17#include <linux/oprofile.h>
  18#include <linux/device.h>
  19#include <linux/pci.h>
  20#include <linux/percpu.h>
  21
  22#include <asm/ptrace.h>
  23#include <asm/msr.h>
  24#include <asm/nmi.h>
  25#include <asm/apic.h>
  26#include <asm/processor.h>
  27#include <asm/cpufeature.h>
  28
  29#include "op_x86_model.h"
  30#include "op_counter.h"
  31
  32#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
  33#define NUM_VIRT_COUNTERS       32
  34#else
  35#define NUM_VIRT_COUNTERS       0
  36#endif
  37
  38#define OP_EVENT_MASK                   0x0FFF
  39#define OP_CTR_OVERFLOW                 (1ULL<<31)
  40
  41#define MSR_AMD_EVENTSEL_RESERVED       ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
  42
  43static int num_counters;
  44static unsigned long reset_value[OP_MAX_COUNTER];
  45
  46#define IBS_FETCH_SIZE                  6
  47#define IBS_OP_SIZE                     12
  48
  49static u32 ibs_caps;
  50
  51struct ibs_config {
  52        unsigned long op_enabled;
  53        unsigned long fetch_enabled;
  54        unsigned long max_cnt_fetch;
  55        unsigned long max_cnt_op;
  56        unsigned long rand_en;
  57        unsigned long dispatched_ops;
  58        unsigned long branch_target;
  59};
  60
  61struct ibs_state {
  62        u64             ibs_op_ctl;
  63        int             branch_target;
  64        unsigned long   sample_size;
  65};
  66
  67static struct ibs_config ibs_config;
  68static struct ibs_state ibs_state;
  69
  70/*
  71 * IBS randomization macros
  72 */
  73#define IBS_RANDOM_BITS                 12
  74#define IBS_RANDOM_MASK                 ((1ULL << IBS_RANDOM_BITS) - 1)
  75#define IBS_RANDOM_MAXCNT_OFFSET        (1ULL << (IBS_RANDOM_BITS - 5))
  76
  77/*
  78 * 16-bit Linear Feedback Shift Register (LFSR)
  79 *
  80 *                       16   14   13    11
  81 * Feedback polynomial = X  + X  + X  +  X  + 1
  82 */
  83static unsigned int lfsr_random(void)
  84{
  85        static unsigned int lfsr_value = 0xF00D;
  86        unsigned int bit;
  87
  88        /* Compute next bit to shift in */
  89        bit = ((lfsr_value >> 0) ^
  90               (lfsr_value >> 2) ^
  91               (lfsr_value >> 3) ^
  92               (lfsr_value >> 5)) & 0x0001;
  93
  94        /* Advance to next register value */
  95        lfsr_value = (lfsr_value >> 1) | (bit << 15);
  96
  97        return lfsr_value;
  98}
  99
 100/*
 101 * IBS software randomization
 102 *
 103 * The IBS periodic op counter is randomized in software. The lower 12
 104 * bits of the 20 bit counter are randomized. IbsOpCurCnt is
 105 * initialized with a 12 bit random value.
 106 */
 107static inline u64 op_amd_randomize_ibs_op(u64 val)
 108{
 109        unsigned int random = lfsr_random();
 110
 111        if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
 112                /*
 113                 * Work around if the hw can not write to IbsOpCurCnt
 114                 *
 115                 * Randomize the lower 8 bits of the 16 bit
 116                 * IbsOpMaxCnt [15:0] value in the range of -128 to
 117                 * +127 by adding/subtracting an offset to the
 118                 * maximum count (IbsOpMaxCnt).
 119                 *
 120                 * To avoid over or underflows and protect upper bits
 121                 * starting at bit 16, the initial value for
 122                 * IbsOpMaxCnt must fit in the range from 0x0081 to
 123                 * 0xff80.
 124                 */
 125                val += (s8)(random >> 4);
 126        else
 127                val |= (u64)(random & IBS_RANDOM_MASK) << 32;
 128
 129        return val;
 130}
 131
 132static inline void
 133op_amd_handle_ibs(struct pt_regs * const regs,
 134                  struct op_msrs const * const msrs)
 135{
 136        u64 val, ctl;
 137        struct op_entry entry;
 138
 139        if (!ibs_caps)
 140                return;
 141
 142        if (ibs_config.fetch_enabled) {
 143                rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
 144                if (ctl & IBS_FETCH_VAL) {
 145                        rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
 146                        oprofile_write_reserve(&entry, regs, val,
 147                                               IBS_FETCH_CODE, IBS_FETCH_SIZE);
 148                        oprofile_add_data64(&entry, val);
 149                        oprofile_add_data64(&entry, ctl);
 150                        rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
 151                        oprofile_add_data64(&entry, val);
 152                        oprofile_write_commit(&entry);
 153
 154                        /* reenable the IRQ */
 155                        ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);
 156                        ctl |= IBS_FETCH_ENABLE;
 157                        wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
 158                }
 159        }
 160
 161        if (ibs_config.op_enabled) {
 162                rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
 163                if (ctl & IBS_OP_VAL) {
 164                        rdmsrl(MSR_AMD64_IBSOPRIP, val);
 165                        oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
 166                                               ibs_state.sample_size);
 167                        oprofile_add_data64(&entry, val);
 168                        rdmsrl(MSR_AMD64_IBSOPDATA, val);
 169                        oprofile_add_data64(&entry, val);
 170                        rdmsrl(MSR_AMD64_IBSOPDATA2, val);
 171                        oprofile_add_data64(&entry, val);
 172                        rdmsrl(MSR_AMD64_IBSOPDATA3, val);
 173                        oprofile_add_data64(&entry, val);
 174                        rdmsrl(MSR_AMD64_IBSDCLINAD, val);
 175                        oprofile_add_data64(&entry, val);
 176                        rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
 177                        oprofile_add_data64(&entry, val);
 178                        if (ibs_state.branch_target) {
 179                                rdmsrl(MSR_AMD64_IBSBRTARGET, val);
 180                                oprofile_add_data(&entry, (unsigned long)val);
 181                        }
 182                        oprofile_write_commit(&entry);
 183
 184                        /* reenable the IRQ */
 185                        ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
 186                        wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
 187                }
 188        }
 189}
 190
 191static inline void op_amd_start_ibs(void)
 192{
 193        u64 val;
 194
 195        if (!ibs_caps)
 196                return;
 197
 198        memset(&ibs_state, 0, sizeof(ibs_state));
 199
 200        /*
 201         * Note: Since the max count settings may out of range we
 202         * write back the actual used values so that userland can read
 203         * it.
 204         */
 205
 206        if (ibs_config.fetch_enabled) {
 207                val = ibs_config.max_cnt_fetch >> 4;
 208                val = min(val, IBS_FETCH_MAX_CNT);
 209                ibs_config.max_cnt_fetch = val << 4;
 210                val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
 211                val |= IBS_FETCH_ENABLE;
 212                wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
 213        }
 214
 215        if (ibs_config.op_enabled) {
 216                val = ibs_config.max_cnt_op >> 4;
 217                if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
 218                        /*
 219                         * IbsOpCurCnt not supported.  See
 220                         * op_amd_randomize_ibs_op() for details.
 221                         */
 222                        val = clamp(val, 0x0081ULL, 0xFF80ULL);
 223                        ibs_config.max_cnt_op = val << 4;
 224                } else {
 225                        /*
 226                         * The start value is randomized with a
 227                         * positive offset, we need to compensate it
 228                         * with the half of the randomized range. Also
 229                         * avoid underflows.
 230                         */
 231                        val += IBS_RANDOM_MAXCNT_OFFSET;
 232                        if (ibs_caps & IBS_CAPS_OPCNTEXT)
 233                                val = min(val, IBS_OP_MAX_CNT_EXT);
 234                        else
 235                                val = min(val, IBS_OP_MAX_CNT);
 236                        ibs_config.max_cnt_op =
 237                                (val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
 238                }
 239                val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
 240                val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
 241                val |= IBS_OP_ENABLE;
 242                ibs_state.ibs_op_ctl = val;
 243                ibs_state.sample_size = IBS_OP_SIZE;
 244                if (ibs_config.branch_target) {
 245                        ibs_state.branch_target = 1;
 246                        ibs_state.sample_size++;
 247                }
 248                val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
 249                wrmsrl(MSR_AMD64_IBSOPCTL, val);
 250        }
 251}
 252
 253static void op_amd_stop_ibs(void)
 254{
 255        if (!ibs_caps)
 256                return;
 257
 258        if (ibs_config.fetch_enabled)
 259                /* clear max count and enable */
 260                wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
 261
 262        if (ibs_config.op_enabled)
 263                /* clear max count and enable */
 264                wrmsrl(MSR_AMD64_IBSOPCTL, 0);
 265}
 266
 267#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 268
 269static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
 270                               struct op_msrs const * const msrs)
 271{
 272        u64 val;
 273        int i;
 274
 275        /* enable active counters */
 276        for (i = 0; i < num_counters; ++i) {
 277                int virt = op_x86_phys_to_virt(i);
 278                if (!reset_value[virt])
 279                        continue;
 280                rdmsrl(msrs->controls[i].addr, val);
 281                val &= model->reserved;
 282                val |= op_x86_get_ctrl(model, &counter_config[virt]);
 283                wrmsrl(msrs->controls[i].addr, val);
 284        }
 285}
 286
 287#endif
 288
 289/* functions for op_amd_spec */
 290
 291static void op_amd_shutdown(struct op_msrs const * const msrs)
 292{
 293        int i;
 294
 295        for (i = 0; i < num_counters; ++i) {
 296                if (!msrs->counters[i].addr)
 297                        continue;
 298                release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
 299                release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
 300        }
 301}
 302
 303static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
 304{
 305        int i;
 306
 307        for (i = 0; i < num_counters; i++) {
 308                if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
 309                        goto fail;
 310                if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
 311                        release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
 312                        goto fail;
 313                }
 314                /* both registers must be reserved */
 315                if (num_counters == AMD64_NUM_COUNTERS_CORE) {
 316                        msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
 317                        msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
 318                } else {
 319                        msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
 320                        msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
 321                }
 322                continue;
 323        fail:
 324                if (!counter_config[i].enabled)
 325                        continue;
 326                op_x86_warn_reserved(i);
 327                op_amd_shutdown(msrs);
 328                return -EBUSY;
 329        }
 330
 331        return 0;
 332}
 333
 334static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 335                              struct op_msrs const * const msrs)
 336{
 337        u64 val;
 338        int i;
 339
 340        /* setup reset_value */
 341        for (i = 0; i < OP_MAX_COUNTER; ++i) {
 342                if (counter_config[i].enabled
 343                    && msrs->counters[op_x86_virt_to_phys(i)].addr)
 344                        reset_value[i] = counter_config[i].count;
 345                else
 346                        reset_value[i] = 0;
 347        }
 348
 349        /* clear all counters */
 350        for (i = 0; i < num_counters; ++i) {
 351                if (!msrs->controls[i].addr)
 352                        continue;
 353                rdmsrl(msrs->controls[i].addr, val);
 354                if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
 355                        op_x86_warn_in_use(i);
 356                val &= model->reserved;
 357                wrmsrl(msrs->controls[i].addr, val);
 358                /*
 359                 * avoid a false detection of ctr overflows in NMI
 360                 * handler
 361                 */
 362                wrmsrl(msrs->counters[i].addr, -1LL);
 363        }
 364
 365        /* enable active counters */
 366        for (i = 0; i < num_counters; ++i) {
 367                int virt = op_x86_phys_to_virt(i);
 368                if (!reset_value[virt])
 369                        continue;
 370
 371                /* setup counter registers */
 372                wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
 373
 374                /* setup control registers */
 375                rdmsrl(msrs->controls[i].addr, val);
 376                val &= model->reserved;
 377                val |= op_x86_get_ctrl(model, &counter_config[virt]);
 378                wrmsrl(msrs->controls[i].addr, val);
 379        }
 380}
 381
 382static int op_amd_check_ctrs(struct pt_regs * const regs,
 383                             struct op_msrs const * const msrs)
 384{
 385        u64 val;
 386        int i;
 387
 388        for (i = 0; i < num_counters; ++i) {
 389                int virt = op_x86_phys_to_virt(i);
 390                if (!reset_value[virt])
 391                        continue;
 392                rdmsrl(msrs->counters[i].addr, val);
 393                /* bit is clear if overflowed: */
 394                if (val & OP_CTR_OVERFLOW)
 395                        continue;
 396                oprofile_add_sample(regs, virt);
 397                wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
 398        }
 399
 400        op_amd_handle_ibs(regs, msrs);
 401
 402        /* See op_model_ppro.c */
 403        return 1;
 404}
 405
 406static void op_amd_start(struct op_msrs const * const msrs)
 407{
 408        u64 val;
 409        int i;
 410
 411        for (i = 0; i < num_counters; ++i) {
 412                if (!reset_value[op_x86_phys_to_virt(i)])
 413                        continue;
 414                rdmsrl(msrs->controls[i].addr, val);
 415                val |= ARCH_PERFMON_EVENTSEL_ENABLE;
 416                wrmsrl(msrs->controls[i].addr, val);
 417        }
 418
 419        op_amd_start_ibs();
 420}
 421
 422static void op_amd_stop(struct op_msrs const * const msrs)
 423{
 424        u64 val;
 425        int i;
 426
 427        /*
 428         * Subtle: stop on all counters to avoid race with setting our
 429         * pm callback
 430         */
 431        for (i = 0; i < num_counters; ++i) {
 432                if (!reset_value[op_x86_phys_to_virt(i)])
 433                        continue;
 434                rdmsrl(msrs->controls[i].addr, val);
 435                val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
 436                wrmsrl(msrs->controls[i].addr, val);
 437        }
 438
 439        op_amd_stop_ibs();
 440}
 441
 442/*
 443 * check and reserve APIC extended interrupt LVT offset for IBS if
 444 * available
 445 */
 446
 447static void init_ibs(void)
 448{
 449        ibs_caps = get_ibs_caps();
 450
 451        if (!ibs_caps)
 452                return;
 453
 454        printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
 455}
 456
 457static int (*create_arch_files)(struct super_block *sb, struct dentry *root);
 458
 459static int setup_ibs_files(struct super_block *sb, struct dentry *root)
 460{
 461        struct dentry *dir;
 462        int ret = 0;
 463
 464        /* architecture specific files */
 465        if (create_arch_files)
 466                ret = create_arch_files(sb, root);
 467
 468        if (ret)
 469                return ret;
 470
 471        if (!ibs_caps)
 472                return ret;
 473
 474        /* model specific files */
 475
 476        /* setup some reasonable defaults */
 477        memset(&ibs_config, 0, sizeof(ibs_config));
 478        ibs_config.max_cnt_fetch = 250000;
 479        ibs_config.max_cnt_op = 250000;
 480
 481        if (ibs_caps & IBS_CAPS_FETCHSAM) {
 482                dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
 483                oprofilefs_create_ulong(sb, dir, "enable",
 484                                        &ibs_config.fetch_enabled);
 485                oprofilefs_create_ulong(sb, dir, "max_count",
 486                                        &ibs_config.max_cnt_fetch);
 487                oprofilefs_create_ulong(sb, dir, "rand_enable",
 488                                        &ibs_config.rand_en);
 489        }
 490
 491        if (ibs_caps & IBS_CAPS_OPSAM) {
 492                dir = oprofilefs_mkdir(sb, root, "ibs_op");
 493                oprofilefs_create_ulong(sb, dir, "enable",
 494                                        &ibs_config.op_enabled);
 495                oprofilefs_create_ulong(sb, dir, "max_count",
 496                                        &ibs_config.max_cnt_op);
 497                if (ibs_caps & IBS_CAPS_OPCNT)
 498                        oprofilefs_create_ulong(sb, dir, "dispatched_ops",
 499                                                &ibs_config.dispatched_ops);
 500                if (ibs_caps & IBS_CAPS_BRNTRGT)
 501                        oprofilefs_create_ulong(sb, dir, "branch_target",
 502                                                &ibs_config.branch_target);
 503        }
 504
 505        return 0;
 506}
 507
 508struct op_x86_model_spec op_amd_spec;
 509
 510static int op_amd_init(struct oprofile_operations *ops)
 511{
 512        init_ibs();
 513        create_arch_files = ops->create_files;
 514        ops->create_files = setup_ibs_files;
 515
 516        if (boot_cpu_data.x86 == 0x15) {
 517                num_counters = AMD64_NUM_COUNTERS_CORE;
 518        } else {
 519                num_counters = AMD64_NUM_COUNTERS;
 520        }
 521
 522        op_amd_spec.num_counters = num_counters;
 523        op_amd_spec.num_controls = num_counters;
 524        op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
 525
 526        return 0;
 527}
 528
 529struct op_x86_model_spec op_amd_spec = {
 530        /* num_counters/num_controls filled in at runtime */
 531        .reserved               = MSR_AMD_EVENTSEL_RESERVED,
 532        .event_mask             = OP_EVENT_MASK,
 533        .init                   = op_amd_init,
 534        .fill_in_addresses      = &op_amd_fill_in_addresses,
 535        .setup_ctrs             = &op_amd_setup_ctrs,
 536        .check_ctrs             = &op_amd_check_ctrs,
 537        .start                  = &op_amd_start,
 538        .stop                   = &op_amd_stop,
 539        .shutdown               = &op_amd_shutdown,
 540#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 541        .switch_ctrl            = &op_mux_switch_ctrl,
 542#endif
 543};
 544