linux/arch/x86/oprofile/op_model_amd.c
<<
>>
Prefs
   1/*
   2 * @file op_model_amd.c
   3 * athlon / K7 / K8 / Family 10h model-specific MSR operations
   4 *
   5 * @remark Copyright 2002-2009 OProfile authors
   6 * @remark Read the file COPYING
   7 *
   8 * @author John Levon
   9 * @author Philippe Elie
  10 * @author Graydon Hoare
  11 * @author Robert Richter <robert.richter@amd.com>
  12 * @author Barry Kasindorf <barry.kasindorf@amd.com>
  13 * @author Jason Yeh <jason.yeh@amd.com>
  14 * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
  15 */
  16
  17#include <linux/oprofile.h>
  18#include <linux/device.h>
  19#include <linux/pci.h>
  20#include <linux/percpu.h>
  21
  22#include <asm/ptrace.h>
  23#include <asm/msr.h>
  24#include <asm/nmi.h>
  25#include <asm/apic.h>
  26#include <asm/processor.h>
  27
  28#include "op_x86_model.h"
  29#include "op_counter.h"
  30
  31#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
  32#define NUM_VIRT_COUNTERS       32
  33#else
  34#define NUM_VIRT_COUNTERS       0
  35#endif
  36
  37#define OP_EVENT_MASK                   0x0FFF
  38#define OP_CTR_OVERFLOW                 (1ULL<<31)
  39
  40#define MSR_AMD_EVENTSEL_RESERVED       ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
  41
  42static int num_counters;
  43static unsigned long reset_value[OP_MAX_COUNTER];
  44
  45#define IBS_FETCH_SIZE                  6
  46#define IBS_OP_SIZE                     12
  47
  48static u32 ibs_caps;
  49
  50struct ibs_config {
  51        unsigned long op_enabled;
  52        unsigned long fetch_enabled;
  53        unsigned long max_cnt_fetch;
  54        unsigned long max_cnt_op;
  55        unsigned long rand_en;
  56        unsigned long dispatched_ops;
  57        unsigned long branch_target;
  58};
  59
  60struct ibs_state {
  61        u64             ibs_op_ctl;
  62        int             branch_target;
  63        unsigned long   sample_size;
  64};
  65
  66static struct ibs_config ibs_config;
  67static struct ibs_state ibs_state;
  68
  69/*
  70 * IBS randomization macros
  71 */
  72#define IBS_RANDOM_BITS                 12
  73#define IBS_RANDOM_MASK                 ((1ULL << IBS_RANDOM_BITS) - 1)
  74#define IBS_RANDOM_MAXCNT_OFFSET        (1ULL << (IBS_RANDOM_BITS - 5))
  75
  76/*
  77 * 16-bit Linear Feedback Shift Register (LFSR)
  78 *
  79 *                       16   14   13    11
  80 * Feedback polynomial = X  + X  + X  +  X  + 1
  81 */
  82static unsigned int lfsr_random(void)
  83{
  84        static unsigned int lfsr_value = 0xF00D;
  85        unsigned int bit;
  86
  87        /* Compute next bit to shift in */
  88        bit = ((lfsr_value >> 0) ^
  89               (lfsr_value >> 2) ^
  90               (lfsr_value >> 3) ^
  91               (lfsr_value >> 5)) & 0x0001;
  92
  93        /* Advance to next register value */
  94        lfsr_value = (lfsr_value >> 1) | (bit << 15);
  95
  96        return lfsr_value;
  97}
  98
  99/*
 100 * IBS software randomization
 101 *
 102 * The IBS periodic op counter is randomized in software. The lower 12
 103 * bits of the 20 bit counter are randomized. IbsOpCurCnt is
 104 * initialized with a 12 bit random value.
 105 */
 106static inline u64 op_amd_randomize_ibs_op(u64 val)
 107{
 108        unsigned int random = lfsr_random();
 109
 110        if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
 111                /*
 112                 * Work around if the hw can not write to IbsOpCurCnt
 113                 *
 114                 * Randomize the lower 8 bits of the 16 bit
 115                 * IbsOpMaxCnt [15:0] value in the range of -128 to
 116                 * +127 by adding/subtracting an offset to the
 117                 * maximum count (IbsOpMaxCnt).
 118                 *
 119                 * To avoid over or underflows and protect upper bits
 120                 * starting at bit 16, the initial value for
 121                 * IbsOpMaxCnt must fit in the range from 0x0081 to
 122                 * 0xff80.
 123                 */
 124                val += (s8)(random >> 4);
 125        else
 126                val |= (u64)(random & IBS_RANDOM_MASK) << 32;
 127
 128        return val;
 129}
 130
 131static inline void
 132op_amd_handle_ibs(struct pt_regs * const regs,
 133                  struct op_msrs const * const msrs)
 134{
 135        u64 val, ctl;
 136        struct op_entry entry;
 137
 138        if (!ibs_caps)
 139                return;
 140
 141        if (ibs_config.fetch_enabled) {
 142                rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
 143                if (ctl & IBS_FETCH_VAL) {
 144                        rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
 145                        oprofile_write_reserve(&entry, regs, val,
 146                                               IBS_FETCH_CODE, IBS_FETCH_SIZE);
 147                        oprofile_add_data64(&entry, val);
 148                        oprofile_add_data64(&entry, ctl);
 149                        rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
 150                        oprofile_add_data64(&entry, val);
 151                        oprofile_write_commit(&entry);
 152
 153                        /* reenable the IRQ */
 154                        ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);
 155                        ctl |= IBS_FETCH_ENABLE;
 156                        wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
 157                }
 158        }
 159
 160        if (ibs_config.op_enabled) {
 161                rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
 162                if (ctl & IBS_OP_VAL) {
 163                        rdmsrl(MSR_AMD64_IBSOPRIP, val);
 164                        oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
 165                                               ibs_state.sample_size);
 166                        oprofile_add_data64(&entry, val);
 167                        rdmsrl(MSR_AMD64_IBSOPDATA, val);
 168                        oprofile_add_data64(&entry, val);
 169                        rdmsrl(MSR_AMD64_IBSOPDATA2, val);
 170                        oprofile_add_data64(&entry, val);
 171                        rdmsrl(MSR_AMD64_IBSOPDATA3, val);
 172                        oprofile_add_data64(&entry, val);
 173                        rdmsrl(MSR_AMD64_IBSDCLINAD, val);
 174                        oprofile_add_data64(&entry, val);
 175                        rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
 176                        oprofile_add_data64(&entry, val);
 177                        if (ibs_state.branch_target) {
 178                                rdmsrl(MSR_AMD64_IBSBRTARGET, val);
 179                                oprofile_add_data(&entry, (unsigned long)val);
 180                        }
 181                        oprofile_write_commit(&entry);
 182
 183                        /* reenable the IRQ */
 184                        ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
 185                        wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
 186                }
 187        }
 188}
 189
 190static inline void op_amd_start_ibs(void)
 191{
 192        u64 val;
 193
 194        if (!ibs_caps)
 195                return;
 196
 197        memset(&ibs_state, 0, sizeof(ibs_state));
 198
 199        /*
 200         * Note: Since the max count settings may out of range we
 201         * write back the actual used values so that userland can read
 202         * it.
 203         */
 204
 205        if (ibs_config.fetch_enabled) {
 206                val = ibs_config.max_cnt_fetch >> 4;
 207                val = min(val, IBS_FETCH_MAX_CNT);
 208                ibs_config.max_cnt_fetch = val << 4;
 209                val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
 210                val |= IBS_FETCH_ENABLE;
 211                wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
 212        }
 213
 214        if (ibs_config.op_enabled) {
 215                val = ibs_config.max_cnt_op >> 4;
 216                if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
 217                        /*
 218                         * IbsOpCurCnt not supported.  See
 219                         * op_amd_randomize_ibs_op() for details.
 220                         */
 221                        val = clamp(val, 0x0081ULL, 0xFF80ULL);
 222                        ibs_config.max_cnt_op = val << 4;
 223                } else {
 224                        /*
 225                         * The start value is randomized with a
 226                         * positive offset, we need to compensate it
 227                         * with the half of the randomized range. Also
 228                         * avoid underflows.
 229                         */
 230                        val += IBS_RANDOM_MAXCNT_OFFSET;
 231                        if (ibs_caps & IBS_CAPS_OPCNTEXT)
 232                                val = min(val, IBS_OP_MAX_CNT_EXT);
 233                        else
 234                                val = min(val, IBS_OP_MAX_CNT);
 235                        ibs_config.max_cnt_op =
 236                                (val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
 237                }
 238                val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
 239                val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
 240                val |= IBS_OP_ENABLE;
 241                ibs_state.ibs_op_ctl = val;
 242                ibs_state.sample_size = IBS_OP_SIZE;
 243                if (ibs_config.branch_target) {
 244                        ibs_state.branch_target = 1;
 245                        ibs_state.sample_size++;
 246                }
 247                val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
 248                wrmsrl(MSR_AMD64_IBSOPCTL, val);
 249        }
 250}
 251
 252static void op_amd_stop_ibs(void)
 253{
 254        if (!ibs_caps)
 255                return;
 256
 257        if (ibs_config.fetch_enabled)
 258                /* clear max count and enable */
 259                wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
 260
 261        if (ibs_config.op_enabled)
 262                /* clear max count and enable */
 263                wrmsrl(MSR_AMD64_IBSOPCTL, 0);
 264}
 265
 266#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 267
 268static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
 269                               struct op_msrs const * const msrs)
 270{
 271        u64 val;
 272        int i;
 273
 274        /* enable active counters */
 275        for (i = 0; i < num_counters; ++i) {
 276                int virt = op_x86_phys_to_virt(i);
 277                if (!reset_value[virt])
 278                        continue;
 279                rdmsrl(msrs->controls[i].addr, val);
 280                val &= model->reserved;
 281                val |= op_x86_get_ctrl(model, &counter_config[virt]);
 282                wrmsrl(msrs->controls[i].addr, val);
 283        }
 284}
 285
 286#endif
 287
 288/* functions for op_amd_spec */
 289
 290static void op_amd_shutdown(struct op_msrs const * const msrs)
 291{
 292        int i;
 293
 294        for (i = 0; i < num_counters; ++i) {
 295                if (!msrs->counters[i].addr)
 296                        continue;
 297                release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
 298                release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
 299        }
 300}
 301
 302static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
 303{
 304        int i;
 305
 306        for (i = 0; i < num_counters; i++) {
 307                if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
 308                        goto fail;
 309                if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
 310                        release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
 311                        goto fail;
 312                }
 313                /* both registers must be reserved */
 314                if (num_counters == AMD64_NUM_COUNTERS_CORE) {
 315                        msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
 316                        msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
 317                } else {
 318                        msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
 319                        msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
 320                }
 321                continue;
 322        fail:
 323                if (!counter_config[i].enabled)
 324                        continue;
 325                op_x86_warn_reserved(i);
 326                op_amd_shutdown(msrs);
 327                return -EBUSY;
 328        }
 329
 330        return 0;
 331}
 332
 333static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 334                              struct op_msrs const * const msrs)
 335{
 336        u64 val;
 337        int i;
 338
 339        /* setup reset_value */
 340        for (i = 0; i < OP_MAX_COUNTER; ++i) {
 341                if (counter_config[i].enabled
 342                    && msrs->counters[op_x86_virt_to_phys(i)].addr)
 343                        reset_value[i] = counter_config[i].count;
 344                else
 345                        reset_value[i] = 0;
 346        }
 347
 348        /* clear all counters */
 349        for (i = 0; i < num_counters; ++i) {
 350                if (!msrs->controls[i].addr)
 351                        continue;
 352                rdmsrl(msrs->controls[i].addr, val);
 353                if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
 354                        op_x86_warn_in_use(i);
 355                val &= model->reserved;
 356                wrmsrl(msrs->controls[i].addr, val);
 357                /*
 358                 * avoid a false detection of ctr overflows in NMI
 359                 * handler
 360                 */
 361                wrmsrl(msrs->counters[i].addr, -1LL);
 362        }
 363
 364        /* enable active counters */
 365        for (i = 0; i < num_counters; ++i) {
 366                int virt = op_x86_phys_to_virt(i);
 367                if (!reset_value[virt])
 368                        continue;
 369
 370                /* setup counter registers */
 371                wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
 372
 373                /* setup control registers */
 374                rdmsrl(msrs->controls[i].addr, val);
 375                val &= model->reserved;
 376                val |= op_x86_get_ctrl(model, &counter_config[virt]);
 377                wrmsrl(msrs->controls[i].addr, val);
 378        }
 379}
 380
 381static int op_amd_check_ctrs(struct pt_regs * const regs,
 382                             struct op_msrs const * const msrs)
 383{
 384        u64 val;
 385        int i;
 386
 387        for (i = 0; i < num_counters; ++i) {
 388                int virt = op_x86_phys_to_virt(i);
 389                if (!reset_value[virt])
 390                        continue;
 391                rdmsrl(msrs->counters[i].addr, val);
 392                /* bit is clear if overflowed: */
 393                if (val & OP_CTR_OVERFLOW)
 394                        continue;
 395                oprofile_add_sample(regs, virt);
 396                wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
 397        }
 398
 399        op_amd_handle_ibs(regs, msrs);
 400
 401        /* See op_model_ppro.c */
 402        return 1;
 403}
 404
 405static void op_amd_start(struct op_msrs const * const msrs)
 406{
 407        u64 val;
 408        int i;
 409
 410        for (i = 0; i < num_counters; ++i) {
 411                if (!reset_value[op_x86_phys_to_virt(i)])
 412                        continue;
 413                rdmsrl(msrs->controls[i].addr, val);
 414                val |= ARCH_PERFMON_EVENTSEL_ENABLE;
 415                wrmsrl(msrs->controls[i].addr, val);
 416        }
 417
 418        op_amd_start_ibs();
 419}
 420
 421static void op_amd_stop(struct op_msrs const * const msrs)
 422{
 423        u64 val;
 424        int i;
 425
 426        /*
 427         * Subtle: stop on all counters to avoid race with setting our
 428         * pm callback
 429         */
 430        for (i = 0; i < num_counters; ++i) {
 431                if (!reset_value[op_x86_phys_to_virt(i)])
 432                        continue;
 433                rdmsrl(msrs->controls[i].addr, val);
 434                val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
 435                wrmsrl(msrs->controls[i].addr, val);
 436        }
 437
 438        op_amd_stop_ibs();
 439}
 440
 441/*
 442 * check and reserve APIC extended interrupt LVT offset for IBS if
 443 * available
 444 */
 445
 446static void init_ibs(void)
 447{
 448        ibs_caps = get_ibs_caps();
 449
 450        if (!ibs_caps)
 451                return;
 452
 453        printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
 454}
 455
 456static int (*create_arch_files)(struct dentry *root);
 457
 458static int setup_ibs_files(struct dentry *root)
 459{
 460        struct dentry *dir;
 461        int ret = 0;
 462
 463        /* architecture specific files */
 464        if (create_arch_files)
 465                ret = create_arch_files(root);
 466
 467        if (ret)
 468                return ret;
 469
 470        if (!ibs_caps)
 471                return ret;
 472
 473        /* model specific files */
 474
 475        /* setup some reasonable defaults */
 476        memset(&ibs_config, 0, sizeof(ibs_config));
 477        ibs_config.max_cnt_fetch = 250000;
 478        ibs_config.max_cnt_op = 250000;
 479
 480        if (ibs_caps & IBS_CAPS_FETCHSAM) {
 481                dir = oprofilefs_mkdir(root, "ibs_fetch");
 482                oprofilefs_create_ulong(dir, "enable",
 483                                        &ibs_config.fetch_enabled);
 484                oprofilefs_create_ulong(dir, "max_count",
 485                                        &ibs_config.max_cnt_fetch);
 486                oprofilefs_create_ulong(dir, "rand_enable",
 487                                        &ibs_config.rand_en);
 488        }
 489
 490        if (ibs_caps & IBS_CAPS_OPSAM) {
 491                dir = oprofilefs_mkdir(root, "ibs_op");
 492                oprofilefs_create_ulong(dir, "enable",
 493                                        &ibs_config.op_enabled);
 494                oprofilefs_create_ulong(dir, "max_count",
 495                                        &ibs_config.max_cnt_op);
 496                if (ibs_caps & IBS_CAPS_OPCNT)
 497                        oprofilefs_create_ulong(dir, "dispatched_ops",
 498                                                &ibs_config.dispatched_ops);
 499                if (ibs_caps & IBS_CAPS_BRNTRGT)
 500                        oprofilefs_create_ulong(dir, "branch_target",
 501                                                &ibs_config.branch_target);
 502        }
 503
 504        return 0;
 505}
 506
 507struct op_x86_model_spec op_amd_spec;
 508
 509static int op_amd_init(struct oprofile_operations *ops)
 510{
 511        init_ibs();
 512        create_arch_files = ops->create_files;
 513        ops->create_files = setup_ibs_files;
 514
 515        if (boot_cpu_data.x86 == 0x15) {
 516                num_counters = AMD64_NUM_COUNTERS_CORE;
 517        } else {
 518                num_counters = AMD64_NUM_COUNTERS;
 519        }
 520
 521        op_amd_spec.num_counters = num_counters;
 522        op_amd_spec.num_controls = num_counters;
 523        op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
 524
 525        return 0;
 526}
 527
 528struct op_x86_model_spec op_amd_spec = {
 529        /* num_counters/num_controls filled in at runtime */
 530        .reserved               = MSR_AMD_EVENTSEL_RESERVED,
 531        .event_mask             = OP_EVENT_MASK,
 532        .init                   = op_amd_init,
 533        .fill_in_addresses      = &op_amd_fill_in_addresses,
 534        .setup_ctrs             = &op_amd_setup_ctrs,
 535        .check_ctrs             = &op_amd_check_ctrs,
 536        .start                  = &op_amd_start,
 537        .stop                   = &op_amd_stop,
 538        .shutdown               = &op_amd_shutdown,
 539#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 540        .switch_ctrl            = &op_mux_switch_ctrl,
 541#endif
 542};
 543