linux/arch/powerpc/perf/power8-pmu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Performance counter support for POWER8 processors.
   4 *
   5 * Copyright 2009 Paul Mackerras, IBM Corporation.
   6 * Copyright 2013 Michael Ellerman, IBM Corporation.
   7 */
   8
   9#define pr_fmt(fmt)     "power8-pmu: " fmt
  10
  11#include "isa207-common.h"
  12
  13/*
  14 * Some power8 event codes.
  15 */
  16#define EVENT(_name, _code)     _name = _code,
  17
  18enum {
  19#include "power8-events-list.h"
  20};
  21
  22#undef EVENT
  23
  24/* MMCRA IFM bits - POWER8 */
  25#define POWER8_MMCRA_IFM1               0x0000000040000000UL
  26#define POWER8_MMCRA_IFM2               0x0000000080000000UL
  27#define POWER8_MMCRA_IFM3               0x00000000C0000000UL
  28#define POWER8_MMCRA_BHRB_MASK          0x00000000C0000000UL
  29
  30/*
  31 * Raw event encoding for PowerISA v2.07 (Power8):
  32 *
  33 *        60        56        52        48        44        40        36        32
  34 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
  35 *   | | [ ]                           [      thresh_cmp     ]   [  thresh_ctl   ]
  36 *   | |  |                                                              |
  37 *   | |  *- IFM (Linux)                 thresh start/stop OR FAB match -*
  38 *   | *- BHRB (Linux)
  39 *   *- EBB (Linux)
  40 *
  41 *        28        24        20        16        12         8         4         0
  42 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
  43 *   [   ] [  sample ]   [cache]   [ pmc ]   [unit ]   c     m   [    pmcxsel    ]
  44 *     |        |           |                          |     |
  45 *     |        |           |                          |     *- mark
  46 *     |        |           *- L1/L2/L3 cache_sel      |
  47 *     |        |                                      |
  48 *     |        *- sampling mode for marked events     *- combine
  49 *     |
  50 *     *- thresh_sel
  51 *
  52 * Below uses IBM bit numbering.
  53 *
  54 * MMCR1[x:y] = unit    (PMCxUNIT)
  55 * MMCR1[x]   = combine (PMCxCOMB)
  56 *
  57 * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
  58 *      # PM_MRK_FAB_RSP_MATCH
  59 *      MMCR1[20:27] = thresh_ctl   (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
  60 * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
  61 *      # PM_MRK_FAB_RSP_MATCH_CYC
  62 *      MMCR1[20:27] = thresh_ctl   (FAB_CRESP_MATCH / FAB_TYPE_MATCH)
  63 * else
  64 *      MMCRA[48:55] = thresh_ctl   (THRESH START/END)
  65 *
  66 * if thresh_sel:
  67 *      MMCRA[45:47] = thresh_sel
  68 *
  69 * if thresh_cmp:
  70 *      MMCRA[22:24] = thresh_cmp[0:2]
  71 *      MMCRA[25:31] = thresh_cmp[3:9]
  72 *
  73 * if unit == 6 or unit == 7
  74 *      MMCRC[53:55] = cache_sel[1:3]      (L2EVENT_SEL)
  75 * else if unit == 8 or unit == 9:
  76 *      if cache_sel[0] == 0: # L3 bank
  77 *              MMCRC[47:49] = cache_sel[1:3]  (L3EVENT_SEL0)
  78 *      else if cache_sel[0] == 1:
  79 *              MMCRC[50:51] = cache_sel[2:3]  (L3EVENT_SEL1)
  80 * else if cache_sel[1]: # L1 event
  81 *      MMCR1[16] = cache_sel[2]
  82 *      MMCR1[17] = cache_sel[3]
  83 *
  84 * if mark:
  85 *      MMCRA[63]    = 1                (SAMPLE_ENABLE)
  86 *      MMCRA[57:59] = sample[0:2]      (RAND_SAMP_ELIG)
  87 *      MMCRA[61:62] = sample[3:4]      (RAND_SAMP_MODE)
  88 *
  89 * if EBB and BHRB:
  90 *      MMCRA[32:33] = IFM
  91 *
  92 */
  93
  94/* PowerISA v2.07 format attribute structure*/
  95extern struct attribute_group isa207_pmu_format_group;
  96
  97/* Table of alternatives, sorted by column 0 */
  98static const unsigned int event_alternatives[][MAX_ALT] = {
  99        { PM_MRK_ST_CMPL,               PM_MRK_ST_CMPL_ALT },
 100        { PM_BR_MRK_2PATH,              PM_BR_MRK_2PATH_ALT },
 101        { PM_L3_CO_MEPF,                PM_L3_CO_MEPF_ALT },
 102        { PM_MRK_DATA_FROM_L2MISS,      PM_MRK_DATA_FROM_L2MISS_ALT },
 103        { PM_CMPLU_STALL_ALT,           PM_CMPLU_STALL },
 104        { PM_BR_2PATH,                  PM_BR_2PATH_ALT },
 105        { PM_INST_DISP,                 PM_INST_DISP_ALT },
 106        { PM_RUN_CYC_ALT,               PM_RUN_CYC },
 107        { PM_MRK_FILT_MATCH,            PM_MRK_FILT_MATCH_ALT },
 108        { PM_LD_MISS_L1,                PM_LD_MISS_L1_ALT },
 109        { PM_RUN_INST_CMPL_ALT,         PM_RUN_INST_CMPL },
 110};
 111
 112static int power8_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 113{
 114        int num_alt = 0;
 115
 116        num_alt = isa207_get_alternatives(event, alt,
 117                                          ARRAY_SIZE(event_alternatives), flags,
 118                                          event_alternatives);
 119
 120        return num_alt;
 121}
 122
 123GENERIC_EVENT_ATTR(cpu-cycles,                  PM_CYC);
 124GENERIC_EVENT_ATTR(stalled-cycles-frontend,     PM_GCT_NOSLOT_CYC);
 125GENERIC_EVENT_ATTR(stalled-cycles-backend,      PM_CMPLU_STALL);
 126GENERIC_EVENT_ATTR(instructions,                PM_INST_CMPL);
 127GENERIC_EVENT_ATTR(branch-instructions,         PM_BRU_FIN);
 128GENERIC_EVENT_ATTR(branch-misses,               PM_BR_MPRED_CMPL);
 129GENERIC_EVENT_ATTR(cache-references,            PM_LD_REF_L1);
 130GENERIC_EVENT_ATTR(cache-misses,                PM_LD_MISS_L1);
 131GENERIC_EVENT_ATTR(mem_access,                  MEM_ACCESS);
 132
 133CACHE_EVENT_ATTR(L1-dcache-load-misses,         PM_LD_MISS_L1);
 134CACHE_EVENT_ATTR(L1-dcache-loads,               PM_LD_REF_L1);
 135
 136CACHE_EVENT_ATTR(L1-dcache-prefetches,          PM_L1_PREF);
 137CACHE_EVENT_ATTR(L1-dcache-store-misses,        PM_ST_MISS_L1);
 138CACHE_EVENT_ATTR(L1-icache-load-misses,         PM_L1_ICACHE_MISS);
 139CACHE_EVENT_ATTR(L1-icache-loads,               PM_INST_FROM_L1);
 140CACHE_EVENT_ATTR(L1-icache-prefetches,          PM_IC_PREF_WRITE);
 141
 142CACHE_EVENT_ATTR(LLC-load-misses,               PM_DATA_FROM_L3MISS);
 143CACHE_EVENT_ATTR(LLC-loads,                     PM_DATA_FROM_L3);
 144CACHE_EVENT_ATTR(LLC-prefetches,                PM_L3_PREF_ALL);
 145CACHE_EVENT_ATTR(LLC-store-misses,              PM_L2_ST_MISS);
 146CACHE_EVENT_ATTR(LLC-stores,                    PM_L2_ST);
 147
 148CACHE_EVENT_ATTR(branch-load-misses,            PM_BR_MPRED_CMPL);
 149CACHE_EVENT_ATTR(branch-loads,                  PM_BRU_FIN);
 150CACHE_EVENT_ATTR(dTLB-load-misses,              PM_DTLB_MISS);
 151CACHE_EVENT_ATTR(iTLB-load-misses,              PM_ITLB_MISS);
 152
 153static struct attribute *power8_events_attr[] = {
 154        GENERIC_EVENT_PTR(PM_CYC),
 155        GENERIC_EVENT_PTR(PM_GCT_NOSLOT_CYC),
 156        GENERIC_EVENT_PTR(PM_CMPLU_STALL),
 157        GENERIC_EVENT_PTR(PM_INST_CMPL),
 158        GENERIC_EVENT_PTR(PM_BRU_FIN),
 159        GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
 160        GENERIC_EVENT_PTR(PM_LD_REF_L1),
 161        GENERIC_EVENT_PTR(PM_LD_MISS_L1),
 162        GENERIC_EVENT_PTR(MEM_ACCESS),
 163
 164        CACHE_EVENT_PTR(PM_LD_MISS_L1),
 165        CACHE_EVENT_PTR(PM_LD_REF_L1),
 166        CACHE_EVENT_PTR(PM_L1_PREF),
 167        CACHE_EVENT_PTR(PM_ST_MISS_L1),
 168        CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
 169        CACHE_EVENT_PTR(PM_INST_FROM_L1),
 170        CACHE_EVENT_PTR(PM_IC_PREF_WRITE),
 171        CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
 172        CACHE_EVENT_PTR(PM_DATA_FROM_L3),
 173        CACHE_EVENT_PTR(PM_L3_PREF_ALL),
 174        CACHE_EVENT_PTR(PM_L2_ST_MISS),
 175        CACHE_EVENT_PTR(PM_L2_ST),
 176
 177        CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
 178        CACHE_EVENT_PTR(PM_BRU_FIN),
 179
 180        CACHE_EVENT_PTR(PM_DTLB_MISS),
 181        CACHE_EVENT_PTR(PM_ITLB_MISS),
 182        NULL
 183};
 184
 185static struct attribute_group power8_pmu_events_group = {
 186        .name = "events",
 187        .attrs = power8_events_attr,
 188};
 189
 190static const struct attribute_group *power8_pmu_attr_groups[] = {
 191        &isa207_pmu_format_group,
 192        &power8_pmu_events_group,
 193        NULL,
 194};
 195
 196static int power8_generic_events[] = {
 197        [PERF_COUNT_HW_CPU_CYCLES] =                    PM_CYC,
 198        [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =       PM_GCT_NOSLOT_CYC,
 199        [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =        PM_CMPLU_STALL,
 200        [PERF_COUNT_HW_INSTRUCTIONS] =                  PM_INST_CMPL,
 201        [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =           PM_BRU_FIN,
 202        [PERF_COUNT_HW_BRANCH_MISSES] =                 PM_BR_MPRED_CMPL,
 203        [PERF_COUNT_HW_CACHE_REFERENCES] =              PM_LD_REF_L1,
 204        [PERF_COUNT_HW_CACHE_MISSES] =                  PM_LD_MISS_L1,
 205};
 206
 207static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 208{
 209        u64 pmu_bhrb_filter = 0;
 210
 211        /* BHRB and regular PMU events share the same privilege state
 212         * filter configuration. BHRB is always recorded along with a
 213         * regular PMU event. As the privilege state filter is handled
 214         * in the basic PMC configuration of the accompanying regular
 215         * PMU event, we ignore any separate BHRB specific request.
 216         */
 217
 218        /* No branch filter requested */
 219        if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
 220                return pmu_bhrb_filter;
 221
 222        /* Invalid branch filter options - HW does not support */
 223        if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
 224                return -1;
 225
 226        if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
 227                return -1;
 228
 229        if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
 230                return -1;
 231
 232        if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
 233                pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
 234                return pmu_bhrb_filter;
 235        }
 236
 237        /* Every thing else is unsupported */
 238        return -1;
 239}
 240
 241static void power8_config_bhrb(u64 pmu_bhrb_filter)
 242{
 243        pmu_bhrb_filter &= POWER8_MMCRA_BHRB_MASK;
 244
 245        /* Enable BHRB filter in PMU */
 246        mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
 247}
 248
 249#define C(x)    PERF_COUNT_HW_CACHE_##x
 250
 251/*
 252 * Table of generalized cache-related events.
 253 * 0 means not supported, -1 means nonsensical, other values
 254 * are event codes.
 255 */
 256static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 257        [ C(L1D) ] = {
 258                [ C(OP_READ) ] = {
 259                        [ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
 260                        [ C(RESULT_MISS)   ] = PM_LD_MISS_L1,
 261                },
 262                [ C(OP_WRITE) ] = {
 263                        [ C(RESULT_ACCESS) ] = 0,
 264                        [ C(RESULT_MISS)   ] = PM_ST_MISS_L1,
 265                },
 266                [ C(OP_PREFETCH) ] = {
 267                        [ C(RESULT_ACCESS) ] = PM_L1_PREF,
 268                        [ C(RESULT_MISS)   ] = 0,
 269                },
 270        },
 271        [ C(L1I) ] = {
 272                [ C(OP_READ) ] = {
 273                        [ C(RESULT_ACCESS) ] = PM_INST_FROM_L1,
 274                        [ C(RESULT_MISS)   ] = PM_L1_ICACHE_MISS,
 275                },
 276                [ C(OP_WRITE) ] = {
 277                        [ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE,
 278                        [ C(RESULT_MISS)   ] = -1,
 279                },
 280                [ C(OP_PREFETCH) ] = {
 281                        [ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE,
 282                        [ C(RESULT_MISS)   ] = 0,
 283                },
 284        },
 285        [ C(LL) ] = {
 286                [ C(OP_READ) ] = {
 287                        [ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3,
 288                        [ C(RESULT_MISS)   ] = PM_DATA_FROM_L3MISS,
 289                },
 290                [ C(OP_WRITE) ] = {
 291                        [ C(RESULT_ACCESS) ] = PM_L2_ST,
 292                        [ C(RESULT_MISS)   ] = PM_L2_ST_MISS,
 293                },
 294                [ C(OP_PREFETCH) ] = {
 295                        [ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL,
 296                        [ C(RESULT_MISS)   ] = 0,
 297                },
 298        },
 299        [ C(DTLB) ] = {
 300                [ C(OP_READ) ] = {
 301                        [ C(RESULT_ACCESS) ] = 0,
 302                        [ C(RESULT_MISS)   ] = PM_DTLB_MISS,
 303                },
 304                [ C(OP_WRITE) ] = {
 305                        [ C(RESULT_ACCESS) ] = -1,
 306                        [ C(RESULT_MISS)   ] = -1,
 307                },
 308                [ C(OP_PREFETCH) ] = {
 309                        [ C(RESULT_ACCESS) ] = -1,
 310                        [ C(RESULT_MISS)   ] = -1,
 311                },
 312        },
 313        [ C(ITLB) ] = {
 314                [ C(OP_READ) ] = {
 315                        [ C(RESULT_ACCESS) ] = 0,
 316                        [ C(RESULT_MISS)   ] = PM_ITLB_MISS,
 317                },
 318                [ C(OP_WRITE) ] = {
 319                        [ C(RESULT_ACCESS) ] = -1,
 320                        [ C(RESULT_MISS)   ] = -1,
 321                },
 322                [ C(OP_PREFETCH) ] = {
 323                        [ C(RESULT_ACCESS) ] = -1,
 324                        [ C(RESULT_MISS)   ] = -1,
 325                },
 326        },
 327        [ C(BPU) ] = {
 328                [ C(OP_READ) ] = {
 329                        [ C(RESULT_ACCESS) ] = PM_BRU_FIN,
 330                        [ C(RESULT_MISS)   ] = PM_BR_MPRED_CMPL,
 331                },
 332                [ C(OP_WRITE) ] = {
 333                        [ C(RESULT_ACCESS) ] = -1,
 334                        [ C(RESULT_MISS)   ] = -1,
 335                },
 336                [ C(OP_PREFETCH) ] = {
 337                        [ C(RESULT_ACCESS) ] = -1,
 338                        [ C(RESULT_MISS)   ] = -1,
 339                },
 340        },
 341        [ C(NODE) ] = {
 342                [ C(OP_READ) ] = {
 343                        [ C(RESULT_ACCESS) ] = -1,
 344                        [ C(RESULT_MISS)   ] = -1,
 345                },
 346                [ C(OP_WRITE) ] = {
 347                        [ C(RESULT_ACCESS) ] = -1,
 348                        [ C(RESULT_MISS)   ] = -1,
 349                },
 350                [ C(OP_PREFETCH) ] = {
 351                        [ C(RESULT_ACCESS) ] = -1,
 352                        [ C(RESULT_MISS)   ] = -1,
 353                },
 354        },
 355};
 356
 357#undef C
 358
 359static struct power_pmu power8_pmu = {
 360        .name                   = "POWER8",
 361        .n_counter              = MAX_PMU_COUNTERS,
 362        .max_alternatives       = MAX_ALT + 1,
 363        .add_fields             = ISA207_ADD_FIELDS,
 364        .test_adder             = ISA207_TEST_ADDER,
 365        .compute_mmcr           = isa207_compute_mmcr,
 366        .config_bhrb            = power8_config_bhrb,
 367        .bhrb_filter_map        = power8_bhrb_filter_map,
 368        .get_constraint         = isa207_get_constraint,
 369        .get_alternatives       = power8_get_alternatives,
 370        .get_mem_data_src       = isa207_get_mem_data_src,
 371        .get_mem_weight         = isa207_get_mem_weight,
 372        .disable_pmc            = isa207_disable_pmc,
 373        .flags                  = PPMU_HAS_SIER | PPMU_ARCH_207S,
 374        .n_generic              = ARRAY_SIZE(power8_generic_events),
 375        .generic_events         = power8_generic_events,
 376        .cache_events           = &power8_cache_events,
 377        .attr_groups            = power8_pmu_attr_groups,
 378        .bhrb_nr                = 32,
 379};
 380
 381int init_power8_pmu(void)
 382{
 383        int rc;
 384
 385        if (!cur_cpu_spec->oprofile_cpu_type ||
 386            strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
 387                return -ENODEV;
 388
 389        rc = register_power_pmu(&power8_pmu);
 390        if (rc)
 391                return rc;
 392
 393        /* Tell userspace that EBB is supported */
 394        cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
 395
 396        if (cpu_has_feature(CPU_FTR_PMAO_BUG))
 397                pr_info("PMAO restore workaround active.\n");
 398
 399        return 0;
 400}
 401