linux/arch/powerpc/perf/isa207-common.c
<<
>>
Prefs
   1/*
   2 * Common Performance counter support functions for PowerISA v2.07 processors.
   3 *
   4 * Copyright 2009 Paul Mackerras, IBM Corporation.
   5 * Copyright 2013 Michael Ellerman, IBM Corporation.
   6 * Copyright 2016 Madhavan Srinivasan, IBM Corporation.
   7 *
   8 * This program is free software; you can redistribute it and/or
   9 * modify it under the terms of the GNU General Public License
  10 * as published by the Free Software Foundation; either version
  11 * 2 of the License, or (at your option) any later version.
  12 */
  13#include "isa207-common.h"
  14
  15PMU_FORMAT_ATTR(event,          "config:0-49");
  16PMU_FORMAT_ATTR(pmcxsel,        "config:0-7");
  17PMU_FORMAT_ATTR(mark,           "config:8");
  18PMU_FORMAT_ATTR(combine,        "config:11");
  19PMU_FORMAT_ATTR(unit,           "config:12-15");
  20PMU_FORMAT_ATTR(pmc,            "config:16-19");
  21PMU_FORMAT_ATTR(cache_sel,      "config:20-23");
  22PMU_FORMAT_ATTR(sample_mode,    "config:24-28");
  23PMU_FORMAT_ATTR(thresh_sel,     "config:29-31");
  24PMU_FORMAT_ATTR(thresh_stop,    "config:32-35");
  25PMU_FORMAT_ATTR(thresh_start,   "config:36-39");
  26PMU_FORMAT_ATTR(thresh_cmp,     "config:40-49");
  27
  28struct attribute *isa207_pmu_format_attr[] = {
  29        &format_attr_event.attr,
  30        &format_attr_pmcxsel.attr,
  31        &format_attr_mark.attr,
  32        &format_attr_combine.attr,
  33        &format_attr_unit.attr,
  34        &format_attr_pmc.attr,
  35        &format_attr_cache_sel.attr,
  36        &format_attr_sample_mode.attr,
  37        &format_attr_thresh_sel.attr,
  38        &format_attr_thresh_stop.attr,
  39        &format_attr_thresh_start.attr,
  40        &format_attr_thresh_cmp.attr,
  41        NULL,
  42};
  43
  44struct attribute_group isa207_pmu_format_group = {
  45        .name = "format",
  46        .attrs = isa207_pmu_format_attr,
  47};
  48
  49static inline bool event_is_fab_match(u64 event)
  50{
  51        /* Only check pmc, unit and pmcxsel, ignore the edge bit (0) */
  52        event &= 0xff0fe;
  53
  54        /* PM_MRK_FAB_RSP_MATCH & PM_MRK_FAB_RSP_MATCH_CYC */
  55        return (event == 0x30056 || event == 0x4f052);
  56}
  57
  58static bool is_event_valid(u64 event)
  59{
  60        u64 valid_mask = EVENT_VALID_MASK;
  61
  62        if (cpu_has_feature(CPU_FTR_ARCH_300))
  63                valid_mask = p9_EVENT_VALID_MASK;
  64
  65        return !(event & ~valid_mask);
  66}
  67
  68static inline bool is_event_marked(u64 event)
  69{
  70        if (event & EVENT_IS_MARKED)
  71                return true;
  72
  73        return false;
  74}
  75
  76static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
  77{
  78        /*
  79         * MMCRA[SDAR_MODE] specifices how the SDAR should be updated in
  80         * continous sampling mode.
  81         *
  82         * Incase of Power8:
  83         * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
  84         * mode and will be un-changed when setting MMCRA[63] (Marked events).
  85         *
  86         * Incase of Power9:
  87         * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
  88         *               or if group already have any marked events.
  89         * For rest
  90         *      MMCRA[SDAR_MODE] will be set from event code.
  91         *      If sdar_mode from event is zero, default to 0b01. Hardware
  92         *      requires that we set a non-zero value.
  93         */
  94        if (cpu_has_feature(CPU_FTR_ARCH_300)) {
  95                if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
  96                        *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
  97                else if (p9_SDAR_MODE(event))
  98                        *mmcra |=  p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
  99                else
 100                        *mmcra |= MMCRA_SDAR_MODE_DCACHE;
 101        } else
 102                *mmcra |= MMCRA_SDAR_MODE_TLB;
 103}
 104
 105static u64 thresh_cmp_val(u64 value)
 106{
 107        if (cpu_has_feature(CPU_FTR_ARCH_300))
 108                return value << p9_MMCRA_THR_CMP_SHIFT;
 109
 110        return value << MMCRA_THR_CMP_SHIFT;
 111}
 112
 113static unsigned long combine_from_event(u64 event)
 114{
 115        if (cpu_has_feature(CPU_FTR_ARCH_300))
 116                return p9_EVENT_COMBINE(event);
 117
 118        return EVENT_COMBINE(event);
 119}
 120
 121static unsigned long combine_shift(unsigned long pmc)
 122{
 123        if (cpu_has_feature(CPU_FTR_ARCH_300))
 124                return p9_MMCR1_COMBINE_SHIFT(pmc);
 125
 126        return MMCR1_COMBINE_SHIFT(pmc);
 127}
 128
 129static inline bool event_is_threshold(u64 event)
 130{
 131        return (event >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
 132}
 133
 134static bool is_thresh_cmp_valid(u64 event)
 135{
 136        unsigned int cmp, exp;
 137
 138        /*
 139         * Check the mantissa upper two bits are not zero, unless the
 140         * exponent is also zero. See the THRESH_CMP_MANTISSA doc.
 141         */
 142        cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
 143        exp = cmp >> 7;
 144
 145        if (exp && (cmp & 0x60) == 0)
 146                return false;
 147
 148        return true;
 149}
 150
 151static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
 152{
 153        u64 ret = PERF_MEM_NA;
 154
 155        switch(idx) {
 156        case 0:
 157                /* Nothing to do */
 158                break;
 159        case 1:
 160                ret = PH(LVL, L1);
 161                break;
 162        case 2:
 163                ret = PH(LVL, L2);
 164                break;
 165        case 3:
 166                ret = PH(LVL, L3);
 167                break;
 168        case 4:
 169                if (sub_idx <= 1)
 170                        ret = PH(LVL, LOC_RAM);
 171                else if (sub_idx > 1 && sub_idx <= 2)
 172                        ret = PH(LVL, REM_RAM1);
 173                else
 174                        ret = PH(LVL, REM_RAM2);
 175                ret |= P(SNOOP, HIT);
 176                break;
 177        case 5:
 178                ret = PH(LVL, REM_CCE1);
 179                if ((sub_idx == 0) || (sub_idx == 2) || (sub_idx == 4))
 180                        ret |= P(SNOOP, HIT);
 181                else if ((sub_idx == 1) || (sub_idx == 3) || (sub_idx == 5))
 182                        ret |= P(SNOOP, HITM);
 183                break;
 184        case 6:
 185                ret = PH(LVL, REM_CCE2);
 186                if ((sub_idx == 0) || (sub_idx == 2))
 187                        ret |= P(SNOOP, HIT);
 188                else if ((sub_idx == 1) || (sub_idx == 3))
 189                        ret |= P(SNOOP, HITM);
 190                break;
 191        case 7:
 192                ret = PM(LVL, L1);
 193                break;
 194        }
 195
 196        return ret;
 197}
 198
 199void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
 200                                                        struct pt_regs *regs)
 201{
 202        u64 idx;
 203        u32 sub_idx;
 204        u64 sier;
 205        u64 val;
 206
 207        /* Skip if no SIER support */
 208        if (!(flags & PPMU_HAS_SIER)) {
 209                dsrc->val = 0;
 210                return;
 211        }
 212
 213        sier = mfspr(SPRN_SIER);
 214        val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
 215        if (val == 1 || val == 2) {
 216                idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT;
 217                sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT;
 218
 219                dsrc->val = isa207_find_source(idx, sub_idx);
 220                dsrc->val |= (val == 1) ? P(OP, LOAD) : P(OP, STORE);
 221        }
 222}
 223
 224void isa207_get_mem_weight(u64 *weight)
 225{
 226        u64 mmcra = mfspr(SPRN_MMCRA);
 227        u64 exp = MMCRA_THR_CTR_EXP(mmcra);
 228        u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
 229
 230        *weight = mantissa << (2 * exp);
 231}
 232
 233int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
 234{
 235        unsigned int unit, pmc, cache, ebb;
 236        unsigned long mask, value;
 237
 238        mask = value = 0;
 239
 240        if (!is_event_valid(event))
 241                return -1;
 242
 243        pmc   = (event >> EVENT_PMC_SHIFT)        & EVENT_PMC_MASK;
 244        unit  = (event >> EVENT_UNIT_SHIFT)       & EVENT_UNIT_MASK;
 245        cache = (event >> EVENT_CACHE_SEL_SHIFT)  & EVENT_CACHE_SEL_MASK;
 246        ebb   = (event >> EVENT_EBB_SHIFT)        & EVENT_EBB_MASK;
 247
 248        if (pmc) {
 249                u64 base_event;
 250
 251                if (pmc > 6)
 252                        return -1;
 253
 254                /* Ignore Linux defined bits when checking event below */
 255                base_event = event & ~EVENT_LINUX_MASK;
 256
 257                if (pmc >= 5 && base_event != 0x500fa &&
 258                                base_event != 0x600f4)
 259                        return -1;
 260
 261                mask  |= CNST_PMC_MASK(pmc);
 262                value |= CNST_PMC_VAL(pmc);
 263        }
 264
 265        if (pmc <= 4) {
 266                /*
 267                 * Add to number of counters in use. Note this includes events with
 268                 * a PMC of 0 - they still need a PMC, it's just assigned later.
 269                 * Don't count events on PMC 5 & 6, there is only one valid event
 270                 * on each of those counters, and they are handled above.
 271                 */
 272                mask  |= CNST_NC_MASK;
 273                value |= CNST_NC_VAL;
 274        }
 275
 276        if (unit >= 6 && unit <= 9) {
 277                /*
 278                 * L2/L3 events contain a cache selector field, which is
 279                 * supposed to be programmed into MMCRC. However MMCRC is only
 280                 * HV writable, and there is no API for guest kernels to modify
 281                 * it. The solution is for the hypervisor to initialise the
 282                 * field to zeroes, and for us to only ever allow events that
 283                 * have a cache selector of zero. The bank selector (bit 3) is
 284                 * irrelevant, as long as the rest of the value is 0.
 285                 */
 286                if (cache & 0x7)
 287                        return -1;
 288
 289        } else if (event & EVENT_IS_L1) {
 290                mask  |= CNST_L1_QUAL_MASK;
 291                value |= CNST_L1_QUAL_VAL(cache);
 292        }
 293
 294        if (is_event_marked(event)) {
 295                mask  |= CNST_SAMPLE_MASK;
 296                value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
 297        }
 298
 299        if (cpu_has_feature(CPU_FTR_ARCH_300))  {
 300                if (event_is_threshold(event) && is_thresh_cmp_valid(event)) {
 301                        mask  |= CNST_THRESH_MASK;
 302                        value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
 303                }
 304        } else {
 305                /*
 306                 * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
 307                 * the threshold control bits are used for the match value.
 308                 */
 309                if (event_is_fab_match(event)) {
 310                        mask  |= CNST_FAB_MATCH_MASK;
 311                        value |= CNST_FAB_MATCH_VAL(event >> EVENT_THR_CTL_SHIFT);
 312                } else {
 313                        if (!is_thresh_cmp_valid(event))
 314                                return -1;
 315
 316                        mask  |= CNST_THRESH_MASK;
 317                        value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
 318                }
 319        }
 320
 321        if (!pmc && ebb)
 322                /* EBB events must specify the PMC */
 323                return -1;
 324
 325        if (event & EVENT_WANTS_BHRB) {
 326                if (!ebb)
 327                        /* Only EBB events can request BHRB */
 328                        return -1;
 329
 330                mask  |= CNST_IFM_MASK;
 331                value |= CNST_IFM_VAL(event >> EVENT_IFM_SHIFT);
 332        }
 333
 334        /*
 335         * All events must agree on EBB, either all request it or none.
 336         * EBB events are pinned & exclusive, so this should never actually
 337         * hit, but we leave it as a fallback in case.
 338         */
 339        mask  |= CNST_EBB_VAL(ebb);
 340        value |= CNST_EBB_MASK;
 341
 342        *maskp = mask;
 343        *valp = value;
 344
 345        return 0;
 346}
 347
 348int isa207_compute_mmcr(u64 event[], int n_ev,
 349                               unsigned int hwc[], unsigned long mmcr[],
 350                               struct perf_event *pevents[])
 351{
 352        unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
 353        unsigned int pmc, pmc_inuse;
 354        int i;
 355
 356        pmc_inuse = 0;
 357
 358        /* First pass to count resource use */
 359        for (i = 0; i < n_ev; ++i) {
 360                pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
 361                if (pmc)
 362                        pmc_inuse |= 1 << pmc;
 363        }
 364
 365        mmcra = mmcr1 = mmcr2 = 0;
 366
 367        /* Second pass: assign PMCs, set all MMCR1 fields */
 368        for (i = 0; i < n_ev; ++i) {
 369                pmc     = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
 370                unit    = (event[i] >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
 371                combine = combine_from_event(event[i]);
 372                psel    =  event[i] & EVENT_PSEL_MASK;
 373
 374                if (!pmc) {
 375                        for (pmc = 1; pmc <= 4; ++pmc) {
 376                                if (!(pmc_inuse & (1 << pmc)))
 377                                        break;
 378                        }
 379
 380                        pmc_inuse |= 1 << pmc;
 381                }
 382
 383                if (pmc <= 4) {
 384                        mmcr1 |= unit << MMCR1_UNIT_SHIFT(pmc);
 385                        mmcr1 |= combine << combine_shift(pmc);
 386                        mmcr1 |= psel << MMCR1_PMCSEL_SHIFT(pmc);
 387                }
 388
 389                /* In continuous sampling mode, update SDAR on TLB miss */
 390                mmcra_sdar_mode(event[i], &mmcra);
 391
 392                if (event[i] & EVENT_IS_L1) {
 393                        cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
 394                        mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT;
 395                        cache >>= 1;
 396                        mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
 397                }
 398
 399                if (is_event_marked(event[i])) {
 400                        mmcra |= MMCRA_SAMPLE_ENABLE;
 401
 402                        val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
 403                        if (val) {
 404                                mmcra |= (val &  3) << MMCRA_SAMP_MODE_SHIFT;
 405                                mmcra |= (val >> 2) << MMCRA_SAMP_ELIG_SHIFT;
 406                        }
 407                }
 408
 409                /*
 410                 * PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
 411                 * the threshold bits are used for the match value.
 412                 */
 413                if (!cpu_has_feature(CPU_FTR_ARCH_300) && event_is_fab_match(event[i])) {
 414                        mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) &
 415                                  EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT;
 416                } else {
 417                        val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;
 418                        mmcra |= val << MMCRA_THR_CTL_SHIFT;
 419                        val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
 420                        mmcra |= val << MMCRA_THR_SEL_SHIFT;
 421                        val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
 422                        mmcra |= thresh_cmp_val(val);
 423                }
 424
 425                if (event[i] & EVENT_WANTS_BHRB) {
 426                        val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK;
 427                        mmcra |= val << MMCRA_IFM_SHIFT;
 428                }
 429
 430                if (pevents[i]->attr.exclude_user)
 431                        mmcr2 |= MMCR2_FCP(pmc);
 432
 433                if (pevents[i]->attr.exclude_hv)
 434                        mmcr2 |= MMCR2_FCH(pmc);
 435
 436                if (pevents[i]->attr.exclude_kernel) {
 437                        if (cpu_has_feature(CPU_FTR_HVMODE))
 438                                mmcr2 |= MMCR2_FCH(pmc);
 439                        else
 440                                mmcr2 |= MMCR2_FCS(pmc);
 441                }
 442
 443                hwc[i] = pmc - 1;
 444        }
 445
 446        /* Return MMCRx values */
 447        mmcr[0] = 0;
 448
 449        /* pmc_inuse is 1-based */
 450        if (pmc_inuse & 2)
 451                mmcr[0] = MMCR0_PMC1CE;
 452
 453        if (pmc_inuse & 0x7c)
 454                mmcr[0] |= MMCR0_PMCjCE;
 455
 456        /* If we're not using PMC 5 or 6, freeze them */
 457        if (!(pmc_inuse & 0x60))
 458                mmcr[0] |= MMCR0_FC56;
 459
 460        mmcr[1] = mmcr1;
 461        mmcr[2] = mmcra;
 462        mmcr[3] = mmcr2;
 463
 464        return 0;
 465}
 466
 467void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[])
 468{
 469        if (pmc <= 3)
 470                mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1));
 471}
 472
 473static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int size)
 474{
 475        int i, j;
 476
 477        for (i = 0; i < size; ++i) {
 478                if (event < ev_alt[i][0])
 479                        break;
 480
 481                for (j = 0; j < MAX_ALT && ev_alt[i][j]; ++j)
 482                        if (event == ev_alt[i][j])
 483                                return i;
 484        }
 485
 486        return -1;
 487}
 488
 489int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
 490                                        const unsigned int ev_alt[][MAX_ALT])
 491{
 492        int i, j, num_alt = 0;
 493        u64 alt_event;
 494
 495        alt[num_alt++] = event;
 496        i = find_alternative(event, ev_alt, size);
 497        if (i >= 0) {
 498                /* Filter out the original event, it's already in alt[0] */
 499                for (j = 0; j < MAX_ALT; ++j) {
 500                        alt_event = ev_alt[i][j];
 501                        if (alt_event && alt_event != event)
 502                                alt[num_alt++] = alt_event;
 503                }
 504        }
 505
 506        if (flags & PPMU_ONLY_COUNT_RUN) {
 507                /*
 508                 * We're only counting in RUN state, so PM_CYC is equivalent to
 509                 * PM_RUN_CYC and PM_INST_CMPL === PM_RUN_INST_CMPL.
 510                 */
 511                j = num_alt;
 512                for (i = 0; i < num_alt; ++i) {
 513                        switch (alt[i]) {
 514                        case 0x1e:                      /* PMC_CYC */
 515                                alt[j++] = 0x600f4;     /* PM_RUN_CYC */
 516                                break;
 517                        case 0x600f4:
 518                                alt[j++] = 0x1e;
 519                                break;
 520                        case 0x2:                       /* PM_INST_CMPL */
 521                                alt[j++] = 0x500fa;     /* PM_RUN_INST_CMPL */
 522                                break;
 523                        case 0x500fa:
 524                                alt[j++] = 0x2;
 525                                break;
 526                        }
 527                }
 528                num_alt = j;
 529        }
 530
 531        return num_alt;
 532}
 533