linux/arch/powerpc/kernel/power4-pmu.c
<<
>>
Prefs
   1/*
   2 * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
   3 *
   4 * Copyright 2009 Paul Mackerras, IBM Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11#include <linux/kernel.h>
  12#include <linux/perf_event.h>
  13#include <linux/string.h>
  14#include <asm/reg.h>
  15#include <asm/cputable.h>
  16
  17/*
  18 * Bits in event code for POWER4
  19 */
  20#define PM_PMC_SH       12      /* PMC number (1-based) for direct events */
  21#define PM_PMC_MSK      0xf
  22#define PM_UNIT_SH      8       /* TTMMUX number and setting - unit select */
  23#define PM_UNIT_MSK     0xf
  24#define PM_LOWER_SH     6
  25#define PM_LOWER_MSK    1
  26#define PM_LOWER_MSKS   0x40
  27#define PM_BYTE_SH      4       /* Byte number of event bus to use */
  28#define PM_BYTE_MSK     3
  29#define PM_PMCSEL_MSK   7
  30
  31/*
  32 * Unit code values
  33 */
  34#define PM_FPU          1
  35#define PM_ISU1         2
  36#define PM_IFU          3
  37#define PM_IDU0         4
  38#define PM_ISU1_ALT     6
  39#define PM_ISU2         7
  40#define PM_IFU_ALT      8
  41#define PM_LSU0         9
  42#define PM_LSU1         0xc
  43#define PM_GPS          0xf
  44
  45/*
  46 * Bits in MMCR0 for POWER4
  47 */
  48#define MMCR0_PMC1SEL_SH        8
  49#define MMCR0_PMC2SEL_SH        1
  50#define MMCR_PMCSEL_MSK         0x1f
  51
  52/*
  53 * Bits in MMCR1 for POWER4
  54 */
  55#define MMCR1_TTM0SEL_SH        62
  56#define MMCR1_TTC0SEL_SH        61
  57#define MMCR1_TTM1SEL_SH        59
  58#define MMCR1_TTC1SEL_SH        58
  59#define MMCR1_TTM2SEL_SH        56
  60#define MMCR1_TTC2SEL_SH        55
  61#define MMCR1_TTM3SEL_SH        53
  62#define MMCR1_TTC3SEL_SH        52
  63#define MMCR1_TTMSEL_MSK        3
  64#define MMCR1_TD_CP_DBG0SEL_SH  50
  65#define MMCR1_TD_CP_DBG1SEL_SH  48
  66#define MMCR1_TD_CP_DBG2SEL_SH  46
  67#define MMCR1_TD_CP_DBG3SEL_SH  44
  68#define MMCR1_DEBUG0SEL_SH      43
  69#define MMCR1_DEBUG1SEL_SH      42
  70#define MMCR1_DEBUG2SEL_SH      41
  71#define MMCR1_DEBUG3SEL_SH      40
  72#define MMCR1_PMC1_ADDER_SEL_SH 39
  73#define MMCR1_PMC2_ADDER_SEL_SH 38
  74#define MMCR1_PMC6_ADDER_SEL_SH 37
  75#define MMCR1_PMC5_ADDER_SEL_SH 36
  76#define MMCR1_PMC8_ADDER_SEL_SH 35
  77#define MMCR1_PMC7_ADDER_SEL_SH 34
  78#define MMCR1_PMC3_ADDER_SEL_SH 33
  79#define MMCR1_PMC4_ADDER_SEL_SH 32
  80#define MMCR1_PMC3SEL_SH        27
  81#define MMCR1_PMC4SEL_SH        22
  82#define MMCR1_PMC5SEL_SH        17
  83#define MMCR1_PMC6SEL_SH        12
  84#define MMCR1_PMC7SEL_SH        7
  85#define MMCR1_PMC8SEL_SH        2       /* note bit 0 is in MMCRA for GP */
  86
  87static short mmcr1_adder_bits[8] = {
  88        MMCR1_PMC1_ADDER_SEL_SH,
  89        MMCR1_PMC2_ADDER_SEL_SH,
  90        MMCR1_PMC3_ADDER_SEL_SH,
  91        MMCR1_PMC4_ADDER_SEL_SH,
  92        MMCR1_PMC5_ADDER_SEL_SH,
  93        MMCR1_PMC6_ADDER_SEL_SH,
  94        MMCR1_PMC7_ADDER_SEL_SH,
  95        MMCR1_PMC8_ADDER_SEL_SH
  96};
  97
  98/*
  99 * Bits in MMCRA
 100 */
 101#define MMCRA_PMC8SEL0_SH       17      /* PMC8SEL bit 0 for GP */
 102
 103/*
 104 * Layout of constraint bits:
 105 * 6666555555555544444444443333333333222222222211111111110000000000
 106 * 3210987654321098765432109876543210987654321098765432109876543210
 107 *        |[  >[  >[   >|||[  >[  ><  ><  ><  ><  ><><><><><><><><>
 108 *        | UC1 UC2 UC3 ||| PS1 PS2 B0  B1  B2  B3 P1P2P3P4P5P6P7P8
 109 *        \SMPL         ||\TTC3SEL
 110 *                      |\TTC_IFU_SEL
 111 *                      \TTM2SEL0
 112 *
 113 * SMPL - SAMPLE_ENABLE constraint
 114 *     56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
 115 *
 116 * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
 117 *     55: UC1 error 0x0080_0000_0000_0000
 118 *     54: FPU events needed 0x0040_0000_0000_0000
 119 *     53: ISU1 events needed 0x0020_0000_0000_0000
 120 *     52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
 121 *
 122 * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
 123 *     51: UC2 error 0x0008_0000_0000_0000
 124 *     50: FPU events needed 0x0004_0000_0000_0000
 125 *     49: IFU events needed 0x0002_0000_0000_0000
 126 *     48: LSU0 events needed 0x0001_0000_0000_0000
 127 *
 128 * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
 129 *     47: UC3 error 0x8000_0000_0000
 130 *     46: LSU0 events needed 0x4000_0000_0000
 131 *     45: IFU events needed 0x2000_0000_0000
 132 *     44: IDU0|ISU2 events needed 0x1000_0000_0000
 133 *     43: ISU1 events needed 0x0800_0000_0000
 134 *
 135 * TTM2SEL0
 136 *     42: 0 = IDU0 events needed
 137 *         1 = ISU2 events needed 0x0400_0000_0000
 138 *
 139 * TTC_IFU_SEL
 140 *     41: 0 = IFU.U events needed
 141 *         1 = IFU.L events needed 0x0200_0000_0000
 142 *
 143 * TTC3SEL
 144 *     40: 0 = LSU1.U events needed
 145 *         1 = LSU1.L events needed 0x0100_0000_0000
 146 *
 147 * PS1
 148 *     39: PS1 error 0x0080_0000_0000
 149 *     36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
 150 *
 151 * PS2
 152 *     35: PS2 error 0x0008_0000_0000
 153 *     32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
 154 *
 155 * B0
 156 *     28-31: Byte 0 event source 0xf000_0000
 157 *         1 = FPU
 158 *         2 = ISU1
 159 *         3 = IFU
 160 *         4 = IDU0
 161 *         7 = ISU2
 162 *         9 = LSU0
 163 *         c = LSU1
 164 *         f = GPS
 165 *
 166 * B1, B2, B3
 167 *     24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
 168 *
 169 * P8
 170 *     15: P8 error 0x8000
 171 *     14-15: Count of events needing PMC8
 172 *
 173 * P1..P7
 174 *     0-13: Count of events needing PMC1..PMC7
 175 *
 176 * Note: this doesn't allow events using IFU.U to be combined with events
 177 * using IFU.L, though that is feasible (using TTM0 and TTM2).  However
 178 * there are no listed events for IFU.L (they are debug events not
 179 * verified for performance monitoring) so this shouldn't cause a
 180 * problem.
 181 */
 182
 183static struct unitinfo {
 184        unsigned long   value, mask;
 185        int             unit;
 186        int             lowerbit;
 187} p4_unitinfo[16] = {
 188        [PM_FPU]  = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 },
 189        [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
 190        [PM_ISU1_ALT] =
 191                    { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
 192        [PM_IFU]  = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
 193        [PM_IFU_ALT] =
 194                    { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
 195        [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 },
 196        [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 },
 197        [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 },
 198        [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 },
 199        [PM_GPS]  = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 }
 200};
 201
 202static unsigned char direct_marked_event[8] = {
 203        (1<<2) | (1<<3),        /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
 204        (1<<3) | (1<<5),        /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
 205        (1<<3),                 /* PMC3: PM_MRK_ST_CMPL_INT */
 206        (1<<4) | (1<<5),        /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
 207        (1<<4) | (1<<5),        /* PMC5: PM_MRK_GRP_TIMEO */
 208        (1<<3) | (1<<4) | (1<<5),
 209                /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
 210        (1<<4) | (1<<5),        /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
 211        (1<<4),                 /* PMC8: PM_MRK_LSU_FIN */
 212};
 213
 214/*
 215 * Returns 1 if event counts things relating to marked instructions
 216 * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
 217 */
 218static int p4_marked_instr_event(u64 event)
 219{
 220        int pmc, psel, unit, byte, bit;
 221        unsigned int mask;
 222
 223        pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
 224        psel = event & PM_PMCSEL_MSK;
 225        if (pmc) {
 226                if (direct_marked_event[pmc - 1] & (1 << psel))
 227                        return 1;
 228                if (psel == 0)          /* add events */
 229                        bit = (pmc <= 4)? pmc - 1: 8 - pmc;
 230                else if (psel == 6)     /* decode events */
 231                        bit = 4;
 232                else
 233                        return 0;
 234        } else
 235                bit = psel;
 236
 237        byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
 238        unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
 239        mask = 0;
 240        switch (unit) {
 241        case PM_LSU1:
 242                if (event & PM_LOWER_MSKS)
 243                        mask = 1 << 28;         /* byte 7 bit 4 */
 244                else
 245                        mask = 6 << 24;         /* byte 3 bits 1 and 2 */
 246                break;
 247        case PM_LSU0:
 248                /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
 249                mask = 0x083dff00;
 250        }
 251        return (mask >> (byte * 8 + bit)) & 1;
 252}
 253
 254static int p4_get_constraint(u64 event, unsigned long *maskp,
 255                             unsigned long *valp)
 256{
 257        int pmc, byte, unit, lower, sh;
 258        unsigned long mask = 0, value = 0;
 259        int grp = -1;
 260
 261        pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
 262        if (pmc) {
 263                if (pmc > 8)
 264                        return -1;
 265                sh = (pmc - 1) * 2;
 266                mask |= 2 << sh;
 267                value |= 1 << sh;
 268                grp = ((pmc - 1) >> 1) & 1;
 269        }
 270        unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
 271        byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
 272        if (unit) {
 273                lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
 274
 275                /*
 276                 * Bus events on bytes 0 and 2 can be counted
 277                 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
 278                 */
 279                if (!pmc)
 280                        grp = byte & 1;
 281
 282                if (!p4_unitinfo[unit].unit)
 283                        return -1;
 284                mask  |= p4_unitinfo[unit].mask;
 285                value |= p4_unitinfo[unit].value;
 286                sh = p4_unitinfo[unit].lowerbit;
 287                if (sh > 1)
 288                        value |= (unsigned long)lower << sh;
 289                else if (lower != sh)
 290                        return -1;
 291                unit = p4_unitinfo[unit].unit;
 292
 293                /* Set byte lane select field */
 294                mask  |= 0xfULL << (28 - 4 * byte);
 295                value |= (unsigned long)unit << (28 - 4 * byte);
 296        }
 297        if (grp == 0) {
 298                /* increment PMC1/2/5/6 field */
 299                mask  |= 0x8000000000ull;
 300                value |= 0x1000000000ull;
 301        } else {
 302                /* increment PMC3/4/7/8 field */
 303                mask  |= 0x800000000ull;
 304                value |= 0x100000000ull;
 305        }
 306
 307        /* Marked instruction events need sample_enable set */
 308        if (p4_marked_instr_event(event)) {
 309                mask  |= 1ull << 56;
 310                value |= 1ull << 56;
 311        }
 312
 313        /* PMCSEL=6 decode events on byte 2 need sample_enable clear */
 314        if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
 315                mask  |= 1ull << 56;
 316
 317        *maskp = mask;
 318        *valp = value;
 319        return 0;
 320}
 321
 322static unsigned int ppc_inst_cmpl[] = {
 323        0x1001, 0x4001, 0x6001, 0x7001, 0x8001
 324};
 325
 326static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 327{
 328        int i, j, na;
 329
 330        alt[0] = event;
 331        na = 1;
 332
 333        /* 2 possibilities for PM_GRP_DISP_REJECT */
 334        if (event == 0x8003 || event == 0x0224) {
 335                alt[1] = event ^ (0x8003 ^ 0x0224);
 336                return 2;
 337        }
 338
 339        /* 2 possibilities for PM_ST_MISS_L1 */
 340        if (event == 0x0c13 || event == 0x0c23) {
 341                alt[1] = event ^ (0x0c13 ^ 0x0c23);
 342                return 2;
 343        }
 344
 345        /* several possibilities for PM_INST_CMPL */
 346        for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
 347                if (event == ppc_inst_cmpl[i]) {
 348                        for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
 349                                if (j != i)
 350                                        alt[na++] = ppc_inst_cmpl[j];
 351                        break;
 352                }
 353        }
 354
 355        return na;
 356}
 357
 358static int p4_compute_mmcr(u64 event[], int n_ev,
 359                           unsigned int hwc[], unsigned long mmcr[])
 360{
 361        unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
 362        unsigned int pmc, unit, byte, psel, lower;
 363        unsigned int ttm, grp;
 364        unsigned int pmc_inuse = 0;
 365        unsigned int pmc_grp_use[2];
 366        unsigned char busbyte[4];
 367        unsigned char unituse[16];
 368        unsigned int unitlower = 0;
 369        int i;
 370
 371        if (n_ev > 8)
 372                return -1;
 373
 374        /* First pass to count resource use */
 375        pmc_grp_use[0] = pmc_grp_use[1] = 0;
 376        memset(busbyte, 0, sizeof(busbyte));
 377        memset(unituse, 0, sizeof(unituse));
 378        for (i = 0; i < n_ev; ++i) {
 379                pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
 380                if (pmc) {
 381                        if (pmc_inuse & (1 << (pmc - 1)))
 382                                return -1;
 383                        pmc_inuse |= 1 << (pmc - 1);
 384                        /* count 1/2/5/6 vs 3/4/7/8 use */
 385                        ++pmc_grp_use[((pmc - 1) >> 1) & 1];
 386                }
 387                unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
 388                byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
 389                lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
 390                if (unit) {
 391                        if (!pmc)
 392                                ++pmc_grp_use[byte & 1];
 393                        if (unit == 6 || unit == 8)
 394                                /* map alt ISU1/IFU codes: 6->2, 8->3 */
 395                                unit = (unit >> 1) - 1;
 396                        if (busbyte[byte] && busbyte[byte] != unit)
 397                                return -1;
 398                        busbyte[byte] = unit;
 399                        lower <<= unit;
 400                        if (unituse[unit] && lower != (unitlower & lower))
 401                                return -1;
 402                        unituse[unit] = 1;
 403                        unitlower |= lower;
 404                }
 405        }
 406        if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
 407                return -1;
 408
 409        /*
 410         * Assign resources and set multiplexer selects.
 411         *
 412         * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
 413         * Each TTMx can only select one unit, but since
 414         * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
 415         * we have some choices.
 416         */
 417        if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
 418                unituse[6] = 1;         /* Move 2 to 6 */
 419                unituse[2] = 0;
 420        }
 421        if (unituse[3] & (unituse[1] | unituse[2])) {
 422                unituse[8] = 1;         /* Move 3 to 8 */
 423                unituse[3] = 0;
 424                unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
 425        }
 426        /* Check only one unit per TTMx */
 427        if (unituse[1] + unituse[2] + unituse[3] > 1 ||
 428            unituse[4] + unituse[6] + unituse[7] > 1 ||
 429            unituse[8] + unituse[9] > 1 ||
 430            (unituse[5] | unituse[10] | unituse[11] |
 431             unituse[13] | unituse[14]))
 432                return -1;
 433
 434        /* Set TTMxSEL fields.  Note, units 1-3 => TTM0SEL codes 0-2 */
 435        mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2])
 436                << MMCR1_TTM0SEL_SH;
 437        mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2)
 438                << MMCR1_TTM1SEL_SH;
 439        mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH;
 440
 441        /* Set TTCxSEL fields. */
 442        if (unitlower & 0xe)
 443                mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
 444        if (unitlower & 0xf0)
 445                mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
 446        if (unitlower & 0xf00)
 447                mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
 448        if (unitlower & 0x7000)
 449                mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
 450
 451        /* Set byte lane select fields. */
 452        for (byte = 0; byte < 4; ++byte) {
 453                unit = busbyte[byte];
 454                if (!unit)
 455                        continue;
 456                if (unit == 0xf) {
 457                        /* special case for GPS */
 458                        mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
 459                } else {
 460                        if (!unituse[unit])
 461                                ttm = unit - 1;         /* 2->1, 3->2 */
 462                        else
 463                                ttm = unit >> 2;
 464                        mmcr1 |= (unsigned long)ttm
 465                                << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
 466                }
 467        }
 468
 469        /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
 470        for (i = 0; i < n_ev; ++i) {
 471                pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
 472                unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
 473                byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
 474                psel = event[i] & PM_PMCSEL_MSK;
 475                if (!pmc) {
 476                        /* Bus event or 00xxx direct event (off or cycles) */
 477                        if (unit)
 478                                psel |= 0x10 | ((byte & 2) << 2);
 479                        for (pmc = 0; pmc < 8; ++pmc) {
 480                                if (pmc_inuse & (1 << pmc))
 481                                        continue;
 482                                grp = (pmc >> 1) & 1;
 483                                if (unit) {
 484                                        if (grp == (byte & 1))
 485                                                break;
 486                                } else if (pmc_grp_use[grp] < 4) {
 487                                        ++pmc_grp_use[grp];
 488                                        break;
 489                                }
 490                        }
 491                        pmc_inuse |= 1 << pmc;
 492                } else {
 493                        /* Direct event */
 494                        --pmc;
 495                        if (psel == 0 && (byte & 2))
 496                                /* add events on higher-numbered bus */
 497                                mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
 498                        else if (psel == 6 && byte == 3)
 499                                /* seem to need to set sample_enable here */
 500                                mmcra |= MMCRA_SAMPLE_ENABLE;
 501                        psel |= 8;
 502                }
 503                if (pmc <= 1)
 504                        mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
 505                else
 506                        mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
 507                if (pmc == 7)   /* PMC8 */
 508                        mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
 509                hwc[i] = pmc;
 510                if (p4_marked_instr_event(event[i]))
 511                        mmcra |= MMCRA_SAMPLE_ENABLE;
 512        }
 513
 514        if (pmc_inuse & 1)
 515                mmcr0 |= MMCR0_PMC1CE;
 516        if (pmc_inuse & 0xfe)
 517                mmcr0 |= MMCR0_PMCjCE;
 518
 519        mmcra |= 0x2000;        /* mark only one IOP per PPC instruction */
 520
 521        /* Return MMCRx values */
 522        mmcr[0] = mmcr0;
 523        mmcr[1] = mmcr1;
 524        mmcr[2] = mmcra;
 525        return 0;
 526}
 527
 528static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[])
 529{
 530        /*
 531         * Setting the PMCxSEL field to 0 disables PMC x.
 532         * (Note that pmc is 0-based here, not 1-based.)
 533         */
 534        if (pmc <= 1) {
 535                mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
 536        } else {
 537                mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
 538                if (pmc == 7)
 539                        mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
 540        }
 541}
 542
 543static int p4_generic_events[] = {
 544        [PERF_COUNT_HW_CPU_CYCLES]              = 7,
 545        [PERF_COUNT_HW_INSTRUCTIONS]            = 0x1001,
 546        [PERF_COUNT_HW_CACHE_REFERENCES]        = 0x8c10, /* PM_LD_REF_L1 */
 547        [PERF_COUNT_HW_CACHE_MISSES]            = 0x3c10, /* PM_LD_MISS_L1 */
 548        [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x330,  /* PM_BR_ISSUED */
 549        [PERF_COUNT_HW_BRANCH_MISSES]           = 0x331,  /* PM_BR_MPRED_CR */
 550};
 551
 552#define C(x)    PERF_COUNT_HW_CACHE_##x
 553
 554/*
 555 * Table of generalized cache-related events.
 556 * 0 means not supported, -1 means nonsensical, other values
 557 * are event codes.
 558 */
 559static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 560        [C(L1D)] = {            /*      RESULT_ACCESS   RESULT_MISS */
 561                [C(OP_READ)] = {        0x8c10,         0x3c10  },
 562                [C(OP_WRITE)] = {       0x7c10,         0xc13   },
 563                [C(OP_PREFETCH)] = {    0xc35,          0       },
 564        },
 565        [C(L1I)] = {            /*      RESULT_ACCESS   RESULT_MISS */
 566                [C(OP_READ)] = {        0,              0       },
 567                [C(OP_WRITE)] = {       -1,             -1      },
 568                [C(OP_PREFETCH)] = {    0,              0       },
 569        },
 570        [C(LL)] = {             /*      RESULT_ACCESS   RESULT_MISS */
 571                [C(OP_READ)] = {        0,              0       },
 572                [C(OP_WRITE)] = {       0,              0       },
 573                [C(OP_PREFETCH)] = {    0xc34,          0       },
 574        },
 575        [C(DTLB)] = {           /*      RESULT_ACCESS   RESULT_MISS */
 576                [C(OP_READ)] = {        0,              0x904   },
 577                [C(OP_WRITE)] = {       -1,             -1      },
 578                [C(OP_PREFETCH)] = {    -1,             -1      },
 579        },
 580        [C(ITLB)] = {           /*      RESULT_ACCESS   RESULT_MISS */
 581                [C(OP_READ)] = {        0,              0x900   },
 582                [C(OP_WRITE)] = {       -1,             -1      },
 583                [C(OP_PREFETCH)] = {    -1,             -1      },
 584        },
 585        [C(BPU)] = {            /*      RESULT_ACCESS   RESULT_MISS */
 586                [C(OP_READ)] = {        0x330,          0x331   },
 587                [C(OP_WRITE)] = {       -1,             -1      },
 588                [C(OP_PREFETCH)] = {    -1,             -1      },
 589        },
 590};
 591
 592static struct power_pmu power4_pmu = {
 593        .name                   = "POWER4/4+",
 594        .n_counter              = 8,
 595        .max_alternatives       = 5,
 596        .add_fields             = 0x0000001100005555ul,
 597        .test_adder             = 0x0011083300000000ul,
 598        .compute_mmcr           = p4_compute_mmcr,
 599        .get_constraint         = p4_get_constraint,
 600        .get_alternatives       = p4_get_alternatives,
 601        .disable_pmc            = p4_disable_pmc,
 602        .n_generic              = ARRAY_SIZE(p4_generic_events),
 603        .generic_events         = p4_generic_events,
 604        .cache_events           = &power4_cache_events,
 605};
 606
 607static int init_power4_pmu(void)
 608{
 609        if (!cur_cpu_spec->oprofile_cpu_type ||
 610            strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
 611                return -ENODEV;
 612
 613        return register_power_pmu(&power4_pmu);
 614}
 615
 616arch_initcall(init_power4_pmu);
 617