1
2
3
4
5
6
7
8
9
10
11#include <linux/kernel.h>
12#include <linux/perf_event.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17
18
19
20#define PM_PMC_SH 12
21#define PM_PMC_MSK 0xf
22#define PM_UNIT_SH 8
23#define PM_UNIT_MSK 0xf
24#define PM_LOWER_SH 6
25#define PM_LOWER_MSK 1
26#define PM_LOWER_MSKS 0x40
27#define PM_BYTE_SH 4
28#define PM_BYTE_MSK 3
29#define PM_PMCSEL_MSK 7
30
31
32
33
34#define PM_FPU 1
35#define PM_ISU1 2
36#define PM_IFU 3
37#define PM_IDU0 4
38#define PM_ISU1_ALT 6
39#define PM_ISU2 7
40#define PM_IFU_ALT 8
41#define PM_LSU0 9
42#define PM_LSU1 0xc
43#define PM_GPS 0xf
44
45
46
47
48#define MMCR0_PMC1SEL_SH 8
49#define MMCR0_PMC2SEL_SH 1
50#define MMCR_PMCSEL_MSK 0x1f
51
52
53
54
55#define MMCR1_TTM0SEL_SH 62
56#define MMCR1_TTC0SEL_SH 61
57#define MMCR1_TTM1SEL_SH 59
58#define MMCR1_TTC1SEL_SH 58
59#define MMCR1_TTM2SEL_SH 56
60#define MMCR1_TTC2SEL_SH 55
61#define MMCR1_TTM3SEL_SH 53
62#define MMCR1_TTC3SEL_SH 52
63#define MMCR1_TTMSEL_MSK 3
64#define MMCR1_TD_CP_DBG0SEL_SH 50
65#define MMCR1_TD_CP_DBG1SEL_SH 48
66#define MMCR1_TD_CP_DBG2SEL_SH 46
67#define MMCR1_TD_CP_DBG3SEL_SH 44
68#define MMCR1_DEBUG0SEL_SH 43
69#define MMCR1_DEBUG1SEL_SH 42
70#define MMCR1_DEBUG2SEL_SH 41
71#define MMCR1_DEBUG3SEL_SH 40
72#define MMCR1_PMC1_ADDER_SEL_SH 39
73#define MMCR1_PMC2_ADDER_SEL_SH 38
74#define MMCR1_PMC6_ADDER_SEL_SH 37
75#define MMCR1_PMC5_ADDER_SEL_SH 36
76#define MMCR1_PMC8_ADDER_SEL_SH 35
77#define MMCR1_PMC7_ADDER_SEL_SH 34
78#define MMCR1_PMC3_ADDER_SEL_SH 33
79#define MMCR1_PMC4_ADDER_SEL_SH 32
80#define MMCR1_PMC3SEL_SH 27
81#define MMCR1_PMC4SEL_SH 22
82#define MMCR1_PMC5SEL_SH 17
83#define MMCR1_PMC6SEL_SH 12
84#define MMCR1_PMC7SEL_SH 7
85#define MMCR1_PMC8SEL_SH 2
86
87static short mmcr1_adder_bits[8] = {
88 MMCR1_PMC1_ADDER_SEL_SH,
89 MMCR1_PMC2_ADDER_SEL_SH,
90 MMCR1_PMC3_ADDER_SEL_SH,
91 MMCR1_PMC4_ADDER_SEL_SH,
92 MMCR1_PMC5_ADDER_SEL_SH,
93 MMCR1_PMC6_ADDER_SEL_SH,
94 MMCR1_PMC7_ADDER_SEL_SH,
95 MMCR1_PMC8_ADDER_SEL_SH
96};
97
98
99
100
101#define MMCRA_PMC8SEL0_SH 17
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183static struct unitinfo {
184 unsigned long value, mask;
185 int unit;
186 int lowerbit;
187} p4_unitinfo[16] = {
188 [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 },
189 [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
190 [PM_ISU1_ALT] =
191 { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
192 [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
193 [PM_IFU_ALT] =
194 { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
195 [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 },
196 [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 },
197 [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 },
198 [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 },
199 [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 }
200};
201
202static unsigned char direct_marked_event[8] = {
203 (1<<2) | (1<<3),
204 (1<<3) | (1<<5),
205 (1<<3),
206 (1<<4) | (1<<5),
207 (1<<4) | (1<<5),
208 (1<<3) | (1<<4) | (1<<5),
209
210 (1<<4) | (1<<5),
211 (1<<4),
212};
213
214
215
216
217
218static int p4_marked_instr_event(u64 event)
219{
220 int pmc, psel, unit, byte, bit;
221 unsigned int mask;
222
223 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
224 psel = event & PM_PMCSEL_MSK;
225 if (pmc) {
226 if (direct_marked_event[pmc - 1] & (1 << psel))
227 return 1;
228 if (psel == 0)
229 bit = (pmc <= 4)? pmc - 1: 8 - pmc;
230 else if (psel == 6)
231 bit = 4;
232 else
233 return 0;
234 } else
235 bit = psel;
236
237 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
238 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
239 mask = 0;
240 switch (unit) {
241 case PM_LSU1:
242 if (event & PM_LOWER_MSKS)
243 mask = 1 << 28;
244 else
245 mask = 6 << 24;
246 break;
247 case PM_LSU0:
248
249 mask = 0x083dff00;
250 }
251 return (mask >> (byte * 8 + bit)) & 1;
252}
253
254static int p4_get_constraint(u64 event, unsigned long *maskp,
255 unsigned long *valp)
256{
257 int pmc, byte, unit, lower, sh;
258 unsigned long mask = 0, value = 0;
259 int grp = -1;
260
261 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
262 if (pmc) {
263 if (pmc > 8)
264 return -1;
265 sh = (pmc - 1) * 2;
266 mask |= 2 << sh;
267 value |= 1 << sh;
268 grp = ((pmc - 1) >> 1) & 1;
269 }
270 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
271 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
272 if (unit) {
273 lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
274
275
276
277
278
279 if (!pmc)
280 grp = byte & 1;
281
282 if (!p4_unitinfo[unit].unit)
283 return -1;
284 mask |= p4_unitinfo[unit].mask;
285 value |= p4_unitinfo[unit].value;
286 sh = p4_unitinfo[unit].lowerbit;
287 if (sh > 1)
288 value |= (unsigned long)lower << sh;
289 else if (lower != sh)
290 return -1;
291 unit = p4_unitinfo[unit].unit;
292
293
294 mask |= 0xfULL << (28 - 4 * byte);
295 value |= (unsigned long)unit << (28 - 4 * byte);
296 }
297 if (grp == 0) {
298
299 mask |= 0x8000000000ull;
300 value |= 0x1000000000ull;
301 } else {
302
303 mask |= 0x800000000ull;
304 value |= 0x100000000ull;
305 }
306
307
308 if (p4_marked_instr_event(event)) {
309 mask |= 1ull << 56;
310 value |= 1ull << 56;
311 }
312
313
314 if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
315 mask |= 1ull << 56;
316
317 *maskp = mask;
318 *valp = value;
319 return 0;
320}
321
322static unsigned int ppc_inst_cmpl[] = {
323 0x1001, 0x4001, 0x6001, 0x7001, 0x8001
324};
325
326static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
327{
328 int i, j, na;
329
330 alt[0] = event;
331 na = 1;
332
333
334 if (event == 0x8003 || event == 0x0224) {
335 alt[1] = event ^ (0x8003 ^ 0x0224);
336 return 2;
337 }
338
339
340 if (event == 0x0c13 || event == 0x0c23) {
341 alt[1] = event ^ (0x0c13 ^ 0x0c23);
342 return 2;
343 }
344
345
346 for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
347 if (event == ppc_inst_cmpl[i]) {
348 for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
349 if (j != i)
350 alt[na++] = ppc_inst_cmpl[j];
351 break;
352 }
353 }
354
355 return na;
356}
357
358static int p4_compute_mmcr(u64 event[], int n_ev,
359 unsigned int hwc[], unsigned long mmcr[])
360{
361 unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
362 unsigned int pmc, unit, byte, psel, lower;
363 unsigned int ttm, grp;
364 unsigned int pmc_inuse = 0;
365 unsigned int pmc_grp_use[2];
366 unsigned char busbyte[4];
367 unsigned char unituse[16];
368 unsigned int unitlower = 0;
369 int i;
370
371 if (n_ev > 8)
372 return -1;
373
374
375 pmc_grp_use[0] = pmc_grp_use[1] = 0;
376 memset(busbyte, 0, sizeof(busbyte));
377 memset(unituse, 0, sizeof(unituse));
378 for (i = 0; i < n_ev; ++i) {
379 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
380 if (pmc) {
381 if (pmc_inuse & (1 << (pmc - 1)))
382 return -1;
383 pmc_inuse |= 1 << (pmc - 1);
384
385 ++pmc_grp_use[((pmc - 1) >> 1) & 1];
386 }
387 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
388 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
389 lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
390 if (unit) {
391 if (!pmc)
392 ++pmc_grp_use[byte & 1];
393 if (unit == 6 || unit == 8)
394
395 unit = (unit >> 1) - 1;
396 if (busbyte[byte] && busbyte[byte] != unit)
397 return -1;
398 busbyte[byte] = unit;
399 lower <<= unit;
400 if (unituse[unit] && lower != (unitlower & lower))
401 return -1;
402 unituse[unit] = 1;
403 unitlower |= lower;
404 }
405 }
406 if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
407 return -1;
408
409
410
411
412
413
414
415
416
417 if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
418 unituse[6] = 1;
419 unituse[2] = 0;
420 }
421 if (unituse[3] & (unituse[1] | unituse[2])) {
422 unituse[8] = 1;
423 unituse[3] = 0;
424 unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
425 }
426
427 if (unituse[1] + unituse[2] + unituse[3] > 1 ||
428 unituse[4] + unituse[6] + unituse[7] > 1 ||
429 unituse[8] + unituse[9] > 1 ||
430 (unituse[5] | unituse[10] | unituse[11] |
431 unituse[13] | unituse[14]))
432 return -1;
433
434
435 mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2])
436 << MMCR1_TTM0SEL_SH;
437 mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2)
438 << MMCR1_TTM1SEL_SH;
439 mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH;
440
441
442 if (unitlower & 0xe)
443 mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
444 if (unitlower & 0xf0)
445 mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
446 if (unitlower & 0xf00)
447 mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
448 if (unitlower & 0x7000)
449 mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
450
451
452 for (byte = 0; byte < 4; ++byte) {
453 unit = busbyte[byte];
454 if (!unit)
455 continue;
456 if (unit == 0xf) {
457
458 mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
459 } else {
460 if (!unituse[unit])
461 ttm = unit - 1;
462 else
463 ttm = unit >> 2;
464 mmcr1 |= (unsigned long)ttm
465 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
466 }
467 }
468
469
470 for (i = 0; i < n_ev; ++i) {
471 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
472 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
473 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
474 psel = event[i] & PM_PMCSEL_MSK;
475 if (!pmc) {
476
477 if (unit)
478 psel |= 0x10 | ((byte & 2) << 2);
479 for (pmc = 0; pmc < 8; ++pmc) {
480 if (pmc_inuse & (1 << pmc))
481 continue;
482 grp = (pmc >> 1) & 1;
483 if (unit) {
484 if (grp == (byte & 1))
485 break;
486 } else if (pmc_grp_use[grp] < 4) {
487 ++pmc_grp_use[grp];
488 break;
489 }
490 }
491 pmc_inuse |= 1 << pmc;
492 } else {
493
494 --pmc;
495 if (psel == 0 && (byte & 2))
496
497 mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
498 else if (psel == 6 && byte == 3)
499
500 mmcra |= MMCRA_SAMPLE_ENABLE;
501 psel |= 8;
502 }
503 if (pmc <= 1)
504 mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
505 else
506 mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
507 if (pmc == 7)
508 mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
509 hwc[i] = pmc;
510 if (p4_marked_instr_event(event[i]))
511 mmcra |= MMCRA_SAMPLE_ENABLE;
512 }
513
514 if (pmc_inuse & 1)
515 mmcr0 |= MMCR0_PMC1CE;
516 if (pmc_inuse & 0xfe)
517 mmcr0 |= MMCR0_PMCjCE;
518
519 mmcra |= 0x2000;
520
521
522 mmcr[0] = mmcr0;
523 mmcr[1] = mmcr1;
524 mmcr[2] = mmcra;
525 return 0;
526}
527
528static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[])
529{
530
531
532
533
534 if (pmc <= 1) {
535 mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
536 } else {
537 mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
538 if (pmc == 7)
539 mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
540 }
541}
542
543static int p4_generic_events[] = {
544 [PERF_COUNT_HW_CPU_CYCLES] = 7,
545 [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001,
546 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10,
547 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10,
548 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330,
549 [PERF_COUNT_HW_BRANCH_MISSES] = 0x331,
550};
551
552#define C(x) PERF_COUNT_HW_CACHE_##x
553
554
555
556
557
558
559static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
560 [C(L1D)] = {
561 [C(OP_READ)] = { 0x8c10, 0x3c10 },
562 [C(OP_WRITE)] = { 0x7c10, 0xc13 },
563 [C(OP_PREFETCH)] = { 0xc35, 0 },
564 },
565 [C(L1I)] = {
566 [C(OP_READ)] = { 0, 0 },
567 [C(OP_WRITE)] = { -1, -1 },
568 [C(OP_PREFETCH)] = { 0, 0 },
569 },
570 [C(LL)] = {
571 [C(OP_READ)] = { 0, 0 },
572 [C(OP_WRITE)] = { 0, 0 },
573 [C(OP_PREFETCH)] = { 0xc34, 0 },
574 },
575 [C(DTLB)] = {
576 [C(OP_READ)] = { 0, 0x904 },
577 [C(OP_WRITE)] = { -1, -1 },
578 [C(OP_PREFETCH)] = { -1, -1 },
579 },
580 [C(ITLB)] = {
581 [C(OP_READ)] = { 0, 0x900 },
582 [C(OP_WRITE)] = { -1, -1 },
583 [C(OP_PREFETCH)] = { -1, -1 },
584 },
585 [C(BPU)] = {
586 [C(OP_READ)] = { 0x330, 0x331 },
587 [C(OP_WRITE)] = { -1, -1 },
588 [C(OP_PREFETCH)] = { -1, -1 },
589 },
590};
591
592static struct power_pmu power4_pmu = {
593 .name = "POWER4/4+",
594 .n_counter = 8,
595 .max_alternatives = 5,
596 .add_fields = 0x0000001100005555ul,
597 .test_adder = 0x0011083300000000ul,
598 .compute_mmcr = p4_compute_mmcr,
599 .get_constraint = p4_get_constraint,
600 .get_alternatives = p4_get_alternatives,
601 .disable_pmc = p4_disable_pmc,
602 .n_generic = ARRAY_SIZE(p4_generic_events),
603 .generic_events = p4_generic_events,
604 .cache_events = &power4_cache_events,
605};
606
607static int init_power4_pmu(void)
608{
609 if (!cur_cpu_spec->oprofile_cpu_type ||
610 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
611 return -ENODEV;
612
613 return register_power_pmu(&power4_pmu);
614}
615
616early_initcall(init_power4_pmu);
617