1
2
3
4
5
6
7#include <linux/kernel.h>
8#include <linux/perf_event.h>
9#include <linux/string.h>
10#include <asm/reg.h>
11#include <asm/cputable.h>
12
13#include "internal.h"
14
15
16
17
18#define PM_PMC_SH 20
19#define PM_PMC_MSK 0xf
20#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
21#define PM_UNIT_SH 16
22#define PM_UNIT_MSK 0xf
23#define PM_BYTE_SH 12
24#define PM_BYTE_MSK 7
25#define PM_GRS_SH 8
26#define PM_GRS_MSK 7
27#define PM_BUSEVENT_MSK 0x80
28#define PM_PMCSEL_MSK 0x7f
29
30
31#define PM_FPU 0
32#define PM_ISU0 1
33#define PM_IFU 2
34#define PM_ISU1 3
35#define PM_IDU 4
36#define PM_ISU0_ALT 6
37#define PM_GRS 7
38#define PM_LSU0 8
39#define PM_LSU1 0xc
40#define PM_LASTUNIT 0xc
41
42
43
44
45#define MMCR1_TTM0SEL_SH 62
46#define MMCR1_TTM1SEL_SH 60
47#define MMCR1_TTM2SEL_SH 58
48#define MMCR1_TTM3SEL_SH 56
49#define MMCR1_TTMSEL_MSK 3
50#define MMCR1_TD_CP_DBG0SEL_SH 54
51#define MMCR1_TD_CP_DBG1SEL_SH 52
52#define MMCR1_TD_CP_DBG2SEL_SH 50
53#define MMCR1_TD_CP_DBG3SEL_SH 48
54#define MMCR1_GRS_L2SEL_SH 46
55#define MMCR1_GRS_L2SEL_MSK 3
56#define MMCR1_GRS_L3SEL_SH 44
57#define MMCR1_GRS_L3SEL_MSK 3
58#define MMCR1_GRS_MCSEL_SH 41
59#define MMCR1_GRS_MCSEL_MSK 7
60#define MMCR1_GRS_FABSEL_SH 39
61#define MMCR1_GRS_FABSEL_MSK 3
62#define MMCR1_PMC1_ADDER_SEL_SH 35
63#define MMCR1_PMC2_ADDER_SEL_SH 34
64#define MMCR1_PMC3_ADDER_SEL_SH 33
65#define MMCR1_PMC4_ADDER_SEL_SH 32
66#define MMCR1_PMC1SEL_SH 25
67#define MMCR1_PMC2SEL_SH 17
68#define MMCR1_PMC3SEL_SH 9
69#define MMCR1_PMC4SEL_SH 1
70#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
71#define MMCR1_PMCSEL_MSK 0x7f
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122static const int grsel_shift[8] = {
123 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
124 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
125 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
126};
127
128
129static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
130 [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul },
131 [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul },
132 [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul },
133 [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul },
134 [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul },
135 [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul },
136};
137
138static int power5_get_constraint(u64 event, unsigned long *maskp,
139 unsigned long *valp, u64 event_config1 __maybe_unused)
140{
141 int pmc, byte, unit, sh;
142 int bit, fmask;
143 unsigned long mask = 0, value = 0;
144 int grp = -1;
145
146 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
147 if (pmc) {
148 if (pmc > 6)
149 return -1;
150 sh = (pmc - 1) * 2;
151 mask |= 2 << sh;
152 value |= 1 << sh;
153 if (pmc <= 4)
154 grp = (pmc - 1) >> 1;
155 else if (event != 0x500009 && event != 0x600005)
156 return -1;
157 }
158 if (event & PM_BUSEVENT_MSK) {
159 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
160 if (unit > PM_LASTUNIT)
161 return -1;
162 if (unit == PM_ISU0_ALT)
163 unit = PM_ISU0;
164 mask |= unit_cons[unit][0];
165 value |= unit_cons[unit][1];
166 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
167 if (byte >= 4) {
168 if (unit != PM_LSU1)
169 return -1;
170
171 ++unit;
172 byte &= 3;
173 }
174 if (unit == PM_GRS) {
175 bit = event & 7;
176 fmask = (bit == 6)? 7: 3;
177 sh = grsel_shift[bit];
178 mask |= (unsigned long)fmask << sh;
179 value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
180 << sh;
181 }
182
183
184
185
186 if (!pmc)
187 grp = byte & 1;
188
189 mask |= 0xfUL << (24 - 4 * byte);
190 value |= (unsigned long)unit << (24 - 4 * byte);
191 }
192 if (grp == 0) {
193
194 mask |= 0x200000000ul;
195 value |= 0x080000000ul;
196 } else if (grp == 1) {
197
198 mask |= 0x40000000ul;
199 value |= 0x10000000ul;
200 }
201 if (pmc < 5) {
202
203 mask |= 0x8000000000000ul;
204 value |= 0x1000000000000ul;
205 }
206 *maskp = mask;
207 *valp = value;
208 return 0;
209}
210
211#define MAX_ALT 3
212
213static const unsigned int event_alternatives[][MAX_ALT] = {
214 { 0x120e4, 0x400002 },
215 { 0x410c7, 0x441084 },
216 { 0x100005, 0x600005 },
217 { 0x100009, 0x200009, 0x500009 },
218 { 0x300009, 0x400009 },
219};
220
221
222
223
224
225static int find_alternative(u64 event)
226{
227 int i, j;
228
229 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
230 if (event < event_alternatives[i][0])
231 break;
232 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
233 if (event == event_alternatives[i][j])
234 return i;
235 }
236 return -1;
237}
238
239static const unsigned char bytedecode_alternatives[4][4] = {
240 { 0x21, 0x23, 0x25, 0x27 },
241 { 0x07, 0x17, 0x0e, 0x1e },
242 { 0x20, 0x22, 0x24, 0x26 },
243 { 0x07, 0x17, 0x0e, 0x1e }
244};
245
246
247
248
249
250
251static s64 find_alternative_bdecode(u64 event)
252{
253 int pmc, altpmc, pp, j;
254
255 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
256 if (pmc == 0 || pmc > 4)
257 return -1;
258 altpmc = 5 - pmc;
259 pp = event & PM_PMCSEL_MSK;
260 for (j = 0; j < 4; ++j) {
261 if (bytedecode_alternatives[pmc - 1][j] == pp) {
262 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
263 (altpmc << PM_PMC_SH) |
264 bytedecode_alternatives[altpmc - 1][j];
265 }
266 }
267 return -1;
268}
269
270static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
271{
272 int i, j, nalt = 1;
273 s64 ae;
274
275 alt[0] = event;
276 nalt = 1;
277 i = find_alternative(event);
278 if (i >= 0) {
279 for (j = 0; j < MAX_ALT; ++j) {
280 ae = event_alternatives[i][j];
281 if (ae && ae != event)
282 alt[nalt++] = ae;
283 }
284 } else {
285 ae = find_alternative_bdecode(event);
286 if (ae > 0)
287 alt[nalt++] = ae;
288 }
289 return nalt;
290}
291
292
293
294
295
296
297
298static unsigned char direct_event_is_marked[0x28] = {
299 0,
300 0x1f,
301 0x2,
302 0xe,
303 0,
304 0x1c,
305 0x80,
306 0x80,
307 0, 0, 0,
308 0x18,
309 0,
310 0x80,
311 0x80,
312 0,
313 0,
314 0x14,
315 0,
316 0x10,
317 0x1f,
318 0x2,
319 0x80,
320 0x80,
321 0, 0, 0, 0, 0,
322 0x80,
323 0x80,
324 0,
325 0x80,
326 0x80,
327 0x80,
328 0x80,
329 0x80,
330 0x80,
331 0x80,
332 0x80,
333};
334
335
336
337
338
339static int power5_marked_instr_event(u64 event)
340{
341 int pmc, psel;
342 int bit, byte, unit;
343 u32 mask;
344
345 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
346 psel = event & PM_PMCSEL_MSK;
347 if (pmc >= 5)
348 return 0;
349
350 bit = -1;
351 if (psel < sizeof(direct_event_is_marked)) {
352 if (direct_event_is_marked[psel] & (1 << pmc))
353 return 1;
354 if (direct_event_is_marked[psel] & 0x80)
355 bit = 4;
356 else if (psel == 0x08)
357 bit = pmc - 1;
358 else if (psel == 0x10)
359 bit = 4 - pmc;
360 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
361 bit = 4;
362 } else if ((psel & 0x58) == 0x40)
363 bit = psel & 7;
364
365 if (!(event & PM_BUSEVENT_MSK))
366 return 0;
367
368 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
369 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
370 if (unit == PM_LSU0) {
371
372 mask = 0x5dff00;
373 } else if (unit == PM_LSU1 && byte >= 4) {
374 byte -= 4;
375
376 mask = 0x5f00c0aa;
377 } else
378 return 0;
379
380 return (mask >> (byte * 8 + bit)) & 1;
381}
382
383static int power5_compute_mmcr(u64 event[], int n_ev,
384 unsigned int hwc[], struct mmcr_regs *mmcr,
385 struct perf_event *pevents[],
386 u32 flags __maybe_unused)
387{
388 unsigned long mmcr1 = 0;
389 unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
390 unsigned int pmc, unit, byte, psel;
391 unsigned int ttm, grp;
392 int i, isbus, bit, grsel;
393 unsigned int pmc_inuse = 0;
394 unsigned int pmc_grp_use[2];
395 unsigned char busbyte[4];
396 unsigned char unituse[16];
397 int ttmuse;
398
399 if (n_ev > 6)
400 return -1;
401
402
403 pmc_grp_use[0] = pmc_grp_use[1] = 0;
404 memset(busbyte, 0, sizeof(busbyte));
405 memset(unituse, 0, sizeof(unituse));
406 for (i = 0; i < n_ev; ++i) {
407 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
408 if (pmc) {
409 if (pmc > 6)
410 return -1;
411 if (pmc_inuse & (1 << (pmc - 1)))
412 return -1;
413 pmc_inuse |= 1 << (pmc - 1);
414
415 if (pmc <= 4)
416 ++pmc_grp_use[(pmc - 1) >> 1];
417 }
418 if (event[i] & PM_BUSEVENT_MSK) {
419 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
420 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
421 if (unit > PM_LASTUNIT)
422 return -1;
423 if (unit == PM_ISU0_ALT)
424 unit = PM_ISU0;
425 if (byte >= 4) {
426 if (unit != PM_LSU1)
427 return -1;
428 ++unit;
429 byte &= 3;
430 }
431 if (!pmc)
432 ++pmc_grp_use[byte & 1];
433 if (busbyte[byte] && busbyte[byte] != unit)
434 return -1;
435 busbyte[byte] = unit;
436 unituse[unit] = 1;
437 }
438 }
439 if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
440 return -1;
441
442
443
444
445
446
447
448 if (unituse[PM_ISU0] &
449 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
450 unituse[PM_ISU0_ALT] = 1;
451 unituse[PM_ISU0] = 0;
452 }
453
454 ttmuse = 0;
455 for (i = PM_FPU; i <= PM_ISU1; ++i) {
456 if (!unituse[i])
457 continue;
458 if (ttmuse++)
459 return -1;
460 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
461 }
462 ttmuse = 0;
463 for (; i <= PM_GRS; ++i) {
464 if (!unituse[i])
465 continue;
466 if (ttmuse++)
467 return -1;
468 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
469 }
470 if (ttmuse > 1)
471 return -1;
472
473
474 for (byte = 0; byte < 4; ++byte) {
475 unit = busbyte[byte];
476 if (!unit)
477 continue;
478 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
479
480 unit = PM_ISU0_ALT;
481 } else if (unit == PM_LSU1 + 1) {
482
483 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
484 }
485 ttm = unit >> 2;
486 mmcr1 |= (unsigned long)ttm
487 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
488 }
489
490
491 for (i = 0; i < n_ev; ++i) {
492 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
493 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
494 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
495 psel = event[i] & PM_PMCSEL_MSK;
496 isbus = event[i] & PM_BUSEVENT_MSK;
497 if (!pmc) {
498
499 for (pmc = 0; pmc < 4; ++pmc) {
500 if (pmc_inuse & (1 << pmc))
501 continue;
502 grp = (pmc >> 1) & 1;
503 if (isbus) {
504 if (grp == (byte & 1))
505 break;
506 } else if (pmc_grp_use[grp] < 2) {
507 ++pmc_grp_use[grp];
508 break;
509 }
510 }
511 pmc_inuse |= 1 << pmc;
512 } else if (pmc <= 4) {
513
514 --pmc;
515 if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
516
517 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
518 } else {
519
520 --pmc;
521 }
522 if (isbus && unit == PM_GRS) {
523 bit = psel & 7;
524 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
525 mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
526 }
527 if (power5_marked_instr_event(event[i]))
528 mmcra |= MMCRA_SAMPLE_ENABLE;
529 if (pmc <= 3)
530 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
531 hwc[i] = pmc;
532 }
533
534
535 mmcr->mmcr0 = 0;
536 if (pmc_inuse & 1)
537 mmcr->mmcr0 = MMCR0_PMC1CE;
538 if (pmc_inuse & 0x3e)
539 mmcr->mmcr0 |= MMCR0_PMCjCE;
540 mmcr->mmcr1 = mmcr1;
541 mmcr->mmcra = mmcra;
542 return 0;
543}
544
545static void power5_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
546{
547 if (pmc <= 3)
548 mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
549}
550
551static int power5_generic_events[] = {
552 [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
553 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
554 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090,
555 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088,
556 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4,
557 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5,
558};
559
560#define C(x) PERF_COUNT_HW_CACHE_##x
561
562
563
564
565
566
567static u64 power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
568 [C(L1D)] = {
569 [C(OP_READ)] = { 0x4c1090, 0x3c1088 },
570 [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 },
571 [C(OP_PREFETCH)] = { 0xc70e7, 0 },
572 },
573 [C(L1I)] = {
574 [C(OP_READ)] = { 0, 0 },
575 [C(OP_WRITE)] = { -1, -1 },
576 [C(OP_PREFETCH)] = { 0, 0 },
577 },
578 [C(LL)] = {
579 [C(OP_READ)] = { 0, 0x3c309b },
580 [C(OP_WRITE)] = { 0, 0 },
581 [C(OP_PREFETCH)] = { 0xc50c3, 0 },
582 },
583 [C(DTLB)] = {
584 [C(OP_READ)] = { 0x2c4090, 0x800c4 },
585 [C(OP_WRITE)] = { -1, -1 },
586 [C(OP_PREFETCH)] = { -1, -1 },
587 },
588 [C(ITLB)] = {
589 [C(OP_READ)] = { 0, 0x800c0 },
590 [C(OP_WRITE)] = { -1, -1 },
591 [C(OP_PREFETCH)] = { -1, -1 },
592 },
593 [C(BPU)] = {
594 [C(OP_READ)] = { 0x230e4, 0x230e5 },
595 [C(OP_WRITE)] = { -1, -1 },
596 [C(OP_PREFETCH)] = { -1, -1 },
597 },
598 [C(NODE)] = {
599 [C(OP_READ)] = { -1, -1 },
600 [C(OP_WRITE)] = { -1, -1 },
601 [C(OP_PREFETCH)] = { -1, -1 },
602 },
603};
604
605static struct power_pmu power5_pmu = {
606 .name = "POWER5",
607 .n_counter = 6,
608 .max_alternatives = MAX_ALT,
609 .add_fields = 0x7000090000555ul,
610 .test_adder = 0x3000490000000ul,
611 .compute_mmcr = power5_compute_mmcr,
612 .get_constraint = power5_get_constraint,
613 .get_alternatives = power5_get_alternatives,
614 .disable_pmc = power5_disable_pmc,
615 .n_generic = ARRAY_SIZE(power5_generic_events),
616 .generic_events = power5_generic_events,
617 .cache_events = &power5_cache_events,
618 .flags = PPMU_HAS_SSLOT,
619};
620
621int init_power5_pmu(void)
622{
623 if (!cur_cpu_spec->oprofile_cpu_type ||
624 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
625 return -ENODEV;
626
627 return register_power_pmu(&power5_pmu);
628}
629