1
2
3
4
5
6
7
8
9
10
11#include <linux/kernel.h>
12#include <linux/perf_event.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17
18
19
20#define PM_PMC_SH 20
21#define PM_PMC_MSK 0xf
22#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
23#define PM_UNIT_SH 16
24#define PM_UNIT_MSK 0xf
25#define PM_BYTE_SH 12
26#define PM_BYTE_MSK 7
27#define PM_GRS_SH 8
28#define PM_GRS_MSK 7
29#define PM_BUSEVENT_MSK 0x80
30#define PM_PMCSEL_MSK 0x7f
31
32
33#define PM_FPU 0
34#define PM_ISU0 1
35#define PM_IFU 2
36#define PM_ISU1 3
37#define PM_IDU 4
38#define PM_ISU0_ALT 6
39#define PM_GRS 7
40#define PM_LSU0 8
41#define PM_LSU1 0xc
42#define PM_LASTUNIT 0xc
43
44
45
46
47#define MMCR1_TTM0SEL_SH 62
48#define MMCR1_TTM1SEL_SH 60
49#define MMCR1_TTM2SEL_SH 58
50#define MMCR1_TTM3SEL_SH 56
51#define MMCR1_TTMSEL_MSK 3
52#define MMCR1_TD_CP_DBG0SEL_SH 54
53#define MMCR1_TD_CP_DBG1SEL_SH 52
54#define MMCR1_TD_CP_DBG2SEL_SH 50
55#define MMCR1_TD_CP_DBG3SEL_SH 48
56#define MMCR1_GRS_L2SEL_SH 46
57#define MMCR1_GRS_L2SEL_MSK 3
58#define MMCR1_GRS_L3SEL_SH 44
59#define MMCR1_GRS_L3SEL_MSK 3
60#define MMCR1_GRS_MCSEL_SH 41
61#define MMCR1_GRS_MCSEL_MSK 7
62#define MMCR1_GRS_FABSEL_SH 39
63#define MMCR1_GRS_FABSEL_MSK 3
64#define MMCR1_PMC1_ADDER_SEL_SH 35
65#define MMCR1_PMC2_ADDER_SEL_SH 34
66#define MMCR1_PMC3_ADDER_SEL_SH 33
67#define MMCR1_PMC4_ADDER_SEL_SH 32
68#define MMCR1_PMC1SEL_SH 25
69#define MMCR1_PMC2SEL_SH 17
70#define MMCR1_PMC3SEL_SH 9
71#define MMCR1_PMC4SEL_SH 1
72#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
73#define MMCR1_PMCSEL_MSK 0x7f
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124static const int grsel_shift[8] = {
125 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
126 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
127 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
128};
129
130
131static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
132 [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul },
133 [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul },
134 [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul },
135 [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul },
136 [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul },
137 [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul },
138};
139
140static int power5_get_constraint(u64 event, unsigned long *maskp,
141 unsigned long *valp)
142{
143 int pmc, byte, unit, sh;
144 int bit, fmask;
145 unsigned long mask = 0, value = 0;
146 int grp = -1;
147
148 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
149 if (pmc) {
150 if (pmc > 6)
151 return -1;
152 sh = (pmc - 1) * 2;
153 mask |= 2 << sh;
154 value |= 1 << sh;
155 if (pmc <= 4)
156 grp = (pmc - 1) >> 1;
157 else if (event != 0x500009 && event != 0x600005)
158 return -1;
159 }
160 if (event & PM_BUSEVENT_MSK) {
161 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
162 if (unit > PM_LASTUNIT)
163 return -1;
164 if (unit == PM_ISU0_ALT)
165 unit = PM_ISU0;
166 mask |= unit_cons[unit][0];
167 value |= unit_cons[unit][1];
168 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
169 if (byte >= 4) {
170 if (unit != PM_LSU1)
171 return -1;
172
173 ++unit;
174 byte &= 3;
175 }
176 if (unit == PM_GRS) {
177 bit = event & 7;
178 fmask = (bit == 6)? 7: 3;
179 sh = grsel_shift[bit];
180 mask |= (unsigned long)fmask << sh;
181 value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
182 << sh;
183 }
184
185
186
187
188 if (!pmc)
189 grp = byte & 1;
190
191 mask |= 0xfUL << (24 - 4 * byte);
192 value |= (unsigned long)unit << (24 - 4 * byte);
193 }
194 if (grp == 0) {
195
196 mask |= 0x200000000ul;
197 value |= 0x080000000ul;
198 } else if (grp == 1) {
199
200 mask |= 0x40000000ul;
201 value |= 0x10000000ul;
202 }
203 if (pmc < 5) {
204
205 mask |= 0x8000000000000ul;
206 value |= 0x1000000000000ul;
207 }
208 *maskp = mask;
209 *valp = value;
210 return 0;
211}
212
213#define MAX_ALT 3
214
215static const unsigned int event_alternatives[][MAX_ALT] = {
216 { 0x120e4, 0x400002 },
217 { 0x410c7, 0x441084 },
218 { 0x100005, 0x600005 },
219 { 0x100009, 0x200009, 0x500009 },
220 { 0x300009, 0x400009 },
221};
222
223
224
225
226
227static int find_alternative(u64 event)
228{
229 int i, j;
230
231 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
232 if (event < event_alternatives[i][0])
233 break;
234 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
235 if (event == event_alternatives[i][j])
236 return i;
237 }
238 return -1;
239}
240
241static const unsigned char bytedecode_alternatives[4][4] = {
242 { 0x21, 0x23, 0x25, 0x27 },
243 { 0x07, 0x17, 0x0e, 0x1e },
244 { 0x20, 0x22, 0x24, 0x26 },
245 { 0x07, 0x17, 0x0e, 0x1e }
246};
247
248
249
250
251
252
253static s64 find_alternative_bdecode(u64 event)
254{
255 int pmc, altpmc, pp, j;
256
257 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
258 if (pmc == 0 || pmc > 4)
259 return -1;
260 altpmc = 5 - pmc;
261 pp = event & PM_PMCSEL_MSK;
262 for (j = 0; j < 4; ++j) {
263 if (bytedecode_alternatives[pmc - 1][j] == pp) {
264 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
265 (altpmc << PM_PMC_SH) |
266 bytedecode_alternatives[altpmc - 1][j];
267 }
268 }
269 return -1;
270}
271
272static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[])
273{
274 int i, j, nalt = 1;
275 s64 ae;
276
277 alt[0] = event;
278 nalt = 1;
279 i = find_alternative(event);
280 if (i >= 0) {
281 for (j = 0; j < MAX_ALT; ++j) {
282 ae = event_alternatives[i][j];
283 if (ae && ae != event)
284 alt[nalt++] = ae;
285 }
286 } else {
287 ae = find_alternative_bdecode(event);
288 if (ae > 0)
289 alt[nalt++] = ae;
290 }
291 return nalt;
292}
293
294
295
296
297
298
299
300static unsigned char direct_event_is_marked[0x28] = {
301 0,
302 0x1f,
303 0x2,
304 0xe,
305 0,
306 0x1c,
307 0x80,
308 0x80,
309 0, 0, 0,
310 0x18,
311 0,
312 0x80,
313 0x80,
314 0,
315 0,
316 0x14,
317 0,
318 0x10,
319 0x1f,
320 0x2,
321 0x80,
322 0x80,
323 0, 0, 0, 0, 0,
324 0x80,
325 0x80,
326 0,
327 0x80,
328 0x80,
329 0x80,
330 0x80,
331 0x80,
332 0x80,
333 0x80,
334 0x80,
335};
336
337
338
339
340
341static int power5_marked_instr_event(u64 event)
342{
343 int pmc, psel;
344 int bit, byte, unit;
345 u32 mask;
346
347 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
348 psel = event & PM_PMCSEL_MSK;
349 if (pmc >= 5)
350 return 0;
351
352 bit = -1;
353 if (psel < sizeof(direct_event_is_marked)) {
354 if (direct_event_is_marked[psel] & (1 << pmc))
355 return 1;
356 if (direct_event_is_marked[psel] & 0x80)
357 bit = 4;
358 else if (psel == 0x08)
359 bit = pmc - 1;
360 else if (psel == 0x10)
361 bit = 4 - pmc;
362 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
363 bit = 4;
364 } else if ((psel & 0x58) == 0x40)
365 bit = psel & 7;
366
367 if (!(event & PM_BUSEVENT_MSK))
368 return 0;
369
370 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
371 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
372 if (unit == PM_LSU0) {
373
374 mask = 0x5dff00;
375 } else if (unit == PM_LSU1 && byte >= 4) {
376 byte -= 4;
377
378 mask = 0x5f00c0aa;
379 } else
380 return 0;
381
382 return (mask >> (byte * 8 + bit)) & 1;
383}
384
385static int power5_compute_mmcr(u64 event[], int n_ev,
386 unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
387{
388 unsigned long mmcr1 = 0;
389 unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
390 unsigned int pmc, unit, byte, psel;
391 unsigned int ttm, grp;
392 int i, isbus, bit, grsel;
393 unsigned int pmc_inuse = 0;
394 unsigned int pmc_grp_use[2];
395 unsigned char busbyte[4];
396 unsigned char unituse[16];
397 int ttmuse;
398
399 if (n_ev > 6)
400 return -1;
401
402
403 pmc_grp_use[0] = pmc_grp_use[1] = 0;
404 memset(busbyte, 0, sizeof(busbyte));
405 memset(unituse, 0, sizeof(unituse));
406 for (i = 0; i < n_ev; ++i) {
407 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
408 if (pmc) {
409 if (pmc > 6)
410 return -1;
411 if (pmc_inuse & (1 << (pmc - 1)))
412 return -1;
413 pmc_inuse |= 1 << (pmc - 1);
414
415 if (pmc <= 4)
416 ++pmc_grp_use[(pmc - 1) >> 1];
417 }
418 if (event[i] & PM_BUSEVENT_MSK) {
419 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
420 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
421 if (unit > PM_LASTUNIT)
422 return -1;
423 if (unit == PM_ISU0_ALT)
424 unit = PM_ISU0;
425 if (byte >= 4) {
426 if (unit != PM_LSU1)
427 return -1;
428 ++unit;
429 byte &= 3;
430 }
431 if (!pmc)
432 ++pmc_grp_use[byte & 1];
433 if (busbyte[byte] && busbyte[byte] != unit)
434 return -1;
435 busbyte[byte] = unit;
436 unituse[unit] = 1;
437 }
438 }
439 if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
440 return -1;
441
442
443
444
445
446
447
448 if (unituse[PM_ISU0] &
449 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
450 unituse[PM_ISU0_ALT] = 1;
451 unituse[PM_ISU0] = 0;
452 }
453
454 ttmuse = 0;
455 for (i = PM_FPU; i <= PM_ISU1; ++i) {
456 if (!unituse[i])
457 continue;
458 if (ttmuse++)
459 return -1;
460 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
461 }
462 ttmuse = 0;
463 for (; i <= PM_GRS; ++i) {
464 if (!unituse[i])
465 continue;
466 if (ttmuse++)
467 return -1;
468 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
469 }
470 if (ttmuse > 1)
471 return -1;
472
473
474 for (byte = 0; byte < 4; ++byte) {
475 unit = busbyte[byte];
476 if (!unit)
477 continue;
478 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
479
480 unit = PM_ISU0_ALT;
481 } else if (unit == PM_LSU1 + 1) {
482
483 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
484 }
485 ttm = unit >> 2;
486 mmcr1 |= (unsigned long)ttm
487 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
488 }
489
490
491 for (i = 0; i < n_ev; ++i) {
492 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
493 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
494 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
495 psel = event[i] & PM_PMCSEL_MSK;
496 isbus = event[i] & PM_BUSEVENT_MSK;
497 if (!pmc) {
498
499 for (pmc = 0; pmc < 4; ++pmc) {
500 if (pmc_inuse & (1 << pmc))
501 continue;
502 grp = (pmc >> 1) & 1;
503 if (isbus) {
504 if (grp == (byte & 1))
505 break;
506 } else if (pmc_grp_use[grp] < 2) {
507 ++pmc_grp_use[grp];
508 break;
509 }
510 }
511 pmc_inuse |= 1 << pmc;
512 } else if (pmc <= 4) {
513
514 --pmc;
515 if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
516
517 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
518 } else {
519
520 --pmc;
521 }
522 if (isbus && unit == PM_GRS) {
523 bit = psel & 7;
524 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
525 mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
526 }
527 if (power5_marked_instr_event(event[i]))
528 mmcra |= MMCRA_SAMPLE_ENABLE;
529 if (pmc <= 3)
530 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
531 hwc[i] = pmc;
532 }
533
534
535 mmcr[0] = 0;
536 if (pmc_inuse & 1)
537 mmcr[0] = MMCR0_PMC1CE;
538 if (pmc_inuse & 0x3e)
539 mmcr[0] |= MMCR0_PMCjCE;
540 mmcr[1] = mmcr1;
541 mmcr[2] = mmcra;
542 return 0;
543}
544
545static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[])
546{
547 if (pmc <= 3)
548 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
549}
550
551static int power5_generic_events[] = {
552 [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
553 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
554 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090,
555 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088,
556 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4,
557 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5,
558};
559
560#define C(x) PERF_COUNT_HW_CACHE_##x
561
562
563
564
565
566
567static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
568 [C(L1D)] = {
569 [C(OP_READ)] = { 0x4c1090, 0x3c1088 },
570 [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 },
571 [C(OP_PREFETCH)] = { 0xc70e7, 0 },
572 },
573 [C(L1I)] = {
574 [C(OP_READ)] = { 0, 0 },
575 [C(OP_WRITE)] = { -1, -1 },
576 [C(OP_PREFETCH)] = { 0, 0 },
577 },
578 [C(LL)] = {
579 [C(OP_READ)] = { 0, 0x3c309b },
580 [C(OP_WRITE)] = { 0, 0 },
581 [C(OP_PREFETCH)] = { 0xc50c3, 0 },
582 },
583 [C(DTLB)] = {
584 [C(OP_READ)] = { 0x2c4090, 0x800c4 },
585 [C(OP_WRITE)] = { -1, -1 },
586 [C(OP_PREFETCH)] = { -1, -1 },
587 },
588 [C(ITLB)] = {
589 [C(OP_READ)] = { 0, 0x800c0 },
590 [C(OP_WRITE)] = { -1, -1 },
591 [C(OP_PREFETCH)] = { -1, -1 },
592 },
593 [C(BPU)] = {
594 [C(OP_READ)] = { 0x230e4, 0x230e5 },
595 [C(OP_WRITE)] = { -1, -1 },
596 [C(OP_PREFETCH)] = { -1, -1 },
597 },
598 [C(NODE)] = {
599 [C(OP_READ)] = { -1, -1 },
600 [C(OP_WRITE)] = { -1, -1 },
601 [C(OP_PREFETCH)] = { -1, -1 },
602 },
603};
604
605static struct power_pmu power5_pmu = {
606 .name = "POWER5",
607 .n_counter = 6,
608 .max_alternatives = MAX_ALT,
609 .add_fields = 0x7000090000555ul,
610 .test_adder = 0x3000490000000ul,
611 .compute_mmcr = power5_compute_mmcr,
612 .get_constraint = power5_get_constraint,
613 .get_alternatives = power5_get_alternatives,
614 .disable_pmc = power5_disable_pmc,
615 .n_generic = ARRAY_SIZE(power5_generic_events),
616 .generic_events = power5_generic_events,
617 .cache_events = &power5_cache_events,
618 .flags = PPMU_HAS_SSLOT,
619};
620
621static int __init init_power5_pmu(void)
622{
623 if (!cur_cpu_spec->oprofile_cpu_type ||
624 strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
625 return -ENODEV;
626
627 return register_power_pmu(&power5_pmu);
628}
629
630early_initcall(init_power5_pmu);
631