1#include <stdio.h>
2#include "evsel.h"
3#include "stat.h"
4#include "color.h"
5#include "pmu.h"
6#include "rblist.h"
7#include "evlist.h"
8#include "expr.h"
9#include "metricgroup.h"
10
11enum {
12 CTX_BIT_USER = 1 << 0,
13 CTX_BIT_KERNEL = 1 << 1,
14 CTX_BIT_HV = 1 << 2,
15 CTX_BIT_HOST = 1 << 3,
16 CTX_BIT_IDLE = 1 << 4,
17 CTX_BIT_MAX = 1 << 5,
18};
19
20#define NUM_CTX CTX_BIT_MAX
21
22
23
24
25
26
27
28
29static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
30static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
31static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
32static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
33static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
34static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
35static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
36static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
37static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
38static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
39static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
40static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
41static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
42static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
43static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS];
44static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
45static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
46static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
47static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
48static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
49static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
50static struct rblist runtime_saved_values;
51static bool have_frontend_stalled;
52
53struct stats walltime_nsecs_stats;
54
55struct saved_value {
56 struct rb_node rb_node;
57 struct perf_evsel *evsel;
58 int cpu;
59 struct stats stats;
60};
61
62static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
63{
64 struct saved_value *a = container_of(rb_node,
65 struct saved_value,
66 rb_node);
67 const struct saved_value *b = entry;
68
69 if (a->cpu != b->cpu)
70 return a->cpu - b->cpu;
71 if (a->evsel == b->evsel)
72 return 0;
73 if ((char *)a->evsel < (char *)b->evsel)
74 return -1;
75 return +1;
76}
77
78static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
79 const void *entry)
80{
81 struct saved_value *nd = malloc(sizeof(struct saved_value));
82
83 if (!nd)
84 return NULL;
85 memcpy(nd, entry, sizeof(struct saved_value));
86 return &nd->rb_node;
87}
88
89static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
90 int cpu,
91 bool create)
92{
93 struct rb_node *nd;
94 struct saved_value dm = {
95 .cpu = cpu,
96 .evsel = evsel,
97 };
98 nd = rblist__find(&runtime_saved_values, &dm);
99 if (nd)
100 return container_of(nd, struct saved_value, rb_node);
101 if (create) {
102 rblist__add_node(&runtime_saved_values, &dm);
103 nd = rblist__find(&runtime_saved_values, &dm);
104 if (nd)
105 return container_of(nd, struct saved_value, rb_node);
106 }
107 return NULL;
108}
109
110void perf_stat__init_shadow_stats(void)
111{
112 have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
113 rblist__init(&runtime_saved_values);
114 runtime_saved_values.node_cmp = saved_value_cmp;
115 runtime_saved_values.node_new = saved_value_new;
116
117}
118
119static int evsel_context(struct perf_evsel *evsel)
120{
121 int ctx = 0;
122
123 if (evsel->attr.exclude_kernel)
124 ctx |= CTX_BIT_KERNEL;
125 if (evsel->attr.exclude_user)
126 ctx |= CTX_BIT_USER;
127 if (evsel->attr.exclude_hv)
128 ctx |= CTX_BIT_HV;
129 if (evsel->attr.exclude_host)
130 ctx |= CTX_BIT_HOST;
131 if (evsel->attr.exclude_idle)
132 ctx |= CTX_BIT_IDLE;
133
134 return ctx;
135}
136
137void perf_stat__reset_shadow_stats(void)
138{
139 struct rb_node *pos, *next;
140
141 memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
142 memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
143 memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
144 memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
145 memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
146 memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
147 memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
148 memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
149 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
150 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
151 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
152 memset(runtime_cycles_in_tx_stats, 0,
153 sizeof(runtime_cycles_in_tx_stats));
154 memset(runtime_transaction_stats, 0,
155 sizeof(runtime_transaction_stats));
156 memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
157 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
158 memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots));
159 memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired));
160 memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
161 memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
162 memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
163 memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
164 memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
165
166 next = rb_first(&runtime_saved_values.entries);
167 while (next) {
168 pos = next;
169 next = rb_next(pos);
170 memset(&container_of(pos, struct saved_value, rb_node)->stats,
171 0,
172 sizeof(struct stats));
173 }
174}
175
176
177
178
179
180
181void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
182 int cpu)
183{
184 int ctx = evsel_context(counter);
185
186 count *= counter->scale;
187
188 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) ||
189 perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK))
190 update_stats(&runtime_nsecs_stats[cpu], count);
191 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
192 update_stats(&runtime_cycles_stats[ctx][cpu], count);
193 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
194 update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count);
195 else if (perf_stat_evsel__is(counter, TRANSACTION_START))
196 update_stats(&runtime_transaction_stats[ctx][cpu], count);
197 else if (perf_stat_evsel__is(counter, ELISION_START))
198 update_stats(&runtime_elision_stats[ctx][cpu], count);
199 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
200 update_stats(&runtime_topdown_total_slots[ctx][cpu], count);
201 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
202 update_stats(&runtime_topdown_slots_issued[ctx][cpu], count);
203 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
204 update_stats(&runtime_topdown_slots_retired[ctx][cpu], count);
205 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
206 update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu], count);
207 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
208 update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count);
209 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
210 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count);
211 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
212 update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count);
213 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
214 update_stats(&runtime_branches_stats[ctx][cpu], count);
215 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
216 update_stats(&runtime_cacherefs_stats[ctx][cpu], count);
217 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
218 update_stats(&runtime_l1_dcache_stats[ctx][cpu], count);
219 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
220 update_stats(&runtime_ll_cache_stats[ctx][cpu], count);
221 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
222 update_stats(&runtime_ll_cache_stats[ctx][cpu], count);
223 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
224 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count);
225 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
226 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count);
227 else if (perf_stat_evsel__is(counter, SMI_NUM))
228 update_stats(&runtime_smi_num_stats[ctx][cpu], count);
229 else if (perf_stat_evsel__is(counter, APERF))
230 update_stats(&runtime_aperf_stats[ctx][cpu], count);
231
232 if (counter->collect_stat) {
233 struct saved_value *v = saved_value_lookup(counter, cpu, true);
234 update_stats(&v->stats, count);
235 }
236}
237
238
239enum grc_type {
240 GRC_STALLED_CYCLES_FE,
241 GRC_STALLED_CYCLES_BE,
242 GRC_CACHE_MISSES,
243 GRC_MAX_NR
244};
245
246static const char *get_ratio_color(enum grc_type type, double ratio)
247{
248 static const double grc_table[GRC_MAX_NR][3] = {
249 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
250 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
251 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
252 };
253 const char *color = PERF_COLOR_NORMAL;
254
255 if (ratio > grc_table[type][0])
256 color = PERF_COLOR_RED;
257 else if (ratio > grc_table[type][1])
258 color = PERF_COLOR_MAGENTA;
259 else if (ratio > grc_table[type][2])
260 color = PERF_COLOR_YELLOW;
261
262 return color;
263}
264
265static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list,
266 const char *name)
267{
268 struct perf_evsel *c2;
269
270 evlist__for_each_entry (evsel_list, c2) {
271 if (!strcasecmp(c2->name, name))
272 return c2;
273 }
274 return NULL;
275}
276
277
278void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
279{
280 struct perf_evsel *counter, *leader, **metric_events, *oc;
281 bool found;
282 const char **metric_names;
283 int i;
284 int num_metric_names;
285
286 evlist__for_each_entry(evsel_list, counter) {
287 bool invalid = false;
288
289 leader = counter->leader;
290 if (!counter->metric_expr)
291 continue;
292 metric_events = counter->metric_events;
293 if (!metric_events) {
294 if (expr__find_other(counter->metric_expr, counter->name,
295 &metric_names, &num_metric_names) < 0)
296 continue;
297
298 metric_events = calloc(sizeof(struct perf_evsel *),
299 num_metric_names + 1);
300 if (!metric_events)
301 return;
302 counter->metric_events = metric_events;
303 }
304
305 for (i = 0; i < num_metric_names; i++) {
306 found = false;
307 if (leader) {
308
309 for_each_group_member (oc, leader) {
310 if (!strcasecmp(oc->name, metric_names[i])) {
311 found = true;
312 break;
313 }
314 }
315 }
316 if (!found) {
317
318 oc = perf_stat__find_event(evsel_list, metric_names[i]);
319 }
320 if (!oc) {
321
322 static char *printed;
323
324
325
326
327
328
329
330
331 if (!printed || strcasecmp(printed, metric_names[i])) {
332 fprintf(stderr,
333 "Add %s event to groups to get metric expression for %s\n",
334 metric_names[i],
335 counter->name);
336 printed = strdup(metric_names[i]);
337 }
338 invalid = true;
339 continue;
340 }
341 metric_events[i] = oc;
342 oc->collect_stat = true;
343 }
344 metric_events[i] = NULL;
345 free(metric_names);
346 if (invalid) {
347 free(metric_events);
348 counter->metric_events = NULL;
349 counter->metric_expr = NULL;
350 }
351 }
352}
353
354static void print_stalled_cycles_frontend(int cpu,
355 struct perf_evsel *evsel, double avg,
356 struct perf_stat_output_ctx *out)
357{
358 double total, ratio = 0.0;
359 const char *color;
360 int ctx = evsel_context(evsel);
361
362 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
363
364 if (total)
365 ratio = avg / total * 100.0;
366
367 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
368
369 if (ratio)
370 out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
371 ratio);
372 else
373 out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
374}
375
376static void print_stalled_cycles_backend(int cpu,
377 struct perf_evsel *evsel, double avg,
378 struct perf_stat_output_ctx *out)
379{
380 double total, ratio = 0.0;
381 const char *color;
382 int ctx = evsel_context(evsel);
383
384 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
385
386 if (total)
387 ratio = avg / total * 100.0;
388
389 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
390
391 out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
392}
393
394static void print_branch_misses(int cpu,
395 struct perf_evsel *evsel,
396 double avg,
397 struct perf_stat_output_ctx *out)
398{
399 double total, ratio = 0.0;
400 const char *color;
401 int ctx = evsel_context(evsel);
402
403 total = avg_stats(&runtime_branches_stats[ctx][cpu]);
404
405 if (total)
406 ratio = avg / total * 100.0;
407
408 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
409
410 out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
411}
412
413static void print_l1_dcache_misses(int cpu,
414 struct perf_evsel *evsel,
415 double avg,
416 struct perf_stat_output_ctx *out)
417{
418 double total, ratio = 0.0;
419 const char *color;
420 int ctx = evsel_context(evsel);
421
422 total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
423
424 if (total)
425 ratio = avg / total * 100.0;
426
427 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
428
429 out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
430}
431
432static void print_l1_icache_misses(int cpu,
433 struct perf_evsel *evsel,
434 double avg,
435 struct perf_stat_output_ctx *out)
436{
437 double total, ratio = 0.0;
438 const char *color;
439 int ctx = evsel_context(evsel);
440
441 total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
442
443 if (total)
444 ratio = avg / total * 100.0;
445
446 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
447 out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
448}
449
450static void print_dtlb_cache_misses(int cpu,
451 struct perf_evsel *evsel,
452 double avg,
453 struct perf_stat_output_ctx *out)
454{
455 double total, ratio = 0.0;
456 const char *color;
457 int ctx = evsel_context(evsel);
458
459 total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
460
461 if (total)
462 ratio = avg / total * 100.0;
463
464 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
465 out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
466}
467
468static void print_itlb_cache_misses(int cpu,
469 struct perf_evsel *evsel,
470 double avg,
471 struct perf_stat_output_ctx *out)
472{
473 double total, ratio = 0.0;
474 const char *color;
475 int ctx = evsel_context(evsel);
476
477 total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
478
479 if (total)
480 ratio = avg / total * 100.0;
481
482 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
483 out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
484}
485
486static void print_ll_cache_misses(int cpu,
487 struct perf_evsel *evsel,
488 double avg,
489 struct perf_stat_output_ctx *out)
490{
491 double total, ratio = 0.0;
492 const char *color;
493 int ctx = evsel_context(evsel);
494
495 total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
496
497 if (total)
498 ratio = avg / total * 100.0;
499
500 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
501 out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
502}
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546static double sanitize_val(double x)
547{
548 if (x < 0 && x >= -0.02)
549 return 0.0;
550 return x;
551}
552
553static double td_total_slots(int ctx, int cpu)
554{
555 return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
556}
557
558static double td_bad_spec(int ctx, int cpu)
559{
560 double bad_spec = 0;
561 double total_slots;
562 double total;
563
564 total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
565 avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
566 avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
567 total_slots = td_total_slots(ctx, cpu);
568 if (total_slots)
569 bad_spec = total / total_slots;
570 return sanitize_val(bad_spec);
571}
572
573static double td_retiring(int ctx, int cpu)
574{
575 double retiring = 0;
576 double total_slots = td_total_slots(ctx, cpu);
577 double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
578
579 if (total_slots)
580 retiring = ret_slots / total_slots;
581 return retiring;
582}
583
584static double td_fe_bound(int ctx, int cpu)
585{
586 double fe_bound = 0;
587 double total_slots = td_total_slots(ctx, cpu);
588 double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
589
590 if (total_slots)
591 fe_bound = fetch_bub / total_slots;
592 return fe_bound;
593}
594
595static double td_be_bound(int ctx, int cpu)
596{
597 double sum = (td_fe_bound(ctx, cpu) +
598 td_bad_spec(ctx, cpu) +
599 td_retiring(ctx, cpu));
600 if (sum == 0)
601 return 0;
602 return sanitize_val(1.0 - sum);
603}
604
605static void print_smi_cost(int cpu, struct perf_evsel *evsel,
606 struct perf_stat_output_ctx *out)
607{
608 double smi_num, aperf, cycles, cost = 0.0;
609 int ctx = evsel_context(evsel);
610 const char *color = NULL;
611
612 smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
613 aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
614 cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
615
616 if ((cycles == 0) || (aperf == 0))
617 return;
618
619 if (smi_num)
620 cost = (aperf - cycles) / aperf * 100.00;
621
622 if (cost > 10)
623 color = PERF_COLOR_RED;
624 out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
625 out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
626}
627
628static void generic_metric(const char *metric_expr,
629 struct perf_evsel **metric_events,
630 char *name,
631 const char *metric_name,
632 double avg,
633 int cpu,
634 struct perf_stat_output_ctx *out)
635{
636 print_metric_t print_metric = out->print_metric;
637 struct parse_ctx pctx;
638 double ratio;
639 int i;
640 void *ctxp = out->ctx;
641
642 expr__ctx_init(&pctx);
643 expr__add_id(&pctx, name, avg);
644 for (i = 0; metric_events[i]; i++) {
645 struct saved_value *v;
646 struct stats *stats;
647 double scale;
648
649 if (!strcmp(metric_events[i]->name, "duration_time")) {
650 stats = &walltime_nsecs_stats;
651 scale = 1e-9;
652 } else {
653 v = saved_value_lookup(metric_events[i], cpu, false);
654 if (!v)
655 break;
656 stats = &v->stats;
657 scale = 1.0;
658 }
659 expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale);
660 }
661 if (!metric_events[i]) {
662 const char *p = metric_expr;
663
664 if (expr__parse(&ratio, &pctx, &p) == 0)
665 print_metric(ctxp, NULL, "%8.1f",
666 metric_name ?
667 metric_name :
668 out->force_header ? name : "",
669 ratio);
670 else
671 print_metric(ctxp, NULL, NULL,
672 out->force_header ?
673 (metric_name ? metric_name : name) : "", 0);
674 } else
675 print_metric(ctxp, NULL, NULL, "", 0);
676}
677
678void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
679 double avg, int cpu,
680 struct perf_stat_output_ctx *out,
681 struct rblist *metric_events)
682{
683 void *ctxp = out->ctx;
684 print_metric_t print_metric = out->print_metric;
685 double total, ratio = 0.0, total2;
686 const char *color = NULL;
687 int ctx = evsel_context(evsel);
688 struct metric_event *me;
689 int num = 1;
690
691 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
692 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
693 if (total) {
694 ratio = avg / total;
695 print_metric(ctxp, NULL, "%7.2f ",
696 "insn per cycle", ratio);
697 } else {
698 print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
699 }
700 total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
701 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
702
703 if (total && avg) {
704 out->new_line(ctxp);
705 ratio = total / avg;
706 print_metric(ctxp, NULL, "%7.2f ",
707 "stalled cycles per insn",
708 ratio);
709 } else if (have_frontend_stalled) {
710 print_metric(ctxp, NULL, NULL,
711 "stalled cycles per insn", 0);
712 }
713 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
714 if (runtime_branches_stats[ctx][cpu].n != 0)
715 print_branch_misses(cpu, evsel, avg, out);
716 else
717 print_metric(ctxp, NULL, NULL, "of all branches", 0);
718 } else if (
719 evsel->attr.type == PERF_TYPE_HW_CACHE &&
720 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
721 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
722 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
723 if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
724 print_l1_dcache_misses(cpu, evsel, avg, out);
725 else
726 print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
727 } else if (
728 evsel->attr.type == PERF_TYPE_HW_CACHE &&
729 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
730 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
731 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
732 if (runtime_l1_icache_stats[ctx][cpu].n != 0)
733 print_l1_icache_misses(cpu, evsel, avg, out);
734 else
735 print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
736 } else if (
737 evsel->attr.type == PERF_TYPE_HW_CACHE &&
738 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
739 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
740 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
741 if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
742 print_dtlb_cache_misses(cpu, evsel, avg, out);
743 else
744 print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
745 } else if (
746 evsel->attr.type == PERF_TYPE_HW_CACHE &&
747 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
748 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
749 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
750 if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
751 print_itlb_cache_misses(cpu, evsel, avg, out);
752 else
753 print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
754 } else if (
755 evsel->attr.type == PERF_TYPE_HW_CACHE &&
756 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
757 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
758 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
759 if (runtime_ll_cache_stats[ctx][cpu].n != 0)
760 print_ll_cache_misses(cpu, evsel, avg, out);
761 else
762 print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
763 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
764 total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
765
766 if (total)
767 ratio = avg * 100 / total;
768
769 if (runtime_cacherefs_stats[ctx][cpu].n != 0)
770 print_metric(ctxp, NULL, "%8.3f %%",
771 "of all cache refs", ratio);
772 else
773 print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
774 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
775 print_stalled_cycles_frontend(cpu, evsel, avg, out);
776 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
777 print_stalled_cycles_backend(cpu, evsel, avg, out);
778 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
779 total = avg_stats(&runtime_nsecs_stats[cpu]);
780
781 if (total) {
782 ratio = avg / total;
783 print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
784 } else {
785 print_metric(ctxp, NULL, NULL, "Ghz", 0);
786 }
787 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
788 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
789 if (total)
790 print_metric(ctxp, NULL,
791 "%7.2f%%", "transactional cycles",
792 100.0 * (avg / total));
793 else
794 print_metric(ctxp, NULL, NULL, "transactional cycles",
795 0);
796 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
797 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
798 total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
799 if (total2 < avg)
800 total2 = avg;
801 if (total)
802 print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
803 100.0 * ((total2-avg) / total));
804 else
805 print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
806 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
807 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
808
809 if (avg)
810 ratio = total / avg;
811
812 if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
813 print_metric(ctxp, NULL, "%8.0f",
814 "cycles / transaction", ratio);
815 else
816 print_metric(ctxp, NULL, NULL, "cycles / transaction",
817 0);
818 } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
819 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
820
821 if (avg)
822 ratio = total / avg;
823
824 print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
825 } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) ||
826 perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) {
827 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
828 print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
829 avg / ratio);
830 else
831 print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
832 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
833 double fe_bound = td_fe_bound(ctx, cpu);
834
835 if (fe_bound > 0.2)
836 color = PERF_COLOR_RED;
837 print_metric(ctxp, color, "%8.1f%%", "frontend bound",
838 fe_bound * 100.);
839 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
840 double retiring = td_retiring(ctx, cpu);
841
842 if (retiring > 0.7)
843 color = PERF_COLOR_GREEN;
844 print_metric(ctxp, color, "%8.1f%%", "retiring",
845 retiring * 100.);
846 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
847 double bad_spec = td_bad_spec(ctx, cpu);
848
849 if (bad_spec > 0.1)
850 color = PERF_COLOR_RED;
851 print_metric(ctxp, color, "%8.1f%%", "bad speculation",
852 bad_spec * 100.);
853 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
854 double be_bound = td_be_bound(ctx, cpu);
855 const char *name = "backend bound";
856 static int have_recovery_bubbles = -1;
857
858
859 if (have_recovery_bubbles < 0)
860 have_recovery_bubbles = pmu_have_event("cpu",
861 "topdown-recovery-bubbles");
862 if (!have_recovery_bubbles)
863 name = "backend bound/bad spec";
864
865 if (be_bound > 0.2)
866 color = PERF_COLOR_RED;
867 if (td_total_slots(ctx, cpu) > 0)
868 print_metric(ctxp, color, "%8.1f%%", name,
869 be_bound * 100.);
870 else
871 print_metric(ctxp, NULL, NULL, name, 0);
872 } else if (evsel->metric_expr) {
873 generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name,
874 evsel->metric_name, avg, cpu, out);
875 } else if (runtime_nsecs_stats[cpu].n != 0) {
876 char unit = 'M';
877 char unit_buf[10];
878
879 total = avg_stats(&runtime_nsecs_stats[cpu]);
880
881 if (total)
882 ratio = 1000.0 * avg / total;
883 if (ratio < 0.001) {
884 ratio *= 1000;
885 unit = 'K';
886 }
887 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
888 print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
889 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
890 print_smi_cost(cpu, evsel, out);
891 } else {
892 num = 0;
893 }
894
895 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
896 struct metric_expr *mexp;
897
898 list_for_each_entry (mexp, &me->head, nd) {
899 if (num++ > 0)
900 out->new_line(ctxp);
901 generic_metric(mexp->metric_expr, mexp->metric_events,
902 evsel->name, mexp->metric_name,
903 avg, cpu, out);
904 }
905 }
906 if (num == 0)
907 print_metric(ctxp, NULL, NULL, NULL, 0);
908}
909