1
2#include <stdio.h>
3#include "evsel.h"
4#include "stat.h"
5#include "color.h"
6#include "pmu.h"
7#include "rblist.h"
8#include "evlist.h"
9#include "expr.h"
10#include "metricgroup.h"
11#include <linux/zalloc.h>
12
13
14
15
16
17
18
19
20
21static bool have_frontend_stalled;
22
23struct runtime_stat rt_stat;
24struct stats walltime_nsecs_stats;
25
26struct saved_value {
27 struct rb_node rb_node;
28 struct perf_evsel *evsel;
29 enum stat_type type;
30 int ctx;
31 int cpu;
32 struct runtime_stat *stat;
33 struct stats stats;
34};
35
36static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
37{
38 struct saved_value *a = container_of(rb_node,
39 struct saved_value,
40 rb_node);
41 const struct saved_value *b = entry;
42
43 if (a->cpu != b->cpu)
44 return a->cpu - b->cpu;
45
46
47
48
49
50
51
52
53 if (a->type != b->type)
54 return a->type - b->type;
55
56 if (a->ctx != b->ctx)
57 return a->ctx - b->ctx;
58
59 if (a->evsel == NULL && b->evsel == NULL) {
60 if (a->stat == b->stat)
61 return 0;
62
63 if ((char *)a->stat < (char *)b->stat)
64 return -1;
65
66 return 1;
67 }
68
69 if (a->evsel == b->evsel)
70 return 0;
71 if ((char *)a->evsel < (char *)b->evsel)
72 return -1;
73 return +1;
74}
75
76static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
77 const void *entry)
78{
79 struct saved_value *nd = malloc(sizeof(struct saved_value));
80
81 if (!nd)
82 return NULL;
83 memcpy(nd, entry, sizeof(struct saved_value));
84 return &nd->rb_node;
85}
86
87static void saved_value_delete(struct rblist *rblist __maybe_unused,
88 struct rb_node *rb_node)
89{
90 struct saved_value *v;
91
92 BUG_ON(!rb_node);
93 v = container_of(rb_node, struct saved_value, rb_node);
94 free(v);
95}
96
97static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
98 int cpu,
99 bool create,
100 enum stat_type type,
101 int ctx,
102 struct runtime_stat *st)
103{
104 struct rblist *rblist;
105 struct rb_node *nd;
106 struct saved_value dm = {
107 .cpu = cpu,
108 .evsel = evsel,
109 .type = type,
110 .ctx = ctx,
111 .stat = st,
112 };
113
114 rblist = &st->value_list;
115
116 nd = rblist__find(rblist, &dm);
117 if (nd)
118 return container_of(nd, struct saved_value, rb_node);
119 if (create) {
120 rblist__add_node(rblist, &dm);
121 nd = rblist__find(rblist, &dm);
122 if (nd)
123 return container_of(nd, struct saved_value, rb_node);
124 }
125 return NULL;
126}
127
128void runtime_stat__init(struct runtime_stat *st)
129{
130 struct rblist *rblist = &st->value_list;
131
132 rblist__init(rblist);
133 rblist->node_cmp = saved_value_cmp;
134 rblist->node_new = saved_value_new;
135 rblist->node_delete = saved_value_delete;
136}
137
138void runtime_stat__exit(struct runtime_stat *st)
139{
140 rblist__exit(&st->value_list);
141}
142
143void perf_stat__init_shadow_stats(void)
144{
145 have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
146 runtime_stat__init(&rt_stat);
147}
148
149static int evsel_context(struct perf_evsel *evsel)
150{
151 int ctx = 0;
152
153 if (evsel->attr.exclude_kernel)
154 ctx |= CTX_BIT_KERNEL;
155 if (evsel->attr.exclude_user)
156 ctx |= CTX_BIT_USER;
157 if (evsel->attr.exclude_hv)
158 ctx |= CTX_BIT_HV;
159 if (evsel->attr.exclude_host)
160 ctx |= CTX_BIT_HOST;
161 if (evsel->attr.exclude_idle)
162 ctx |= CTX_BIT_IDLE;
163
164 return ctx;
165}
166
167static void reset_stat(struct runtime_stat *st)
168{
169 struct rblist *rblist;
170 struct rb_node *pos, *next;
171
172 rblist = &st->value_list;
173 next = rb_first_cached(&rblist->entries);
174 while (next) {
175 pos = next;
176 next = rb_next(pos);
177 memset(&container_of(pos, struct saved_value, rb_node)->stats,
178 0,
179 sizeof(struct stats));
180 }
181}
182
183void perf_stat__reset_shadow_stats(void)
184{
185 reset_stat(&rt_stat);
186 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
187}
188
189void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
190{
191 reset_stat(st);
192}
193
194static void update_runtime_stat(struct runtime_stat *st,
195 enum stat_type type,
196 int ctx, int cpu, u64 count)
197{
198 struct saved_value *v = saved_value_lookup(NULL, cpu, true,
199 type, ctx, st);
200
201 if (v)
202 update_stats(&v->stats, count);
203}
204
205
206
207
208
209
210void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
211 int cpu, struct runtime_stat *st)
212{
213 int ctx = evsel_context(counter);
214 u64 count_ns = count;
215
216 count *= counter->scale;
217
218 if (perf_evsel__is_clock(counter))
219 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
220 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
221 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
222 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
223 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
224 else if (perf_stat_evsel__is(counter, TRANSACTION_START))
225 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
226 else if (perf_stat_evsel__is(counter, ELISION_START))
227 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
228 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
229 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
230 ctx, cpu, count);
231 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
232 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
233 ctx, cpu, count);
234 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
235 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
236 ctx, cpu, count);
237 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
238 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
239 ctx, cpu, count);
240 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
241 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
242 ctx, cpu, count);
243 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
244 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
245 ctx, cpu, count);
246 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
247 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
248 ctx, cpu, count);
249 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
250 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
251 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
252 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
253 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
254 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
255 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
256 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
257 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
258 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
259 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
260 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
261 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
262 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
263 else if (perf_stat_evsel__is(counter, SMI_NUM))
264 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
265 else if (perf_stat_evsel__is(counter, APERF))
266 update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
267
268 if (counter->collect_stat) {
269 struct saved_value *v = saved_value_lookup(counter, cpu, true,
270 STAT_NONE, 0, st);
271 update_stats(&v->stats, count);
272 }
273}
274
275
276enum grc_type {
277 GRC_STALLED_CYCLES_FE,
278 GRC_STALLED_CYCLES_BE,
279 GRC_CACHE_MISSES,
280 GRC_MAX_NR
281};
282
283static const char *get_ratio_color(enum grc_type type, double ratio)
284{
285 static const double grc_table[GRC_MAX_NR][3] = {
286 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
287 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
288 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
289 };
290 const char *color = PERF_COLOR_NORMAL;
291
292 if (ratio > grc_table[type][0])
293 color = PERF_COLOR_RED;
294 else if (ratio > grc_table[type][1])
295 color = PERF_COLOR_MAGENTA;
296 else if (ratio > grc_table[type][2])
297 color = PERF_COLOR_YELLOW;
298
299 return color;
300}
301
302static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list,
303 const char *name)
304{
305 struct perf_evsel *c2;
306
307 evlist__for_each_entry (evsel_list, c2) {
308 if (!strcasecmp(c2->name, name) && !c2->collect_stat)
309 return c2;
310 }
311 return NULL;
312}
313
314
315void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
316{
317 struct perf_evsel *counter, *leader, **metric_events, *oc;
318 bool found;
319 const char **metric_names;
320 int i;
321 int num_metric_names;
322
323 evlist__for_each_entry(evsel_list, counter) {
324 bool invalid = false;
325
326 leader = counter->leader;
327 if (!counter->metric_expr)
328 continue;
329 metric_events = counter->metric_events;
330 if (!metric_events) {
331 if (expr__find_other(counter->metric_expr, counter->name,
332 &metric_names, &num_metric_names) < 0)
333 continue;
334
335 metric_events = calloc(sizeof(struct perf_evsel *),
336 num_metric_names + 1);
337 if (!metric_events)
338 return;
339 counter->metric_events = metric_events;
340 }
341
342 for (i = 0; i < num_metric_names; i++) {
343 found = false;
344 if (leader) {
345
346 for_each_group_member (oc, leader) {
347 if (!strcasecmp(oc->name, metric_names[i]) &&
348 !oc->collect_stat) {
349 found = true;
350 break;
351 }
352 }
353 }
354 if (!found) {
355
356 oc = perf_stat__find_event(evsel_list, metric_names[i]);
357 }
358 if (!oc) {
359
360 static char *printed;
361
362
363
364
365
366
367
368
369 if (!printed || strcasecmp(printed, metric_names[i])) {
370 fprintf(stderr,
371 "Add %s event to groups to get metric expression for %s\n",
372 metric_names[i],
373 counter->name);
374 printed = strdup(metric_names[i]);
375 }
376 invalid = true;
377 continue;
378 }
379 metric_events[i] = oc;
380 oc->collect_stat = true;
381 }
382 metric_events[i] = NULL;
383 free(metric_names);
384 if (invalid) {
385 free(metric_events);
386 counter->metric_events = NULL;
387 counter->metric_expr = NULL;
388 }
389 }
390}
391
392static double runtime_stat_avg(struct runtime_stat *st,
393 enum stat_type type, int ctx, int cpu)
394{
395 struct saved_value *v;
396
397 v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
398 if (!v)
399 return 0.0;
400
401 return avg_stats(&v->stats);
402}
403
404static double runtime_stat_n(struct runtime_stat *st,
405 enum stat_type type, int ctx, int cpu)
406{
407 struct saved_value *v;
408
409 v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
410 if (!v)
411 return 0.0;
412
413 return v->stats.n;
414}
415
416static void print_stalled_cycles_frontend(struct perf_stat_config *config,
417 int cpu,
418 struct perf_evsel *evsel, double avg,
419 struct perf_stat_output_ctx *out,
420 struct runtime_stat *st)
421{
422 double total, ratio = 0.0;
423 const char *color;
424 int ctx = evsel_context(evsel);
425
426 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
427
428 if (total)
429 ratio = avg / total * 100.0;
430
431 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
432
433 if (ratio)
434 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle",
435 ratio);
436 else
437 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0);
438}
439
440static void print_stalled_cycles_backend(struct perf_stat_config *config,
441 int cpu,
442 struct perf_evsel *evsel, double avg,
443 struct perf_stat_output_ctx *out,
444 struct runtime_stat *st)
445{
446 double total, ratio = 0.0;
447 const char *color;
448 int ctx = evsel_context(evsel);
449
450 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
451
452 if (total)
453 ratio = avg / total * 100.0;
454
455 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
456
457 out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
458}
459
460static void print_branch_misses(struct perf_stat_config *config,
461 int cpu,
462 struct perf_evsel *evsel,
463 double avg,
464 struct perf_stat_output_ctx *out,
465 struct runtime_stat *st)
466{
467 double total, ratio = 0.0;
468 const char *color;
469 int ctx = evsel_context(evsel);
470
471 total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
472
473 if (total)
474 ratio = avg / total * 100.0;
475
476 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
477
478 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio);
479}
480
481static void print_l1_dcache_misses(struct perf_stat_config *config,
482 int cpu,
483 struct perf_evsel *evsel,
484 double avg,
485 struct perf_stat_output_ctx *out,
486 struct runtime_stat *st)
487
488{
489 double total, ratio = 0.0;
490 const char *color;
491 int ctx = evsel_context(evsel);
492
493 total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
494
495 if (total)
496 ratio = avg / total * 100.0;
497
498 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
499
500 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
501}
502
503static void print_l1_icache_misses(struct perf_stat_config *config,
504 int cpu,
505 struct perf_evsel *evsel,
506 double avg,
507 struct perf_stat_output_ctx *out,
508 struct runtime_stat *st)
509
510{
511 double total, ratio = 0.0;
512 const char *color;
513 int ctx = evsel_context(evsel);
514
515 total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
516
517 if (total)
518 ratio = avg / total * 100.0;
519
520 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
521 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
522}
523
524static void print_dtlb_cache_misses(struct perf_stat_config *config,
525 int cpu,
526 struct perf_evsel *evsel,
527 double avg,
528 struct perf_stat_output_ctx *out,
529 struct runtime_stat *st)
530{
531 double total, ratio = 0.0;
532 const char *color;
533 int ctx = evsel_context(evsel);
534
535 total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
536
537 if (total)
538 ratio = avg / total * 100.0;
539
540 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
541 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
542}
543
544static void print_itlb_cache_misses(struct perf_stat_config *config,
545 int cpu,
546 struct perf_evsel *evsel,
547 double avg,
548 struct perf_stat_output_ctx *out,
549 struct runtime_stat *st)
550{
551 double total, ratio = 0.0;
552 const char *color;
553 int ctx = evsel_context(evsel);
554
555 total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
556
557 if (total)
558 ratio = avg / total * 100.0;
559
560 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
561 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
562}
563
564static void print_ll_cache_misses(struct perf_stat_config *config,
565 int cpu,
566 struct perf_evsel *evsel,
567 double avg,
568 struct perf_stat_output_ctx *out,
569 struct runtime_stat *st)
570{
571 double total, ratio = 0.0;
572 const char *color;
573 int ctx = evsel_context(evsel);
574
575 total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
576
577 if (total)
578 ratio = avg / total * 100.0;
579
580 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
581 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
582}
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626static double sanitize_val(double x)
627{
628 if (x < 0 && x >= -0.02)
629 return 0.0;
630 return x;
631}
632
633static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
634{
635 return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
636}
637
638static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
639{
640 double bad_spec = 0;
641 double total_slots;
642 double total;
643
644 total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
645 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
646 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
647
648 total_slots = td_total_slots(ctx, cpu, st);
649 if (total_slots)
650 bad_spec = total / total_slots;
651 return sanitize_val(bad_spec);
652}
653
654static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
655{
656 double retiring = 0;
657 double total_slots = td_total_slots(ctx, cpu, st);
658 double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
659 ctx, cpu);
660
661 if (total_slots)
662 retiring = ret_slots / total_slots;
663 return retiring;
664}
665
666static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
667{
668 double fe_bound = 0;
669 double total_slots = td_total_slots(ctx, cpu, st);
670 double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
671 ctx, cpu);
672
673 if (total_slots)
674 fe_bound = fetch_bub / total_slots;
675 return fe_bound;
676}
677
678static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
679{
680 double sum = (td_fe_bound(ctx, cpu, st) +
681 td_bad_spec(ctx, cpu, st) +
682 td_retiring(ctx, cpu, st));
683 if (sum == 0)
684 return 0;
685 return sanitize_val(1.0 - sum);
686}
687
688static void print_smi_cost(struct perf_stat_config *config,
689 int cpu, struct perf_evsel *evsel,
690 struct perf_stat_output_ctx *out,
691 struct runtime_stat *st)
692{
693 double smi_num, aperf, cycles, cost = 0.0;
694 int ctx = evsel_context(evsel);
695 const char *color = NULL;
696
697 smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
698 aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
699 cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
700
701 if ((cycles == 0) || (aperf == 0))
702 return;
703
704 if (smi_num)
705 cost = (aperf - cycles) / aperf * 100.00;
706
707 if (cost > 10)
708 color = PERF_COLOR_RED;
709 out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
710 out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
711}
712
713static void generic_metric(struct perf_stat_config *config,
714 const char *metric_expr,
715 struct perf_evsel **metric_events,
716 char *name,
717 const char *metric_name,
718 double avg,
719 int cpu,
720 struct perf_stat_output_ctx *out,
721 struct runtime_stat *st)
722{
723 print_metric_t print_metric = out->print_metric;
724 struct parse_ctx pctx;
725 double ratio;
726 int i;
727 void *ctxp = out->ctx;
728 char *n, *pn;
729
730 expr__ctx_init(&pctx);
731 expr__add_id(&pctx, name, avg);
732 for (i = 0; metric_events[i]; i++) {
733 struct saved_value *v;
734 struct stats *stats;
735 double scale;
736
737 if (!strcmp(metric_events[i]->name, "duration_time")) {
738 stats = &walltime_nsecs_stats;
739 scale = 1e-9;
740 } else {
741 v = saved_value_lookup(metric_events[i], cpu, false,
742 STAT_NONE, 0, st);
743 if (!v)
744 break;
745 stats = &v->stats;
746 scale = 1.0;
747 }
748
749 n = strdup(metric_events[i]->name);
750 if (!n)
751 return;
752
753
754
755
756
757 pn = strchr(n, ' ');
758 if (pn)
759 *pn = 0;
760 expr__add_id(&pctx, n, avg_stats(stats)*scale);
761 }
762 if (!metric_events[i]) {
763 const char *p = metric_expr;
764
765 if (expr__parse(&ratio, &pctx, &p) == 0)
766 print_metric(config, ctxp, NULL, "%8.1f",
767 metric_name ?
768 metric_name :
769 out->force_header ? name : "",
770 ratio);
771 else
772 print_metric(config, ctxp, NULL, NULL,
773 out->force_header ?
774 (metric_name ? metric_name : name) : "", 0);
775 } else
776 print_metric(config, ctxp, NULL, NULL, "", 0);
777
778 for (i = 1; i < pctx.num_ids; i++)
779 zfree(&pctx.ids[i].name);
780}
781
782void perf_stat__print_shadow_stats(struct perf_stat_config *config,
783 struct perf_evsel *evsel,
784 double avg, int cpu,
785 struct perf_stat_output_ctx *out,
786 struct rblist *metric_events,
787 struct runtime_stat *st)
788{
789 void *ctxp = out->ctx;
790 print_metric_t print_metric = out->print_metric;
791 double total, ratio = 0.0, total2;
792 const char *color = NULL;
793 int ctx = evsel_context(evsel);
794 struct metric_event *me;
795 int num = 1;
796
797 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
798 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
799
800 if (total) {
801 ratio = avg / total;
802 print_metric(config, ctxp, NULL, "%7.2f ",
803 "insn per cycle", ratio);
804 } else {
805 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
806 }
807
808 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
809 ctx, cpu);
810
811 total = max(total, runtime_stat_avg(st,
812 STAT_STALLED_CYCLES_BACK,
813 ctx, cpu));
814
815 if (total && avg) {
816 out->new_line(config, ctxp);
817 ratio = total / avg;
818 print_metric(config, ctxp, NULL, "%7.2f ",
819 "stalled cycles per insn",
820 ratio);
821 } else if (have_frontend_stalled) {
822 out->new_line(config, ctxp);
823 print_metric(config, ctxp, NULL, "%7.2f ",
824 "stalled cycles per insn", 0);
825 }
826 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
827 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
828 print_branch_misses(config, cpu, evsel, avg, out, st);
829 else
830 print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
831 } else if (
832 evsel->attr.type == PERF_TYPE_HW_CACHE &&
833 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
834 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
835 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
836
837 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
838 print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
839 else
840 print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0);
841 } else if (
842 evsel->attr.type == PERF_TYPE_HW_CACHE &&
843 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
844 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
845 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
846
847 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
848 print_l1_icache_misses(config, cpu, evsel, avg, out, st);
849 else
850 print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0);
851 } else if (
852 evsel->attr.type == PERF_TYPE_HW_CACHE &&
853 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
854 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
855 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
856
857 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
858 print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
859 else
860 print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0);
861 } else if (
862 evsel->attr.type == PERF_TYPE_HW_CACHE &&
863 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
864 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
865 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
866
867 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
868 print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
869 else
870 print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0);
871 } else if (
872 evsel->attr.type == PERF_TYPE_HW_CACHE &&
873 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
874 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
875 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
876
877 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
878 print_ll_cache_misses(config, cpu, evsel, avg, out, st);
879 else
880 print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0);
881 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
882 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
883
884 if (total)
885 ratio = avg * 100 / total;
886
887 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
888 print_metric(config, ctxp, NULL, "%8.3f %%",
889 "of all cache refs", ratio);
890 else
891 print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
892 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
893 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
894 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
895 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
896 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
897 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
898
899 if (total) {
900 ratio = avg / total;
901 print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio);
902 } else {
903 print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
904 }
905 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
906 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
907
908 if (total)
909 print_metric(config, ctxp, NULL,
910 "%7.2f%%", "transactional cycles",
911 100.0 * (avg / total));
912 else
913 print_metric(config, ctxp, NULL, NULL, "transactional cycles",
914 0);
915 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
916 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
917 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
918
919 if (total2 < avg)
920 total2 = avg;
921 if (total)
922 print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles",
923 100.0 * ((total2-avg) / total));
924 else
925 print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
926 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
927 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
928 ctx, cpu);
929
930 if (avg)
931 ratio = total / avg;
932
933 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
934 print_metric(config, ctxp, NULL, "%8.0f",
935 "cycles / transaction", ratio);
936 else
937 print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
938 0);
939 } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
940 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
941 ctx, cpu);
942
943 if (avg)
944 ratio = total / avg;
945
946 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
947 } else if (perf_evsel__is_clock(evsel)) {
948 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
949 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
950 avg / (ratio * evsel->scale));
951 else
952 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
953 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
954 double fe_bound = td_fe_bound(ctx, cpu, st);
955
956 if (fe_bound > 0.2)
957 color = PERF_COLOR_RED;
958 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
959 fe_bound * 100.);
960 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
961 double retiring = td_retiring(ctx, cpu, st);
962
963 if (retiring > 0.7)
964 color = PERF_COLOR_GREEN;
965 print_metric(config, ctxp, color, "%8.1f%%", "retiring",
966 retiring * 100.);
967 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
968 double bad_spec = td_bad_spec(ctx, cpu, st);
969
970 if (bad_spec > 0.1)
971 color = PERF_COLOR_RED;
972 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
973 bad_spec * 100.);
974 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
975 double be_bound = td_be_bound(ctx, cpu, st);
976 const char *name = "backend bound";
977 static int have_recovery_bubbles = -1;
978
979
980 if (have_recovery_bubbles < 0)
981 have_recovery_bubbles = pmu_have_event("cpu",
982 "topdown-recovery-bubbles");
983 if (!have_recovery_bubbles)
984 name = "backend bound/bad spec";
985
986 if (be_bound > 0.2)
987 color = PERF_COLOR_RED;
988 if (td_total_slots(ctx, cpu, st) > 0)
989 print_metric(config, ctxp, color, "%8.1f%%", name,
990 be_bound * 100.);
991 else
992 print_metric(config, ctxp, NULL, NULL, name, 0);
993 } else if (evsel->metric_expr) {
994 generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
995 evsel->metric_name, avg, cpu, out, st);
996 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
997 char unit = 'M';
998 char unit_buf[10];
999
1000 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
1001
1002 if (total)
1003 ratio = 1000.0 * avg / total;
1004 if (ratio < 0.001) {
1005 ratio *= 1000;
1006 unit = 'K';
1007 }
1008 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
1009 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
1010 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
1011 print_smi_cost(config, cpu, evsel, out, st);
1012 } else {
1013 num = 0;
1014 }
1015
1016 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
1017 struct metric_expr *mexp;
1018
1019 list_for_each_entry (mexp, &me->head, nd) {
1020 if (num++ > 0)
1021 out->new_line(config, ctxp);
1022 generic_metric(config, mexp->metric_expr, mexp->metric_events,
1023 evsel->name, mexp->metric_name,
1024 avg, cpu, out, st);
1025 }
1026 }
1027 if (num == 0)
1028 print_metric(config, ctxp, NULL, NULL, NULL, 0);
1029}
1030