1#include <stdio.h>
2#include "evsel.h"
3#include "stat.h"
4#include "color.h"
5#include "pmu.h"
6#include "rblist.h"
7#include "evlist.h"
8#include "expr.h"
9
10enum {
11 CTX_BIT_USER = 1 << 0,
12 CTX_BIT_KERNEL = 1 << 1,
13 CTX_BIT_HV = 1 << 2,
14 CTX_BIT_HOST = 1 << 3,
15 CTX_BIT_IDLE = 1 << 4,
16 CTX_BIT_MAX = 1 << 5,
17};
18
19#define NUM_CTX CTX_BIT_MAX
20
21
22
23
24
25
26
27
28static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
29static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
30static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
31static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
32static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
33static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
34static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
35static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
36static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
37static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
38static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
39static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
40static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
41static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
42static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS];
43static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
44static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
45static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
46static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
47static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
48static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
49static struct rblist runtime_saved_values;
50static bool have_frontend_stalled;
51
52struct stats walltime_nsecs_stats;
53
54struct saved_value {
55 struct rb_node rb_node;
56 struct perf_evsel *evsel;
57 int cpu;
58 int ctx;
59 struct stats stats;
60};
61
62static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
63{
64 struct saved_value *a = container_of(rb_node,
65 struct saved_value,
66 rb_node);
67 const struct saved_value *b = entry;
68
69 if (a->ctx != b->ctx)
70 return a->ctx - b->ctx;
71 if (a->cpu != b->cpu)
72 return a->cpu - b->cpu;
73 return a->evsel - b->evsel;
74}
75
76static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
77 const void *entry)
78{
79 struct saved_value *nd = malloc(sizeof(struct saved_value));
80
81 if (!nd)
82 return NULL;
83 memcpy(nd, entry, sizeof(struct saved_value));
84 return &nd->rb_node;
85}
86
87static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
88 int cpu, int ctx,
89 bool create)
90{
91 struct rb_node *nd;
92 struct saved_value dm = {
93 .cpu = cpu,
94 .ctx = ctx,
95 .evsel = evsel,
96 };
97 nd = rblist__find(&runtime_saved_values, &dm);
98 if (nd)
99 return container_of(nd, struct saved_value, rb_node);
100 if (create) {
101 rblist__add_node(&runtime_saved_values, &dm);
102 nd = rblist__find(&runtime_saved_values, &dm);
103 if (nd)
104 return container_of(nd, struct saved_value, rb_node);
105 }
106 return NULL;
107}
108
109void perf_stat__init_shadow_stats(void)
110{
111 have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
112 rblist__init(&runtime_saved_values);
113 runtime_saved_values.node_cmp = saved_value_cmp;
114 runtime_saved_values.node_new = saved_value_new;
115
116}
117
118static int evsel_context(struct perf_evsel *evsel)
119{
120 int ctx = 0;
121
122 if (evsel->attr.exclude_kernel)
123 ctx |= CTX_BIT_KERNEL;
124 if (evsel->attr.exclude_user)
125 ctx |= CTX_BIT_USER;
126 if (evsel->attr.exclude_hv)
127 ctx |= CTX_BIT_HV;
128 if (evsel->attr.exclude_host)
129 ctx |= CTX_BIT_HOST;
130 if (evsel->attr.exclude_idle)
131 ctx |= CTX_BIT_IDLE;
132
133 return ctx;
134}
135
136void perf_stat__reset_shadow_stats(void)
137{
138 struct rb_node *pos, *next;
139
140 memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
141 memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
142 memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
143 memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
144 memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
145 memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
146 memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
147 memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
148 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
149 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
150 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
151 memset(runtime_cycles_in_tx_stats, 0,
152 sizeof(runtime_cycles_in_tx_stats));
153 memset(runtime_transaction_stats, 0,
154 sizeof(runtime_transaction_stats));
155 memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
156 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
157 memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots));
158 memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired));
159 memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
160 memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
161 memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
162 memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
163 memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
164
165 next = rb_first(&runtime_saved_values.entries);
166 while (next) {
167 pos = next;
168 next = rb_next(pos);
169 memset(&container_of(pos, struct saved_value, rb_node)->stats,
170 0,
171 sizeof(struct stats));
172 }
173}
174
175
176
177
178
179
180void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
181 int cpu)
182{
183 int ctx = evsel_context(counter);
184
185 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) ||
186 perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK))
187 update_stats(&runtime_nsecs_stats[cpu], count[0]);
188 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
189 update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
190 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
191 update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]);
192 else if (perf_stat_evsel__is(counter, TRANSACTION_START))
193 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
194 else if (perf_stat_evsel__is(counter, ELISION_START))
195 update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
196 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
197 update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]);
198 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
199 update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]);
200 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
201 update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]);
202 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
203 update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]);
204 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
205 update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]);
206 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
207 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
208 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
209 update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
210 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
211 update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
212 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
213 update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
214 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
215 update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
216 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
217 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
218 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
219 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
220 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
221 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
222 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
223 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
224 else if (perf_stat_evsel__is(counter, SMI_NUM))
225 update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
226 else if (perf_stat_evsel__is(counter, APERF))
227 update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
228
229 if (counter->collect_stat) {
230 struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
231 true);
232 update_stats(&v->stats, count[0]);
233 }
234}
235
236
237enum grc_type {
238 GRC_STALLED_CYCLES_FE,
239 GRC_STALLED_CYCLES_BE,
240 GRC_CACHE_MISSES,
241 GRC_MAX_NR
242};
243
244static const char *get_ratio_color(enum grc_type type, double ratio)
245{
246 static const double grc_table[GRC_MAX_NR][3] = {
247 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
248 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
249 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
250 };
251 const char *color = PERF_COLOR_NORMAL;
252
253 if (ratio > grc_table[type][0])
254 color = PERF_COLOR_RED;
255 else if (ratio > grc_table[type][1])
256 color = PERF_COLOR_MAGENTA;
257 else if (ratio > grc_table[type][2])
258 color = PERF_COLOR_YELLOW;
259
260 return color;
261}
262
263static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list,
264 const char *name)
265{
266 struct perf_evsel *c2;
267
268 evlist__for_each_entry (evsel_list, c2) {
269 if (!strcasecmp(c2->name, name))
270 return c2;
271 }
272 return NULL;
273}
274
275
276void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
277{
278 struct perf_evsel *counter, *leader, **metric_events, *oc;
279 bool found;
280 const char **metric_names;
281 int i;
282 int num_metric_names;
283
284 evlist__for_each_entry(evsel_list, counter) {
285 bool invalid = false;
286
287 leader = counter->leader;
288 if (!counter->metric_expr)
289 continue;
290 metric_events = counter->metric_events;
291 if (!metric_events) {
292 if (expr__find_other(counter->metric_expr, counter->name,
293 &metric_names, &num_metric_names) < 0)
294 continue;
295
296 metric_events = calloc(sizeof(struct perf_evsel *),
297 num_metric_names + 1);
298 if (!metric_events)
299 return;
300 counter->metric_events = metric_events;
301 }
302
303 for (i = 0; i < num_metric_names; i++) {
304 found = false;
305 if (leader) {
306
307 for_each_group_member (oc, leader) {
308 if (!strcasecmp(oc->name, metric_names[i])) {
309 found = true;
310 break;
311 }
312 }
313 }
314 if (!found) {
315
316 oc = perf_stat__find_event(evsel_list, metric_names[i]);
317 }
318 if (!oc) {
319
320 static char *printed;
321
322
323
324
325
326
327
328
329 if (!printed || strcasecmp(printed, metric_names[i])) {
330 fprintf(stderr,
331 "Add %s event to groups to get metric expression for %s\n",
332 metric_names[i],
333 counter->name);
334 printed = strdup(metric_names[i]);
335 }
336 invalid = true;
337 continue;
338 }
339 metric_events[i] = oc;
340 oc->collect_stat = true;
341 }
342 metric_events[i] = NULL;
343 free(metric_names);
344 if (invalid) {
345 free(metric_events);
346 counter->metric_events = NULL;
347 counter->metric_expr = NULL;
348 }
349 }
350}
351
352static void print_stalled_cycles_frontend(int cpu,
353 struct perf_evsel *evsel, double avg,
354 struct perf_stat_output_ctx *out)
355{
356 double total, ratio = 0.0;
357 const char *color;
358 int ctx = evsel_context(evsel);
359
360 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
361
362 if (total)
363 ratio = avg / total * 100.0;
364
365 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
366
367 if (ratio)
368 out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
369 ratio);
370 else
371 out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
372}
373
374static void print_stalled_cycles_backend(int cpu,
375 struct perf_evsel *evsel, double avg,
376 struct perf_stat_output_ctx *out)
377{
378 double total, ratio = 0.0;
379 const char *color;
380 int ctx = evsel_context(evsel);
381
382 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
383
384 if (total)
385 ratio = avg / total * 100.0;
386
387 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
388
389 out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
390}
391
392static void print_branch_misses(int cpu,
393 struct perf_evsel *evsel,
394 double avg,
395 struct perf_stat_output_ctx *out)
396{
397 double total, ratio = 0.0;
398 const char *color;
399 int ctx = evsel_context(evsel);
400
401 total = avg_stats(&runtime_branches_stats[ctx][cpu]);
402
403 if (total)
404 ratio = avg / total * 100.0;
405
406 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
407
408 out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
409}
410
411static void print_l1_dcache_misses(int cpu,
412 struct perf_evsel *evsel,
413 double avg,
414 struct perf_stat_output_ctx *out)
415{
416 double total, ratio = 0.0;
417 const char *color;
418 int ctx = evsel_context(evsel);
419
420 total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
421
422 if (total)
423 ratio = avg / total * 100.0;
424
425 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
426
427 out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
428}
429
430static void print_l1_icache_misses(int cpu,
431 struct perf_evsel *evsel,
432 double avg,
433 struct perf_stat_output_ctx *out)
434{
435 double total, ratio = 0.0;
436 const char *color;
437 int ctx = evsel_context(evsel);
438
439 total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
440
441 if (total)
442 ratio = avg / total * 100.0;
443
444 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
445 out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
446}
447
448static void print_dtlb_cache_misses(int cpu,
449 struct perf_evsel *evsel,
450 double avg,
451 struct perf_stat_output_ctx *out)
452{
453 double total, ratio = 0.0;
454 const char *color;
455 int ctx = evsel_context(evsel);
456
457 total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
458
459 if (total)
460 ratio = avg / total * 100.0;
461
462 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
463 out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
464}
465
466static void print_itlb_cache_misses(int cpu,
467 struct perf_evsel *evsel,
468 double avg,
469 struct perf_stat_output_ctx *out)
470{
471 double total, ratio = 0.0;
472 const char *color;
473 int ctx = evsel_context(evsel);
474
475 total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
476
477 if (total)
478 ratio = avg / total * 100.0;
479
480 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
481 out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
482}
483
484static void print_ll_cache_misses(int cpu,
485 struct perf_evsel *evsel,
486 double avg,
487 struct perf_stat_output_ctx *out)
488{
489 double total, ratio = 0.0;
490 const char *color;
491 int ctx = evsel_context(evsel);
492
493 total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
494
495 if (total)
496 ratio = avg / total * 100.0;
497
498 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
499 out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
500}
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544static double sanitize_val(double x)
545{
546 if (x < 0 && x >= -0.02)
547 return 0.0;
548 return x;
549}
550
551static double td_total_slots(int ctx, int cpu)
552{
553 return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
554}
555
556static double td_bad_spec(int ctx, int cpu)
557{
558 double bad_spec = 0;
559 double total_slots;
560 double total;
561
562 total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
563 avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
564 avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
565 total_slots = td_total_slots(ctx, cpu);
566 if (total_slots)
567 bad_spec = total / total_slots;
568 return sanitize_val(bad_spec);
569}
570
571static double td_retiring(int ctx, int cpu)
572{
573 double retiring = 0;
574 double total_slots = td_total_slots(ctx, cpu);
575 double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
576
577 if (total_slots)
578 retiring = ret_slots / total_slots;
579 return retiring;
580}
581
582static double td_fe_bound(int ctx, int cpu)
583{
584 double fe_bound = 0;
585 double total_slots = td_total_slots(ctx, cpu);
586 double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
587
588 if (total_slots)
589 fe_bound = fetch_bub / total_slots;
590 return fe_bound;
591}
592
593static double td_be_bound(int ctx, int cpu)
594{
595 double sum = (td_fe_bound(ctx, cpu) +
596 td_bad_spec(ctx, cpu) +
597 td_retiring(ctx, cpu));
598 if (sum == 0)
599 return 0;
600 return sanitize_val(1.0 - sum);
601}
602
603static void print_smi_cost(int cpu, struct perf_evsel *evsel,
604 struct perf_stat_output_ctx *out)
605{
606 double smi_num, aperf, cycles, cost = 0.0;
607 int ctx = evsel_context(evsel);
608 const char *color = NULL;
609
610 smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
611 aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
612 cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
613
614 if ((cycles == 0) || (aperf == 0))
615 return;
616
617 if (smi_num)
618 cost = (aperf - cycles) / aperf * 100.00;
619
620 if (cost > 10)
621 color = PERF_COLOR_RED;
622 out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
623 out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
624}
625
626void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
627 double avg, int cpu,
628 struct perf_stat_output_ctx *out)
629{
630 void *ctxp = out->ctx;
631 print_metric_t print_metric = out->print_metric;
632 double total, ratio = 0.0, total2;
633 const char *color = NULL;
634 int ctx = evsel_context(evsel);
635
636 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
637 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
638 if (total) {
639 ratio = avg / total;
640 print_metric(ctxp, NULL, "%7.2f ",
641 "insn per cycle", ratio);
642 } else {
643 print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
644 }
645 total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
646 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
647
648 if (total && avg) {
649 out->new_line(ctxp);
650 ratio = total / avg;
651 print_metric(ctxp, NULL, "%7.2f ",
652 "stalled cycles per insn",
653 ratio);
654 } else if (have_frontend_stalled) {
655 print_metric(ctxp, NULL, NULL,
656 "stalled cycles per insn", 0);
657 }
658 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
659 if (runtime_branches_stats[ctx][cpu].n != 0)
660 print_branch_misses(cpu, evsel, avg, out);
661 else
662 print_metric(ctxp, NULL, NULL, "of all branches", 0);
663 } else if (
664 evsel->attr.type == PERF_TYPE_HW_CACHE &&
665 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
666 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
667 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
668 if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
669 print_l1_dcache_misses(cpu, evsel, avg, out);
670 else
671 print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
672 } else if (
673 evsel->attr.type == PERF_TYPE_HW_CACHE &&
674 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
675 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
676 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
677 if (runtime_l1_icache_stats[ctx][cpu].n != 0)
678 print_l1_icache_misses(cpu, evsel, avg, out);
679 else
680 print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
681 } else if (
682 evsel->attr.type == PERF_TYPE_HW_CACHE &&
683 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
684 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
685 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
686 if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
687 print_dtlb_cache_misses(cpu, evsel, avg, out);
688 else
689 print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
690 } else if (
691 evsel->attr.type == PERF_TYPE_HW_CACHE &&
692 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
693 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
694 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
695 if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
696 print_itlb_cache_misses(cpu, evsel, avg, out);
697 else
698 print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
699 } else if (
700 evsel->attr.type == PERF_TYPE_HW_CACHE &&
701 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
702 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
703 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
704 if (runtime_ll_cache_stats[ctx][cpu].n != 0)
705 print_ll_cache_misses(cpu, evsel, avg, out);
706 else
707 print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
708 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
709 total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
710
711 if (total)
712 ratio = avg * 100 / total;
713
714 if (runtime_cacherefs_stats[ctx][cpu].n != 0)
715 print_metric(ctxp, NULL, "%8.3f %%",
716 "of all cache refs", ratio);
717 else
718 print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
719 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
720 print_stalled_cycles_frontend(cpu, evsel, avg, out);
721 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
722 print_stalled_cycles_backend(cpu, evsel, avg, out);
723 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
724 total = avg_stats(&runtime_nsecs_stats[cpu]);
725
726 if (total) {
727 ratio = avg / total;
728 print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
729 } else {
730 print_metric(ctxp, NULL, NULL, "Ghz", 0);
731 }
732 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
733 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
734 if (total)
735 print_metric(ctxp, NULL,
736 "%7.2f%%", "transactional cycles",
737 100.0 * (avg / total));
738 else
739 print_metric(ctxp, NULL, NULL, "transactional cycles",
740 0);
741 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
742 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
743 total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
744 if (total2 < avg)
745 total2 = avg;
746 if (total)
747 print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
748 100.0 * ((total2-avg) / total));
749 else
750 print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
751 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
752 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
753
754 if (avg)
755 ratio = total / avg;
756
757 if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
758 print_metric(ctxp, NULL, "%8.0f",
759 "cycles / transaction", ratio);
760 else
761 print_metric(ctxp, NULL, NULL, "cycles / transaction",
762 0);
763 } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
764 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
765
766 if (avg)
767 ratio = total / avg;
768
769 print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
770 } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) ||
771 perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) {
772 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
773 print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
774 avg / ratio);
775 else
776 print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
777 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
778 double fe_bound = td_fe_bound(ctx, cpu);
779
780 if (fe_bound > 0.2)
781 color = PERF_COLOR_RED;
782 print_metric(ctxp, color, "%8.1f%%", "frontend bound",
783 fe_bound * 100.);
784 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
785 double retiring = td_retiring(ctx, cpu);
786
787 if (retiring > 0.7)
788 color = PERF_COLOR_GREEN;
789 print_metric(ctxp, color, "%8.1f%%", "retiring",
790 retiring * 100.);
791 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
792 double bad_spec = td_bad_spec(ctx, cpu);
793
794 if (bad_spec > 0.1)
795 color = PERF_COLOR_RED;
796 print_metric(ctxp, color, "%8.1f%%", "bad speculation",
797 bad_spec * 100.);
798 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
799 double be_bound = td_be_bound(ctx, cpu);
800 const char *name = "backend bound";
801 static int have_recovery_bubbles = -1;
802
803
804 if (have_recovery_bubbles < 0)
805 have_recovery_bubbles = pmu_have_event("cpu",
806 "topdown-recovery-bubbles");
807 if (!have_recovery_bubbles)
808 name = "backend bound/bad spec";
809
810 if (be_bound > 0.2)
811 color = PERF_COLOR_RED;
812 if (td_total_slots(ctx, cpu) > 0)
813 print_metric(ctxp, color, "%8.1f%%", name,
814 be_bound * 100.);
815 else
816 print_metric(ctxp, NULL, NULL, name, 0);
817 } else if (evsel->metric_expr) {
818 struct parse_ctx pctx;
819 int i;
820
821 expr__ctx_init(&pctx);
822 expr__add_id(&pctx, evsel->name, avg);
823 for (i = 0; evsel->metric_events[i]; i++) {
824 struct saved_value *v;
825
826 v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false);
827 if (!v)
828 break;
829 expr__add_id(&pctx, evsel->metric_events[i]->name,
830 avg_stats(&v->stats));
831 }
832 if (!evsel->metric_events[i]) {
833 const char *p = evsel->metric_expr;
834
835 if (expr__parse(&ratio, &pctx, &p) == 0)
836 print_metric(ctxp, NULL, "%8.1f",
837 evsel->metric_name ?
838 evsel->metric_name :
839 out->force_header ? evsel->name : "",
840 ratio);
841 else
842 print_metric(ctxp, NULL, NULL, "", 0);
843 } else
844 print_metric(ctxp, NULL, NULL, "", 0);
845 } else if (runtime_nsecs_stats[cpu].n != 0) {
846 char unit = 'M';
847 char unit_buf[10];
848
849 total = avg_stats(&runtime_nsecs_stats[cpu]);
850
851 if (total)
852 ratio = 1000.0 * avg / total;
853 if (ratio < 0.001) {
854 ratio *= 1000;
855 unit = 'K';
856 }
857 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
858 print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
859 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
860 print_smi_cost(cpu, evsel, out);
861 } else {
862 print_metric(ctxp, NULL, NULL, NULL, 0);
863 }
864}
865