linux/tools/perf/builtin-stat.c
<<
>>
Prefs
   1/*
   2 * builtin-stat.c
   3 *
   4 * Builtin stat command: Give a precise performance counters summary
   5 * overview about any workload, CPU or specific PID.
   6 *
   7 * Sample output:
   8
   9   $ perf stat ./hackbench 10
  10
  11  Time: 0.118
  12
  13  Performance counter stats for './hackbench 10':
  14
  15       1708.761321 task-clock                #   11.037 CPUs utilized
  16            41,190 context-switches          #    0.024 M/sec
  17             6,735 CPU-migrations            #    0.004 M/sec
  18            17,318 page-faults               #    0.010 M/sec
  19     5,205,202,243 cycles                    #    3.046 GHz
  20     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
  21     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
  22     2,603,501,247 instructions              #    0.50  insns per cycle
  23                                             #    1.48  stalled cycles per insn
  24       484,357,498 branches                  #  283.455 M/sec
  25         6,388,934 branch-misses             #    1.32% of all branches
  26
  27        0.154822978  seconds time elapsed
  28
  29 *
  30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  31 *
  32 * Improvements and fixes by:
  33 *
  34 *   Arjan van de Ven <arjan@linux.intel.com>
  35 *   Yanmin Zhang <yanmin.zhang@intel.com>
  36 *   Wu Fengguang <fengguang.wu@intel.com>
  37 *   Mike Galbraith <efault@gmx.de>
  38 *   Paul Mackerras <paulus@samba.org>
  39 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
  40 *
  41 * Released under the GPL v2. (and only v2, not any later version)
  42 */
  43
  44#include "perf.h"
  45#include "builtin.h"
  46#include "util/util.h"
  47#include "util/parse-options.h"
  48#include "util/parse-events.h"
  49#include "util/pmu.h"
  50#include "util/event.h"
  51#include "util/evlist.h"
  52#include "util/evsel.h"
  53#include "util/debug.h"
  54#include "util/color.h"
  55#include "util/stat.h"
  56#include "util/header.h"
  57#include "util/cpumap.h"
  58#include "util/thread.h"
  59#include "util/thread_map.h"
  60
  61#include <stdlib.h>
  62#include <sys/prctl.h>
  63#include <locale.h>
  64
  65#define DEFAULT_SEPARATOR       " "
  66#define CNTR_NOT_SUPPORTED      "<not supported>"
  67#define CNTR_NOT_COUNTED        "<not counted>"
  68
  69static void print_stat(int argc, const char **argv);
  70static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
  71static void print_counter(struct perf_evsel *counter, char *prefix);
  72static void print_aggr(char *prefix);
  73
  74/* Default events used for perf stat -T */
  75static const char * const transaction_attrs[] = {
  76        "task-clock",
  77        "{"
  78        "instructions,"
  79        "cycles,"
  80        "cpu/cycles-t/,"
  81        "cpu/tx-start/,"
  82        "cpu/el-start/,"
  83        "cpu/cycles-ct/"
  84        "}"
  85};
  86
  87/* More limited version when the CPU does not have all events. */
  88static const char * const transaction_limited_attrs[] = {
  89        "task-clock",
  90        "{"
  91        "instructions,"
  92        "cycles,"
  93        "cpu/cycles-t/,"
  94        "cpu/tx-start/"
  95        "}"
  96};
  97
  98/* must match transaction_attrs and the beginning limited_attrs */
  99enum {
 100        T_TASK_CLOCK,
 101        T_INSTRUCTIONS,
 102        T_CYCLES,
 103        T_CYCLES_IN_TX,
 104        T_TRANSACTION_START,
 105        T_ELISION_START,
 106        T_CYCLES_IN_TX_CP,
 107};
 108
 109static struct perf_evlist       *evsel_list;
 110
 111static struct target target = {
 112        .uid    = UINT_MAX,
 113};
 114
 115enum aggr_mode {
 116        AGGR_NONE,
 117        AGGR_GLOBAL,
 118        AGGR_SOCKET,
 119        AGGR_CORE,
 120};
 121
 122static int                      run_count                       =  1;
 123static bool                     no_inherit                      = false;
 124static bool                     scale                           =  true;
 125static enum aggr_mode           aggr_mode                       = AGGR_GLOBAL;
 126static volatile pid_t           child_pid                       = -1;
 127static bool                     null_run                        =  false;
 128static int                      detailed_run                    =  0;
 129static bool                     transaction_run;
 130static bool                     big_num                         =  true;
 131static int                      big_num_opt                     =  -1;
 132static const char               *csv_sep                        = NULL;
 133static bool                     csv_output                      = false;
 134static bool                     group                           = false;
 135static FILE                     *output                         = NULL;
 136static const char               *pre_cmd                        = NULL;
 137static const char               *post_cmd                       = NULL;
 138static bool                     sync_run                        = false;
 139static unsigned int             interval                        = 0;
 140static unsigned int             initial_delay                   = 0;
 141static unsigned int             unit_width                      = 4; /* strlen("unit") */
 142static bool                     forever                         = false;
 143static struct timespec          ref_time;
 144static struct cpu_map           *aggr_map;
 145static int                      (*aggr_get_id)(struct cpu_map *m, int cpu);
 146
 147static volatile int done = 0;
 148
 149struct perf_stat {
 150        struct stats      res_stats[3];
 151};
 152
 153static inline void diff_timespec(struct timespec *r, struct timespec *a,
 154                                 struct timespec *b)
 155{
 156        r->tv_sec = a->tv_sec - b->tv_sec;
 157        if (a->tv_nsec < b->tv_nsec) {
 158                r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
 159                r->tv_sec--;
 160        } else {
 161                r->tv_nsec = a->tv_nsec - b->tv_nsec ;
 162        }
 163}
 164
 165static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
 166{
 167        return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
 168}
 169
 170static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
 171{
 172        return perf_evsel__cpus(evsel)->nr;
 173}
 174
 175static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
 176{
 177        memset(evsel->priv, 0, sizeof(struct perf_stat));
 178}
 179
 180static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
 181{
 182        evsel->priv = zalloc(sizeof(struct perf_stat));
 183        return evsel->priv == NULL ? -ENOMEM : 0;
 184}
 185
 186static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
 187{
 188        zfree(&evsel->priv);
 189}
 190
 191static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
 192{
 193        void *addr;
 194        size_t sz;
 195
 196        sz = sizeof(*evsel->counts) +
 197             (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
 198
 199        addr = zalloc(sz);
 200        if (!addr)
 201                return -ENOMEM;
 202
 203        evsel->prev_raw_counts =  addr;
 204
 205        return 0;
 206}
 207
 208static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
 209{
 210        zfree(&evsel->prev_raw_counts);
 211}
 212
 213static void perf_evlist__free_stats(struct perf_evlist *evlist)
 214{
 215        struct perf_evsel *evsel;
 216
 217        evlist__for_each(evlist, evsel) {
 218                perf_evsel__free_stat_priv(evsel);
 219                perf_evsel__free_counts(evsel);
 220                perf_evsel__free_prev_raw_counts(evsel);
 221        }
 222}
 223
 224static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
 225{
 226        struct perf_evsel *evsel;
 227
 228        evlist__for_each(evlist, evsel) {
 229                if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
 230                    perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
 231                    (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
 232                        goto out_free;
 233        }
 234
 235        return 0;
 236
 237out_free:
 238        perf_evlist__free_stats(evlist);
 239        return -1;
 240}
 241
 242static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 243static struct stats runtime_cycles_stats[MAX_NR_CPUS];
 244static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
 245static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
 246static struct stats runtime_branches_stats[MAX_NR_CPUS];
 247static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
 248static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
 249static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 250static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 251static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 252static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
 253static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
 254static struct stats walltime_nsecs_stats;
 255static struct stats runtime_transaction_stats[MAX_NR_CPUS];
 256static struct stats runtime_elision_stats[MAX_NR_CPUS];
 257
 258static void perf_stat__reset_stats(struct perf_evlist *evlist)
 259{
 260        struct perf_evsel *evsel;
 261
 262        evlist__for_each(evlist, evsel) {
 263                perf_evsel__reset_stat_priv(evsel);
 264                perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
 265        }
 266
 267        memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
 268        memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
 269        memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
 270        memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
 271        memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
 272        memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
 273        memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
 274        memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
 275        memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
 276        memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
 277        memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
 278        memset(runtime_cycles_in_tx_stats, 0,
 279                        sizeof(runtime_cycles_in_tx_stats));
 280        memset(runtime_transaction_stats, 0,
 281                sizeof(runtime_transaction_stats));
 282        memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
 283        memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
 284}
 285
 286static int create_perf_stat_counter(struct perf_evsel *evsel)
 287{
 288        struct perf_event_attr *attr = &evsel->attr;
 289
 290        if (scale)
 291                attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
 292                                    PERF_FORMAT_TOTAL_TIME_RUNNING;
 293
 294        attr->inherit = !no_inherit;
 295
 296        if (target__has_cpu(&target))
 297                return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
 298
 299        if (!target__has_task(&target) && perf_evsel__is_group_leader(evsel)) {
 300                attr->disabled = 1;
 301                if (!initial_delay)
 302                        attr->enable_on_exec = 1;
 303        }
 304
 305        return perf_evsel__open_per_thread(evsel, evsel_list->threads);
 306}
 307
 308/*
 309 * Does the counter have nsecs as a unit?
 310 */
 311static inline int nsec_counter(struct perf_evsel *evsel)
 312{
 313        if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
 314            perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
 315                return 1;
 316
 317        return 0;
 318}
 319
 320static struct perf_evsel *nth_evsel(int n)
 321{
 322        static struct perf_evsel **array;
 323        static int array_len;
 324        struct perf_evsel *ev;
 325        int j;
 326
 327        /* Assumes this only called when evsel_list does not change anymore. */
 328        if (!array) {
 329                evlist__for_each(evsel_list, ev)
 330                        array_len++;
 331                array = malloc(array_len * sizeof(void *));
 332                if (!array)
 333                        exit(ENOMEM);
 334                j = 0;
 335                evlist__for_each(evsel_list, ev)
 336                        array[j++] = ev;
 337        }
 338        if (n < array_len)
 339                return array[n];
 340        return NULL;
 341}
 342
 343/*
 344 * Update various tracking values we maintain to print
 345 * more semantic information such as miss/hit ratios,
 346 * instruction rates, etc:
 347 */
 348static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
 349{
 350        if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
 351                update_stats(&runtime_nsecs_stats[0], count[0]);
 352        else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
 353                update_stats(&runtime_cycles_stats[0], count[0]);
 354        else if (transaction_run &&
 355                 perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
 356                update_stats(&runtime_cycles_in_tx_stats[0], count[0]);
 357        else if (transaction_run &&
 358                 perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
 359                update_stats(&runtime_transaction_stats[0], count[0]);
 360        else if (transaction_run &&
 361                 perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
 362                update_stats(&runtime_elision_stats[0], count[0]);
 363        else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
 364                update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
 365        else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
 366                update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
 367        else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
 368                update_stats(&runtime_branches_stats[0], count[0]);
 369        else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
 370                update_stats(&runtime_cacherefs_stats[0], count[0]);
 371        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
 372                update_stats(&runtime_l1_dcache_stats[0], count[0]);
 373        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
 374                update_stats(&runtime_l1_icache_stats[0], count[0]);
 375        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
 376                update_stats(&runtime_ll_cache_stats[0], count[0]);
 377        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
 378                update_stats(&runtime_dtlb_cache_stats[0], count[0]);
 379        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
 380                update_stats(&runtime_itlb_cache_stats[0], count[0]);
 381}
 382
 383/*
 384 * Read out the results of a single counter:
 385 * aggregate counts across CPUs in system-wide mode
 386 */
 387static int read_counter_aggr(struct perf_evsel *counter)
 388{
 389        struct perf_stat *ps = counter->priv;
 390        u64 *count = counter->counts->aggr.values;
 391        int i;
 392
 393        if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
 394                               thread_map__nr(evsel_list->threads), scale) < 0)
 395                return -1;
 396
 397        for (i = 0; i < 3; i++)
 398                update_stats(&ps->res_stats[i], count[i]);
 399
 400        if (verbose) {
 401                fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 402                        perf_evsel__name(counter), count[0], count[1], count[2]);
 403        }
 404
 405        /*
 406         * Save the full runtime - to allow normalization during printout:
 407         */
 408        update_shadow_stats(counter, count);
 409
 410        return 0;
 411}
 412
 413/*
 414 * Read out the results of a single counter:
 415 * do not aggregate counts across CPUs in system-wide mode
 416 */
 417static int read_counter(struct perf_evsel *counter)
 418{
 419        u64 *count;
 420        int cpu;
 421
 422        for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
 423                if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
 424                        return -1;
 425
 426                count = counter->counts->cpu[cpu].values;
 427
 428                update_shadow_stats(counter, count);
 429        }
 430
 431        return 0;
 432}
 433
 434static void print_interval(void)
 435{
 436        static int num_print_interval;
 437        struct perf_evsel *counter;
 438        struct perf_stat *ps;
 439        struct timespec ts, rs;
 440        char prefix[64];
 441
 442        if (aggr_mode == AGGR_GLOBAL) {
 443                evlist__for_each(evsel_list, counter) {
 444                        ps = counter->priv;
 445                        memset(ps->res_stats, 0, sizeof(ps->res_stats));
 446                        read_counter_aggr(counter);
 447                }
 448        } else  {
 449                evlist__for_each(evsel_list, counter) {
 450                        ps = counter->priv;
 451                        memset(ps->res_stats, 0, sizeof(ps->res_stats));
 452                        read_counter(counter);
 453                }
 454        }
 455
 456        clock_gettime(CLOCK_MONOTONIC, &ts);
 457        diff_timespec(&rs, &ts, &ref_time);
 458        sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
 459
 460        if (num_print_interval == 0 && !csv_output) {
 461                switch (aggr_mode) {
 462                case AGGR_SOCKET:
 463                        fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
 464                        break;
 465                case AGGR_CORE:
 466                        fprintf(output, "#           time core         cpus             counts %*s events\n", unit_width, "unit");
 467                        break;
 468                case AGGR_NONE:
 469                        fprintf(output, "#           time CPU                counts %*s events\n", unit_width, "unit");
 470                        break;
 471                case AGGR_GLOBAL:
 472                default:
 473                        fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
 474                }
 475        }
 476
 477        if (++num_print_interval == 25)
 478                num_print_interval = 0;
 479
 480        switch (aggr_mode) {
 481        case AGGR_CORE:
 482        case AGGR_SOCKET:
 483                print_aggr(prefix);
 484                break;
 485        case AGGR_NONE:
 486                evlist__for_each(evsel_list, counter)
 487                        print_counter(counter, prefix);
 488                break;
 489        case AGGR_GLOBAL:
 490        default:
 491                evlist__for_each(evsel_list, counter)
 492                        print_counter_aggr(counter, prefix);
 493        }
 494
 495        fflush(output);
 496}
 497
 498static void handle_initial_delay(void)
 499{
 500        struct perf_evsel *counter;
 501
 502        if (initial_delay) {
 503                const int ncpus = cpu_map__nr(evsel_list->cpus),
 504                        nthreads = thread_map__nr(evsel_list->threads);
 505
 506                usleep(initial_delay * 1000);
 507                evlist__for_each(evsel_list, counter)
 508                        perf_evsel__enable(counter, ncpus, nthreads);
 509        }
 510}
 511
 512static volatile int workload_exec_errno;
 513
 514/*
 515 * perf_evlist__prepare_workload will send a SIGUSR1
 516 * if the fork fails, since we asked by setting its
 517 * want_signal to true.
 518 */
 519static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
 520                                        void *ucontext __maybe_unused)
 521{
 522        workload_exec_errno = info->si_value.sival_int;
 523}
 524
 525static int __run_perf_stat(int argc, const char **argv)
 526{
 527        char msg[512];
 528        unsigned long long t0, t1;
 529        struct perf_evsel *counter;
 530        struct timespec ts;
 531        size_t l;
 532        int status = 0;
 533        const bool forks = (argc > 0);
 534
 535        if (interval) {
 536                ts.tv_sec  = interval / 1000;
 537                ts.tv_nsec = (interval % 1000) * 1000000;
 538        } else {
 539                ts.tv_sec  = 1;
 540                ts.tv_nsec = 0;
 541        }
 542
 543        if (forks) {
 544                if (perf_evlist__prepare_workload(evsel_list, &target, argv, false,
 545                                                  workload_exec_failed_signal) < 0) {
 546                        perror("failed to prepare workload");
 547                        return -1;
 548                }
 549                child_pid = evsel_list->workload.pid;
 550        }
 551
 552        if (group)
 553                perf_evlist__set_leader(evsel_list);
 554
 555        evlist__for_each(evsel_list, counter) {
 556                if (create_perf_stat_counter(counter) < 0) {
 557                        /*
 558                         * PPC returns ENXIO for HW counters until 2.6.37
 559                         * (behavior changed with commit b0a873e).
 560                         */
 561                        if (errno == EINVAL || errno == ENOSYS ||
 562                            errno == ENOENT || errno == EOPNOTSUPP ||
 563                            errno == ENXIO) {
 564                                if (verbose)
 565                                        ui__warning("%s event is not supported by the kernel.\n",
 566                                                    perf_evsel__name(counter));
 567                                counter->supported = false;
 568                                continue;
 569                        }
 570
 571                        perf_evsel__open_strerror(counter, &target,
 572                                                  errno, msg, sizeof(msg));
 573                        ui__error("%s\n", msg);
 574
 575                        if (child_pid != -1)
 576                                kill(child_pid, SIGTERM);
 577
 578                        return -1;
 579                }
 580                counter->supported = true;
 581
 582                l = strlen(counter->unit);
 583                if (l > unit_width)
 584                        unit_width = l;
 585        }
 586
 587        if (perf_evlist__apply_filters(evsel_list)) {
 588                error("failed to set filter with %d (%s)\n", errno,
 589                        strerror(errno));
 590                return -1;
 591        }
 592
 593        /*
 594         * Enable counters and exec the command:
 595         */
 596        t0 = rdclock();
 597        clock_gettime(CLOCK_MONOTONIC, &ref_time);
 598
 599        if (forks) {
 600                perf_evlist__start_workload(evsel_list);
 601                handle_initial_delay();
 602
 603                if (interval) {
 604                        while (!waitpid(child_pid, &status, WNOHANG)) {
 605                                nanosleep(&ts, NULL);
 606                                print_interval();
 607                        }
 608                }
 609                wait(&status);
 610
 611                if (workload_exec_errno) {
 612                        const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
 613                        pr_err("Workload failed: %s\n", emsg);
 614                        return -1;
 615                }
 616
 617                if (WIFSIGNALED(status))
 618                        psignal(WTERMSIG(status), argv[0]);
 619        } else {
 620                handle_initial_delay();
 621                while (!done) {
 622                        nanosleep(&ts, NULL);
 623                        if (interval)
 624                                print_interval();
 625                }
 626        }
 627
 628        t1 = rdclock();
 629
 630        update_stats(&walltime_nsecs_stats, t1 - t0);
 631
 632        if (aggr_mode == AGGR_GLOBAL) {
 633                evlist__for_each(evsel_list, counter) {
 634                        read_counter_aggr(counter);
 635                        perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
 636                                             thread_map__nr(evsel_list->threads));
 637                }
 638        } else {
 639                evlist__for_each(evsel_list, counter) {
 640                        read_counter(counter);
 641                        perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
 642                }
 643        }
 644
 645        return WEXITSTATUS(status);
 646}
 647
 648static int run_perf_stat(int argc, const char **argv)
 649{
 650        int ret;
 651
 652        if (pre_cmd) {
 653                ret = system(pre_cmd);
 654                if (ret)
 655                        return ret;
 656        }
 657
 658        if (sync_run)
 659                sync();
 660
 661        ret = __run_perf_stat(argc, argv);
 662        if (ret)
 663                return ret;
 664
 665        if (post_cmd) {
 666                ret = system(post_cmd);
 667                if (ret)
 668                        return ret;
 669        }
 670
 671        return ret;
 672}
 673
 674static void print_noise_pct(double total, double avg)
 675{
 676        double pct = rel_stddev_stats(total, avg);
 677
 678        if (csv_output)
 679                fprintf(output, "%s%.2f%%", csv_sep, pct);
 680        else if (pct)
 681                fprintf(output, "  ( +-%6.2f%% )", pct);
 682}
 683
 684static void print_noise(struct perf_evsel *evsel, double avg)
 685{
 686        struct perf_stat *ps;
 687
 688        if (run_count == 1)
 689                return;
 690
 691        ps = evsel->priv;
 692        print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
 693}
 694
 695static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 696{
 697        switch (aggr_mode) {
 698        case AGGR_CORE:
 699                fprintf(output, "S%d-C%*d%s%*d%s",
 700                        cpu_map__id_to_socket(id),
 701                        csv_output ? 0 : -8,
 702                        cpu_map__id_to_cpu(id),
 703                        csv_sep,
 704                        csv_output ? 0 : 4,
 705                        nr,
 706                        csv_sep);
 707                break;
 708        case AGGR_SOCKET:
 709                fprintf(output, "S%*d%s%*d%s",
 710                        csv_output ? 0 : -5,
 711                        id,
 712                        csv_sep,
 713                        csv_output ? 0 : 4,
 714                        nr,
 715                        csv_sep);
 716                        break;
 717        case AGGR_NONE:
 718                fprintf(output, "CPU%*d%s",
 719                        csv_output ? 0 : -4,
 720                        perf_evsel__cpus(evsel)->map[id], csv_sep);
 721                break;
 722        case AGGR_GLOBAL:
 723        default:
 724                break;
 725        }
 726}
 727
 728static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 729{
 730        double msecs = avg / 1e6;
 731        const char *fmt_v, *fmt_n;
 732        char name[25];
 733
 734        fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
 735        fmt_n = csv_output ? "%s" : "%-25s";
 736
 737        aggr_printout(evsel, cpu, nr);
 738
 739        scnprintf(name, sizeof(name), "%s%s",
 740                  perf_evsel__name(evsel), csv_output ? "" : " (msec)");
 741
 742        fprintf(output, fmt_v, msecs, csv_sep);
 743
 744        if (csv_output)
 745                fprintf(output, "%s%s", evsel->unit, csv_sep);
 746        else
 747                fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);
 748
 749        fprintf(output, fmt_n, name);
 750
 751        if (evsel->cgrp)
 752                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 753
 754        if (csv_output || interval)
 755                return;
 756
 757        if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
 758                fprintf(output, " # %8.3f CPUs utilized          ",
 759                        avg / avg_stats(&walltime_nsecs_stats));
 760        else
 761                fprintf(output, "                                   ");
 762}
 763
 764/* used for get_ratio_color() */
 765enum grc_type {
 766        GRC_STALLED_CYCLES_FE,
 767        GRC_STALLED_CYCLES_BE,
 768        GRC_CACHE_MISSES,
 769        GRC_MAX_NR
 770};
 771
 772static const char *get_ratio_color(enum grc_type type, double ratio)
 773{
 774        static const double grc_table[GRC_MAX_NR][3] = {
 775                [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
 776                [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
 777                [GRC_CACHE_MISSES]      = { 20.0, 10.0, 5.0 },
 778        };
 779        const char *color = PERF_COLOR_NORMAL;
 780
 781        if (ratio > grc_table[type][0])
 782                color = PERF_COLOR_RED;
 783        else if (ratio > grc_table[type][1])
 784                color = PERF_COLOR_MAGENTA;
 785        else if (ratio > grc_table[type][2])
 786                color = PERF_COLOR_YELLOW;
 787
 788        return color;
 789}
 790
 791static void print_stalled_cycles_frontend(int cpu,
 792                                          struct perf_evsel *evsel
 793                                          __maybe_unused, double avg)
 794{
 795        double total, ratio = 0.0;
 796        const char *color;
 797
 798        total = avg_stats(&runtime_cycles_stats[cpu]);
 799
 800        if (total)
 801                ratio = avg / total * 100.0;
 802
 803        color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
 804
 805        fprintf(output, " #  ");
 806        color_fprintf(output, color, "%6.2f%%", ratio);
 807        fprintf(output, " frontend cycles idle   ");
 808}
 809
 810static void print_stalled_cycles_backend(int cpu,
 811                                         struct perf_evsel *evsel
 812                                         __maybe_unused, double avg)
 813{
 814        double total, ratio = 0.0;
 815        const char *color;
 816
 817        total = avg_stats(&runtime_cycles_stats[cpu]);
 818
 819        if (total)
 820                ratio = avg / total * 100.0;
 821
 822        color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
 823
 824        fprintf(output, " #  ");
 825        color_fprintf(output, color, "%6.2f%%", ratio);
 826        fprintf(output, " backend  cycles idle   ");
 827}
 828
 829static void print_branch_misses(int cpu,
 830                                struct perf_evsel *evsel __maybe_unused,
 831                                double avg)
 832{
 833        double total, ratio = 0.0;
 834        const char *color;
 835
 836        total = avg_stats(&runtime_branches_stats[cpu]);
 837
 838        if (total)
 839                ratio = avg / total * 100.0;
 840
 841        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 842
 843        fprintf(output, " #  ");
 844        color_fprintf(output, color, "%6.2f%%", ratio);
 845        fprintf(output, " of all branches        ");
 846}
 847
 848static void print_l1_dcache_misses(int cpu,
 849                                   struct perf_evsel *evsel __maybe_unused,
 850                                   double avg)
 851{
 852        double total, ratio = 0.0;
 853        const char *color;
 854
 855        total = avg_stats(&runtime_l1_dcache_stats[cpu]);
 856
 857        if (total)
 858                ratio = avg / total * 100.0;
 859
 860        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 861
 862        fprintf(output, " #  ");
 863        color_fprintf(output, color, "%6.2f%%", ratio);
 864        fprintf(output, " of all L1-dcache hits  ");
 865}
 866
 867static void print_l1_icache_misses(int cpu,
 868                                   struct perf_evsel *evsel __maybe_unused,
 869                                   double avg)
 870{
 871        double total, ratio = 0.0;
 872        const char *color;
 873
 874        total = avg_stats(&runtime_l1_icache_stats[cpu]);
 875
 876        if (total)
 877                ratio = avg / total * 100.0;
 878
 879        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 880
 881        fprintf(output, " #  ");
 882        color_fprintf(output, color, "%6.2f%%", ratio);
 883        fprintf(output, " of all L1-icache hits  ");
 884}
 885
 886static void print_dtlb_cache_misses(int cpu,
 887                                    struct perf_evsel *evsel __maybe_unused,
 888                                    double avg)
 889{
 890        double total, ratio = 0.0;
 891        const char *color;
 892
 893        total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
 894
 895        if (total)
 896                ratio = avg / total * 100.0;
 897
 898        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 899
 900        fprintf(output, " #  ");
 901        color_fprintf(output, color, "%6.2f%%", ratio);
 902        fprintf(output, " of all dTLB cache hits ");
 903}
 904
 905static void print_itlb_cache_misses(int cpu,
 906                                    struct perf_evsel *evsel __maybe_unused,
 907                                    double avg)
 908{
 909        double total, ratio = 0.0;
 910        const char *color;
 911
 912        total = avg_stats(&runtime_itlb_cache_stats[cpu]);
 913
 914        if (total)
 915                ratio = avg / total * 100.0;
 916
 917        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 918
 919        fprintf(output, " #  ");
 920        color_fprintf(output, color, "%6.2f%%", ratio);
 921        fprintf(output, " of all iTLB cache hits ");
 922}
 923
 924static void print_ll_cache_misses(int cpu,
 925                                  struct perf_evsel *evsel __maybe_unused,
 926                                  double avg)
 927{
 928        double total, ratio = 0.0;
 929        const char *color;
 930
 931        total = avg_stats(&runtime_ll_cache_stats[cpu]);
 932
 933        if (total)
 934                ratio = avg / total * 100.0;
 935
 936        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 937
 938        fprintf(output, " #  ");
 939        color_fprintf(output, color, "%6.2f%%", ratio);
 940        fprintf(output, " of all LL-cache hits   ");
 941}
 942
 943static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 944{
 945        double total, ratio = 0.0, total2;
 946        double sc =  evsel->scale;
 947        const char *fmt;
 948
 949        if (csv_output) {
 950                fmt = sc != 1.0 ?  "%.2f%s" : "%.0f%s";
 951        } else {
 952                if (big_num)
 953                        fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s";
 954                else
 955                        fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s";
 956        }
 957
 958        aggr_printout(evsel, cpu, nr);
 959
 960        if (aggr_mode == AGGR_GLOBAL)
 961                cpu = 0;
 962
 963        fprintf(output, fmt, avg, csv_sep);
 964
 965        if (evsel->unit)
 966                fprintf(output, "%-*s%s",
 967                        csv_output ? 0 : unit_width,
 968                        evsel->unit, csv_sep);
 969
 970        fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
 971
 972        if (evsel->cgrp)
 973                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 974
 975        if (csv_output || interval)
 976                return;
 977
 978        if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
 979                total = avg_stats(&runtime_cycles_stats[cpu]);
 980                if (total) {
 981                        ratio = avg / total;
 982                        fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
 983                }
 984                total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
 985                total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
 986
 987                if (total && avg) {
 988                        ratio = total / avg;
 989                        fprintf(output, "\n");
 990                        if (aggr_mode == AGGR_NONE)
 991                                fprintf(output, "        ");
 992                        fprintf(output, "                                                  #   %5.2f  stalled cycles per insn", ratio);
 993                }
 994
 995        } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
 996                        runtime_branches_stats[cpu].n != 0) {
 997                print_branch_misses(cpu, evsel, avg);
 998        } else if (
 999                evsel->attr.type == PERF_TYPE_HW_CACHE &&
1000                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
1001                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1002                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1003                        runtime_l1_dcache_stats[cpu].n != 0) {
1004                print_l1_dcache_misses(cpu, evsel, avg);
1005        } else if (
1006                evsel->attr.type == PERF_TYPE_HW_CACHE &&
1007                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
1008                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1009                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1010                        runtime_l1_icache_stats[cpu].n != 0) {
1011                print_l1_icache_misses(cpu, evsel, avg);
1012        } else if (
1013                evsel->attr.type == PERF_TYPE_HW_CACHE &&
1014                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
1015                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1016                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1017                        runtime_dtlb_cache_stats[cpu].n != 0) {
1018                print_dtlb_cache_misses(cpu, evsel, avg);
1019        } else if (
1020                evsel->attr.type == PERF_TYPE_HW_CACHE &&
1021                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
1022                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1023                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1024                        runtime_itlb_cache_stats[cpu].n != 0) {
1025                print_itlb_cache_misses(cpu, evsel, avg);
1026        } else if (
1027                evsel->attr.type == PERF_TYPE_HW_CACHE &&
1028                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
1029                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1030                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
1031                        runtime_ll_cache_stats[cpu].n != 0) {
1032                print_ll_cache_misses(cpu, evsel, avg);
1033        } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
1034                        runtime_cacherefs_stats[cpu].n != 0) {
1035                total = avg_stats(&runtime_cacherefs_stats[cpu]);
1036
1037                if (total)
1038                        ratio = avg * 100 / total;
1039
1040                fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
1041
1042        } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
1043                print_stalled_cycles_frontend(cpu, evsel, avg);
1044        } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
1045                print_stalled_cycles_backend(cpu, evsel, avg);
1046        } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
1047                total = avg_stats(&runtime_nsecs_stats[cpu]);
1048
1049                if (total) {
1050                        ratio = avg / total;
1051                        fprintf(output, " # %8.3f GHz                    ", ratio);
1052                }
1053        } else if (transaction_run &&
1054                   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
1055                total = avg_stats(&runtime_cycles_stats[cpu]);
1056                if (total)
1057                        fprintf(output,
1058                                " #   %5.2f%% transactional cycles   ",
1059                                100.0 * (avg / total));
1060        } else if (transaction_run &&
1061                   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
1062                total = avg_stats(&runtime_cycles_stats[cpu]);
1063                total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
1064                if (total2 < avg)
1065                        total2 = avg;
1066                if (total)
1067                        fprintf(output,
1068                                " #   %5.2f%% aborted cycles         ",
1069                                100.0 * ((total2-avg) / total));
1070        } else if (transaction_run &&
1071                   perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
1072                   avg > 0 &&
1073                   runtime_cycles_in_tx_stats[cpu].n != 0) {
1074                total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
1075
1076                if (total)
1077                        ratio = total / avg;
1078
1079                fprintf(output, " # %8.0f cycles / transaction   ", ratio);
1080        } else if (transaction_run &&
1081                   perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
1082                   avg > 0 &&
1083                   runtime_cycles_in_tx_stats[cpu].n != 0) {
1084                total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
1085
1086                if (total)
1087                        ratio = total / avg;
1088
1089                fprintf(output, " # %8.0f cycles / elision       ", ratio);
1090        } else if (runtime_nsecs_stats[cpu].n != 0) {
1091                char unit = 'M';
1092
1093                total = avg_stats(&runtime_nsecs_stats[cpu]);
1094
1095                if (total)
1096                        ratio = 1000.0 * avg / total;
1097                if (ratio < 0.001) {
1098                        ratio *= 1000;
1099                        unit = 'K';
1100                }
1101
1102                fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
1103        } else {
1104                fprintf(output, "                                   ");
1105        }
1106}
1107
1108static void print_aggr(char *prefix)
1109{
1110        struct perf_evsel *counter;
1111        int cpu, cpu2, s, s2, id, nr;
1112        double uval;
1113        u64 ena, run, val;
1114
1115        if (!(aggr_map || aggr_get_id))
1116                return;
1117
1118        for (s = 0; s < aggr_map->nr; s++) {
1119                id = aggr_map->map[s];
1120                evlist__for_each(evsel_list, counter) {
1121                        val = ena = run = 0;
1122                        nr = 0;
1123                        for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1124                                cpu2 = perf_evsel__cpus(counter)->map[cpu];
1125                                s2 = aggr_get_id(evsel_list->cpus, cpu2);
1126                                if (s2 != id)
1127                                        continue;
1128                                val += counter->counts->cpu[cpu].val;
1129                                ena += counter->counts->cpu[cpu].ena;
1130                                run += counter->counts->cpu[cpu].run;
1131                                nr++;
1132                        }
1133                        if (prefix)
1134                                fprintf(output, "%s", prefix);
1135
1136                        if (run == 0 || ena == 0) {
1137                                aggr_printout(counter, id, nr);
1138
1139                                fprintf(output, "%*s%s",
1140                                        csv_output ? 0 : 18,
1141                                        counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1142                                        csv_sep);
1143
1144                                fprintf(output, "%-*s%s",
1145                                        csv_output ? 0 : unit_width,
1146                                        counter->unit, csv_sep);
1147
1148                                fprintf(output, "%*s",
1149                                        csv_output ? 0 : -25,
1150                                        perf_evsel__name(counter));
1151
1152                                if (counter->cgrp)
1153                                        fprintf(output, "%s%s",
1154                                                csv_sep, counter->cgrp->name);
1155
1156                                fputc('\n', output);
1157                                continue;
1158                        }
1159                        uval = val * counter->scale;
1160
1161                        if (nsec_counter(counter))
1162                                nsec_printout(id, nr, counter, uval);
1163                        else
1164                                abs_printout(id, nr, counter, uval);
1165
1166                        if (!csv_output) {
1167                                print_noise(counter, 1.0);
1168
1169                                if (run != ena)
1170                                        fprintf(output, "  (%.2f%%)",
1171                                                100.0 * run / ena);
1172                        }
1173                        fputc('\n', output);
1174                }
1175        }
1176}
1177
1178/*
1179 * Print out the results of a single counter:
1180 * aggregated counts in system-wide mode
1181 */
1182static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
1183{
1184        struct perf_stat *ps = counter->priv;
1185        double avg = avg_stats(&ps->res_stats[0]);
1186        int scaled = counter->counts->scaled;
1187        double uval;
1188
1189        if (prefix)
1190                fprintf(output, "%s", prefix);
1191
1192        if (scaled == -1) {
1193                fprintf(output, "%*s%s",
1194                        csv_output ? 0 : 18,
1195                        counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1196                        csv_sep);
1197                fprintf(output, "%-*s%s",
1198                        csv_output ? 0 : unit_width,
1199                        counter->unit, csv_sep);
1200                fprintf(output, "%*s",
1201                        csv_output ? 0 : -25,
1202                        perf_evsel__name(counter));
1203
1204                if (counter->cgrp)
1205                        fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
1206
1207                fputc('\n', output);
1208                return;
1209        }
1210
1211        uval = avg * counter->scale;
1212
1213        if (nsec_counter(counter))
1214                nsec_printout(-1, 0, counter, uval);
1215        else
1216                abs_printout(-1, 0, counter, uval);
1217
1218        print_noise(counter, avg);
1219
1220        if (csv_output) {
1221                fputc('\n', output);
1222                return;
1223        }
1224
1225        if (scaled) {
1226                double avg_enabled, avg_running;
1227
1228                avg_enabled = avg_stats(&ps->res_stats[1]);
1229                avg_running = avg_stats(&ps->res_stats[2]);
1230
1231                fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
1232        }
1233        fprintf(output, "\n");
1234}
1235
1236/*
1237 * Print out the results of a single counter:
1238 * does not use aggregated count in system-wide
1239 */
1240static void print_counter(struct perf_evsel *counter, char *prefix)
1241{
1242        u64 ena, run, val;
1243        double uval;
1244        int cpu;
1245
1246        for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1247                val = counter->counts->cpu[cpu].val;
1248                ena = counter->counts->cpu[cpu].ena;
1249                run = counter->counts->cpu[cpu].run;
1250
1251                if (prefix)
1252                        fprintf(output, "%s", prefix);
1253
1254                if (run == 0 || ena == 0) {
1255                        fprintf(output, "CPU%*d%s%*s%s",
1256                                csv_output ? 0 : -4,
1257                                perf_evsel__cpus(counter)->map[cpu], csv_sep,
1258                                csv_output ? 0 : 18,
1259                                counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1260                                csv_sep);
1261
1262                                fprintf(output, "%-*s%s",
1263                                        csv_output ? 0 : unit_width,
1264                                        counter->unit, csv_sep);
1265
1266                                fprintf(output, "%*s",
1267                                        csv_output ? 0 : -25,
1268                                        perf_evsel__name(counter));
1269
1270                        if (counter->cgrp)
1271                                fprintf(output, "%s%s",
1272                                        csv_sep, counter->cgrp->name);
1273
1274                        fputc('\n', output);
1275                        continue;
1276                }
1277
1278                uval = val * counter->scale;
1279
1280                if (nsec_counter(counter))
1281                        nsec_printout(cpu, 0, counter, uval);
1282                else
1283                        abs_printout(cpu, 0, counter, uval);
1284
1285                if (!csv_output) {
1286                        print_noise(counter, 1.0);
1287
1288                        if (run != ena)
1289                                fprintf(output, "  (%.2f%%)",
1290                                        100.0 * run / ena);
1291                }
1292                fputc('\n', output);
1293        }
1294}
1295
1296static void print_stat(int argc, const char **argv)
1297{
1298        struct perf_evsel *counter;
1299        int i;
1300
1301        fflush(stdout);
1302
1303        if (!csv_output) {
1304                fprintf(output, "\n");
1305                fprintf(output, " Performance counter stats for ");
1306                if (target.system_wide)
1307                        fprintf(output, "\'system wide");
1308                else if (target.cpu_list)
1309                        fprintf(output, "\'CPU(s) %s", target.cpu_list);
1310                else if (!target__has_task(&target)) {
1311                        fprintf(output, "\'%s", argv[0]);
1312                        for (i = 1; i < argc; i++)
1313                                fprintf(output, " %s", argv[i]);
1314                } else if (target.pid)
1315                        fprintf(output, "process id \'%s", target.pid);
1316                else
1317                        fprintf(output, "thread id \'%s", target.tid);
1318
1319                fprintf(output, "\'");
1320                if (run_count > 1)
1321                        fprintf(output, " (%d runs)", run_count);
1322                fprintf(output, ":\n\n");
1323        }
1324
1325        switch (aggr_mode) {
1326        case AGGR_CORE:
1327        case AGGR_SOCKET:
1328                print_aggr(NULL);
1329                break;
1330        case AGGR_GLOBAL:
1331                evlist__for_each(evsel_list, counter)
1332                        print_counter_aggr(counter, NULL);
1333                break;
1334        case AGGR_NONE:
1335                evlist__for_each(evsel_list, counter)
1336                        print_counter(counter, NULL);
1337                break;
1338        default:
1339                break;
1340        }
1341
1342        if (!csv_output) {
1343                if (!null_run)
1344                        fprintf(output, "\n");
1345                fprintf(output, " %17.9f seconds time elapsed",
1346                                avg_stats(&walltime_nsecs_stats)/1e9);
1347                if (run_count > 1) {
1348                        fprintf(output, "                                        ");
1349                        print_noise_pct(stddev_stats(&walltime_nsecs_stats),
1350                                        avg_stats(&walltime_nsecs_stats));
1351                }
1352                fprintf(output, "\n\n");
1353        }
1354}
1355
1356static volatile int signr = -1;
1357
1358static void skip_signal(int signo)
1359{
1360        if ((child_pid == -1) || interval)
1361                done = 1;
1362
1363        signr = signo;
1364        /*
1365         * render child_pid harmless
1366         * won't send SIGTERM to a random
1367         * process in case of race condition
1368         * and fast PID recycling
1369         */
1370        child_pid = -1;
1371}
1372
1373static void sig_atexit(void)
1374{
1375        sigset_t set, oset;
1376
1377        /*
1378         * avoid race condition with SIGCHLD handler
1379         * in skip_signal() which is modifying child_pid
1380         * goal is to avoid send SIGTERM to a random
1381         * process
1382         */
1383        sigemptyset(&set);
1384        sigaddset(&set, SIGCHLD);
1385        sigprocmask(SIG_BLOCK, &set, &oset);
1386
1387        if (child_pid != -1)
1388                kill(child_pid, SIGTERM);
1389
1390        sigprocmask(SIG_SETMASK, &oset, NULL);
1391
1392        if (signr == -1)
1393                return;
1394
1395        signal(signr, SIG_DFL);
1396        kill(getpid(), signr);
1397}
1398
1399static int stat__set_big_num(const struct option *opt __maybe_unused,
1400                             const char *s __maybe_unused, int unset)
1401{
1402        big_num_opt = unset ? 0 : 1;
1403        return 0;
1404}
1405
1406static int perf_stat_init_aggr_mode(void)
1407{
1408        switch (aggr_mode) {
1409        case AGGR_SOCKET:
1410                if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1411                        perror("cannot build socket map");
1412                        return -1;
1413                }
1414                aggr_get_id = cpu_map__get_socket;
1415                break;
1416        case AGGR_CORE:
1417                if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
1418                        perror("cannot build core map");
1419                        return -1;
1420                }
1421                aggr_get_id = cpu_map__get_core;
1422                break;
1423        case AGGR_NONE:
1424        case AGGR_GLOBAL:
1425        default:
1426                break;
1427        }
1428        return 0;
1429}
1430
1431static int setup_events(const char * const *attrs, unsigned len)
1432{
1433        unsigned i;
1434
1435        for (i = 0; i < len; i++) {
1436                if (parse_events(evsel_list, attrs[i]))
1437                        return -1;
1438        }
1439        return 0;
1440}
1441
1442/*
1443 * Add default attributes, if there were no attributes specified or
1444 * if -d/--detailed, -d -d or -d -d -d is used:
1445 */
1446static int add_default_attributes(void)
1447{
1448        struct perf_event_attr default_attrs[] = {
1449
1450  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
1451  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
1452  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
1453  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
1454
1455  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
1456  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
1457  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND  },
1458  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
1459  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
1460  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
1461
1462};
1463
1464/*
1465 * Detailed stats (-d), covering the L1 and last level data caches:
1466 */
1467        struct perf_event_attr detailed_attrs[] = {
1468
1469  { .type = PERF_TYPE_HW_CACHE,
1470    .config =
1471         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1472        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1473        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1474
1475  { .type = PERF_TYPE_HW_CACHE,
1476    .config =
1477         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1478        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1479        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1480
1481  { .type = PERF_TYPE_HW_CACHE,
1482    .config =
1483         PERF_COUNT_HW_CACHE_LL                 <<  0  |
1484        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1485        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1486
1487  { .type = PERF_TYPE_HW_CACHE,
1488    .config =
1489         PERF_COUNT_HW_CACHE_LL                 <<  0  |
1490        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1491        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1492};
1493
1494/*
1495 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1496 */
1497        struct perf_event_attr very_detailed_attrs[] = {
1498
1499  { .type = PERF_TYPE_HW_CACHE,
1500    .config =
1501         PERF_COUNT_HW_CACHE_L1I                <<  0  |
1502        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1503        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1504
1505  { .type = PERF_TYPE_HW_CACHE,
1506    .config =
1507         PERF_COUNT_HW_CACHE_L1I                <<  0  |
1508        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1509        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1510
1511  { .type = PERF_TYPE_HW_CACHE,
1512    .config =
1513         PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1514        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1515        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1516
1517  { .type = PERF_TYPE_HW_CACHE,
1518    .config =
1519         PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1520        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1521        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1522
1523  { .type = PERF_TYPE_HW_CACHE,
1524    .config =
1525         PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1526        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1527        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1528
1529  { .type = PERF_TYPE_HW_CACHE,
1530    .config =
1531         PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1532        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1533        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1534
1535};
1536
1537/*
1538 * Very, very detailed stats (-d -d -d), adding prefetch events:
1539 */
1540        struct perf_event_attr very_very_detailed_attrs[] = {
1541
1542  { .type = PERF_TYPE_HW_CACHE,
1543    .config =
1544         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1545        (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1546        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1547
1548  { .type = PERF_TYPE_HW_CACHE,
1549    .config =
1550         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1551        (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1552        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1553};
1554
1555        /* Set attrs if no event is selected and !null_run: */
1556        if (null_run)
1557                return 0;
1558
1559        if (transaction_run) {
1560                int err;
1561                if (pmu_have_event("cpu", "cycles-ct") &&
1562                    pmu_have_event("cpu", "el-start"))
1563                        err = setup_events(transaction_attrs,
1564                                        ARRAY_SIZE(transaction_attrs));
1565                else
1566                        err = setup_events(transaction_limited_attrs,
1567                                 ARRAY_SIZE(transaction_limited_attrs));
1568                if (err < 0) {
1569                        fprintf(stderr, "Cannot set up transaction events\n");
1570                        return -1;
1571                }
1572                return 0;
1573        }
1574
1575        if (!evsel_list->nr_entries) {
1576                if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1577                        return -1;
1578        }
1579
1580        /* Detailed events get appended to the event list: */
1581
1582        if (detailed_run <  1)
1583                return 0;
1584
1585        /* Append detailed run extra attributes: */
1586        if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1587                return -1;
1588
1589        if (detailed_run < 2)
1590                return 0;
1591
1592        /* Append very detailed run extra attributes: */
1593        if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1594                return -1;
1595
1596        if (detailed_run < 3)
1597                return 0;
1598
1599        /* Append very, very detailed run extra attributes: */
1600        return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1601}
1602
1603int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1604{
1605        bool append_file = false;
1606        int output_fd = 0;
1607        const char *output_name = NULL;
1608        const struct option options[] = {
1609        OPT_BOOLEAN('T', "transaction", &transaction_run,
1610                    "hardware transaction statistics"),
1611        OPT_CALLBACK('e', "event", &evsel_list, "event",
1612                     "event selector. use 'perf list' to list available events",
1613                     parse_events_option),
1614        OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1615                     "event filter", parse_filter),
1616        OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1617                    "child tasks do not inherit counters"),
1618        OPT_STRING('p', "pid", &target.pid, "pid",
1619                   "stat events on existing process id"),
1620        OPT_STRING('t', "tid", &target.tid, "tid",
1621                   "stat events on existing thread id"),
1622        OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1623                    "system-wide collection from all CPUs"),
1624        OPT_BOOLEAN('g', "group", &group,
1625                    "put the counters into a counter group"),
1626        OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
1627        OPT_INCR('v', "verbose", &verbose,
1628                    "be more verbose (show counter open errors, etc)"),
1629        OPT_INTEGER('r', "repeat", &run_count,
1630                    "repeat command and print average + stddev (max: 100, forever: 0)"),
1631        OPT_BOOLEAN('n', "null", &null_run,
1632                    "null run - dont start any counters"),
1633        OPT_INCR('d', "detailed", &detailed_run,
1634                    "detailed run - start a lot of events"),
1635        OPT_BOOLEAN('S', "sync", &sync_run,
1636                    "call sync() before starting a run"),
1637        OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
1638                           "print large numbers with thousands\' separators",
1639                           stat__set_big_num),
1640        OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1641                    "list of cpus to monitor in system-wide"),
1642        OPT_SET_UINT('A', "no-aggr", &aggr_mode,
1643                    "disable CPU count aggregation", AGGR_NONE),
1644        OPT_STRING('x', "field-separator", &csv_sep, "separator",
1645                   "print counts with custom separator"),
1646        OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1647                     "monitor event in cgroup name only", parse_cgroups),
1648        OPT_STRING('o', "output", &output_name, "file", "output file name"),
1649        OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1650        OPT_INTEGER(0, "log-fd", &output_fd,
1651                    "log output to fd, instead of stderr"),
1652        OPT_STRING(0, "pre", &pre_cmd, "command",
1653                        "command to run prior to the measured command"),
1654        OPT_STRING(0, "post", &post_cmd, "command",
1655                        "command to run after to the measured command"),
1656        OPT_UINTEGER('I', "interval-print", &interval,
1657                    "print counts at regular interval in ms (>= 100)"),
1658        OPT_SET_UINT(0, "per-socket", &aggr_mode,
1659                     "aggregate counts per processor socket", AGGR_SOCKET),
1660        OPT_SET_UINT(0, "per-core", &aggr_mode,
1661                     "aggregate counts per physical processor core", AGGR_CORE),
1662        OPT_UINTEGER('D', "delay", &initial_delay,
1663                     "ms to wait before starting measurement after program start"),
1664        OPT_END()
1665        };
1666        const char * const stat_usage[] = {
1667                "perf stat [<options>] [<command>]",
1668                NULL
1669        };
1670        int status = -EINVAL, run_idx;
1671        const char *mode;
1672
1673        setlocale(LC_ALL, "");
1674
1675        evsel_list = perf_evlist__new();
1676        if (evsel_list == NULL)
1677                return -ENOMEM;
1678
1679        argc = parse_options(argc, argv, options, stat_usage,
1680                PARSE_OPT_STOP_AT_NON_OPTION);
1681
1682        output = stderr;
1683        if (output_name && strcmp(output_name, "-"))
1684                output = NULL;
1685
1686        if (output_name && output_fd) {
1687                fprintf(stderr, "cannot use both --output and --log-fd\n");
1688                parse_options_usage(stat_usage, options, "o", 1);
1689                parse_options_usage(NULL, options, "log-fd", 0);
1690                goto out;
1691        }
1692
1693        if (output_fd < 0) {
1694                fprintf(stderr, "argument to --log-fd must be a > 0\n");
1695                parse_options_usage(stat_usage, options, "log-fd", 0);
1696                goto out;
1697        }
1698
1699        if (!output) {
1700                struct timespec tm;
1701                mode = append_file ? "a" : "w";
1702
1703                output = fopen(output_name, mode);
1704                if (!output) {
1705                        perror("failed to create output file");
1706                        return -1;
1707                }
1708                clock_gettime(CLOCK_REALTIME, &tm);
1709                fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1710        } else if (output_fd > 0) {
1711                mode = append_file ? "a" : "w";
1712                output = fdopen(output_fd, mode);
1713                if (!output) {
1714                        perror("Failed opening logfd");
1715                        return -errno;
1716                }
1717        }
1718
1719        if (csv_sep) {
1720                csv_output = true;
1721                if (!strcmp(csv_sep, "\\t"))
1722                        csv_sep = "\t";
1723        } else
1724                csv_sep = DEFAULT_SEPARATOR;
1725
1726        /*
1727         * let the spreadsheet do the pretty-printing
1728         */
1729        if (csv_output) {
1730                /* User explicitly passed -B? */
1731                if (big_num_opt == 1) {
1732                        fprintf(stderr, "-B option not supported with -x\n");
1733                        parse_options_usage(stat_usage, options, "B", 1);
1734                        parse_options_usage(NULL, options, "x", 1);
1735                        goto out;
1736                } else /* Nope, so disable big number formatting */
1737                        big_num = false;
1738        } else if (big_num_opt == 0) /* User passed --no-big-num */
1739                big_num = false;
1740
1741        if (!argc && target__none(&target))
1742                usage_with_options(stat_usage, options);
1743
1744        if (run_count < 0) {
1745                pr_err("Run count must be a positive number\n");
1746                parse_options_usage(stat_usage, options, "r", 1);
1747                goto out;
1748        } else if (run_count == 0) {
1749                forever = true;
1750                run_count = 1;
1751        }
1752
1753        /* no_aggr, cgroup are for system-wide only */
1754        if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) &&
1755            !target__has_cpu(&target)) {
1756                fprintf(stderr, "both cgroup and no-aggregation "
1757                        "modes only available in system-wide mode\n");
1758
1759                parse_options_usage(stat_usage, options, "G", 1);
1760                parse_options_usage(NULL, options, "A", 1);
1761                parse_options_usage(NULL, options, "a", 1);
1762                goto out;
1763        }
1764
1765        if (add_default_attributes())
1766                goto out;
1767
1768        target__validate(&target);
1769
1770        if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1771                if (target__has_task(&target)) {
1772                        pr_err("Problems finding threads of monitor\n");
1773                        parse_options_usage(stat_usage, options, "p", 1);
1774                        parse_options_usage(NULL, options, "t", 1);
1775                } else if (target__has_cpu(&target)) {
1776                        perror("failed to parse CPUs map");
1777                        parse_options_usage(stat_usage, options, "C", 1);
1778                        parse_options_usage(NULL, options, "a", 1);
1779                }
1780                goto out;
1781        }
1782        if (interval && interval < 100) {
1783                pr_err("print interval must be >= 100ms\n");
1784                parse_options_usage(stat_usage, options, "I", 1);
1785                goto out;
1786        }
1787
1788        if (perf_evlist__alloc_stats(evsel_list, interval))
1789                goto out;
1790
1791        if (perf_stat_init_aggr_mode())
1792                goto out;
1793
1794        /*
1795         * We dont want to block the signals - that would cause
1796         * child tasks to inherit that and Ctrl-C would not work.
1797         * What we want is for Ctrl-C to work in the exec()-ed
1798         * task, but being ignored by perf stat itself:
1799         */
1800        atexit(sig_atexit);
1801        if (!forever)
1802                signal(SIGINT,  skip_signal);
1803        signal(SIGCHLD, skip_signal);
1804        signal(SIGALRM, skip_signal);
1805        signal(SIGABRT, skip_signal);
1806
1807        status = 0;
1808        for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
1809                if (run_count != 1 && verbose)
1810                        fprintf(output, "[ perf stat: executing run #%d ... ]\n",
1811                                run_idx + 1);
1812
1813                status = run_perf_stat(argc, argv);
1814                if (forever && status != -1) {
1815                        print_stat(argc, argv);
1816                        perf_stat__reset_stats(evsel_list);
1817                }
1818        }
1819
1820        if (!forever && status != -1 && !interval)
1821                print_stat(argc, argv);
1822
1823        perf_evlist__free_stats(evsel_list);
1824out:
1825        perf_evlist__delete(evsel_list);
1826        return status;
1827}
1828