linux/tools/perf/builtin-stat.c
<<
>>
Prefs
   1/*
   2 * builtin-stat.c
   3 *
   4 * Builtin stat command: Give a precise performance counters summary
   5 * overview about any workload, CPU or specific PID.
   6 *
   7 * Sample output:
   8
   9   $ perf stat ./hackbench 10
  10
  11  Time: 0.118
  12
  13  Performance counter stats for './hackbench 10':
  14
  15       1708.761321 task-clock                #   11.037 CPUs utilized
  16            41,190 context-switches          #    0.024 M/sec
  17             6,735 CPU-migrations            #    0.004 M/sec
  18            17,318 page-faults               #    0.010 M/sec
  19     5,205,202,243 cycles                    #    3.046 GHz
  20     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
  21     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
  22     2,603,501,247 instructions              #    0.50  insns per cycle
  23                                             #    1.48  stalled cycles per insn
  24       484,357,498 branches                  #  283.455 M/sec
  25         6,388,934 branch-misses             #    1.32% of all branches
  26
  27        0.154822978  seconds time elapsed
  28
  29 *
  30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  31 *
  32 * Improvements and fixes by:
  33 *
  34 *   Arjan van de Ven <arjan@linux.intel.com>
  35 *   Yanmin Zhang <yanmin.zhang@intel.com>
  36 *   Wu Fengguang <fengguang.wu@intel.com>
  37 *   Mike Galbraith <efault@gmx.de>
  38 *   Paul Mackerras <paulus@samba.org>
  39 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
  40 *
  41 * Released under the GPL v2. (and only v2, not any later version)
  42 */
  43
  44#include "perf.h"
  45#include "builtin.h"
  46#include "util/util.h"
  47#include "util/parse-options.h"
  48#include "util/parse-events.h"
  49#include "util/event.h"
  50#include "util/evlist.h"
  51#include "util/evsel.h"
  52#include "util/debug.h"
  53#include "util/color.h"
  54#include "util/stat.h"
  55#include "util/header.h"
  56#include "util/cpumap.h"
  57#include "util/thread.h"
  58#include "util/thread_map.h"
  59
  60#include <stdlib.h>
  61#include <sys/prctl.h>
  62#include <locale.h>
  63
  64#define DEFAULT_SEPARATOR       " "
  65#define CNTR_NOT_SUPPORTED      "<not supported>"
  66#define CNTR_NOT_COUNTED        "<not counted>"
  67
  68static void print_stat(int argc, const char **argv);
  69static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
  70static void print_counter(struct perf_evsel *counter, char *prefix);
  71static void print_aggr(char *prefix);
  72
  73static struct perf_evlist       *evsel_list;
  74
  75static struct perf_target       target = {
  76        .uid    = UINT_MAX,
  77};
  78
  79enum aggr_mode {
  80        AGGR_NONE,
  81        AGGR_GLOBAL,
  82        AGGR_SOCKET,
  83        AGGR_CORE,
  84};
  85
  86static int                      run_count                       =  1;
  87static bool                     no_inherit                      = false;
  88static bool                     scale                           =  true;
  89static enum aggr_mode           aggr_mode                       = AGGR_GLOBAL;
  90static volatile pid_t           child_pid                       = -1;
  91static bool                     null_run                        =  false;
  92static int                      detailed_run                    =  0;
  93static bool                     big_num                         =  true;
  94static int                      big_num_opt                     =  -1;
  95static const char               *csv_sep                        = NULL;
  96static bool                     csv_output                      = false;
  97static bool                     group                           = false;
  98static FILE                     *output                         = NULL;
  99static const char               *pre_cmd                        = NULL;
 100static const char               *post_cmd                       = NULL;
 101static bool                     sync_run                        = false;
 102static unsigned int             interval                        = 0;
 103static unsigned int             initial_delay                   = 0;
 104static bool                     forever                         = false;
 105static struct timespec          ref_time;
 106static struct cpu_map           *aggr_map;
 107static int                      (*aggr_get_id)(struct cpu_map *m, int cpu);
 108
 109static volatile int done = 0;
 110
 111struct perf_stat {
 112        struct stats      res_stats[3];
 113};
 114
 115static inline void diff_timespec(struct timespec *r, struct timespec *a,
 116                                 struct timespec *b)
 117{
 118        r->tv_sec = a->tv_sec - b->tv_sec;
 119        if (a->tv_nsec < b->tv_nsec) {
 120                r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
 121                r->tv_sec--;
 122        } else {
 123                r->tv_nsec = a->tv_nsec - b->tv_nsec ;
 124        }
 125}
 126
 127static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
 128{
 129        return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
 130}
 131
 132static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
 133{
 134        return perf_evsel__cpus(evsel)->nr;
 135}
 136
 137static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
 138{
 139        memset(evsel->priv, 0, sizeof(struct perf_stat));
 140}
 141
 142static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
 143{
 144        evsel->priv = zalloc(sizeof(struct perf_stat));
 145        return evsel->priv == NULL ? -ENOMEM : 0;
 146}
 147
 148static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
 149{
 150        free(evsel->priv);
 151        evsel->priv = NULL;
 152}
 153
 154static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
 155{
 156        void *addr;
 157        size_t sz;
 158
 159        sz = sizeof(*evsel->counts) +
 160             (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
 161
 162        addr = zalloc(sz);
 163        if (!addr)
 164                return -ENOMEM;
 165
 166        evsel->prev_raw_counts =  addr;
 167
 168        return 0;
 169}
 170
 171static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
 172{
 173        free(evsel->prev_raw_counts);
 174        evsel->prev_raw_counts = NULL;
 175}
 176
 177static void perf_evlist__free_stats(struct perf_evlist *evlist)
 178{
 179        struct perf_evsel *evsel;
 180
 181        list_for_each_entry(evsel, &evlist->entries, node) {
 182                perf_evsel__free_stat_priv(evsel);
 183                perf_evsel__free_counts(evsel);
 184                perf_evsel__free_prev_raw_counts(evsel);
 185        }
 186}
 187
 188static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
 189{
 190        struct perf_evsel *evsel;
 191
 192        list_for_each_entry(evsel, &evlist->entries, node) {
 193                if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
 194                    perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
 195                    (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
 196                        goto out_free;
 197        }
 198
 199        return 0;
 200
 201out_free:
 202        perf_evlist__free_stats(evlist);
 203        return -1;
 204}
 205
 206static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 207static struct stats runtime_cycles_stats[MAX_NR_CPUS];
 208static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
 209static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
 210static struct stats runtime_branches_stats[MAX_NR_CPUS];
 211static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
 212static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
 213static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 214static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 215static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 216static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
 217static struct stats walltime_nsecs_stats;
 218
 219static void perf_stat__reset_stats(struct perf_evlist *evlist)
 220{
 221        struct perf_evsel *evsel;
 222
 223        list_for_each_entry(evsel, &evlist->entries, node) {
 224                perf_evsel__reset_stat_priv(evsel);
 225                perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
 226        }
 227
 228        memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
 229        memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
 230        memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
 231        memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
 232        memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
 233        memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
 234        memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
 235        memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
 236        memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
 237        memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
 238        memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
 239        memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
 240}
 241
 242static int create_perf_stat_counter(struct perf_evsel *evsel)
 243{
 244        struct perf_event_attr *attr = &evsel->attr;
 245
 246        if (scale)
 247                attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
 248                                    PERF_FORMAT_TOTAL_TIME_RUNNING;
 249
 250        attr->inherit = !no_inherit;
 251
 252        if (perf_target__has_cpu(&target))
 253                return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
 254
 255        if (!perf_target__has_task(&target) &&
 256            perf_evsel__is_group_leader(evsel)) {
 257                attr->disabled = 1;
 258                if (!initial_delay)
 259                        attr->enable_on_exec = 1;
 260        }
 261
 262        return perf_evsel__open_per_thread(evsel, evsel_list->threads);
 263}
 264
 265/*
 266 * Does the counter have nsecs as a unit?
 267 */
 268static inline int nsec_counter(struct perf_evsel *evsel)
 269{
 270        if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
 271            perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
 272                return 1;
 273
 274        return 0;
 275}
 276
 277/*
 278 * Update various tracking values we maintain to print
 279 * more semantic information such as miss/hit ratios,
 280 * instruction rates, etc:
 281 */
 282static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
 283{
 284        if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
 285                update_stats(&runtime_nsecs_stats[0], count[0]);
 286        else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
 287                update_stats(&runtime_cycles_stats[0], count[0]);
 288        else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
 289                update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
 290        else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
 291                update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
 292        else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
 293                update_stats(&runtime_branches_stats[0], count[0]);
 294        else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
 295                update_stats(&runtime_cacherefs_stats[0], count[0]);
 296        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
 297                update_stats(&runtime_l1_dcache_stats[0], count[0]);
 298        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
 299                update_stats(&runtime_l1_icache_stats[0], count[0]);
 300        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
 301                update_stats(&runtime_ll_cache_stats[0], count[0]);
 302        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
 303                update_stats(&runtime_dtlb_cache_stats[0], count[0]);
 304        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
 305                update_stats(&runtime_itlb_cache_stats[0], count[0]);
 306}
 307
 308/*
 309 * Read out the results of a single counter:
 310 * aggregate counts across CPUs in system-wide mode
 311 */
 312static int read_counter_aggr(struct perf_evsel *counter)
 313{
 314        struct perf_stat *ps = counter->priv;
 315        u64 *count = counter->counts->aggr.values;
 316        int i;
 317
 318        if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
 319                               thread_map__nr(evsel_list->threads), scale) < 0)
 320                return -1;
 321
 322        for (i = 0; i < 3; i++)
 323                update_stats(&ps->res_stats[i], count[i]);
 324
 325        if (verbose) {
 326                fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 327                        perf_evsel__name(counter), count[0], count[1], count[2]);
 328        }
 329
 330        /*
 331         * Save the full runtime - to allow normalization during printout:
 332         */
 333        update_shadow_stats(counter, count);
 334
 335        return 0;
 336}
 337
 338/*
 339 * Read out the results of a single counter:
 340 * do not aggregate counts across CPUs in system-wide mode
 341 */
 342static int read_counter(struct perf_evsel *counter)
 343{
 344        u64 *count;
 345        int cpu;
 346
 347        for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
 348                if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
 349                        return -1;
 350
 351                count = counter->counts->cpu[cpu].values;
 352
 353                update_shadow_stats(counter, count);
 354        }
 355
 356        return 0;
 357}
 358
 359static void print_interval(void)
 360{
 361        static int num_print_interval;
 362        struct perf_evsel *counter;
 363        struct perf_stat *ps;
 364        struct timespec ts, rs;
 365        char prefix[64];
 366
 367        if (aggr_mode == AGGR_GLOBAL) {
 368                list_for_each_entry(counter, &evsel_list->entries, node) {
 369                        ps = counter->priv;
 370                        memset(ps->res_stats, 0, sizeof(ps->res_stats));
 371                        read_counter_aggr(counter);
 372                }
 373        } else  {
 374                list_for_each_entry(counter, &evsel_list->entries, node) {
 375                        ps = counter->priv;
 376                        memset(ps->res_stats, 0, sizeof(ps->res_stats));
 377                        read_counter(counter);
 378                }
 379        }
 380
 381        clock_gettime(CLOCK_MONOTONIC, &ts);
 382        diff_timespec(&rs, &ts, &ref_time);
 383        sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
 384
 385        if (num_print_interval == 0 && !csv_output) {
 386                switch (aggr_mode) {
 387                case AGGR_SOCKET:
 388                        fprintf(output, "#           time socket cpus             counts events\n");
 389                        break;
 390                case AGGR_CORE:
 391                        fprintf(output, "#           time core         cpus             counts events\n");
 392                        break;
 393                case AGGR_NONE:
 394                        fprintf(output, "#           time CPU                 counts events\n");
 395                        break;
 396                case AGGR_GLOBAL:
 397                default:
 398                        fprintf(output, "#           time             counts events\n");
 399                }
 400        }
 401
 402        if (++num_print_interval == 25)
 403                num_print_interval = 0;
 404
 405        switch (aggr_mode) {
 406        case AGGR_CORE:
 407        case AGGR_SOCKET:
 408                print_aggr(prefix);
 409                break;
 410        case AGGR_NONE:
 411                list_for_each_entry(counter, &evsel_list->entries, node)
 412                        print_counter(counter, prefix);
 413                break;
 414        case AGGR_GLOBAL:
 415        default:
 416                list_for_each_entry(counter, &evsel_list->entries, node)
 417                        print_counter_aggr(counter, prefix);
 418        }
 419
 420        fflush(output);
 421}
 422
 423static void handle_initial_delay(void)
 424{
 425        struct perf_evsel *counter;
 426
 427        if (initial_delay) {
 428                const int ncpus = cpu_map__nr(evsel_list->cpus),
 429                        nthreads = thread_map__nr(evsel_list->threads);
 430
 431                usleep(initial_delay * 1000);
 432                list_for_each_entry(counter, &evsel_list->entries, node)
 433                        perf_evsel__enable(counter, ncpus, nthreads);
 434        }
 435}
 436
 437static int __run_perf_stat(int argc, const char **argv)
 438{
 439        char msg[512];
 440        unsigned long long t0, t1;
 441        struct perf_evsel *counter;
 442        struct timespec ts;
 443        int status = 0;
 444        const bool forks = (argc > 0);
 445
 446        if (interval) {
 447                ts.tv_sec  = interval / 1000;
 448                ts.tv_nsec = (interval % 1000) * 1000000;
 449        } else {
 450                ts.tv_sec  = 1;
 451                ts.tv_nsec = 0;
 452        }
 453
 454        if (forks) {
 455                if (perf_evlist__prepare_workload(evsel_list, &target, argv,
 456                                                  false, false) < 0) {
 457                        perror("failed to prepare workload");
 458                        return -1;
 459                }
 460                child_pid = evsel_list->workload.pid;
 461        }
 462
 463        if (group)
 464                perf_evlist__set_leader(evsel_list);
 465
 466        list_for_each_entry(counter, &evsel_list->entries, node) {
 467                if (create_perf_stat_counter(counter) < 0) {
 468                        /*
 469                         * PPC returns ENXIO for HW counters until 2.6.37
 470                         * (behavior changed with commit b0a873e).
 471                         */
 472                        if (errno == EINVAL || errno == ENOSYS ||
 473                            errno == ENOENT || errno == EOPNOTSUPP ||
 474                            errno == ENXIO) {
 475                                if (verbose)
 476                                        ui__warning("%s event is not supported by the kernel.\n",
 477                                                    perf_evsel__name(counter));
 478                                counter->supported = false;
 479                                continue;
 480                        }
 481
 482                        perf_evsel__open_strerror(counter, &target,
 483                                                  errno, msg, sizeof(msg));
 484                        ui__error("%s\n", msg);
 485
 486                        if (child_pid != -1)
 487                                kill(child_pid, SIGTERM);
 488
 489                        return -1;
 490                }
 491                counter->supported = true;
 492        }
 493
 494        if (perf_evlist__apply_filters(evsel_list)) {
 495                error("failed to set filter with %d (%s)\n", errno,
 496                        strerror(errno));
 497                return -1;
 498        }
 499
 500        /*
 501         * Enable counters and exec the command:
 502         */
 503        t0 = rdclock();
 504        clock_gettime(CLOCK_MONOTONIC, &ref_time);
 505
 506        if (forks) {
 507                perf_evlist__start_workload(evsel_list);
 508                handle_initial_delay();
 509
 510                if (interval) {
 511                        while (!waitpid(child_pid, &status, WNOHANG)) {
 512                                nanosleep(&ts, NULL);
 513                                print_interval();
 514                        }
 515                }
 516                wait(&status);
 517                if (WIFSIGNALED(status))
 518                        psignal(WTERMSIG(status), argv[0]);
 519        } else {
 520                handle_initial_delay();
 521                while (!done) {
 522                        nanosleep(&ts, NULL);
 523                        if (interval)
 524                                print_interval();
 525                }
 526        }
 527
 528        t1 = rdclock();
 529
 530        update_stats(&walltime_nsecs_stats, t1 - t0);
 531
 532        if (aggr_mode == AGGR_GLOBAL) {
 533                list_for_each_entry(counter, &evsel_list->entries, node) {
 534                        read_counter_aggr(counter);
 535                        perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
 536                                             thread_map__nr(evsel_list->threads));
 537                }
 538        } else {
 539                list_for_each_entry(counter, &evsel_list->entries, node) {
 540                        read_counter(counter);
 541                        perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
 542                }
 543        }
 544
 545        return WEXITSTATUS(status);
 546}
 547
 548static int run_perf_stat(int argc __maybe_unused, const char **argv)
 549{
 550        int ret;
 551
 552        if (pre_cmd) {
 553                ret = system(pre_cmd);
 554                if (ret)
 555                        return ret;
 556        }
 557
 558        if (sync_run)
 559                sync();
 560
 561        ret = __run_perf_stat(argc, argv);
 562        if (ret)
 563                return ret;
 564
 565        if (post_cmd) {
 566                ret = system(post_cmd);
 567                if (ret)
 568                        return ret;
 569        }
 570
 571        return ret;
 572}
 573
 574static void print_noise_pct(double total, double avg)
 575{
 576        double pct = rel_stddev_stats(total, avg);
 577
 578        if (csv_output)
 579                fprintf(output, "%s%.2f%%", csv_sep, pct);
 580        else if (pct)
 581                fprintf(output, "  ( +-%6.2f%% )", pct);
 582}
 583
 584static void print_noise(struct perf_evsel *evsel, double avg)
 585{
 586        struct perf_stat *ps;
 587
 588        if (run_count == 1)
 589                return;
 590
 591        ps = evsel->priv;
 592        print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
 593}
 594
 595static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 596{
 597        switch (aggr_mode) {
 598        case AGGR_CORE:
 599                fprintf(output, "S%d-C%*d%s%*d%s",
 600                        cpu_map__id_to_socket(id),
 601                        csv_output ? 0 : -8,
 602                        cpu_map__id_to_cpu(id),
 603                        csv_sep,
 604                        csv_output ? 0 : 4,
 605                        nr,
 606                        csv_sep);
 607                break;
 608        case AGGR_SOCKET:
 609                fprintf(output, "S%*d%s%*d%s",
 610                        csv_output ? 0 : -5,
 611                        id,
 612                        csv_sep,
 613                        csv_output ? 0 : 4,
 614                        nr,
 615                        csv_sep);
 616                        break;
 617        case AGGR_NONE:
 618                fprintf(output, "CPU%*d%s",
 619                        csv_output ? 0 : -4,
 620                        perf_evsel__cpus(evsel)->map[id], csv_sep);
 621                break;
 622        case AGGR_GLOBAL:
 623        default:
 624                break;
 625        }
 626}
 627
 628static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 629{
 630        double msecs = avg / 1e6;
 631        const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s";
 632
 633        aggr_printout(evsel, cpu, nr);
 634
 635        fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel));
 636
 637        if (evsel->cgrp)
 638                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 639
 640        if (csv_output || interval)
 641                return;
 642
 643        if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
 644                fprintf(output, " # %8.3f CPUs utilized          ",
 645                        avg / avg_stats(&walltime_nsecs_stats));
 646        else
 647                fprintf(output, "                                   ");
 648}
 649
 650/* used for get_ratio_color() */
 651enum grc_type {
 652        GRC_STALLED_CYCLES_FE,
 653        GRC_STALLED_CYCLES_BE,
 654        GRC_CACHE_MISSES,
 655        GRC_MAX_NR
 656};
 657
 658static const char *get_ratio_color(enum grc_type type, double ratio)
 659{
 660        static const double grc_table[GRC_MAX_NR][3] = {
 661                [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
 662                [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
 663                [GRC_CACHE_MISSES]      = { 20.0, 10.0, 5.0 },
 664        };
 665        const char *color = PERF_COLOR_NORMAL;
 666
 667        if (ratio > grc_table[type][0])
 668                color = PERF_COLOR_RED;
 669        else if (ratio > grc_table[type][1])
 670                color = PERF_COLOR_MAGENTA;
 671        else if (ratio > grc_table[type][2])
 672                color = PERF_COLOR_YELLOW;
 673
 674        return color;
 675}
 676
 677static void print_stalled_cycles_frontend(int cpu,
 678                                          struct perf_evsel *evsel
 679                                          __maybe_unused, double avg)
 680{
 681        double total, ratio = 0.0;
 682        const char *color;
 683
 684        total = avg_stats(&runtime_cycles_stats[cpu]);
 685
 686        if (total)
 687                ratio = avg / total * 100.0;
 688
 689        color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
 690
 691        fprintf(output, " #  ");
 692        color_fprintf(output, color, "%6.2f%%", ratio);
 693        fprintf(output, " frontend cycles idle   ");
 694}
 695
 696static void print_stalled_cycles_backend(int cpu,
 697                                         struct perf_evsel *evsel
 698                                         __maybe_unused, double avg)
 699{
 700        double total, ratio = 0.0;
 701        const char *color;
 702
 703        total = avg_stats(&runtime_cycles_stats[cpu]);
 704
 705        if (total)
 706                ratio = avg / total * 100.0;
 707
 708        color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
 709
 710        fprintf(output, " #  ");
 711        color_fprintf(output, color, "%6.2f%%", ratio);
 712        fprintf(output, " backend  cycles idle   ");
 713}
 714
 715static void print_branch_misses(int cpu,
 716                                struct perf_evsel *evsel __maybe_unused,
 717                                double avg)
 718{
 719        double total, ratio = 0.0;
 720        const char *color;
 721
 722        total = avg_stats(&runtime_branches_stats[cpu]);
 723
 724        if (total)
 725                ratio = avg / total * 100.0;
 726
 727        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 728
 729        fprintf(output, " #  ");
 730        color_fprintf(output, color, "%6.2f%%", ratio);
 731        fprintf(output, " of all branches        ");
 732}
 733
 734static void print_l1_dcache_misses(int cpu,
 735                                   struct perf_evsel *evsel __maybe_unused,
 736                                   double avg)
 737{
 738        double total, ratio = 0.0;
 739        const char *color;
 740
 741        total = avg_stats(&runtime_l1_dcache_stats[cpu]);
 742
 743        if (total)
 744                ratio = avg / total * 100.0;
 745
 746        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 747
 748        fprintf(output, " #  ");
 749        color_fprintf(output, color, "%6.2f%%", ratio);
 750        fprintf(output, " of all L1-dcache hits  ");
 751}
 752
 753static void print_l1_icache_misses(int cpu,
 754                                   struct perf_evsel *evsel __maybe_unused,
 755                                   double avg)
 756{
 757        double total, ratio = 0.0;
 758        const char *color;
 759
 760        total = avg_stats(&runtime_l1_icache_stats[cpu]);
 761
 762        if (total)
 763                ratio = avg / total * 100.0;
 764
 765        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 766
 767        fprintf(output, " #  ");
 768        color_fprintf(output, color, "%6.2f%%", ratio);
 769        fprintf(output, " of all L1-icache hits  ");
 770}
 771
 772static void print_dtlb_cache_misses(int cpu,
 773                                    struct perf_evsel *evsel __maybe_unused,
 774                                    double avg)
 775{
 776        double total, ratio = 0.0;
 777        const char *color;
 778
 779        total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
 780
 781        if (total)
 782                ratio = avg / total * 100.0;
 783
 784        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 785
 786        fprintf(output, " #  ");
 787        color_fprintf(output, color, "%6.2f%%", ratio);
 788        fprintf(output, " of all dTLB cache hits ");
 789}
 790
 791static void print_itlb_cache_misses(int cpu,
 792                                    struct perf_evsel *evsel __maybe_unused,
 793                                    double avg)
 794{
 795        double total, ratio = 0.0;
 796        const char *color;
 797
 798        total = avg_stats(&runtime_itlb_cache_stats[cpu]);
 799
 800        if (total)
 801                ratio = avg / total * 100.0;
 802
 803        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 804
 805        fprintf(output, " #  ");
 806        color_fprintf(output, color, "%6.2f%%", ratio);
 807        fprintf(output, " of all iTLB cache hits ");
 808}
 809
 810static void print_ll_cache_misses(int cpu,
 811                                  struct perf_evsel *evsel __maybe_unused,
 812                                  double avg)
 813{
 814        double total, ratio = 0.0;
 815        const char *color;
 816
 817        total = avg_stats(&runtime_ll_cache_stats[cpu]);
 818
 819        if (total)
 820                ratio = avg / total * 100.0;
 821
 822        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 823
 824        fprintf(output, " #  ");
 825        color_fprintf(output, color, "%6.2f%%", ratio);
 826        fprintf(output, " of all LL-cache hits   ");
 827}
 828
 829static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 830{
 831        double total, ratio = 0.0;
 832        const char *fmt;
 833
 834        if (csv_output)
 835                fmt = "%.0f%s%s";
 836        else if (big_num)
 837                fmt = "%'18.0f%s%-25s";
 838        else
 839                fmt = "%18.0f%s%-25s";
 840
 841        aggr_printout(evsel, cpu, nr);
 842
 843        if (aggr_mode == AGGR_GLOBAL)
 844                cpu = 0;
 845
 846        fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel));
 847
 848        if (evsel->cgrp)
 849                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 850
 851        if (csv_output || interval)
 852                return;
 853
 854        if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
 855                total = avg_stats(&runtime_cycles_stats[cpu]);
 856                if (total)
 857                        ratio = avg / total;
 858
 859                fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
 860
 861                total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
 862                total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
 863
 864                if (total && avg) {
 865                        ratio = total / avg;
 866                        fprintf(output, "\n                                             #   %5.2f  stalled cycles per insn", ratio);
 867                }
 868
 869        } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
 870                        runtime_branches_stats[cpu].n != 0) {
 871                print_branch_misses(cpu, evsel, avg);
 872        } else if (
 873                evsel->attr.type == PERF_TYPE_HW_CACHE &&
 874                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
 875                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 876                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 877                        runtime_l1_dcache_stats[cpu].n != 0) {
 878                print_l1_dcache_misses(cpu, evsel, avg);
 879        } else if (
 880                evsel->attr.type == PERF_TYPE_HW_CACHE &&
 881                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
 882                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 883                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 884                        runtime_l1_icache_stats[cpu].n != 0) {
 885                print_l1_icache_misses(cpu, evsel, avg);
 886        } else if (
 887                evsel->attr.type == PERF_TYPE_HW_CACHE &&
 888                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
 889                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 890                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 891                        runtime_dtlb_cache_stats[cpu].n != 0) {
 892                print_dtlb_cache_misses(cpu, evsel, avg);
 893        } else if (
 894                evsel->attr.type == PERF_TYPE_HW_CACHE &&
 895                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
 896                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 897                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 898                        runtime_itlb_cache_stats[cpu].n != 0) {
 899                print_itlb_cache_misses(cpu, evsel, avg);
 900        } else if (
 901                evsel->attr.type == PERF_TYPE_HW_CACHE &&
 902                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
 903                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 904                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 905                        runtime_ll_cache_stats[cpu].n != 0) {
 906                print_ll_cache_misses(cpu, evsel, avg);
 907        } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
 908                        runtime_cacherefs_stats[cpu].n != 0) {
 909                total = avg_stats(&runtime_cacherefs_stats[cpu]);
 910
 911                if (total)
 912                        ratio = avg * 100 / total;
 913
 914                fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
 915
 916        } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
 917                print_stalled_cycles_frontend(cpu, evsel, avg);
 918        } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
 919                print_stalled_cycles_backend(cpu, evsel, avg);
 920        } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 921                total = avg_stats(&runtime_nsecs_stats[cpu]);
 922
 923                if (total)
 924                        ratio = 1.0 * avg / total;
 925
 926                fprintf(output, " # %8.3f GHz                    ", ratio);
 927        } else if (runtime_nsecs_stats[cpu].n != 0) {
 928                char unit = 'M';
 929
 930                total = avg_stats(&runtime_nsecs_stats[cpu]);
 931
 932                if (total)
 933                        ratio = 1000.0 * avg / total;
 934                if (ratio < 0.001) {
 935                        ratio *= 1000;
 936                        unit = 'K';
 937                }
 938
 939                fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
 940        } else {
 941                fprintf(output, "                                   ");
 942        }
 943}
 944
 945static void print_aggr(char *prefix)
 946{
 947        struct perf_evsel *counter;
 948        int cpu, cpu2, s, s2, id, nr;
 949        u64 ena, run, val;
 950
 951        if (!(aggr_map || aggr_get_id))
 952                return;
 953
 954        for (s = 0; s < aggr_map->nr; s++) {
 955                id = aggr_map->map[s];
 956                list_for_each_entry(counter, &evsel_list->entries, node) {
 957                        val = ena = run = 0;
 958                        nr = 0;
 959                        for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
 960                                cpu2 = perf_evsel__cpus(counter)->map[cpu];
 961                                s2 = aggr_get_id(evsel_list->cpus, cpu2);
 962                                if (s2 != id)
 963                                        continue;
 964                                val += counter->counts->cpu[cpu].val;
 965                                ena += counter->counts->cpu[cpu].ena;
 966                                run += counter->counts->cpu[cpu].run;
 967                                nr++;
 968                        }
 969                        if (prefix)
 970                                fprintf(output, "%s", prefix);
 971
 972                        if (run == 0 || ena == 0) {
 973                                aggr_printout(counter, id, nr);
 974
 975                                fprintf(output, "%*s%s%*s",
 976                                        csv_output ? 0 : 18,
 977                                        counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
 978                                        csv_sep,
 979                                        csv_output ? 0 : -24,
 980                                        perf_evsel__name(counter));
 981
 982                                if (counter->cgrp)
 983                                        fprintf(output, "%s%s",
 984                                                csv_sep, counter->cgrp->name);
 985
 986                                fputc('\n', output);
 987                                continue;
 988                        }
 989
 990                        if (nsec_counter(counter))
 991                                nsec_printout(id, nr, counter, val);
 992                        else
 993                                abs_printout(id, nr, counter, val);
 994
 995                        if (!csv_output) {
 996                                print_noise(counter, 1.0);
 997
 998                                if (run != ena)
 999                                        fprintf(output, "  (%.2f%%)",
1000                                                100.0 * run / ena);
1001                        }
1002                        fputc('\n', output);
1003                }
1004        }
1005}
1006
1007/*
1008 * Print out the results of a single counter:
1009 * aggregated counts in system-wide mode
1010 */
1011static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
1012{
1013        struct perf_stat *ps = counter->priv;
1014        double avg = avg_stats(&ps->res_stats[0]);
1015        int scaled = counter->counts->scaled;
1016
1017        if (prefix)
1018                fprintf(output, "%s", prefix);
1019
1020        if (scaled == -1) {
1021                fprintf(output, "%*s%s%*s",
1022                        csv_output ? 0 : 18,
1023                        counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1024                        csv_sep,
1025                        csv_output ? 0 : -24,
1026                        perf_evsel__name(counter));
1027
1028                if (counter->cgrp)
1029                        fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
1030
1031                fputc('\n', output);
1032                return;
1033        }
1034
1035        if (nsec_counter(counter))
1036                nsec_printout(-1, 0, counter, avg);
1037        else
1038                abs_printout(-1, 0, counter, avg);
1039
1040        print_noise(counter, avg);
1041
1042        if (csv_output) {
1043                fputc('\n', output);
1044                return;
1045        }
1046
1047        if (scaled) {
1048                double avg_enabled, avg_running;
1049
1050                avg_enabled = avg_stats(&ps->res_stats[1]);
1051                avg_running = avg_stats(&ps->res_stats[2]);
1052
1053                fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
1054        }
1055        fprintf(output, "\n");
1056}
1057
1058/*
1059 * Print out the results of a single counter:
1060 * does not use aggregated count in system-wide
1061 */
1062static void print_counter(struct perf_evsel *counter, char *prefix)
1063{
1064        u64 ena, run, val;
1065        int cpu;
1066
1067        for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1068                val = counter->counts->cpu[cpu].val;
1069                ena = counter->counts->cpu[cpu].ena;
1070                run = counter->counts->cpu[cpu].run;
1071
1072                if (prefix)
1073                        fprintf(output, "%s", prefix);
1074
1075                if (run == 0 || ena == 0) {
1076                        fprintf(output, "CPU%*d%s%*s%s%*s",
1077                                csv_output ? 0 : -4,
1078                                perf_evsel__cpus(counter)->map[cpu], csv_sep,
1079                                csv_output ? 0 : 18,
1080                                counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1081                                csv_sep,
1082                                csv_output ? 0 : -24,
1083                                perf_evsel__name(counter));
1084
1085                        if (counter->cgrp)
1086                                fprintf(output, "%s%s",
1087                                        csv_sep, counter->cgrp->name);
1088
1089                        fputc('\n', output);
1090                        continue;
1091                }
1092
1093                if (nsec_counter(counter))
1094                        nsec_printout(cpu, 0, counter, val);
1095                else
1096                        abs_printout(cpu, 0, counter, val);
1097
1098                if (!csv_output) {
1099                        print_noise(counter, 1.0);
1100
1101                        if (run != ena)
1102                                fprintf(output, "  (%.2f%%)",
1103                                        100.0 * run / ena);
1104                }
1105                fputc('\n', output);
1106        }
1107}
1108
1109static void print_stat(int argc, const char **argv)
1110{
1111        struct perf_evsel *counter;
1112        int i;
1113
1114        fflush(stdout);
1115
1116        if (!csv_output) {
1117                fprintf(output, "\n");
1118                fprintf(output, " Performance counter stats for ");
1119                if (!perf_target__has_task(&target)) {
1120                        fprintf(output, "\'%s", argv[0]);
1121                        for (i = 1; i < argc; i++)
1122                                fprintf(output, " %s", argv[i]);
1123                } else if (target.pid)
1124                        fprintf(output, "process id \'%s", target.pid);
1125                else
1126                        fprintf(output, "thread id \'%s", target.tid);
1127
1128                fprintf(output, "\'");
1129                if (run_count > 1)
1130                        fprintf(output, " (%d runs)", run_count);
1131                fprintf(output, ":\n\n");
1132        }
1133
1134        switch (aggr_mode) {
1135        case AGGR_CORE:
1136        case AGGR_SOCKET:
1137                print_aggr(NULL);
1138                break;
1139        case AGGR_GLOBAL:
1140                list_for_each_entry(counter, &evsel_list->entries, node)
1141                        print_counter_aggr(counter, NULL);
1142                break;
1143        case AGGR_NONE:
1144                list_for_each_entry(counter, &evsel_list->entries, node)
1145                        print_counter(counter, NULL);
1146                break;
1147        default:
1148                break;
1149        }
1150
1151        if (!csv_output) {
1152                if (!null_run)
1153                        fprintf(output, "\n");
1154                fprintf(output, " %17.9f seconds time elapsed",
1155                                avg_stats(&walltime_nsecs_stats)/1e9);
1156                if (run_count > 1) {
1157                        fprintf(output, "                                        ");
1158                        print_noise_pct(stddev_stats(&walltime_nsecs_stats),
1159                                        avg_stats(&walltime_nsecs_stats));
1160                }
1161                fprintf(output, "\n\n");
1162        }
1163}
1164
1165static volatile int signr = -1;
1166
1167static void skip_signal(int signo)
1168{
1169        if ((child_pid == -1) || interval)
1170                done = 1;
1171
1172        signr = signo;
1173        /*
1174         * render child_pid harmless
1175         * won't send SIGTERM to a random
1176         * process in case of race condition
1177         * and fast PID recycling
1178         */
1179        child_pid = -1;
1180}
1181
1182static void sig_atexit(void)
1183{
1184        sigset_t set, oset;
1185
1186        /*
1187         * avoid race condition with SIGCHLD handler
1188         * in skip_signal() which is modifying child_pid
1189         * goal is to avoid send SIGTERM to a random
1190         * process
1191         */
1192        sigemptyset(&set);
1193        sigaddset(&set, SIGCHLD);
1194        sigprocmask(SIG_BLOCK, &set, &oset);
1195
1196        if (child_pid != -1)
1197                kill(child_pid, SIGTERM);
1198
1199        sigprocmask(SIG_SETMASK, &oset, NULL);
1200
1201        if (signr == -1)
1202                return;
1203
1204        signal(signr, SIG_DFL);
1205        kill(getpid(), signr);
1206}
1207
1208static int stat__set_big_num(const struct option *opt __maybe_unused,
1209                             const char *s __maybe_unused, int unset)
1210{
1211        big_num_opt = unset ? 0 : 1;
1212        return 0;
1213}
1214
1215static int perf_stat_init_aggr_mode(void)
1216{
1217        switch (aggr_mode) {
1218        case AGGR_SOCKET:
1219                if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1220                        perror("cannot build socket map");
1221                        return -1;
1222                }
1223                aggr_get_id = cpu_map__get_socket;
1224                break;
1225        case AGGR_CORE:
1226                if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
1227                        perror("cannot build core map");
1228                        return -1;
1229                }
1230                aggr_get_id = cpu_map__get_core;
1231                break;
1232        case AGGR_NONE:
1233        case AGGR_GLOBAL:
1234        default:
1235                break;
1236        }
1237        return 0;
1238}
1239
1240
1241/*
1242 * Add default attributes, if there were no attributes specified or
1243 * if -d/--detailed, -d -d or -d -d -d is used:
1244 */
1245static int add_default_attributes(void)
1246{
1247        struct perf_event_attr default_attrs[] = {
1248
1249  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
1250  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
1251  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
1252  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
1253
1254  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
1255  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
1256  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND  },
1257  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
1258  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
1259  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
1260
1261};
1262
1263/*
1264 * Detailed stats (-d), covering the L1 and last level data caches:
1265 */
1266        struct perf_event_attr detailed_attrs[] = {
1267
1268  { .type = PERF_TYPE_HW_CACHE,
1269    .config =
1270         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1271        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1272        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1273
1274  { .type = PERF_TYPE_HW_CACHE,
1275    .config =
1276         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1277        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1278        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1279
1280  { .type = PERF_TYPE_HW_CACHE,
1281    .config =
1282         PERF_COUNT_HW_CACHE_LL                 <<  0  |
1283        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1284        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1285
1286  { .type = PERF_TYPE_HW_CACHE,
1287    .config =
1288         PERF_COUNT_HW_CACHE_LL                 <<  0  |
1289        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1290        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1291};
1292
1293/*
1294 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1295 */
1296        struct perf_event_attr very_detailed_attrs[] = {
1297
1298  { .type = PERF_TYPE_HW_CACHE,
1299    .config =
1300         PERF_COUNT_HW_CACHE_L1I                <<  0  |
1301        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1302        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1303
1304  { .type = PERF_TYPE_HW_CACHE,
1305    .config =
1306         PERF_COUNT_HW_CACHE_L1I                <<  0  |
1307        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1308        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1309
1310  { .type = PERF_TYPE_HW_CACHE,
1311    .config =
1312         PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1313        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1314        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1315
1316  { .type = PERF_TYPE_HW_CACHE,
1317    .config =
1318         PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1319        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1320        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1321
1322  { .type = PERF_TYPE_HW_CACHE,
1323    .config =
1324         PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1325        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1326        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1327
1328  { .type = PERF_TYPE_HW_CACHE,
1329    .config =
1330         PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1331        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1332        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1333
1334};
1335
1336/*
1337 * Very, very detailed stats (-d -d -d), adding prefetch events:
1338 */
1339        struct perf_event_attr very_very_detailed_attrs[] = {
1340
1341  { .type = PERF_TYPE_HW_CACHE,
1342    .config =
1343         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1344        (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1345        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1346
1347  { .type = PERF_TYPE_HW_CACHE,
1348    .config =
1349         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1350        (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1351        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1352};
1353
1354        /* Set attrs if no event is selected and !null_run: */
1355        if (null_run)
1356                return 0;
1357
1358        if (!evsel_list->nr_entries) {
1359                if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1360                        return -1;
1361        }
1362
1363        /* Detailed events get appended to the event list: */
1364
1365        if (detailed_run <  1)
1366                return 0;
1367
1368        /* Append detailed run extra attributes: */
1369        if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1370                return -1;
1371
1372        if (detailed_run < 2)
1373                return 0;
1374
1375        /* Append very detailed run extra attributes: */
1376        if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1377                return -1;
1378
1379        if (detailed_run < 3)
1380                return 0;
1381
1382        /* Append very, very detailed run extra attributes: */
1383        return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1384}
1385
1386int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1387{
1388        bool append_file = false;
1389        int output_fd = 0;
1390        const char *output_name = NULL;
1391        const struct option options[] = {
1392        OPT_CALLBACK('e', "event", &evsel_list, "event",
1393                     "event selector. use 'perf list' to list available events",
1394                     parse_events_option),
1395        OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1396                     "event filter", parse_filter),
1397        OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1398                    "child tasks do not inherit counters"),
1399        OPT_STRING('p', "pid", &target.pid, "pid",
1400                   "stat events on existing process id"),
1401        OPT_STRING('t', "tid", &target.tid, "tid",
1402                   "stat events on existing thread id"),
1403        OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1404                    "system-wide collection from all CPUs"),
1405        OPT_BOOLEAN('g', "group", &group,
1406                    "put the counters into a counter group"),
1407        OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
1408        OPT_INCR('v', "verbose", &verbose,
1409                    "be more verbose (show counter open errors, etc)"),
1410        OPT_INTEGER('r', "repeat", &run_count,
1411                    "repeat command and print average + stddev (max: 100, forever: 0)"),
1412        OPT_BOOLEAN('n', "null", &null_run,
1413                    "null run - dont start any counters"),
1414        OPT_INCR('d', "detailed", &detailed_run,
1415                    "detailed run - start a lot of events"),
1416        OPT_BOOLEAN('S', "sync", &sync_run,
1417                    "call sync() before starting a run"),
1418        OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
1419                           "print large numbers with thousands\' separators",
1420                           stat__set_big_num),
1421        OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1422                    "list of cpus to monitor in system-wide"),
1423        OPT_SET_UINT('A', "no-aggr", &aggr_mode,
1424                    "disable CPU count aggregation", AGGR_NONE),
1425        OPT_STRING('x', "field-separator", &csv_sep, "separator",
1426                   "print counts with custom separator"),
1427        OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1428                     "monitor event in cgroup name only", parse_cgroups),
1429        OPT_STRING('o', "output", &output_name, "file", "output file name"),
1430        OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1431        OPT_INTEGER(0, "log-fd", &output_fd,
1432                    "log output to fd, instead of stderr"),
1433        OPT_STRING(0, "pre", &pre_cmd, "command",
1434                        "command to run prior to the measured command"),
1435        OPT_STRING(0, "post", &post_cmd, "command",
1436                        "command to run after to the measured command"),
1437        OPT_UINTEGER('I', "interval-print", &interval,
1438                    "print counts at regular interval in ms (>= 100)"),
1439        OPT_SET_UINT(0, "per-socket", &aggr_mode,
1440                     "aggregate counts per processor socket", AGGR_SOCKET),
1441        OPT_SET_UINT(0, "per-core", &aggr_mode,
1442                     "aggregate counts per physical processor core", AGGR_CORE),
1443        OPT_UINTEGER('D', "delay", &initial_delay,
1444                     "ms to wait before starting measurement after program start"),
1445        OPT_END()
1446        };
1447        const char * const stat_usage[] = {
1448                "perf stat [<options>] [<command>]",
1449                NULL
1450        };
1451        int status = -ENOMEM, run_idx;
1452        const char *mode;
1453
1454        setlocale(LC_ALL, "");
1455
1456        evsel_list = perf_evlist__new();
1457        if (evsel_list == NULL)
1458                return -ENOMEM;
1459
1460        argc = parse_options(argc, argv, options, stat_usage,
1461                PARSE_OPT_STOP_AT_NON_OPTION);
1462
1463        output = stderr;
1464        if (output_name && strcmp(output_name, "-"))
1465                output = NULL;
1466
1467        if (output_name && output_fd) {
1468                fprintf(stderr, "cannot use both --output and --log-fd\n");
1469                usage_with_options(stat_usage, options);
1470        }
1471
1472        if (output_fd < 0) {
1473                fprintf(stderr, "argument to --log-fd must be a > 0\n");
1474                usage_with_options(stat_usage, options);
1475        }
1476
1477        if (!output) {
1478                struct timespec tm;
1479                mode = append_file ? "a" : "w";
1480
1481                output = fopen(output_name, mode);
1482                if (!output) {
1483                        perror("failed to create output file");
1484                        return -1;
1485                }
1486                clock_gettime(CLOCK_REALTIME, &tm);
1487                fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1488        } else if (output_fd > 0) {
1489                mode = append_file ? "a" : "w";
1490                output = fdopen(output_fd, mode);
1491                if (!output) {
1492                        perror("Failed opening logfd");
1493                        return -errno;
1494                }
1495        }
1496
1497        if (csv_sep) {
1498                csv_output = true;
1499                if (!strcmp(csv_sep, "\\t"))
1500                        csv_sep = "\t";
1501        } else
1502                csv_sep = DEFAULT_SEPARATOR;
1503
1504        /*
1505         * let the spreadsheet do the pretty-printing
1506         */
1507        if (csv_output) {
1508                /* User explicitly passed -B? */
1509                if (big_num_opt == 1) {
1510                        fprintf(stderr, "-B option not supported with -x\n");
1511                        usage_with_options(stat_usage, options);
1512                } else /* Nope, so disable big number formatting */
1513                        big_num = false;
1514        } else if (big_num_opt == 0) /* User passed --no-big-num */
1515                big_num = false;
1516
1517        if (!argc && !perf_target__has_task(&target))
1518                usage_with_options(stat_usage, options);
1519        if (run_count < 0) {
1520                usage_with_options(stat_usage, options);
1521        } else if (run_count == 0) {
1522                forever = true;
1523                run_count = 1;
1524        }
1525
1526        /* no_aggr, cgroup are for system-wide only */
1527        if ((aggr_mode != AGGR_GLOBAL || nr_cgroups)
1528             && !perf_target__has_cpu(&target)) {
1529                fprintf(stderr, "both cgroup and no-aggregation "
1530                        "modes only available in system-wide mode\n");
1531
1532                usage_with_options(stat_usage, options);
1533                return -1;
1534        }
1535
1536        if (add_default_attributes())
1537                goto out;
1538
1539        perf_target__validate(&target);
1540
1541        if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1542                if (perf_target__has_task(&target))
1543                        pr_err("Problems finding threads of monitor\n");
1544                if (perf_target__has_cpu(&target))
1545                        perror("failed to parse CPUs map");
1546
1547                usage_with_options(stat_usage, options);
1548                return -1;
1549        }
1550        if (interval && interval < 100) {
1551                pr_err("print interval must be >= 100ms\n");
1552                usage_with_options(stat_usage, options);
1553                return -1;
1554        }
1555
1556        if (perf_evlist__alloc_stats(evsel_list, interval))
1557                goto out_free_maps;
1558
1559        if (perf_stat_init_aggr_mode())
1560                goto out;
1561
1562        /*
1563         * We dont want to block the signals - that would cause
1564         * child tasks to inherit that and Ctrl-C would not work.
1565         * What we want is for Ctrl-C to work in the exec()-ed
1566         * task, but being ignored by perf stat itself:
1567         */
1568        atexit(sig_atexit);
1569        if (!forever)
1570                signal(SIGINT,  skip_signal);
1571        signal(SIGCHLD, skip_signal);
1572        signal(SIGALRM, skip_signal);
1573        signal(SIGABRT, skip_signal);
1574
1575        status = 0;
1576        for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
1577                if (run_count != 1 && verbose)
1578                        fprintf(output, "[ perf stat: executing run #%d ... ]\n",
1579                                run_idx + 1);
1580
1581                status = run_perf_stat(argc, argv);
1582                if (forever && status != -1) {
1583                        print_stat(argc, argv);
1584                        perf_stat__reset_stats(evsel_list);
1585                }
1586        }
1587
1588        if (!forever && status != -1 && !interval)
1589                print_stat(argc, argv);
1590
1591        perf_evlist__free_stats(evsel_list);
1592out_free_maps:
1593        perf_evlist__delete_maps(evsel_list);
1594out:
1595        perf_evlist__delete(evsel_list);
1596        return status;
1597}
1598