linux/tools/perf/builtin-stat.c
<<
>>
Prefs
   1/*
   2 * builtin-stat.c
   3 *
   4 * Builtin stat command: Give a precise performance counters summary
   5 * overview about any workload, CPU or specific PID.
   6 *
   7 * Sample output:
   8
   9   $ perf stat ./hackbench 10
  10
  11  Time: 0.118
  12
  13  Performance counter stats for './hackbench 10':
  14
  15       1708.761321 task-clock                #   11.037 CPUs utilized
  16            41,190 context-switches          #    0.024 M/sec
  17             6,735 CPU-migrations            #    0.004 M/sec
  18            17,318 page-faults               #    0.010 M/sec
  19     5,205,202,243 cycles                    #    3.046 GHz
  20     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
  21     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
  22     2,603,501,247 instructions              #    0.50  insns per cycle
  23                                             #    1.48  stalled cycles per insn
  24       484,357,498 branches                  #  283.455 M/sec
  25         6,388,934 branch-misses             #    1.32% of all branches
  26
  27        0.154822978  seconds time elapsed
  28
  29 *
  30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  31 *
  32 * Improvements and fixes by:
  33 *
  34 *   Arjan van de Ven <arjan@linux.intel.com>
  35 *   Yanmin Zhang <yanmin.zhang@intel.com>
  36 *   Wu Fengguang <fengguang.wu@intel.com>
  37 *   Mike Galbraith <efault@gmx.de>
  38 *   Paul Mackerras <paulus@samba.org>
  39 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
  40 *
  41 * Released under the GPL v2. (and only v2, not any later version)
  42 */
  43
  44#include "perf.h"
  45#include "builtin.h"
  46#include "util/util.h"
  47#include "util/parse-options.h"
  48#include "util/parse-events.h"
  49#include "util/event.h"
  50#include "util/evlist.h"
  51#include "util/evsel.h"
  52#include "util/debug.h"
  53#include "util/color.h"
  54#include "util/stat.h"
  55#include "util/header.h"
  56#include "util/cpumap.h"
  57#include "util/thread.h"
  58#include "util/thread_map.h"
  59
  60#include <stdlib.h>
  61#include <sys/prctl.h>
  62#include <locale.h>
  63
  64#define DEFAULT_SEPARATOR       " "
  65#define CNTR_NOT_SUPPORTED      "<not supported>"
  66#define CNTR_NOT_COUNTED        "<not counted>"
  67
  68static void print_stat(int argc, const char **argv);
  69static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
  70static void print_counter(struct perf_evsel *counter, char *prefix);
  71static void print_aggr(char *prefix);
  72
  73static struct perf_evlist       *evsel_list;
  74
  75static struct perf_target       target = {
  76        .uid    = UINT_MAX,
  77};
  78
  79enum aggr_mode {
  80        AGGR_NONE,
  81        AGGR_GLOBAL,
  82        AGGR_SOCKET,
  83        AGGR_CORE,
  84};
  85
  86static int                      run_count                       =  1;
  87static bool                     no_inherit                      = false;
  88static bool                     scale                           =  true;
  89static enum aggr_mode           aggr_mode                       = AGGR_GLOBAL;
  90static volatile pid_t           child_pid                       = -1;
  91static bool                     null_run                        =  false;
  92static int                      detailed_run                    =  0;
  93static bool                     big_num                         =  true;
  94static int                      big_num_opt                     =  -1;
  95static const char               *csv_sep                        = NULL;
  96static bool                     csv_output                      = false;
  97static bool                     group                           = false;
  98static FILE                     *output                         = NULL;
  99static const char               *pre_cmd                        = NULL;
 100static const char               *post_cmd                       = NULL;
 101static bool                     sync_run                        = false;
 102static unsigned int             interval                        = 0;
 103static bool                     forever                         = false;
 104static struct timespec          ref_time;
 105static struct cpu_map           *aggr_map;
 106static int                      (*aggr_get_id)(struct cpu_map *m, int cpu);
 107
 108static volatile int done = 0;
 109
 110struct perf_stat {
 111        struct stats      res_stats[3];
 112};
 113
 114static inline void diff_timespec(struct timespec *r, struct timespec *a,
 115                                 struct timespec *b)
 116{
 117        r->tv_sec = a->tv_sec - b->tv_sec;
 118        if (a->tv_nsec < b->tv_nsec) {
 119                r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
 120                r->tv_sec--;
 121        } else {
 122                r->tv_nsec = a->tv_nsec - b->tv_nsec ;
 123        }
 124}
 125
 126static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
 127{
 128        return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
 129}
 130
 131static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
 132{
 133        return perf_evsel__cpus(evsel)->nr;
 134}
 135
 136static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
 137{
 138        memset(evsel->priv, 0, sizeof(struct perf_stat));
 139}
 140
 141static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
 142{
 143        evsel->priv = zalloc(sizeof(struct perf_stat));
 144        return evsel->priv == NULL ? -ENOMEM : 0;
 145}
 146
 147static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
 148{
 149        free(evsel->priv);
 150        evsel->priv = NULL;
 151}
 152
 153static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
 154{
 155        void *addr;
 156        size_t sz;
 157
 158        sz = sizeof(*evsel->counts) +
 159             (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
 160
 161        addr = zalloc(sz);
 162        if (!addr)
 163                return -ENOMEM;
 164
 165        evsel->prev_raw_counts =  addr;
 166
 167        return 0;
 168}
 169
 170static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
 171{
 172        free(evsel->prev_raw_counts);
 173        evsel->prev_raw_counts = NULL;
 174}
 175
 176static void perf_evlist__free_stats(struct perf_evlist *evlist)
 177{
 178        struct perf_evsel *evsel;
 179
 180        list_for_each_entry(evsel, &evlist->entries, node) {
 181                perf_evsel__free_stat_priv(evsel);
 182                perf_evsel__free_counts(evsel);
 183                perf_evsel__free_prev_raw_counts(evsel);
 184        }
 185}
 186
 187static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
 188{
 189        struct perf_evsel *evsel;
 190
 191        list_for_each_entry(evsel, &evlist->entries, node) {
 192                if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
 193                    perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 ||
 194                    (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0))
 195                        goto out_free;
 196        }
 197
 198        return 0;
 199
 200out_free:
 201        perf_evlist__free_stats(evlist);
 202        return -1;
 203}
 204
 205static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 206static struct stats runtime_cycles_stats[MAX_NR_CPUS];
 207static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
 208static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
 209static struct stats runtime_branches_stats[MAX_NR_CPUS];
 210static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
 211static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
 212static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 213static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 214static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 215static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
 216static struct stats walltime_nsecs_stats;
 217
 218static void perf_stat__reset_stats(struct perf_evlist *evlist)
 219{
 220        struct perf_evsel *evsel;
 221
 222        list_for_each_entry(evsel, &evlist->entries, node) {
 223                perf_evsel__reset_stat_priv(evsel);
 224                perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
 225        }
 226
 227        memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
 228        memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
 229        memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
 230        memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
 231        memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
 232        memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
 233        memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
 234        memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
 235        memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
 236        memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
 237        memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
 238        memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
 239}
 240
 241static int create_perf_stat_counter(struct perf_evsel *evsel)
 242{
 243        struct perf_event_attr *attr = &evsel->attr;
 244
 245        if (scale)
 246                attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
 247                                    PERF_FORMAT_TOTAL_TIME_RUNNING;
 248
 249        attr->inherit = !no_inherit;
 250
 251        if (perf_target__has_cpu(&target))
 252                return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
 253
 254        if (!perf_target__has_task(&target) &&
 255            perf_evsel__is_group_leader(evsel)) {
 256                attr->disabled = 1;
 257                attr->enable_on_exec = 1;
 258        }
 259
 260        return perf_evsel__open_per_thread(evsel, evsel_list->threads);
 261}
 262
 263/*
 264 * Does the counter have nsecs as a unit?
 265 */
 266static inline int nsec_counter(struct perf_evsel *evsel)
 267{
 268        if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
 269            perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
 270                return 1;
 271
 272        return 0;
 273}
 274
 275/*
 276 * Update various tracking values we maintain to print
 277 * more semantic information such as miss/hit ratios,
 278 * instruction rates, etc:
 279 */
 280static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
 281{
 282        if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
 283                update_stats(&runtime_nsecs_stats[0], count[0]);
 284        else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
 285                update_stats(&runtime_cycles_stats[0], count[0]);
 286        else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
 287                update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
 288        else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
 289                update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
 290        else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
 291                update_stats(&runtime_branches_stats[0], count[0]);
 292        else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
 293                update_stats(&runtime_cacherefs_stats[0], count[0]);
 294        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
 295                update_stats(&runtime_l1_dcache_stats[0], count[0]);
 296        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
 297                update_stats(&runtime_l1_icache_stats[0], count[0]);
 298        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
 299                update_stats(&runtime_ll_cache_stats[0], count[0]);
 300        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
 301                update_stats(&runtime_dtlb_cache_stats[0], count[0]);
 302        else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
 303                update_stats(&runtime_itlb_cache_stats[0], count[0]);
 304}
 305
 306/*
 307 * Read out the results of a single counter:
 308 * aggregate counts across CPUs in system-wide mode
 309 */
 310static int read_counter_aggr(struct perf_evsel *counter)
 311{
 312        struct perf_stat *ps = counter->priv;
 313        u64 *count = counter->counts->aggr.values;
 314        int i;
 315
 316        if (__perf_evsel__read(counter, perf_evsel__nr_cpus(counter),
 317                               thread_map__nr(evsel_list->threads), scale) < 0)
 318                return -1;
 319
 320        for (i = 0; i < 3; i++)
 321                update_stats(&ps->res_stats[i], count[i]);
 322
 323        if (verbose) {
 324                fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 325                        perf_evsel__name(counter), count[0], count[1], count[2]);
 326        }
 327
 328        /*
 329         * Save the full runtime - to allow normalization during printout:
 330         */
 331        update_shadow_stats(counter, count);
 332
 333        return 0;
 334}
 335
 336/*
 337 * Read out the results of a single counter:
 338 * do not aggregate counts across CPUs in system-wide mode
 339 */
 340static int read_counter(struct perf_evsel *counter)
 341{
 342        u64 *count;
 343        int cpu;
 344
 345        for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
 346                if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
 347                        return -1;
 348
 349                count = counter->counts->cpu[cpu].values;
 350
 351                update_shadow_stats(counter, count);
 352        }
 353
 354        return 0;
 355}
 356
 357static void print_interval(void)
 358{
 359        static int num_print_interval;
 360        struct perf_evsel *counter;
 361        struct perf_stat *ps;
 362        struct timespec ts, rs;
 363        char prefix[64];
 364
 365        if (aggr_mode == AGGR_GLOBAL) {
 366                list_for_each_entry(counter, &evsel_list->entries, node) {
 367                        ps = counter->priv;
 368                        memset(ps->res_stats, 0, sizeof(ps->res_stats));
 369                        read_counter_aggr(counter);
 370                }
 371        } else  {
 372                list_for_each_entry(counter, &evsel_list->entries, node) {
 373                        ps = counter->priv;
 374                        memset(ps->res_stats, 0, sizeof(ps->res_stats));
 375                        read_counter(counter);
 376                }
 377        }
 378
 379        clock_gettime(CLOCK_MONOTONIC, &ts);
 380        diff_timespec(&rs, &ts, &ref_time);
 381        sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
 382
 383        if (num_print_interval == 0 && !csv_output) {
 384                switch (aggr_mode) {
 385                case AGGR_SOCKET:
 386                        fprintf(output, "#           time socket cpus             counts events\n");
 387                        break;
 388                case AGGR_CORE:
 389                        fprintf(output, "#           time core         cpus             counts events\n");
 390                        break;
 391                case AGGR_NONE:
 392                        fprintf(output, "#           time CPU                 counts events\n");
 393                        break;
 394                case AGGR_GLOBAL:
 395                default:
 396                        fprintf(output, "#           time             counts events\n");
 397                }
 398        }
 399
 400        if (++num_print_interval == 25)
 401                num_print_interval = 0;
 402
 403        switch (aggr_mode) {
 404        case AGGR_CORE:
 405        case AGGR_SOCKET:
 406                print_aggr(prefix);
 407                break;
 408        case AGGR_NONE:
 409                list_for_each_entry(counter, &evsel_list->entries, node)
 410                        print_counter(counter, prefix);
 411                break;
 412        case AGGR_GLOBAL:
 413        default:
 414                list_for_each_entry(counter, &evsel_list->entries, node)
 415                        print_counter_aggr(counter, prefix);
 416        }
 417}
 418
 419static int __run_perf_stat(int argc, const char **argv)
 420{
 421        char msg[512];
 422        unsigned long long t0, t1;
 423        struct perf_evsel *counter;
 424        struct timespec ts;
 425        int status = 0;
 426        const bool forks = (argc > 0);
 427
 428        if (interval) {
 429                ts.tv_sec  = interval / 1000;
 430                ts.tv_nsec = (interval % 1000) * 1000000;
 431        } else {
 432                ts.tv_sec  = 1;
 433                ts.tv_nsec = 0;
 434        }
 435
 436        if (forks) {
 437                if (perf_evlist__prepare_workload(evsel_list, &target, argv,
 438                                                  false, false) < 0) {
 439                        perror("failed to prepare workload");
 440                        return -1;
 441                }
 442        }
 443
 444        if (group)
 445                perf_evlist__set_leader(evsel_list);
 446
 447        list_for_each_entry(counter, &evsel_list->entries, node) {
 448                if (create_perf_stat_counter(counter) < 0) {
 449                        /*
 450                         * PPC returns ENXIO for HW counters until 2.6.37
 451                         * (behavior changed with commit b0a873e).
 452                         */
 453                        if (errno == EINVAL || errno == ENOSYS ||
 454                            errno == ENOENT || errno == EOPNOTSUPP ||
 455                            errno == ENXIO) {
 456                                if (verbose)
 457                                        ui__warning("%s event is not supported by the kernel.\n",
 458                                                    perf_evsel__name(counter));
 459                                counter->supported = false;
 460                                continue;
 461                        }
 462
 463                        perf_evsel__open_strerror(counter, &target,
 464                                                  errno, msg, sizeof(msg));
 465                        ui__error("%s\n", msg);
 466
 467                        if (child_pid != -1)
 468                                kill(child_pid, SIGTERM);
 469
 470                        return -1;
 471                }
 472                counter->supported = true;
 473        }
 474
 475        if (perf_evlist__apply_filters(evsel_list)) {
 476                error("failed to set filter with %d (%s)\n", errno,
 477                        strerror(errno));
 478                return -1;
 479        }
 480
 481        /*
 482         * Enable counters and exec the command:
 483         */
 484        t0 = rdclock();
 485        clock_gettime(CLOCK_MONOTONIC, &ref_time);
 486
 487        if (forks) {
 488                perf_evlist__start_workload(evsel_list);
 489
 490                if (interval) {
 491                        while (!waitpid(child_pid, &status, WNOHANG)) {
 492                                nanosleep(&ts, NULL);
 493                                print_interval();
 494                        }
 495                }
 496                wait(&status);
 497                if (WIFSIGNALED(status))
 498                        psignal(WTERMSIG(status), argv[0]);
 499        } else {
 500                while (!done) {
 501                        nanosleep(&ts, NULL);
 502                        if (interval)
 503                                print_interval();
 504                }
 505        }
 506
 507        t1 = rdclock();
 508
 509        update_stats(&walltime_nsecs_stats, t1 - t0);
 510
 511        if (aggr_mode == AGGR_GLOBAL) {
 512                list_for_each_entry(counter, &evsel_list->entries, node) {
 513                        read_counter_aggr(counter);
 514                        perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
 515                                             thread_map__nr(evsel_list->threads));
 516                }
 517        } else {
 518                list_for_each_entry(counter, &evsel_list->entries, node) {
 519                        read_counter(counter);
 520                        perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1);
 521                }
 522        }
 523
 524        return WEXITSTATUS(status);
 525}
 526
 527static int run_perf_stat(int argc __maybe_unused, const char **argv)
 528{
 529        int ret;
 530
 531        if (pre_cmd) {
 532                ret = system(pre_cmd);
 533                if (ret)
 534                        return ret;
 535        }
 536
 537        if (sync_run)
 538                sync();
 539
 540        ret = __run_perf_stat(argc, argv);
 541        if (ret)
 542                return ret;
 543
 544        if (post_cmd) {
 545                ret = system(post_cmd);
 546                if (ret)
 547                        return ret;
 548        }
 549
 550        return ret;
 551}
 552
 553static void print_noise_pct(double total, double avg)
 554{
 555        double pct = rel_stddev_stats(total, avg);
 556
 557        if (csv_output)
 558                fprintf(output, "%s%.2f%%", csv_sep, pct);
 559        else if (pct)
 560                fprintf(output, "  ( +-%6.2f%% )", pct);
 561}
 562
 563static void print_noise(struct perf_evsel *evsel, double avg)
 564{
 565        struct perf_stat *ps;
 566
 567        if (run_count == 1)
 568                return;
 569
 570        ps = evsel->priv;
 571        print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
 572}
 573
 574static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 575{
 576        switch (aggr_mode) {
 577        case AGGR_CORE:
 578                fprintf(output, "S%d-C%*d%s%*d%s",
 579                        cpu_map__id_to_socket(id),
 580                        csv_output ? 0 : -8,
 581                        cpu_map__id_to_cpu(id),
 582                        csv_sep,
 583                        csv_output ? 0 : 4,
 584                        nr,
 585                        csv_sep);
 586                break;
 587        case AGGR_SOCKET:
 588                fprintf(output, "S%*d%s%*d%s",
 589                        csv_output ? 0 : -5,
 590                        id,
 591                        csv_sep,
 592                        csv_output ? 0 : 4,
 593                        nr,
 594                        csv_sep);
 595                        break;
 596        case AGGR_NONE:
 597                fprintf(output, "CPU%*d%s",
 598                        csv_output ? 0 : -4,
 599                        perf_evsel__cpus(evsel)->map[id], csv_sep);
 600                break;
 601        case AGGR_GLOBAL:
 602        default:
 603                break;
 604        }
 605}
 606
 607static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 608{
 609        double msecs = avg / 1e6;
 610        const char *fmt = csv_output ? "%.6f%s%s" : "%18.6f%s%-25s";
 611
 612        aggr_printout(evsel, cpu, nr);
 613
 614        fprintf(output, fmt, msecs, csv_sep, perf_evsel__name(evsel));
 615
 616        if (evsel->cgrp)
 617                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 618
 619        if (csv_output || interval)
 620                return;
 621
 622        if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
 623                fprintf(output, " # %8.3f CPUs utilized          ",
 624                        avg / avg_stats(&walltime_nsecs_stats));
 625        else
 626                fprintf(output, "                                   ");
 627}
 628
 629/* used for get_ratio_color() */
 630enum grc_type {
 631        GRC_STALLED_CYCLES_FE,
 632        GRC_STALLED_CYCLES_BE,
 633        GRC_CACHE_MISSES,
 634        GRC_MAX_NR
 635};
 636
 637static const char *get_ratio_color(enum grc_type type, double ratio)
 638{
 639        static const double grc_table[GRC_MAX_NR][3] = {
 640                [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
 641                [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
 642                [GRC_CACHE_MISSES]      = { 20.0, 10.0, 5.0 },
 643        };
 644        const char *color = PERF_COLOR_NORMAL;
 645
 646        if (ratio > grc_table[type][0])
 647                color = PERF_COLOR_RED;
 648        else if (ratio > grc_table[type][1])
 649                color = PERF_COLOR_MAGENTA;
 650        else if (ratio > grc_table[type][2])
 651                color = PERF_COLOR_YELLOW;
 652
 653        return color;
 654}
 655
 656static void print_stalled_cycles_frontend(int cpu,
 657                                          struct perf_evsel *evsel
 658                                          __maybe_unused, double avg)
 659{
 660        double total, ratio = 0.0;
 661        const char *color;
 662
 663        total = avg_stats(&runtime_cycles_stats[cpu]);
 664
 665        if (total)
 666                ratio = avg / total * 100.0;
 667
 668        color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
 669
 670        fprintf(output, " #  ");
 671        color_fprintf(output, color, "%6.2f%%", ratio);
 672        fprintf(output, " frontend cycles idle   ");
 673}
 674
 675static void print_stalled_cycles_backend(int cpu,
 676                                         struct perf_evsel *evsel
 677                                         __maybe_unused, double avg)
 678{
 679        double total, ratio = 0.0;
 680        const char *color;
 681
 682        total = avg_stats(&runtime_cycles_stats[cpu]);
 683
 684        if (total)
 685                ratio = avg / total * 100.0;
 686
 687        color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
 688
 689        fprintf(output, " #  ");
 690        color_fprintf(output, color, "%6.2f%%", ratio);
 691        fprintf(output, " backend  cycles idle   ");
 692}
 693
 694static void print_branch_misses(int cpu,
 695                                struct perf_evsel *evsel __maybe_unused,
 696                                double avg)
 697{
 698        double total, ratio = 0.0;
 699        const char *color;
 700
 701        total = avg_stats(&runtime_branches_stats[cpu]);
 702
 703        if (total)
 704                ratio = avg / total * 100.0;
 705
 706        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 707
 708        fprintf(output, " #  ");
 709        color_fprintf(output, color, "%6.2f%%", ratio);
 710        fprintf(output, " of all branches        ");
 711}
 712
 713static void print_l1_dcache_misses(int cpu,
 714                                   struct perf_evsel *evsel __maybe_unused,
 715                                   double avg)
 716{
 717        double total, ratio = 0.0;
 718        const char *color;
 719
 720        total = avg_stats(&runtime_l1_dcache_stats[cpu]);
 721
 722        if (total)
 723                ratio = avg / total * 100.0;
 724
 725        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 726
 727        fprintf(output, " #  ");
 728        color_fprintf(output, color, "%6.2f%%", ratio);
 729        fprintf(output, " of all L1-dcache hits  ");
 730}
 731
 732static void print_l1_icache_misses(int cpu,
 733                                   struct perf_evsel *evsel __maybe_unused,
 734                                   double avg)
 735{
 736        double total, ratio = 0.0;
 737        const char *color;
 738
 739        total = avg_stats(&runtime_l1_icache_stats[cpu]);
 740
 741        if (total)
 742                ratio = avg / total * 100.0;
 743
 744        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 745
 746        fprintf(output, " #  ");
 747        color_fprintf(output, color, "%6.2f%%", ratio);
 748        fprintf(output, " of all L1-icache hits  ");
 749}
 750
 751static void print_dtlb_cache_misses(int cpu,
 752                                    struct perf_evsel *evsel __maybe_unused,
 753                                    double avg)
 754{
 755        double total, ratio = 0.0;
 756        const char *color;
 757
 758        total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
 759
 760        if (total)
 761                ratio = avg / total * 100.0;
 762
 763        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 764
 765        fprintf(output, " #  ");
 766        color_fprintf(output, color, "%6.2f%%", ratio);
 767        fprintf(output, " of all dTLB cache hits ");
 768}
 769
 770static void print_itlb_cache_misses(int cpu,
 771                                    struct perf_evsel *evsel __maybe_unused,
 772                                    double avg)
 773{
 774        double total, ratio = 0.0;
 775        const char *color;
 776
 777        total = avg_stats(&runtime_itlb_cache_stats[cpu]);
 778
 779        if (total)
 780                ratio = avg / total * 100.0;
 781
 782        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 783
 784        fprintf(output, " #  ");
 785        color_fprintf(output, color, "%6.2f%%", ratio);
 786        fprintf(output, " of all iTLB cache hits ");
 787}
 788
 789static void print_ll_cache_misses(int cpu,
 790                                  struct perf_evsel *evsel __maybe_unused,
 791                                  double avg)
 792{
 793        double total, ratio = 0.0;
 794        const char *color;
 795
 796        total = avg_stats(&runtime_ll_cache_stats[cpu]);
 797
 798        if (total)
 799                ratio = avg / total * 100.0;
 800
 801        color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 802
 803        fprintf(output, " #  ");
 804        color_fprintf(output, color, "%6.2f%%", ratio);
 805        fprintf(output, " of all LL-cache hits   ");
 806}
 807
 808static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 809{
 810        double total, ratio = 0.0;
 811        const char *fmt;
 812
 813        if (csv_output)
 814                fmt = "%.0f%s%s";
 815        else if (big_num)
 816                fmt = "%'18.0f%s%-25s";
 817        else
 818                fmt = "%18.0f%s%-25s";
 819
 820        aggr_printout(evsel, cpu, nr);
 821
 822        if (aggr_mode == AGGR_GLOBAL)
 823                cpu = 0;
 824
 825        fprintf(output, fmt, avg, csv_sep, perf_evsel__name(evsel));
 826
 827        if (evsel->cgrp)
 828                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 829
 830        if (csv_output || interval)
 831                return;
 832
 833        if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
 834                total = avg_stats(&runtime_cycles_stats[cpu]);
 835                if (total)
 836                        ratio = avg / total;
 837
 838                fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
 839
 840                total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
 841                total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
 842
 843                if (total && avg) {
 844                        ratio = total / avg;
 845                        fprintf(output, "\n                                             #   %5.2f  stalled cycles per insn", ratio);
 846                }
 847
 848        } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
 849                        runtime_branches_stats[cpu].n != 0) {
 850                print_branch_misses(cpu, evsel, avg);
 851        } else if (
 852                evsel->attr.type == PERF_TYPE_HW_CACHE &&
 853                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
 854                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 855                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 856                        runtime_l1_dcache_stats[cpu].n != 0) {
 857                print_l1_dcache_misses(cpu, evsel, avg);
 858        } else if (
 859                evsel->attr.type == PERF_TYPE_HW_CACHE &&
 860                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
 861                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 862                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 863                        runtime_l1_icache_stats[cpu].n != 0) {
 864                print_l1_icache_misses(cpu, evsel, avg);
 865        } else if (
 866                evsel->attr.type == PERF_TYPE_HW_CACHE &&
 867                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
 868                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 869                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 870                        runtime_dtlb_cache_stats[cpu].n != 0) {
 871                print_dtlb_cache_misses(cpu, evsel, avg);
 872        } else if (
 873                evsel->attr.type == PERF_TYPE_HW_CACHE &&
 874                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
 875                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 876                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 877                        runtime_itlb_cache_stats[cpu].n != 0) {
 878                print_itlb_cache_misses(cpu, evsel, avg);
 879        } else if (
 880                evsel->attr.type == PERF_TYPE_HW_CACHE &&
 881                evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
 882                                        ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 883                                        ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 884                        runtime_ll_cache_stats[cpu].n != 0) {
 885                print_ll_cache_misses(cpu, evsel, avg);
 886        } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
 887                        runtime_cacherefs_stats[cpu].n != 0) {
 888                total = avg_stats(&runtime_cacherefs_stats[cpu]);
 889
 890                if (total)
 891                        ratio = avg * 100 / total;
 892
 893                fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
 894
 895        } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
 896                print_stalled_cycles_frontend(cpu, evsel, avg);
 897        } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
 898                print_stalled_cycles_backend(cpu, evsel, avg);
 899        } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 900                total = avg_stats(&runtime_nsecs_stats[cpu]);
 901
 902                if (total)
 903                        ratio = 1.0 * avg / total;
 904
 905                fprintf(output, " # %8.3f GHz                    ", ratio);
 906        } else if (runtime_nsecs_stats[cpu].n != 0) {
 907                char unit = 'M';
 908
 909                total = avg_stats(&runtime_nsecs_stats[cpu]);
 910
 911                if (total)
 912                        ratio = 1000.0 * avg / total;
 913                if (ratio < 0.001) {
 914                        ratio *= 1000;
 915                        unit = 'K';
 916                }
 917
 918                fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
 919        } else {
 920                fprintf(output, "                                   ");
 921        }
 922}
 923
 924static void print_aggr(char *prefix)
 925{
 926        struct perf_evsel *counter;
 927        int cpu, cpu2, s, s2, id, nr;
 928        u64 ena, run, val;
 929
 930        if (!(aggr_map || aggr_get_id))
 931                return;
 932
 933        for (s = 0; s < aggr_map->nr; s++) {
 934                id = aggr_map->map[s];
 935                list_for_each_entry(counter, &evsel_list->entries, node) {
 936                        val = ena = run = 0;
 937                        nr = 0;
 938                        for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
 939                                cpu2 = perf_evsel__cpus(counter)->map[cpu];
 940                                s2 = aggr_get_id(evsel_list->cpus, cpu2);
 941                                if (s2 != id)
 942                                        continue;
 943                                val += counter->counts->cpu[cpu].val;
 944                                ena += counter->counts->cpu[cpu].ena;
 945                                run += counter->counts->cpu[cpu].run;
 946                                nr++;
 947                        }
 948                        if (prefix)
 949                                fprintf(output, "%s", prefix);
 950
 951                        if (run == 0 || ena == 0) {
 952                                aggr_printout(counter, id, nr);
 953
 954                                fprintf(output, "%*s%s%*s",
 955                                        csv_output ? 0 : 18,
 956                                        counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
 957                                        csv_sep,
 958                                        csv_output ? 0 : -24,
 959                                        perf_evsel__name(counter));
 960
 961                                if (counter->cgrp)
 962                                        fprintf(output, "%s%s",
 963                                                csv_sep, counter->cgrp->name);
 964
 965                                fputc('\n', output);
 966                                continue;
 967                        }
 968
 969                        if (nsec_counter(counter))
 970                                nsec_printout(id, nr, counter, val);
 971                        else
 972                                abs_printout(id, nr, counter, val);
 973
 974                        if (!csv_output) {
 975                                print_noise(counter, 1.0);
 976
 977                                if (run != ena)
 978                                        fprintf(output, "  (%.2f%%)",
 979                                                100.0 * run / ena);
 980                        }
 981                        fputc('\n', output);
 982                }
 983        }
 984}
 985
 986/*
 987 * Print out the results of a single counter:
 988 * aggregated counts in system-wide mode
 989 */
 990static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 991{
 992        struct perf_stat *ps = counter->priv;
 993        double avg = avg_stats(&ps->res_stats[0]);
 994        int scaled = counter->counts->scaled;
 995
 996        if (prefix)
 997                fprintf(output, "%s", prefix);
 998
 999        if (scaled == -1) {
1000                fprintf(output, "%*s%s%*s",
1001                        csv_output ? 0 : 18,
1002                        counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1003                        csv_sep,
1004                        csv_output ? 0 : -24,
1005                        perf_evsel__name(counter));
1006
1007                if (counter->cgrp)
1008                        fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
1009
1010                fputc('\n', output);
1011                return;
1012        }
1013
1014        if (nsec_counter(counter))
1015                nsec_printout(-1, 0, counter, avg);
1016        else
1017                abs_printout(-1, 0, counter, avg);
1018
1019        print_noise(counter, avg);
1020
1021        if (csv_output) {
1022                fputc('\n', output);
1023                return;
1024        }
1025
1026        if (scaled) {
1027                double avg_enabled, avg_running;
1028
1029                avg_enabled = avg_stats(&ps->res_stats[1]);
1030                avg_running = avg_stats(&ps->res_stats[2]);
1031
1032                fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled);
1033        }
1034        fprintf(output, "\n");
1035}
1036
1037/*
1038 * Print out the results of a single counter:
1039 * does not use aggregated count in system-wide
1040 */
1041static void print_counter(struct perf_evsel *counter, char *prefix)
1042{
1043        u64 ena, run, val;
1044        int cpu;
1045
1046        for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
1047                val = counter->counts->cpu[cpu].val;
1048                ena = counter->counts->cpu[cpu].ena;
1049                run = counter->counts->cpu[cpu].run;
1050
1051                if (prefix)
1052                        fprintf(output, "%s", prefix);
1053
1054                if (run == 0 || ena == 0) {
1055                        fprintf(output, "CPU%*d%s%*s%s%*s",
1056                                csv_output ? 0 : -4,
1057                                perf_evsel__cpus(counter)->map[cpu], csv_sep,
1058                                csv_output ? 0 : 18,
1059                                counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
1060                                csv_sep,
1061                                csv_output ? 0 : -24,
1062                                perf_evsel__name(counter));
1063
1064                        if (counter->cgrp)
1065                                fprintf(output, "%s%s",
1066                                        csv_sep, counter->cgrp->name);
1067
1068                        fputc('\n', output);
1069                        continue;
1070                }
1071
1072                if (nsec_counter(counter))
1073                        nsec_printout(cpu, 0, counter, val);
1074                else
1075                        abs_printout(cpu, 0, counter, val);
1076
1077                if (!csv_output) {
1078                        print_noise(counter, 1.0);
1079
1080                        if (run != ena)
1081                                fprintf(output, "  (%.2f%%)",
1082                                        100.0 * run / ena);
1083                }
1084                fputc('\n', output);
1085        }
1086}
1087
1088static void print_stat(int argc, const char **argv)
1089{
1090        struct perf_evsel *counter;
1091        int i;
1092
1093        fflush(stdout);
1094
1095        if (!csv_output) {
1096                fprintf(output, "\n");
1097                fprintf(output, " Performance counter stats for ");
1098                if (!perf_target__has_task(&target)) {
1099                        fprintf(output, "\'%s", argv[0]);
1100                        for (i = 1; i < argc; i++)
1101                                fprintf(output, " %s", argv[i]);
1102                } else if (target.pid)
1103                        fprintf(output, "process id \'%s", target.pid);
1104                else
1105                        fprintf(output, "thread id \'%s", target.tid);
1106
1107                fprintf(output, "\'");
1108                if (run_count > 1)
1109                        fprintf(output, " (%d runs)", run_count);
1110                fprintf(output, ":\n\n");
1111        }
1112
1113        switch (aggr_mode) {
1114        case AGGR_CORE:
1115        case AGGR_SOCKET:
1116                print_aggr(NULL);
1117                break;
1118        case AGGR_GLOBAL:
1119                list_for_each_entry(counter, &evsel_list->entries, node)
1120                        print_counter_aggr(counter, NULL);
1121                break;
1122        case AGGR_NONE:
1123                list_for_each_entry(counter, &evsel_list->entries, node)
1124                        print_counter(counter, NULL);
1125                break;
1126        default:
1127                break;
1128        }
1129
1130        if (!csv_output) {
1131                if (!null_run)
1132                        fprintf(output, "\n");
1133                fprintf(output, " %17.9f seconds time elapsed",
1134                                avg_stats(&walltime_nsecs_stats)/1e9);
1135                if (run_count > 1) {
1136                        fprintf(output, "                                        ");
1137                        print_noise_pct(stddev_stats(&walltime_nsecs_stats),
1138                                        avg_stats(&walltime_nsecs_stats));
1139                }
1140                fprintf(output, "\n\n");
1141        }
1142}
1143
1144static volatile int signr = -1;
1145
1146static void skip_signal(int signo)
1147{
1148        if ((child_pid == -1) || interval)
1149                done = 1;
1150
1151        signr = signo;
1152        /*
1153         * render child_pid harmless
1154         * won't send SIGTERM to a random
1155         * process in case of race condition
1156         * and fast PID recycling
1157         */
1158        child_pid = -1;
1159}
1160
1161static void sig_atexit(void)
1162{
1163        sigset_t set, oset;
1164
1165        /*
1166         * avoid race condition with SIGCHLD handler
1167         * in skip_signal() which is modifying child_pid
1168         * goal is to avoid send SIGTERM to a random
1169         * process
1170         */
1171        sigemptyset(&set);
1172        sigaddset(&set, SIGCHLD);
1173        sigprocmask(SIG_BLOCK, &set, &oset);
1174
1175        if (child_pid != -1)
1176                kill(child_pid, SIGTERM);
1177
1178        sigprocmask(SIG_SETMASK, &oset, NULL);
1179
1180        if (signr == -1)
1181                return;
1182
1183        signal(signr, SIG_DFL);
1184        kill(getpid(), signr);
1185}
1186
1187static int stat__set_big_num(const struct option *opt __maybe_unused,
1188                             const char *s __maybe_unused, int unset)
1189{
1190        big_num_opt = unset ? 0 : 1;
1191        return 0;
1192}
1193
1194static int perf_stat_init_aggr_mode(void)
1195{
1196        switch (aggr_mode) {
1197        case AGGR_SOCKET:
1198                if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1199                        perror("cannot build socket map");
1200                        return -1;
1201                }
1202                aggr_get_id = cpu_map__get_socket;
1203                break;
1204        case AGGR_CORE:
1205                if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
1206                        perror("cannot build core map");
1207                        return -1;
1208                }
1209                aggr_get_id = cpu_map__get_core;
1210                break;
1211        case AGGR_NONE:
1212        case AGGR_GLOBAL:
1213        default:
1214                break;
1215        }
1216        return 0;
1217}
1218
1219
1220/*
1221 * Add default attributes, if there were no attributes specified or
1222 * if -d/--detailed, -d -d or -d -d -d is used:
1223 */
1224static int add_default_attributes(void)
1225{
1226        struct perf_event_attr default_attrs[] = {
1227
1228  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
1229  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
1230  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
1231  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
1232
1233  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
1234  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
1235  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND  },
1236  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
1237  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
1238  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
1239
1240};
1241
1242/*
1243 * Detailed stats (-d), covering the L1 and last level data caches:
1244 */
1245        struct perf_event_attr detailed_attrs[] = {
1246
1247  { .type = PERF_TYPE_HW_CACHE,
1248    .config =
1249         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1250        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1251        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1252
1253  { .type = PERF_TYPE_HW_CACHE,
1254    .config =
1255         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1256        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1257        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1258
1259  { .type = PERF_TYPE_HW_CACHE,
1260    .config =
1261         PERF_COUNT_HW_CACHE_LL                 <<  0  |
1262        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1263        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1264
1265  { .type = PERF_TYPE_HW_CACHE,
1266    .config =
1267         PERF_COUNT_HW_CACHE_LL                 <<  0  |
1268        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1269        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1270};
1271
1272/*
1273 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1274 */
1275        struct perf_event_attr very_detailed_attrs[] = {
1276
1277  { .type = PERF_TYPE_HW_CACHE,
1278    .config =
1279         PERF_COUNT_HW_CACHE_L1I                <<  0  |
1280        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1281        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1282
1283  { .type = PERF_TYPE_HW_CACHE,
1284    .config =
1285         PERF_COUNT_HW_CACHE_L1I                <<  0  |
1286        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1287        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1288
1289  { .type = PERF_TYPE_HW_CACHE,
1290    .config =
1291         PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1292        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1293        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1294
1295  { .type = PERF_TYPE_HW_CACHE,
1296    .config =
1297         PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1298        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1299        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1300
1301  { .type = PERF_TYPE_HW_CACHE,
1302    .config =
1303         PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1304        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1305        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1306
1307  { .type = PERF_TYPE_HW_CACHE,
1308    .config =
1309         PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1310        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1311        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1312
1313};
1314
1315/*
1316 * Very, very detailed stats (-d -d -d), adding prefetch events:
1317 */
1318        struct perf_event_attr very_very_detailed_attrs[] = {
1319
1320  { .type = PERF_TYPE_HW_CACHE,
1321    .config =
1322         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1323        (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1324        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1325
1326  { .type = PERF_TYPE_HW_CACHE,
1327    .config =
1328         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1329        (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1330        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1331};
1332
1333        /* Set attrs if no event is selected and !null_run: */
1334        if (null_run)
1335                return 0;
1336
1337        if (!evsel_list->nr_entries) {
1338                if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1339                        return -1;
1340        }
1341
1342        /* Detailed events get appended to the event list: */
1343
1344        if (detailed_run <  1)
1345                return 0;
1346
1347        /* Append detailed run extra attributes: */
1348        if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1349                return -1;
1350
1351        if (detailed_run < 2)
1352                return 0;
1353
1354        /* Append very detailed run extra attributes: */
1355        if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1356                return -1;
1357
1358        if (detailed_run < 3)
1359                return 0;
1360
1361        /* Append very, very detailed run extra attributes: */
1362        return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1363}
1364
1365int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1366{
1367        bool append_file = false;
1368        int output_fd = 0;
1369        const char *output_name = NULL;
1370        const struct option options[] = {
1371        OPT_CALLBACK('e', "event", &evsel_list, "event",
1372                     "event selector. use 'perf list' to list available events",
1373                     parse_events_option),
1374        OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1375                     "event filter", parse_filter),
1376        OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1377                    "child tasks do not inherit counters"),
1378        OPT_STRING('p', "pid", &target.pid, "pid",
1379                   "stat events on existing process id"),
1380        OPT_STRING('t', "tid", &target.tid, "tid",
1381                   "stat events on existing thread id"),
1382        OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1383                    "system-wide collection from all CPUs"),
1384        OPT_BOOLEAN('g', "group", &group,
1385                    "put the counters into a counter group"),
1386        OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
1387        OPT_INCR('v', "verbose", &verbose,
1388                    "be more verbose (show counter open errors, etc)"),
1389        OPT_INTEGER('r', "repeat", &run_count,
1390                    "repeat command and print average + stddev (max: 100, forever: 0)"),
1391        OPT_BOOLEAN('n', "null", &null_run,
1392                    "null run - dont start any counters"),
1393        OPT_INCR('d', "detailed", &detailed_run,
1394                    "detailed run - start a lot of events"),
1395        OPT_BOOLEAN('S', "sync", &sync_run,
1396                    "call sync() before starting a run"),
1397        OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
1398                           "print large numbers with thousands\' separators",
1399                           stat__set_big_num),
1400        OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1401                    "list of cpus to monitor in system-wide"),
1402        OPT_SET_UINT('A', "no-aggr", &aggr_mode,
1403                    "disable CPU count aggregation", AGGR_NONE),
1404        OPT_STRING('x', "field-separator", &csv_sep, "separator",
1405                   "print counts with custom separator"),
1406        OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1407                     "monitor event in cgroup name only", parse_cgroups),
1408        OPT_STRING('o', "output", &output_name, "file", "output file name"),
1409        OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1410        OPT_INTEGER(0, "log-fd", &output_fd,
1411                    "log output to fd, instead of stderr"),
1412        OPT_STRING(0, "pre", &pre_cmd, "command",
1413                        "command to run prior to the measured command"),
1414        OPT_STRING(0, "post", &post_cmd, "command",
1415                        "command to run after to the measured command"),
1416        OPT_UINTEGER('I', "interval-print", &interval,
1417                    "print counts at regular interval in ms (>= 100)"),
1418        OPT_SET_UINT(0, "per-socket", &aggr_mode,
1419                     "aggregate counts per processor socket", AGGR_SOCKET),
1420        OPT_SET_UINT(0, "per-core", &aggr_mode,
1421                     "aggregate counts per physical processor core", AGGR_CORE),
1422        OPT_END()
1423        };
1424        const char * const stat_usage[] = {
1425                "perf stat [<options>] [<command>]",
1426                NULL
1427        };
1428        int status = -ENOMEM, run_idx;
1429        const char *mode;
1430
1431        setlocale(LC_ALL, "");
1432
1433        evsel_list = perf_evlist__new();
1434        if (evsel_list == NULL)
1435                return -ENOMEM;
1436
1437        argc = parse_options(argc, argv, options, stat_usage,
1438                PARSE_OPT_STOP_AT_NON_OPTION);
1439
1440        output = stderr;
1441        if (output_name && strcmp(output_name, "-"))
1442                output = NULL;
1443
1444        if (output_name && output_fd) {
1445                fprintf(stderr, "cannot use both --output and --log-fd\n");
1446                usage_with_options(stat_usage, options);
1447        }
1448
1449        if (output_fd < 0) {
1450                fprintf(stderr, "argument to --log-fd must be a > 0\n");
1451                usage_with_options(stat_usage, options);
1452        }
1453
1454        if (!output) {
1455                struct timespec tm;
1456                mode = append_file ? "a" : "w";
1457
1458                output = fopen(output_name, mode);
1459                if (!output) {
1460                        perror("failed to create output file");
1461                        return -1;
1462                }
1463                clock_gettime(CLOCK_REALTIME, &tm);
1464                fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1465        } else if (output_fd > 0) {
1466                mode = append_file ? "a" : "w";
1467                output = fdopen(output_fd, mode);
1468                if (!output) {
1469                        perror("Failed opening logfd");
1470                        return -errno;
1471                }
1472        }
1473
1474        if (csv_sep) {
1475                csv_output = true;
1476                if (!strcmp(csv_sep, "\\t"))
1477                        csv_sep = "\t";
1478        } else
1479                csv_sep = DEFAULT_SEPARATOR;
1480
1481        /*
1482         * let the spreadsheet do the pretty-printing
1483         */
1484        if (csv_output) {
1485                /* User explicitly passed -B? */
1486                if (big_num_opt == 1) {
1487                        fprintf(stderr, "-B option not supported with -x\n");
1488                        usage_with_options(stat_usage, options);
1489                } else /* Nope, so disable big number formatting */
1490                        big_num = false;
1491        } else if (big_num_opt == 0) /* User passed --no-big-num */
1492                big_num = false;
1493
1494        if (!argc && !perf_target__has_task(&target))
1495                usage_with_options(stat_usage, options);
1496        if (run_count < 0) {
1497                usage_with_options(stat_usage, options);
1498        } else if (run_count == 0) {
1499                forever = true;
1500                run_count = 1;
1501        }
1502
1503        /* no_aggr, cgroup are for system-wide only */
1504        if ((aggr_mode != AGGR_GLOBAL || nr_cgroups)
1505             && !perf_target__has_cpu(&target)) {
1506                fprintf(stderr, "both cgroup and no-aggregation "
1507                        "modes only available in system-wide mode\n");
1508
1509                usage_with_options(stat_usage, options);
1510                return -1;
1511        }
1512
1513        if (add_default_attributes())
1514                goto out;
1515
1516        perf_target__validate(&target);
1517
1518        if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1519                if (perf_target__has_task(&target))
1520                        pr_err("Problems finding threads of monitor\n");
1521                if (perf_target__has_cpu(&target))
1522                        perror("failed to parse CPUs map");
1523
1524                usage_with_options(stat_usage, options);
1525                return -1;
1526        }
1527        if (interval && interval < 100) {
1528                pr_err("print interval must be >= 100ms\n");
1529                usage_with_options(stat_usage, options);
1530                return -1;
1531        }
1532
1533        if (perf_evlist__alloc_stats(evsel_list, interval))
1534                goto out_free_maps;
1535
1536        if (perf_stat_init_aggr_mode())
1537                goto out;
1538
1539        /*
1540         * We dont want to block the signals - that would cause
1541         * child tasks to inherit that and Ctrl-C would not work.
1542         * What we want is for Ctrl-C to work in the exec()-ed
1543         * task, but being ignored by perf stat itself:
1544         */
1545        atexit(sig_atexit);
1546        if (!forever)
1547                signal(SIGINT,  skip_signal);
1548        signal(SIGCHLD, skip_signal);
1549        signal(SIGALRM, skip_signal);
1550        signal(SIGABRT, skip_signal);
1551
1552        status = 0;
1553        for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
1554                if (run_count != 1 && verbose)
1555                        fprintf(output, "[ perf stat: executing run #%d ... ]\n",
1556                                run_idx + 1);
1557
1558                status = run_perf_stat(argc, argv);
1559                if (forever && status != -1) {
1560                        print_stat(argc, argv);
1561                        perf_stat__reset_stats(evsel_list);
1562                }
1563        }
1564
1565        if (!forever && status != -1 && !interval)
1566                print_stat(argc, argv);
1567
1568        perf_evlist__free_stats(evsel_list);
1569out_free_maps:
1570        perf_evlist__delete_maps(evsel_list);
1571out:
1572        perf_evlist__delete(evsel_list);
1573        return status;
1574}
1575