linux/tools/perf/builtin-stat.c
<<
>>
Prefs
   1/*
   2 * builtin-stat.c
   3 *
   4 * Builtin stat command: Give a precise performance counters summary
   5 * overview about any workload, CPU or specific PID.
   6 *
   7 * Sample output:
   8
   9   $ perf stat ./hackbench 10
  10
  11  Time: 0.118
  12
  13  Performance counter stats for './hackbench 10':
  14
  15       1708.761321 task-clock                #   11.037 CPUs utilized
  16            41,190 context-switches          #    0.024 M/sec
  17             6,735 CPU-migrations            #    0.004 M/sec
  18            17,318 page-faults               #    0.010 M/sec
  19     5,205,202,243 cycles                    #    3.046 GHz
  20     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
  21     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
  22     2,603,501,247 instructions              #    0.50  insns per cycle
  23                                             #    1.48  stalled cycles per insn
  24       484,357,498 branches                  #  283.455 M/sec
  25         6,388,934 branch-misses             #    1.32% of all branches
  26
  27        0.154822978  seconds time elapsed
  28
  29 *
  30 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  31 *
  32 * Improvements and fixes by:
  33 *
  34 *   Arjan van de Ven <arjan@linux.intel.com>
  35 *   Yanmin Zhang <yanmin.zhang@intel.com>
  36 *   Wu Fengguang <fengguang.wu@intel.com>
  37 *   Mike Galbraith <efault@gmx.de>
  38 *   Paul Mackerras <paulus@samba.org>
  39 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
  40 *
  41 * Released under the GPL v2. (and only v2, not any later version)
  42 */
  43
  44#include "perf.h"
  45#include "builtin.h"
  46#include "util/cgroup.h"
  47#include "util/util.h"
  48#include "util/parse-options.h"
  49#include "util/parse-events.h"
  50#include "util/pmu.h"
  51#include "util/event.h"
  52#include "util/evlist.h"
  53#include "util/evsel.h"
  54#include "util/debug.h"
  55#include "util/color.h"
  56#include "util/stat.h"
  57#include "util/header.h"
  58#include "util/cpumap.h"
  59#include "util/thread.h"
  60#include "util/thread_map.h"
  61
  62#include <stdlib.h>
  63#include <sys/prctl.h>
  64#include <locale.h>
  65
  66#define DEFAULT_SEPARATOR       " "
  67#define CNTR_NOT_SUPPORTED      "<not supported>"
  68#define CNTR_NOT_COUNTED        "<not counted>"
  69
  70static void print_counters(struct timespec *ts, int argc, const char **argv);
  71
  72/* Default events used for perf stat -T */
  73static const char *transaction_attrs = {
  74        "task-clock,"
  75        "{"
  76        "instructions,"
  77        "cycles,"
  78        "cpu/cycles-t/,"
  79        "cpu/tx-start/,"
  80        "cpu/el-start/,"
  81        "cpu/cycles-ct/"
  82        "}"
  83};
  84
  85/* More limited version when the CPU does not have all events. */
  86static const char * transaction_limited_attrs = {
  87        "task-clock,"
  88        "{"
  89        "instructions,"
  90        "cycles,"
  91        "cpu/cycles-t/,"
  92        "cpu/tx-start/"
  93        "}"
  94};
  95
  96static struct perf_evlist       *evsel_list;
  97
  98static struct target target = {
  99        .uid    = UINT_MAX,
 100};
 101
 102static int                      run_count                       =  1;
 103static bool                     no_inherit                      = false;
 104static bool                     scale                           =  true;
 105static enum aggr_mode           aggr_mode                       = AGGR_GLOBAL;
 106static volatile pid_t           child_pid                       = -1;
 107static bool                     null_run                        =  false;
 108static int                      detailed_run                    =  0;
 109static bool                     transaction_run;
 110static bool                     big_num                         =  true;
 111static int                      big_num_opt                     =  -1;
 112static const char               *csv_sep                        = NULL;
 113static bool                     csv_output                      = false;
 114static bool                     group                           = false;
 115static FILE                     *output                         = NULL;
 116static const char               *pre_cmd                        = NULL;
 117static const char               *post_cmd                       = NULL;
 118static bool                     sync_run                        = false;
 119static unsigned int             interval                        = 0;
 120static unsigned int             initial_delay                   = 0;
 121static unsigned int             unit_width                      = 4; /* strlen("unit") */
 122static bool                     forever                         = false;
 123static struct timespec          ref_time;
 124static struct cpu_map           *aggr_map;
 125static int                      (*aggr_get_id)(struct cpu_map *m, int cpu);
 126
 127static volatile int done = 0;
 128
 129static inline void diff_timespec(struct timespec *r, struct timespec *a,
 130                                 struct timespec *b)
 131{
 132        r->tv_sec = a->tv_sec - b->tv_sec;
 133        if (a->tv_nsec < b->tv_nsec) {
 134                r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
 135                r->tv_sec--;
 136        } else {
 137                r->tv_nsec = a->tv_nsec - b->tv_nsec ;
 138        }
 139}
 140
 141static void perf_stat__reset_stats(void)
 142{
 143        perf_evlist__reset_stats(evsel_list);
 144        perf_stat__reset_shadow_stats();
 145}
 146
 147static int create_perf_stat_counter(struct perf_evsel *evsel)
 148{
 149        struct perf_event_attr *attr = &evsel->attr;
 150
 151        if (scale)
 152                attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
 153                                    PERF_FORMAT_TOTAL_TIME_RUNNING;
 154
 155        attr->inherit = !no_inherit;
 156
 157        if (target__has_cpu(&target))
 158                return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
 159
 160        if (!target__has_task(&target) && perf_evsel__is_group_leader(evsel)) {
 161                attr->disabled = 1;
 162                if (!initial_delay)
 163                        attr->enable_on_exec = 1;
 164        }
 165
 166        return perf_evsel__open_per_thread(evsel, evsel_list->threads);
 167}
 168
 169/*
 170 * Does the counter have nsecs as a unit?
 171 */
 172static inline int nsec_counter(struct perf_evsel *evsel)
 173{
 174        if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
 175            perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
 176                return 1;
 177
 178        return 0;
 179}
 180
 181static void zero_per_pkg(struct perf_evsel *counter)
 182{
 183        if (counter->per_pkg_mask)
 184                memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
 185}
 186
 187static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
 188{
 189        unsigned long *mask = counter->per_pkg_mask;
 190        struct cpu_map *cpus = perf_evsel__cpus(counter);
 191        int s;
 192
 193        *skip = false;
 194
 195        if (!counter->per_pkg)
 196                return 0;
 197
 198        if (cpu_map__empty(cpus))
 199                return 0;
 200
 201        if (!mask) {
 202                mask = zalloc(MAX_NR_CPUS);
 203                if (!mask)
 204                        return -ENOMEM;
 205
 206                counter->per_pkg_mask = mask;
 207        }
 208
 209        s = cpu_map__get_socket(cpus, cpu);
 210        if (s < 0)
 211                return -1;
 212
 213        *skip = test_and_set_bit(s, mask) == 1;
 214        return 0;
 215}
 216
 217static int
 218process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
 219                       struct perf_counts_values *count)
 220{
 221        struct perf_counts_values *aggr = &evsel->counts->aggr;
 222        static struct perf_counts_values zero;
 223        bool skip = false;
 224
 225        if (check_per_pkg(evsel, cpu, &skip)) {
 226                pr_err("failed to read per-pkg counter\n");
 227                return -1;
 228        }
 229
 230        if (skip)
 231                count = &zero;
 232
 233        switch (aggr_mode) {
 234        case AGGR_THREAD:
 235        case AGGR_CORE:
 236        case AGGR_SOCKET:
 237        case AGGR_NONE:
 238                if (!evsel->snapshot)
 239                        perf_evsel__compute_deltas(evsel, cpu, thread, count);
 240                perf_counts_values__scale(count, scale, NULL);
 241                if (aggr_mode == AGGR_NONE)
 242                        perf_stat__update_shadow_stats(evsel, count->values, cpu);
 243                break;
 244        case AGGR_GLOBAL:
 245                aggr->val += count->val;
 246                if (scale) {
 247                        aggr->ena += count->ena;
 248                        aggr->run += count->run;
 249                }
 250        default:
 251                break;
 252        }
 253
 254        return 0;
 255}
 256
 257static int process_counter_maps(struct perf_evsel *counter)
 258{
 259        int nthreads = thread_map__nr(counter->threads);
 260        int ncpus = perf_evsel__nr_cpus(counter);
 261        int cpu, thread;
 262
 263        if (counter->system_wide)
 264                nthreads = 1;
 265
 266        for (thread = 0; thread < nthreads; thread++) {
 267                for (cpu = 0; cpu < ncpus; cpu++) {
 268                        if (process_counter_values(counter, cpu, thread,
 269                                                   perf_counts(counter->counts, cpu, thread)))
 270                                return -1;
 271                }
 272        }
 273
 274        return 0;
 275}
 276
 277static int process_counter(struct perf_evsel *counter)
 278{
 279        struct perf_counts_values *aggr = &counter->counts->aggr;
 280        struct perf_stat *ps = counter->priv;
 281        u64 *count = counter->counts->aggr.values;
 282        int i, ret;
 283
 284        aggr->val = aggr->ena = aggr->run = 0;
 285        init_stats(ps->res_stats);
 286
 287        if (counter->per_pkg)
 288                zero_per_pkg(counter);
 289
 290        ret = process_counter_maps(counter);
 291        if (ret)
 292                return ret;
 293
 294        if (aggr_mode != AGGR_GLOBAL)
 295                return 0;
 296
 297        if (!counter->snapshot)
 298                perf_evsel__compute_deltas(counter, -1, -1, aggr);
 299        perf_counts_values__scale(aggr, scale, &counter->counts->scaled);
 300
 301        for (i = 0; i < 3; i++)
 302                update_stats(&ps->res_stats[i], count[i]);
 303
 304        if (verbose) {
 305                fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 306                        perf_evsel__name(counter), count[0], count[1], count[2]);
 307        }
 308
 309        /*
 310         * Save the full runtime - to allow normalization during printout:
 311         */
 312        perf_stat__update_shadow_stats(counter, count, 0);
 313
 314        return 0;
 315}
 316
 317/*
 318 * Read out the results of a single counter:
 319 * do not aggregate counts across CPUs in system-wide mode
 320 */
 321static int read_counter(struct perf_evsel *counter)
 322{
 323        int nthreads = thread_map__nr(evsel_list->threads);
 324        int ncpus = perf_evsel__nr_cpus(counter);
 325        int cpu, thread;
 326
 327        if (!counter->supported)
 328                return -ENOENT;
 329
 330        if (counter->system_wide)
 331                nthreads = 1;
 332
 333        for (thread = 0; thread < nthreads; thread++) {
 334                for (cpu = 0; cpu < ncpus; cpu++) {
 335                        struct perf_counts_values *count;
 336
 337                        count = perf_counts(counter->counts, cpu, thread);
 338                        if (perf_evsel__read(counter, cpu, thread, count))
 339                                return -1;
 340                }
 341        }
 342
 343        return 0;
 344}
 345
 346static void read_counters(bool close_counters)
 347{
 348        struct perf_evsel *counter;
 349
 350        evlist__for_each(evsel_list, counter) {
 351                if (read_counter(counter))
 352                        pr_warning("failed to read counter %s\n", counter->name);
 353
 354                if (process_counter(counter))
 355                        pr_warning("failed to process counter %s\n", counter->name);
 356
 357                if (close_counters) {
 358                        perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
 359                                             thread_map__nr(evsel_list->threads));
 360                }
 361        }
 362}
 363
 364static void process_interval(void)
 365{
 366        struct timespec ts, rs;
 367
 368        read_counters(false);
 369
 370        clock_gettime(CLOCK_MONOTONIC, &ts);
 371        diff_timespec(&rs, &ts, &ref_time);
 372
 373        print_counters(&rs, 0, NULL);
 374}
 375
 376static void handle_initial_delay(void)
 377{
 378        struct perf_evsel *counter;
 379
 380        if (initial_delay) {
 381                const int ncpus = cpu_map__nr(evsel_list->cpus),
 382                        nthreads = thread_map__nr(evsel_list->threads);
 383
 384                usleep(initial_delay * 1000);
 385                evlist__for_each(evsel_list, counter)
 386                        perf_evsel__enable(counter, ncpus, nthreads);
 387        }
 388}
 389
 390static volatile int workload_exec_errno;
 391
 392/*
 393 * perf_evlist__prepare_workload will send a SIGUSR1
 394 * if the fork fails, since we asked by setting its
 395 * want_signal to true.
 396 */
 397static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
 398                                        void *ucontext __maybe_unused)
 399{
 400        workload_exec_errno = info->si_value.sival_int;
 401}
 402
 403static int __run_perf_stat(int argc, const char **argv)
 404{
 405        char msg[512];
 406        unsigned long long t0, t1;
 407        struct perf_evsel *counter;
 408        struct timespec ts;
 409        size_t l;
 410        int status = 0;
 411        const bool forks = (argc > 0);
 412
 413        if (interval) {
 414                ts.tv_sec  = interval / 1000;
 415                ts.tv_nsec = (interval % 1000) * 1000000;
 416        } else {
 417                ts.tv_sec  = 1;
 418                ts.tv_nsec = 0;
 419        }
 420
 421        if (forks) {
 422                if (perf_evlist__prepare_workload(evsel_list, &target, argv, false,
 423                                                  workload_exec_failed_signal) < 0) {
 424                        perror("failed to prepare workload");
 425                        return -1;
 426                }
 427                child_pid = evsel_list->workload.pid;
 428        }
 429
 430        if (group)
 431                perf_evlist__set_leader(evsel_list);
 432
 433        evlist__for_each(evsel_list, counter) {
 434                if (create_perf_stat_counter(counter) < 0) {
 435                        /*
 436                         * PPC returns ENXIO for HW counters until 2.6.37
 437                         * (behavior changed with commit b0a873e).
 438                         */
 439                        if (errno == EINVAL || errno == ENOSYS ||
 440                            errno == ENOENT || errno == EOPNOTSUPP ||
 441                            errno == ENXIO) {
 442                                if (verbose)
 443                                        ui__warning("%s event is not supported by the kernel.\n",
 444                                                    perf_evsel__name(counter));
 445                                counter->supported = false;
 446
 447                                if ((counter->leader != counter) ||
 448                                    !(counter->leader->nr_members > 1))
 449                                        continue;
 450                        }
 451
 452                        perf_evsel__open_strerror(counter, &target,
 453                                                  errno, msg, sizeof(msg));
 454                        ui__error("%s\n", msg);
 455
 456                        if (child_pid != -1)
 457                                kill(child_pid, SIGTERM);
 458
 459                        return -1;
 460                }
 461                counter->supported = true;
 462
 463                l = strlen(counter->unit);
 464                if (l > unit_width)
 465                        unit_width = l;
 466        }
 467
 468        if (perf_evlist__apply_filters(evsel_list, &counter)) {
 469                error("failed to set filter \"%s\" on event %s with %d (%s)\n",
 470                        counter->filter, perf_evsel__name(counter), errno,
 471                        strerror_r(errno, msg, sizeof(msg)));
 472                return -1;
 473        }
 474
 475        /*
 476         * Enable counters and exec the command:
 477         */
 478        t0 = rdclock();
 479        clock_gettime(CLOCK_MONOTONIC, &ref_time);
 480
 481        if (forks) {
 482                perf_evlist__start_workload(evsel_list);
 483                handle_initial_delay();
 484
 485                if (interval) {
 486                        while (!waitpid(child_pid, &status, WNOHANG)) {
 487                                nanosleep(&ts, NULL);
 488                                process_interval();
 489                        }
 490                }
 491                wait(&status);
 492
 493                if (workload_exec_errno) {
 494                        const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
 495                        pr_err("Workload failed: %s\n", emsg);
 496                        return -1;
 497                }
 498
 499                if (WIFSIGNALED(status))
 500                        psignal(WTERMSIG(status), argv[0]);
 501        } else {
 502                handle_initial_delay();
 503                while (!done) {
 504                        nanosleep(&ts, NULL);
 505                        if (interval)
 506                                process_interval();
 507                }
 508        }
 509
 510        t1 = rdclock();
 511
 512        update_stats(&walltime_nsecs_stats, t1 - t0);
 513
 514        read_counters(true);
 515
 516        return WEXITSTATUS(status);
 517}
 518
 519static int run_perf_stat(int argc, const char **argv)
 520{
 521        int ret;
 522
 523        if (pre_cmd) {
 524                ret = system(pre_cmd);
 525                if (ret)
 526                        return ret;
 527        }
 528
 529        if (sync_run)
 530                sync();
 531
 532        ret = __run_perf_stat(argc, argv);
 533        if (ret)
 534                return ret;
 535
 536        if (post_cmd) {
 537                ret = system(post_cmd);
 538                if (ret)
 539                        return ret;
 540        }
 541
 542        return ret;
 543}
 544
 545static void print_running(u64 run, u64 ena)
 546{
 547        if (csv_output) {
 548                fprintf(output, "%s%" PRIu64 "%s%.2f",
 549                                        csv_sep,
 550                                        run,
 551                                        csv_sep,
 552                                        ena ? 100.0 * run / ena : 100.0);
 553        } else if (run != ena) {
 554                fprintf(output, "  (%.2f%%)", 100.0 * run / ena);
 555        }
 556}
 557
 558static void print_noise_pct(double total, double avg)
 559{
 560        double pct = rel_stddev_stats(total, avg);
 561
 562        if (csv_output)
 563                fprintf(output, "%s%.2f%%", csv_sep, pct);
 564        else if (pct)
 565                fprintf(output, "  ( +-%6.2f%% )", pct);
 566}
 567
 568static void print_noise(struct perf_evsel *evsel, double avg)
 569{
 570        struct perf_stat *ps;
 571
 572        if (run_count == 1)
 573                return;
 574
 575        ps = evsel->priv;
 576        print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
 577}
 578
 579static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 580{
 581        switch (aggr_mode) {
 582        case AGGR_CORE:
 583                fprintf(output, "S%d-C%*d%s%*d%s",
 584                        cpu_map__id_to_socket(id),
 585                        csv_output ? 0 : -8,
 586                        cpu_map__id_to_cpu(id),
 587                        csv_sep,
 588                        csv_output ? 0 : 4,
 589                        nr,
 590                        csv_sep);
 591                break;
 592        case AGGR_SOCKET:
 593                fprintf(output, "S%*d%s%*d%s",
 594                        csv_output ? 0 : -5,
 595                        id,
 596                        csv_sep,
 597                        csv_output ? 0 : 4,
 598                        nr,
 599                        csv_sep);
 600                        break;
 601        case AGGR_NONE:
 602                fprintf(output, "CPU%*d%s",
 603                        csv_output ? 0 : -4,
 604                        perf_evsel__cpus(evsel)->map[id], csv_sep);
 605                break;
 606        case AGGR_THREAD:
 607                fprintf(output, "%*s-%*d%s",
 608                        csv_output ? 0 : 16,
 609                        thread_map__comm(evsel->threads, id),
 610                        csv_output ? 0 : -8,
 611                        thread_map__pid(evsel->threads, id),
 612                        csv_sep);
 613                break;
 614        case AGGR_GLOBAL:
 615        default:
 616                break;
 617        }
 618}
 619
 620static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 621{
 622        double msecs = avg / 1e6;
 623        const char *fmt_v, *fmt_n;
 624        char name[25];
 625
 626        fmt_v = csv_output ? "%.6f%s" : "%18.6f%s";
 627        fmt_n = csv_output ? "%s" : "%-25s";
 628
 629        aggr_printout(evsel, id, nr);
 630
 631        scnprintf(name, sizeof(name), "%s%s",
 632                  perf_evsel__name(evsel), csv_output ? "" : " (msec)");
 633
 634        fprintf(output, fmt_v, msecs, csv_sep);
 635
 636        if (csv_output)
 637                fprintf(output, "%s%s", evsel->unit, csv_sep);
 638        else
 639                fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep);
 640
 641        fprintf(output, fmt_n, name);
 642
 643        if (evsel->cgrp)
 644                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 645
 646        if (csv_output || interval)
 647                return;
 648
 649        if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
 650                fprintf(output, " # %8.3f CPUs utilized          ",
 651                        avg / avg_stats(&walltime_nsecs_stats));
 652        else
 653                fprintf(output, "                                   ");
 654}
 655
 656static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 657{
 658        double sc =  evsel->scale;
 659        const char *fmt;
 660        int cpu = cpu_map__id_to_cpu(id);
 661
 662        if (csv_output) {
 663                fmt = sc != 1.0 ?  "%.2f%s" : "%.0f%s";
 664        } else {
 665                if (big_num)
 666                        fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s";
 667                else
 668                        fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s";
 669        }
 670
 671        aggr_printout(evsel, id, nr);
 672
 673        if (aggr_mode == AGGR_GLOBAL)
 674                cpu = 0;
 675
 676        fprintf(output, fmt, avg, csv_sep);
 677
 678        if (evsel->unit)
 679                fprintf(output, "%-*s%s",
 680                        csv_output ? 0 : unit_width,
 681                        evsel->unit, csv_sep);
 682
 683        fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
 684
 685        if (evsel->cgrp)
 686                fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 687
 688        if (csv_output || interval)
 689                return;
 690
 691        perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode);
 692}
 693
 694static void print_aggr(char *prefix)
 695{
 696        struct perf_evsel *counter;
 697        int cpu, cpu2, s, s2, id, nr;
 698        double uval;
 699        u64 ena, run, val;
 700
 701        if (!(aggr_map || aggr_get_id))
 702                return;
 703
 704        for (s = 0; s < aggr_map->nr; s++) {
 705                id = aggr_map->map[s];
 706                evlist__for_each(evsel_list, counter) {
 707                        val = ena = run = 0;
 708                        nr = 0;
 709                        for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
 710                                cpu2 = perf_evsel__cpus(counter)->map[cpu];
 711                                s2 = aggr_get_id(evsel_list->cpus, cpu2);
 712                                if (s2 != id)
 713                                        continue;
 714                                val += perf_counts(counter->counts, cpu, 0)->val;
 715                                ena += perf_counts(counter->counts, cpu, 0)->ena;
 716                                run += perf_counts(counter->counts, cpu, 0)->run;
 717                                nr++;
 718                        }
 719                        if (prefix)
 720                                fprintf(output, "%s", prefix);
 721
 722                        if (run == 0 || ena == 0) {
 723                                aggr_printout(counter, id, nr);
 724
 725                                fprintf(output, "%*s%s",
 726                                        csv_output ? 0 : 18,
 727                                        counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
 728                                        csv_sep);
 729
 730                                fprintf(output, "%-*s%s",
 731                                        csv_output ? 0 : unit_width,
 732                                        counter->unit, csv_sep);
 733
 734                                fprintf(output, "%*s",
 735                                        csv_output ? 0 : -25,
 736                                        perf_evsel__name(counter));
 737
 738                                if (counter->cgrp)
 739                                        fprintf(output, "%s%s",
 740                                                csv_sep, counter->cgrp->name);
 741
 742                                print_running(run, ena);
 743                                fputc('\n', output);
 744                                continue;
 745                        }
 746                        uval = val * counter->scale;
 747
 748                        if (nsec_counter(counter))
 749                                nsec_printout(id, nr, counter, uval);
 750                        else
 751                                abs_printout(id, nr, counter, uval);
 752
 753                        if (!csv_output)
 754                                print_noise(counter, 1.0);
 755
 756                        print_running(run, ena);
 757                        fputc('\n', output);
 758                }
 759        }
 760}
 761
 762static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
 763{
 764        int nthreads = thread_map__nr(counter->threads);
 765        int ncpus = cpu_map__nr(counter->cpus);
 766        int cpu, thread;
 767        double uval;
 768
 769        for (thread = 0; thread < nthreads; thread++) {
 770                u64 ena = 0, run = 0, val = 0;
 771
 772                for (cpu = 0; cpu < ncpus; cpu++) {
 773                        val += perf_counts(counter->counts, cpu, thread)->val;
 774                        ena += perf_counts(counter->counts, cpu, thread)->ena;
 775                        run += perf_counts(counter->counts, cpu, thread)->run;
 776                }
 777
 778                if (prefix)
 779                        fprintf(output, "%s", prefix);
 780
 781                uval = val * counter->scale;
 782
 783                if (nsec_counter(counter))
 784                        nsec_printout(thread, 0, counter, uval);
 785                else
 786                        abs_printout(thread, 0, counter, uval);
 787
 788                if (!csv_output)
 789                        print_noise(counter, 1.0);
 790
 791                print_running(run, ena);
 792                fputc('\n', output);
 793        }
 794}
 795
 796/*
 797 * Print out the results of a single counter:
 798 * aggregated counts in system-wide mode
 799 */
 800static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 801{
 802        struct perf_stat *ps = counter->priv;
 803        double avg = avg_stats(&ps->res_stats[0]);
 804        int scaled = counter->counts->scaled;
 805        double uval;
 806        double avg_enabled, avg_running;
 807
 808        avg_enabled = avg_stats(&ps->res_stats[1]);
 809        avg_running = avg_stats(&ps->res_stats[2]);
 810
 811        if (prefix)
 812                fprintf(output, "%s", prefix);
 813
 814        if (scaled == -1 || !counter->supported) {
 815                fprintf(output, "%*s%s",
 816                        csv_output ? 0 : 18,
 817                        counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
 818                        csv_sep);
 819                fprintf(output, "%-*s%s",
 820                        csv_output ? 0 : unit_width,
 821                        counter->unit, csv_sep);
 822                fprintf(output, "%*s",
 823                        csv_output ? 0 : -25,
 824                        perf_evsel__name(counter));
 825
 826                if (counter->cgrp)
 827                        fprintf(output, "%s%s", csv_sep, counter->cgrp->name);
 828
 829                print_running(avg_running, avg_enabled);
 830                fputc('\n', output);
 831                return;
 832        }
 833
 834        uval = avg * counter->scale;
 835
 836        if (nsec_counter(counter))
 837                nsec_printout(-1, 0, counter, uval);
 838        else
 839                abs_printout(-1, 0, counter, uval);
 840
 841        print_noise(counter, avg);
 842
 843        print_running(avg_running, avg_enabled);
 844        fprintf(output, "\n");
 845}
 846
 847/*
 848 * Print out the results of a single counter:
 849 * does not use aggregated count in system-wide
 850 */
 851static void print_counter(struct perf_evsel *counter, char *prefix)
 852{
 853        u64 ena, run, val;
 854        double uval;
 855        int cpu;
 856
 857        for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
 858                val = perf_counts(counter->counts, cpu, 0)->val;
 859                ena = perf_counts(counter->counts, cpu, 0)->ena;
 860                run = perf_counts(counter->counts, cpu, 0)->run;
 861
 862                if (prefix)
 863                        fprintf(output, "%s", prefix);
 864
 865                if (run == 0 || ena == 0) {
 866                        fprintf(output, "CPU%*d%s%*s%s",
 867                                csv_output ? 0 : -4,
 868                                perf_evsel__cpus(counter)->map[cpu], csv_sep,
 869                                csv_output ? 0 : 18,
 870                                counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
 871                                csv_sep);
 872
 873                                fprintf(output, "%-*s%s",
 874                                        csv_output ? 0 : unit_width,
 875                                        counter->unit, csv_sep);
 876
 877                                fprintf(output, "%*s",
 878                                        csv_output ? 0 : -25,
 879                                        perf_evsel__name(counter));
 880
 881                        if (counter->cgrp)
 882                                fprintf(output, "%s%s",
 883                                        csv_sep, counter->cgrp->name);
 884
 885                        print_running(run, ena);
 886                        fputc('\n', output);
 887                        continue;
 888                }
 889
 890                uval = val * counter->scale;
 891
 892                if (nsec_counter(counter))
 893                        nsec_printout(cpu, 0, counter, uval);
 894                else
 895                        abs_printout(cpu, 0, counter, uval);
 896
 897                if (!csv_output)
 898                        print_noise(counter, 1.0);
 899                print_running(run, ena);
 900
 901                fputc('\n', output);
 902        }
 903}
 904
 905static void print_interval(char *prefix, struct timespec *ts)
 906{
 907        static int num_print_interval;
 908
 909        sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
 910
 911        if (num_print_interval == 0 && !csv_output) {
 912                switch (aggr_mode) {
 913                case AGGR_SOCKET:
 914                        fprintf(output, "#           time socket cpus             counts %*s events\n", unit_width, "unit");
 915                        break;
 916                case AGGR_CORE:
 917                        fprintf(output, "#           time core         cpus             counts %*s events\n", unit_width, "unit");
 918                        break;
 919                case AGGR_NONE:
 920                        fprintf(output, "#           time CPU                counts %*s events\n", unit_width, "unit");
 921                        break;
 922                case AGGR_THREAD:
 923                        fprintf(output, "#           time             comm-pid                  counts %*s events\n", unit_width, "unit");
 924                        break;
 925                case AGGR_GLOBAL:
 926                default:
 927                        fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
 928                }
 929        }
 930
 931        if (++num_print_interval == 25)
 932                num_print_interval = 0;
 933}
 934
 935static void print_header(int argc, const char **argv)
 936{
 937        int i;
 938
 939        fflush(stdout);
 940
 941        if (!csv_output) {
 942                fprintf(output, "\n");
 943                fprintf(output, " Performance counter stats for ");
 944                if (target.system_wide)
 945                        fprintf(output, "\'system wide");
 946                else if (target.cpu_list)
 947                        fprintf(output, "\'CPU(s) %s", target.cpu_list);
 948                else if (!target__has_task(&target)) {
 949                        fprintf(output, "\'%s", argv[0]);
 950                        for (i = 1; i < argc; i++)
 951                                fprintf(output, " %s", argv[i]);
 952                } else if (target.pid)
 953                        fprintf(output, "process id \'%s", target.pid);
 954                else
 955                        fprintf(output, "thread id \'%s", target.tid);
 956
 957                fprintf(output, "\'");
 958                if (run_count > 1)
 959                        fprintf(output, " (%d runs)", run_count);
 960                fprintf(output, ":\n\n");
 961        }
 962}
 963
 964static void print_footer(void)
 965{
 966        if (!null_run)
 967                fprintf(output, "\n");
 968        fprintf(output, " %17.9f seconds time elapsed",
 969                        avg_stats(&walltime_nsecs_stats)/1e9);
 970        if (run_count > 1) {
 971                fprintf(output, "                                        ");
 972                print_noise_pct(stddev_stats(&walltime_nsecs_stats),
 973                                avg_stats(&walltime_nsecs_stats));
 974        }
 975        fprintf(output, "\n\n");
 976}
 977
 978static void print_counters(struct timespec *ts, int argc, const char **argv)
 979{
 980        struct perf_evsel *counter;
 981        char buf[64], *prefix = NULL;
 982
 983        if (interval)
 984                print_interval(prefix = buf, ts);
 985        else
 986                print_header(argc, argv);
 987
 988        switch (aggr_mode) {
 989        case AGGR_CORE:
 990        case AGGR_SOCKET:
 991                print_aggr(prefix);
 992                break;
 993        case AGGR_THREAD:
 994                evlist__for_each(evsel_list, counter)
 995                        print_aggr_thread(counter, prefix);
 996                break;
 997        case AGGR_GLOBAL:
 998                evlist__for_each(evsel_list, counter)
 999                        print_counter_aggr(counter, prefix);
1000                break;
1001        case AGGR_NONE:
1002                evlist__for_each(evsel_list, counter)
1003                        print_counter(counter, prefix);
1004                break;
1005        default:
1006                break;
1007        }
1008
1009        if (!interval && !csv_output)
1010                print_footer();
1011
1012        fflush(output);
1013}
1014
1015static volatile int signr = -1;
1016
1017static void skip_signal(int signo)
1018{
1019        if ((child_pid == -1) || interval)
1020                done = 1;
1021
1022        signr = signo;
1023        /*
1024         * render child_pid harmless
1025         * won't send SIGTERM to a random
1026         * process in case of race condition
1027         * and fast PID recycling
1028         */
1029        child_pid = -1;
1030}
1031
1032static void sig_atexit(void)
1033{
1034        sigset_t set, oset;
1035
1036        /*
1037         * avoid race condition with SIGCHLD handler
1038         * in skip_signal() which is modifying child_pid
1039         * goal is to avoid send SIGTERM to a random
1040         * process
1041         */
1042        sigemptyset(&set);
1043        sigaddset(&set, SIGCHLD);
1044        sigprocmask(SIG_BLOCK, &set, &oset);
1045
1046        if (child_pid != -1)
1047                kill(child_pid, SIGTERM);
1048
1049        sigprocmask(SIG_SETMASK, &oset, NULL);
1050
1051        if (signr == -1)
1052                return;
1053
1054        signal(signr, SIG_DFL);
1055        kill(getpid(), signr);
1056}
1057
1058static int stat__set_big_num(const struct option *opt __maybe_unused,
1059                             const char *s __maybe_unused, int unset)
1060{
1061        big_num_opt = unset ? 0 : 1;
1062        return 0;
1063}
1064
1065static int perf_stat_init_aggr_mode(void)
1066{
1067        switch (aggr_mode) {
1068        case AGGR_SOCKET:
1069                if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
1070                        perror("cannot build socket map");
1071                        return -1;
1072                }
1073                aggr_get_id = cpu_map__get_socket;
1074                break;
1075        case AGGR_CORE:
1076                if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
1077                        perror("cannot build core map");
1078                        return -1;
1079                }
1080                aggr_get_id = cpu_map__get_core;
1081                break;
1082        case AGGR_NONE:
1083        case AGGR_GLOBAL:
1084        case AGGR_THREAD:
1085        default:
1086                break;
1087        }
1088        return 0;
1089}
1090
1091/*
1092 * Add default attributes, if there were no attributes specified or
1093 * if -d/--detailed, -d -d or -d -d -d is used:
1094 */
1095static int add_default_attributes(void)
1096{
1097        struct perf_event_attr default_attrs[] = {
1098
1099  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
1100  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
1101  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
1102  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
1103
1104  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
1105  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
1106  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND  },
1107  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
1108  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
1109  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
1110
1111};
1112
1113/*
1114 * Detailed stats (-d), covering the L1 and last level data caches:
1115 */
1116        struct perf_event_attr detailed_attrs[] = {
1117
1118  { .type = PERF_TYPE_HW_CACHE,
1119    .config =
1120         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1121        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1122        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1123
1124  { .type = PERF_TYPE_HW_CACHE,
1125    .config =
1126         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1127        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1128        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1129
1130  { .type = PERF_TYPE_HW_CACHE,
1131    .config =
1132         PERF_COUNT_HW_CACHE_LL                 <<  0  |
1133        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1134        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1135
1136  { .type = PERF_TYPE_HW_CACHE,
1137    .config =
1138         PERF_COUNT_HW_CACHE_LL                 <<  0  |
1139        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1140        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1141};
1142
1143/*
1144 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1145 */
1146        struct perf_event_attr very_detailed_attrs[] = {
1147
1148  { .type = PERF_TYPE_HW_CACHE,
1149    .config =
1150         PERF_COUNT_HW_CACHE_L1I                <<  0  |
1151        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1152        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1153
1154  { .type = PERF_TYPE_HW_CACHE,
1155    .config =
1156         PERF_COUNT_HW_CACHE_L1I                <<  0  |
1157        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1158        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1159
1160  { .type = PERF_TYPE_HW_CACHE,
1161    .config =
1162         PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1163        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1164        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1165
1166  { .type = PERF_TYPE_HW_CACHE,
1167    .config =
1168         PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1169        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1170        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1171
1172  { .type = PERF_TYPE_HW_CACHE,
1173    .config =
1174         PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1175        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1176        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1177
1178  { .type = PERF_TYPE_HW_CACHE,
1179    .config =
1180         PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1181        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1182        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1183
1184};
1185
1186/*
1187 * Very, very detailed stats (-d -d -d), adding prefetch events:
1188 */
1189        struct perf_event_attr very_very_detailed_attrs[] = {
1190
1191  { .type = PERF_TYPE_HW_CACHE,
1192    .config =
1193         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1194        (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1195        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1196
1197  { .type = PERF_TYPE_HW_CACHE,
1198    .config =
1199         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1200        (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1201        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1202};
1203
1204        /* Set attrs if no event is selected and !null_run: */
1205        if (null_run)
1206                return 0;
1207
1208        if (transaction_run) {
1209                int err;
1210                if (pmu_have_event("cpu", "cycles-ct") &&
1211                    pmu_have_event("cpu", "el-start"))
1212                        err = parse_events(evsel_list, transaction_attrs, NULL);
1213                else
1214                        err = parse_events(evsel_list, transaction_limited_attrs, NULL);
1215                if (err) {
1216                        fprintf(stderr, "Cannot set up transaction events\n");
1217                        return -1;
1218                }
1219                return 0;
1220        }
1221
1222        if (!evsel_list->nr_entries) {
1223                if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
1224                        return -1;
1225        }
1226
1227        /* Detailed events get appended to the event list: */
1228
1229        if (detailed_run <  1)
1230                return 0;
1231
1232        /* Append detailed run extra attributes: */
1233        if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1234                return -1;
1235
1236        if (detailed_run < 2)
1237                return 0;
1238
1239        /* Append very detailed run extra attributes: */
1240        if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1241                return -1;
1242
1243        if (detailed_run < 3)
1244                return 0;
1245
1246        /* Append very, very detailed run extra attributes: */
1247        return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1248}
1249
1250int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
1251{
1252        bool append_file = false;
1253        int output_fd = 0;
1254        const char *output_name = NULL;
1255        const struct option options[] = {
1256        OPT_BOOLEAN('T', "transaction", &transaction_run,
1257                    "hardware transaction statistics"),
1258        OPT_CALLBACK('e', "event", &evsel_list, "event",
1259                     "event selector. use 'perf list' to list available events",
1260                     parse_events_option),
1261        OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1262                     "event filter", parse_filter),
1263        OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1264                    "child tasks do not inherit counters"),
1265        OPT_STRING('p', "pid", &target.pid, "pid",
1266                   "stat events on existing process id"),
1267        OPT_STRING('t', "tid", &target.tid, "tid",
1268                   "stat events on existing thread id"),
1269        OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1270                    "system-wide collection from all CPUs"),
1271        OPT_BOOLEAN('g', "group", &group,
1272                    "put the counters into a counter group"),
1273        OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"),
1274        OPT_INCR('v', "verbose", &verbose,
1275                    "be more verbose (show counter open errors, etc)"),
1276        OPT_INTEGER('r', "repeat", &run_count,
1277                    "repeat command and print average + stddev (max: 100, forever: 0)"),
1278        OPT_BOOLEAN('n', "null", &null_run,
1279                    "null run - dont start any counters"),
1280        OPT_INCR('d', "detailed", &detailed_run,
1281                    "detailed run - start a lot of events"),
1282        OPT_BOOLEAN('S', "sync", &sync_run,
1283                    "call sync() before starting a run"),
1284        OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
1285                           "print large numbers with thousands\' separators",
1286                           stat__set_big_num),
1287        OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1288                    "list of cpus to monitor in system-wide"),
1289        OPT_SET_UINT('A', "no-aggr", &aggr_mode,
1290                    "disable CPU count aggregation", AGGR_NONE),
1291        OPT_STRING('x', "field-separator", &csv_sep, "separator",
1292                   "print counts with custom separator"),
1293        OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1294                     "monitor event in cgroup name only", parse_cgroups),
1295        OPT_STRING('o', "output", &output_name, "file", "output file name"),
1296        OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1297        OPT_INTEGER(0, "log-fd", &output_fd,
1298                    "log output to fd, instead of stderr"),
1299        OPT_STRING(0, "pre", &pre_cmd, "command",
1300                        "command to run prior to the measured command"),
1301        OPT_STRING(0, "post", &post_cmd, "command",
1302                        "command to run after to the measured command"),
1303        OPT_UINTEGER('I', "interval-print", &interval,
1304                    "print counts at regular interval in ms (>= 100)"),
1305        OPT_SET_UINT(0, "per-socket", &aggr_mode,
1306                     "aggregate counts per processor socket", AGGR_SOCKET),
1307        OPT_SET_UINT(0, "per-core", &aggr_mode,
1308                     "aggregate counts per physical processor core", AGGR_CORE),
1309        OPT_SET_UINT(0, "per-thread", &aggr_mode,
1310                     "aggregate counts per thread", AGGR_THREAD),
1311        OPT_UINTEGER('D', "delay", &initial_delay,
1312                     "ms to wait before starting measurement after program start"),
1313        OPT_END()
1314        };
1315        const char * const stat_usage[] = {
1316                "perf stat [<options>] [<command>]",
1317                NULL
1318        };
1319        int status = -EINVAL, run_idx;
1320        const char *mode;
1321
1322        setlocale(LC_ALL, "");
1323
1324        evsel_list = perf_evlist__new();
1325        if (evsel_list == NULL)
1326                return -ENOMEM;
1327
1328        argc = parse_options(argc, argv, options, stat_usage,
1329                PARSE_OPT_STOP_AT_NON_OPTION);
1330
1331        output = stderr;
1332        if (output_name && strcmp(output_name, "-"))
1333                output = NULL;
1334
1335        if (output_name && output_fd) {
1336                fprintf(stderr, "cannot use both --output and --log-fd\n");
1337                parse_options_usage(stat_usage, options, "o", 1);
1338                parse_options_usage(NULL, options, "log-fd", 0);
1339                goto out;
1340        }
1341
1342        if (output_fd < 0) {
1343                fprintf(stderr, "argument to --log-fd must be a > 0\n");
1344                parse_options_usage(stat_usage, options, "log-fd", 0);
1345                goto out;
1346        }
1347
1348        if (!output) {
1349                struct timespec tm;
1350                mode = append_file ? "a" : "w";
1351
1352                output = fopen(output_name, mode);
1353                if (!output) {
1354                        perror("failed to create output file");
1355                        return -1;
1356                }
1357                clock_gettime(CLOCK_REALTIME, &tm);
1358                fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
1359        } else if (output_fd > 0) {
1360                mode = append_file ? "a" : "w";
1361                output = fdopen(output_fd, mode);
1362                if (!output) {
1363                        perror("Failed opening logfd");
1364                        return -errno;
1365                }
1366        }
1367
1368        if (csv_sep) {
1369                csv_output = true;
1370                if (!strcmp(csv_sep, "\\t"))
1371                        csv_sep = "\t";
1372        } else
1373                csv_sep = DEFAULT_SEPARATOR;
1374
1375        /*
1376         * let the spreadsheet do the pretty-printing
1377         */
1378        if (csv_output) {
1379                /* User explicitly passed -B? */
1380                if (big_num_opt == 1) {
1381                        fprintf(stderr, "-B option not supported with -x\n");
1382                        parse_options_usage(stat_usage, options, "B", 1);
1383                        parse_options_usage(NULL, options, "x", 1);
1384                        goto out;
1385                } else /* Nope, so disable big number formatting */
1386                        big_num = false;
1387        } else if (big_num_opt == 0) /* User passed --no-big-num */
1388                big_num = false;
1389
1390        if (!argc && target__none(&target))
1391                usage_with_options(stat_usage, options);
1392
1393        if (run_count < 0) {
1394                pr_err("Run count must be a positive number\n");
1395                parse_options_usage(stat_usage, options, "r", 1);
1396                goto out;
1397        } else if (run_count == 0) {
1398                forever = true;
1399                run_count = 1;
1400        }
1401
1402        if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) {
1403                fprintf(stderr, "The --per-thread option is only available "
1404                        "when monitoring via -p -t options.\n");
1405                parse_options_usage(NULL, options, "p", 1);
1406                parse_options_usage(NULL, options, "t", 1);
1407                goto out;
1408        }
1409
1410        /*
1411         * no_aggr, cgroup are for system-wide only
1412         * --per-thread is aggregated per thread, we dont mix it with cpu mode
1413         */
1414        if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) &&
1415            !target__has_cpu(&target)) {
1416                fprintf(stderr, "both cgroup and no-aggregation "
1417                        "modes only available in system-wide mode\n");
1418
1419                parse_options_usage(stat_usage, options, "G", 1);
1420                parse_options_usage(NULL, options, "A", 1);
1421                parse_options_usage(NULL, options, "a", 1);
1422                goto out;
1423        }
1424
1425        if (add_default_attributes())
1426                goto out;
1427
1428        target__validate(&target);
1429
1430        if (perf_evlist__create_maps(evsel_list, &target) < 0) {
1431                if (target__has_task(&target)) {
1432                        pr_err("Problems finding threads of monitor\n");
1433                        parse_options_usage(stat_usage, options, "p", 1);
1434                        parse_options_usage(NULL, options, "t", 1);
1435                } else if (target__has_cpu(&target)) {
1436                        perror("failed to parse CPUs map");
1437                        parse_options_usage(stat_usage, options, "C", 1);
1438                        parse_options_usage(NULL, options, "a", 1);
1439                }
1440                goto out;
1441        }
1442
1443        /*
1444         * Initialize thread_map with comm names,
1445         * so we could print it out on output.
1446         */
1447        if (aggr_mode == AGGR_THREAD)
1448                thread_map__read_comms(evsel_list->threads);
1449
1450        if (interval && interval < 100) {
1451                pr_err("print interval must be >= 100ms\n");
1452                parse_options_usage(stat_usage, options, "I", 1);
1453                goto out;
1454        }
1455
1456        if (perf_evlist__alloc_stats(evsel_list, interval))
1457                goto out;
1458
1459        if (perf_stat_init_aggr_mode())
1460                goto out;
1461
1462        /*
1463         * We dont want to block the signals - that would cause
1464         * child tasks to inherit that and Ctrl-C would not work.
1465         * What we want is for Ctrl-C to work in the exec()-ed
1466         * task, but being ignored by perf stat itself:
1467         */
1468        atexit(sig_atexit);
1469        if (!forever)
1470                signal(SIGINT,  skip_signal);
1471        signal(SIGCHLD, skip_signal);
1472        signal(SIGALRM, skip_signal);
1473        signal(SIGABRT, skip_signal);
1474
1475        status = 0;
1476        for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
1477                if (run_count != 1 && verbose)
1478                        fprintf(output, "[ perf stat: executing run #%d ... ]\n",
1479                                run_idx + 1);
1480
1481                status = run_perf_stat(argc, argv);
1482                if (forever && status != -1) {
1483                        print_counters(NULL, argc, argv);
1484                        perf_stat__reset_stats();
1485                }
1486        }
1487
1488        if (!forever && status != -1 && !interval)
1489                print_counters(NULL, argc, argv);
1490
1491        perf_evlist__free_stats(evsel_list);
1492out:
1493        perf_evlist__delete(evsel_list);
1494        return status;
1495}
1496