linux/tools/perf/builtin-stat.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * builtin-stat.c
   4 *
   5 * Builtin stat command: Give a precise performance counters summary
   6 * overview about any workload, CPU or specific PID.
   7 *
   8 * Sample output:
   9
  10   $ perf stat ./hackbench 10
  11
  12  Time: 0.118
  13
  14  Performance counter stats for './hackbench 10':
  15
  16       1708.761321 task-clock                #   11.037 CPUs utilized
  17            41,190 context-switches          #    0.024 M/sec
  18             6,735 CPU-migrations            #    0.004 M/sec
  19            17,318 page-faults               #    0.010 M/sec
  20     5,205,202,243 cycles                    #    3.046 GHz
  21     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
  22     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
  23     2,603,501,247 instructions              #    0.50  insns per cycle
  24                                             #    1.48  stalled cycles per insn
  25       484,357,498 branches                  #  283.455 M/sec
  26         6,388,934 branch-misses             #    1.32% of all branches
  27
  28        0.154822978  seconds time elapsed
  29
  30 *
  31 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  32 *
  33 * Improvements and fixes by:
  34 *
  35 *   Arjan van de Ven <arjan@linux.intel.com>
  36 *   Yanmin Zhang <yanmin.zhang@intel.com>
  37 *   Wu Fengguang <fengguang.wu@intel.com>
  38 *   Mike Galbraith <efault@gmx.de>
  39 *   Paul Mackerras <paulus@samba.org>
  40 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
  41 */
  42
  43#include "builtin.h"
  44#include "perf.h"
  45#include "util/cgroup.h"
  46#include <subcmd/parse-options.h>
  47#include "util/parse-events.h"
  48#include "util/pmu.h"
  49#include "util/event.h"
  50#include "util/evlist.h"
  51#include "util/evlist-hybrid.h"
  52#include "util/evsel.h"
  53#include "util/debug.h"
  54#include "util/color.h"
  55#include "util/stat.h"
  56#include "util/header.h"
  57#include "util/cpumap.h"
  58#include "util/thread_map.h"
  59#include "util/counts.h"
  60#include "util/topdown.h"
  61#include "util/session.h"
  62#include "util/tool.h"
  63#include "util/string2.h"
  64#include "util/metricgroup.h"
  65#include "util/synthetic-events.h"
  66#include "util/target.h"
  67#include "util/time-utils.h"
  68#include "util/top.h"
  69#include "util/affinity.h"
  70#include "util/pfm.h"
  71#include "util/bpf_counter.h"
  72#include "util/iostat.h"
  73#include "util/pmu-hybrid.h"
  74#include "asm/bug.h"
  75
  76#include <linux/time64.h>
  77#include <linux/zalloc.h>
  78#include <api/fs/fs.h>
  79#include <errno.h>
  80#include <signal.h>
  81#include <stdlib.h>
  82#include <sys/prctl.h>
  83#include <inttypes.h>
  84#include <locale.h>
  85#include <math.h>
  86#include <sys/types.h>
  87#include <sys/stat.h>
  88#include <sys/wait.h>
  89#include <unistd.h>
  90#include <sys/time.h>
  91#include <sys/resource.h>
  92#include <linux/err.h>
  93
  94#include <linux/ctype.h>
  95#include <perf/evlist.h>
  96
  97#define DEFAULT_SEPARATOR       " "
  98#define FREEZE_ON_SMI_PATH      "devices/cpu/freeze_on_smi"
  99
 100static void print_counters(struct timespec *ts, int argc, const char **argv);
 101
 102/* Default events used for perf stat -T */
 103static const char *transaction_attrs = {
 104        "task-clock,"
 105        "{"
 106        "instructions,"
 107        "cycles,"
 108        "cpu/cycles-t/,"
 109        "cpu/tx-start/,"
 110        "cpu/el-start/,"
 111        "cpu/cycles-ct/"
 112        "}"
 113};
 114
 115/* More limited version when the CPU does not have all events. */
 116static const char * transaction_limited_attrs = {
 117        "task-clock,"
 118        "{"
 119        "instructions,"
 120        "cycles,"
 121        "cpu/cycles-t/,"
 122        "cpu/tx-start/"
 123        "}"
 124};
 125
 126static const char * topdown_attrs[] = {
 127        "topdown-total-slots",
 128        "topdown-slots-retired",
 129        "topdown-recovery-bubbles",
 130        "topdown-fetch-bubbles",
 131        "topdown-slots-issued",
 132        NULL,
 133};
 134
 135static const char *topdown_metric_attrs[] = {
 136        "slots",
 137        "topdown-retiring",
 138        "topdown-bad-spec",
 139        "topdown-fe-bound",
 140        "topdown-be-bound",
 141        NULL,
 142};
 143
 144static const char *topdown_metric_L2_attrs[] = {
 145        "slots",
 146        "topdown-retiring",
 147        "topdown-bad-spec",
 148        "topdown-fe-bound",
 149        "topdown-be-bound",
 150        "topdown-heavy-ops",
 151        "topdown-br-mispredict",
 152        "topdown-fetch-lat",
 153        "topdown-mem-bound",
 154        NULL,
 155};
 156
 157#define TOPDOWN_MAX_LEVEL                       2
 158
 159static const char *smi_cost_attrs = {
 160        "{"
 161        "msr/aperf/,"
 162        "msr/smi/,"
 163        "cycles"
 164        "}"
 165};
 166
 167static struct evlist    *evsel_list;
 168static bool all_counters_use_bpf = true;
 169
 170static struct target target = {
 171        .uid    = UINT_MAX,
 172};
 173
 174#define METRIC_ONLY_LEN 20
 175
 176static volatile pid_t           child_pid                       = -1;
 177static int                      detailed_run                    =  0;
 178static bool                     transaction_run;
 179static bool                     topdown_run                     = false;
 180static bool                     smi_cost                        = false;
 181static bool                     smi_reset                       = false;
 182static int                      big_num_opt                     =  -1;
 183static bool                     group                           = false;
 184static const char               *pre_cmd                        = NULL;
 185static const char               *post_cmd                       = NULL;
 186static bool                     sync_run                        = false;
 187static bool                     forever                         = false;
 188static bool                     force_metric_only               = false;
 189static struct timespec          ref_time;
 190static bool                     append_file;
 191static bool                     interval_count;
 192static const char               *output_name;
 193static int                      output_fd;
 194
 195struct perf_stat {
 196        bool                     record;
 197        struct perf_data         data;
 198        struct perf_session     *session;
 199        u64                      bytes_written;
 200        struct perf_tool         tool;
 201        bool                     maps_allocated;
 202        struct perf_cpu_map     *cpus;
 203        struct perf_thread_map *threads;
 204        enum aggr_mode           aggr_mode;
 205};
 206
 207static struct perf_stat         perf_stat;
 208#define STAT_RECORD             perf_stat.record
 209
 210static volatile int done = 0;
 211
 212static struct perf_stat_config stat_config = {
 213        .aggr_mode              = AGGR_GLOBAL,
 214        .scale                  = true,
 215        .unit_width             = 4, /* strlen("unit") */
 216        .run_count              = 1,
 217        .metric_only_len        = METRIC_ONLY_LEN,
 218        .walltime_nsecs_stats   = &walltime_nsecs_stats,
 219        .big_num                = true,
 220        .ctl_fd                 = -1,
 221        .ctl_fd_ack             = -1,
 222        .iostat_run             = false,
 223};
 224
 225static bool cpus_map_matched(struct evsel *a, struct evsel *b)
 226{
 227        if (!a->core.cpus && !b->core.cpus)
 228                return true;
 229
 230        if (!a->core.cpus || !b->core.cpus)
 231                return false;
 232
 233        if (a->core.cpus->nr != b->core.cpus->nr)
 234                return false;
 235
 236        for (int i = 0; i < a->core.cpus->nr; i++) {
 237                if (a->core.cpus->map[i] != b->core.cpus->map[i])
 238                        return false;
 239        }
 240
 241        return true;
 242}
 243
 244static void evlist__check_cpu_maps(struct evlist *evlist)
 245{
 246        struct evsel *evsel, *pos, *leader;
 247        char buf[1024];
 248
 249        if (evlist__has_hybrid(evlist))
 250                evlist__warn_hybrid_group(evlist);
 251
 252        evlist__for_each_entry(evlist, evsel) {
 253                leader = evsel__leader(evsel);
 254
 255                /* Check that leader matches cpus with each member. */
 256                if (leader == evsel)
 257                        continue;
 258                if (cpus_map_matched(leader, evsel))
 259                        continue;
 260
 261                /* If there's mismatch disable the group and warn user. */
 262                WARN_ONCE(1, "WARNING: grouped events cpus do not match, disabling group:\n");
 263                evsel__group_desc(leader, buf, sizeof(buf));
 264                pr_warning("  %s\n", buf);
 265
 266                if (verbose) {
 267                        cpu_map__snprint(leader->core.cpus, buf, sizeof(buf));
 268                        pr_warning("     %s: %s\n", leader->name, buf);
 269                        cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf));
 270                        pr_warning("     %s: %s\n", evsel->name, buf);
 271                }
 272
 273                for_each_group_evsel(pos, leader) {
 274                        evsel__set_leader(pos, pos);
 275                        pos->core.nr_members = 0;
 276                }
 277                evsel->core.leader->nr_members = 0;
 278        }
 279}
 280
 281static inline void diff_timespec(struct timespec *r, struct timespec *a,
 282                                 struct timespec *b)
 283{
 284        r->tv_sec = a->tv_sec - b->tv_sec;
 285        if (a->tv_nsec < b->tv_nsec) {
 286                r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
 287                r->tv_sec--;
 288        } else {
 289                r->tv_nsec = a->tv_nsec - b->tv_nsec ;
 290        }
 291}
 292
 293static void perf_stat__reset_stats(void)
 294{
 295        int i;
 296
 297        evlist__reset_stats(evsel_list);
 298        perf_stat__reset_shadow_stats();
 299
 300        for (i = 0; i < stat_config.stats_num; i++)
 301                perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
 302}
 303
 304static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
 305                                     union perf_event *event,
 306                                     struct perf_sample *sample __maybe_unused,
 307                                     struct machine *machine __maybe_unused)
 308{
 309        if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
 310                pr_err("failed to write perf data, error: %m\n");
 311                return -1;
 312        }
 313
 314        perf_stat.bytes_written += event->header.size;
 315        return 0;
 316}
 317
 318static int write_stat_round_event(u64 tm, u64 type)
 319{
 320        return perf_event__synthesize_stat_round(NULL, tm, type,
 321                                                 process_synthesized_event,
 322                                                 NULL);
 323}
 324
 325#define WRITE_STAT_ROUND_EVENT(time, interval) \
 326        write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
 327
 328#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
 329
 330static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
 331                                   struct perf_counts_values *count)
 332{
 333        struct perf_sample_id *sid = SID(counter, cpu, thread);
 334
 335        return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
 336                                           process_synthesized_event, NULL);
 337}
 338
 339static int read_single_counter(struct evsel *counter, int cpu,
 340                               int thread, struct timespec *rs)
 341{
 342        if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
 343                u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
 344                struct perf_counts_values *count =
 345                        perf_counts(counter->counts, cpu, thread);
 346                count->ena = count->run = val;
 347                count->val = val;
 348                return 0;
 349        }
 350        return evsel__read_counter(counter, cpu, thread);
 351}
 352
 353/*
 354 * Read out the results of a single counter:
 355 * do not aggregate counts across CPUs in system-wide mode
 356 */
 357static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
 358{
 359        int nthreads = perf_thread_map__nr(evsel_list->core.threads);
 360        int thread;
 361
 362        if (!counter->supported)
 363                return -ENOENT;
 364
 365        if (counter->core.system_wide)
 366                nthreads = 1;
 367
 368        for (thread = 0; thread < nthreads; thread++) {
 369                struct perf_counts_values *count;
 370
 371                count = perf_counts(counter->counts, cpu, thread);
 372
 373                /*
 374                 * The leader's group read loads data into its group members
 375                 * (via evsel__read_counter()) and sets their count->loaded.
 376                 */
 377                if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
 378                    read_single_counter(counter, cpu, thread, rs)) {
 379                        counter->counts->scaled = -1;
 380                        perf_counts(counter->counts, cpu, thread)->ena = 0;
 381                        perf_counts(counter->counts, cpu, thread)->run = 0;
 382                        return -1;
 383                }
 384
 385                perf_counts__set_loaded(counter->counts, cpu, thread, false);
 386
 387                if (STAT_RECORD) {
 388                        if (evsel__write_stat_event(counter, cpu, thread, count)) {
 389                                pr_err("failed to write stat event\n");
 390                                return -1;
 391                        }
 392                }
 393
 394                if (verbose > 1) {
 395                        fprintf(stat_config.output,
 396                                "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 397                                        evsel__name(counter),
 398                                        cpu,
 399                                        count->val, count->ena, count->run);
 400                }
 401        }
 402
 403        return 0;
 404}
 405
 406static int read_affinity_counters(struct timespec *rs)
 407{
 408        struct evsel *counter;
 409        struct affinity affinity;
 410        int i, ncpus, cpu;
 411
 412        if (all_counters_use_bpf)
 413                return 0;
 414
 415        if (affinity__setup(&affinity) < 0)
 416                return -1;
 417
 418        ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
 419        if (!target__has_cpu(&target) || target__has_per_thread(&target))
 420                ncpus = 1;
 421        evlist__for_each_cpu(evsel_list, i, cpu) {
 422                if (i >= ncpus)
 423                        break;
 424                affinity__set(&affinity, cpu);
 425
 426                evlist__for_each_entry(evsel_list, counter) {
 427                        if (evsel__cpu_iter_skip(counter, cpu))
 428                                continue;
 429                        if (evsel__is_bpf(counter))
 430                                continue;
 431                        if (!counter->err) {
 432                                counter->err = read_counter_cpu(counter, rs,
 433                                                                counter->cpu_iter - 1);
 434                        }
 435                }
 436        }
 437        affinity__cleanup(&affinity);
 438        return 0;
 439}
 440
 441static int read_bpf_map_counters(void)
 442{
 443        struct evsel *counter;
 444        int err;
 445
 446        evlist__for_each_entry(evsel_list, counter) {
 447                if (!evsel__is_bpf(counter))
 448                        continue;
 449
 450                err = bpf_counter__read(counter);
 451                if (err)
 452                        return err;
 453        }
 454        return 0;
 455}
 456
 457static void read_counters(struct timespec *rs)
 458{
 459        struct evsel *counter;
 460
 461        if (!stat_config.stop_read_counter) {
 462                if (read_bpf_map_counters() ||
 463                    read_affinity_counters(rs))
 464                        return;
 465        }
 466
 467        evlist__for_each_entry(evsel_list, counter) {
 468                if (counter->err)
 469                        pr_debug("failed to read counter %s\n", counter->name);
 470                if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
 471                        pr_warning("failed to process counter %s\n", counter->name);
 472                counter->err = 0;
 473        }
 474}
 475
 476static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
 477{
 478        int i;
 479
 480        config->stats = calloc(nthreads, sizeof(struct runtime_stat));
 481        if (!config->stats)
 482                return -1;
 483
 484        config->stats_num = nthreads;
 485
 486        for (i = 0; i < nthreads; i++)
 487                runtime_stat__init(&config->stats[i]);
 488
 489        return 0;
 490}
 491
 492static void runtime_stat_delete(struct perf_stat_config *config)
 493{
 494        int i;
 495
 496        if (!config->stats)
 497                return;
 498
 499        for (i = 0; i < config->stats_num; i++)
 500                runtime_stat__exit(&config->stats[i]);
 501
 502        zfree(&config->stats);
 503}
 504
 505static void runtime_stat_reset(struct perf_stat_config *config)
 506{
 507        int i;
 508
 509        if (!config->stats)
 510                return;
 511
 512        for (i = 0; i < config->stats_num; i++)
 513                perf_stat__reset_shadow_per_stat(&config->stats[i]);
 514}
 515
 516static void process_interval(void)
 517{
 518        struct timespec ts, rs;
 519
 520        clock_gettime(CLOCK_MONOTONIC, &ts);
 521        diff_timespec(&rs, &ts, &ref_time);
 522
 523        perf_stat__reset_shadow_per_stat(&rt_stat);
 524        runtime_stat_reset(&stat_config);
 525        read_counters(&rs);
 526
 527        if (STAT_RECORD) {
 528                if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
 529                        pr_err("failed to write stat round event\n");
 530        }
 531
 532        init_stats(&walltime_nsecs_stats);
 533        update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL);
 534        print_counters(&rs, 0, NULL);
 535}
 536
 537static bool handle_interval(unsigned int interval, int *times)
 538{
 539        if (interval) {
 540                process_interval();
 541                if (interval_count && !(--(*times)))
 542                        return true;
 543        }
 544        return false;
 545}
 546
 547static int enable_counters(void)
 548{
 549        struct evsel *evsel;
 550        int err;
 551
 552        evlist__for_each_entry(evsel_list, evsel) {
 553                if (!evsel__is_bpf(evsel))
 554                        continue;
 555
 556                err = bpf_counter__enable(evsel);
 557                if (err)
 558                        return err;
 559        }
 560
 561        if (stat_config.initial_delay < 0) {
 562                pr_info(EVLIST_DISABLED_MSG);
 563                return 0;
 564        }
 565
 566        if (stat_config.initial_delay > 0) {
 567                pr_info(EVLIST_DISABLED_MSG);
 568                usleep(stat_config.initial_delay * USEC_PER_MSEC);
 569        }
 570
 571        /*
 572         * We need to enable counters only if:
 573         * - we don't have tracee (attaching to task or cpu)
 574         * - we have initial delay configured
 575         */
 576        if (!target__none(&target) || stat_config.initial_delay) {
 577                if (!all_counters_use_bpf)
 578                        evlist__enable(evsel_list);
 579                if (stat_config.initial_delay > 0)
 580                        pr_info(EVLIST_ENABLED_MSG);
 581        }
 582        return 0;
 583}
 584
 585static void disable_counters(void)
 586{
 587        struct evsel *counter;
 588
 589        /*
 590         * If we don't have tracee (attaching to task or cpu), counters may
 591         * still be running. To get accurate group ratios, we must stop groups
 592         * from counting before reading their constituent counters.
 593         */
 594        if (!target__none(&target)) {
 595                evlist__for_each_entry(evsel_list, counter)
 596                        bpf_counter__disable(counter);
 597                if (!all_counters_use_bpf)
 598                        evlist__disable(evsel_list);
 599        }
 600}
 601
 602static volatile int workload_exec_errno;
 603
 604/*
 605 * evlist__prepare_workload will send a SIGUSR1
 606 * if the fork fails, since we asked by setting its
 607 * want_signal to true.
 608 */
 609static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
 610                                        void *ucontext __maybe_unused)
 611{
 612        workload_exec_errno = info->si_value.sival_int;
 613}
 614
 615static bool evsel__should_store_id(struct evsel *counter)
 616{
 617        return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID;
 618}
 619
 620static bool is_target_alive(struct target *_target,
 621                            struct perf_thread_map *threads)
 622{
 623        struct stat st;
 624        int i;
 625
 626        if (!target__has_task(_target))
 627                return true;
 628
 629        for (i = 0; i < threads->nr; i++) {
 630                char path[PATH_MAX];
 631
 632                scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
 633                          threads->map[i].pid);
 634
 635                if (!stat(path, &st))
 636                        return true;
 637        }
 638
 639        return false;
 640}
 641
 642static void process_evlist(struct evlist *evlist, unsigned int interval)
 643{
 644        enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
 645
 646        if (evlist__ctlfd_process(evlist, &cmd) > 0) {
 647                switch (cmd) {
 648                case EVLIST_CTL_CMD_ENABLE:
 649                        if (interval)
 650                                process_interval();
 651                        break;
 652                case EVLIST_CTL_CMD_DISABLE:
 653                        if (interval)
 654                                process_interval();
 655                        break;
 656                case EVLIST_CTL_CMD_SNAPSHOT:
 657                case EVLIST_CTL_CMD_ACK:
 658                case EVLIST_CTL_CMD_UNSUPPORTED:
 659                case EVLIST_CTL_CMD_EVLIST:
 660                case EVLIST_CTL_CMD_STOP:
 661                case EVLIST_CTL_CMD_PING:
 662                default:
 663                        break;
 664                }
 665        }
 666}
 667
 668static void compute_tts(struct timespec *time_start, struct timespec *time_stop,
 669                        int *time_to_sleep)
 670{
 671        int tts = *time_to_sleep;
 672        struct timespec time_diff;
 673
 674        diff_timespec(&time_diff, time_stop, time_start);
 675
 676        tts -= time_diff.tv_sec * MSEC_PER_SEC +
 677               time_diff.tv_nsec / NSEC_PER_MSEC;
 678
 679        if (tts < 0)
 680                tts = 0;
 681
 682        *time_to_sleep = tts;
 683}
 684
 685static int dispatch_events(bool forks, int timeout, int interval, int *times)
 686{
 687        int child_exited = 0, status = 0;
 688        int time_to_sleep, sleep_time;
 689        struct timespec time_start, time_stop;
 690
 691        if (interval)
 692                sleep_time = interval;
 693        else if (timeout)
 694                sleep_time = timeout;
 695        else
 696                sleep_time = 1000;
 697
 698        time_to_sleep = sleep_time;
 699
 700        while (!done) {
 701                if (forks)
 702                        child_exited = waitpid(child_pid, &status, WNOHANG);
 703                else
 704                        child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0;
 705
 706                if (child_exited)
 707                        break;
 708
 709                clock_gettime(CLOCK_MONOTONIC, &time_start);
 710                if (!(evlist__poll(evsel_list, time_to_sleep) > 0)) { /* poll timeout or EINTR */
 711                        if (timeout || handle_interval(interval, times))
 712                                break;
 713                        time_to_sleep = sleep_time;
 714                } else { /* fd revent */
 715                        process_evlist(evsel_list, interval);
 716                        clock_gettime(CLOCK_MONOTONIC, &time_stop);
 717                        compute_tts(&time_start, &time_stop, &time_to_sleep);
 718                }
 719        }
 720
 721        return status;
 722}
 723
 724enum counter_recovery {
 725        COUNTER_SKIP,
 726        COUNTER_RETRY,
 727        COUNTER_FATAL,
 728};
 729
 730static enum counter_recovery stat_handle_error(struct evsel *counter)
 731{
 732        char msg[BUFSIZ];
 733        /*
 734         * PPC returns ENXIO for HW counters until 2.6.37
 735         * (behavior changed with commit b0a873e).
 736         */
 737        if (errno == EINVAL || errno == ENOSYS ||
 738            errno == ENOENT || errno == EOPNOTSUPP ||
 739            errno == ENXIO) {
 740                if (verbose > 0)
 741                        ui__warning("%s event is not supported by the kernel.\n",
 742                                    evsel__name(counter));
 743                counter->supported = false;
 744                /*
 745                 * errored is a sticky flag that means one of the counter's
 746                 * cpu event had a problem and needs to be reexamined.
 747                 */
 748                counter->errored = true;
 749
 750                if ((evsel__leader(counter) != counter) ||
 751                    !(counter->core.leader->nr_members > 1))
 752                        return COUNTER_SKIP;
 753        } else if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
 754                if (verbose > 0)
 755                        ui__warning("%s\n", msg);
 756                return COUNTER_RETRY;
 757        } else if (target__has_per_thread(&target) &&
 758                   evsel_list->core.threads &&
 759                   evsel_list->core.threads->err_thread != -1) {
 760                /*
 761                 * For global --per-thread case, skip current
 762                 * error thread.
 763                 */
 764                if (!thread_map__remove(evsel_list->core.threads,
 765                                        evsel_list->core.threads->err_thread)) {
 766                        evsel_list->core.threads->err_thread = -1;
 767                        return COUNTER_RETRY;
 768                }
 769        }
 770
 771        evsel__open_strerror(counter, &target, errno, msg, sizeof(msg));
 772        ui__error("%s\n", msg);
 773
 774        if (child_pid != -1)
 775                kill(child_pid, SIGTERM);
 776        return COUNTER_FATAL;
 777}
 778
 779static int __run_perf_stat(int argc, const char **argv, int run_idx)
 780{
 781        int interval = stat_config.interval;
 782        int times = stat_config.times;
 783        int timeout = stat_config.timeout;
 784        char msg[BUFSIZ];
 785        unsigned long long t0, t1;
 786        struct evsel *counter;
 787        size_t l;
 788        int status = 0;
 789        const bool forks = (argc > 0);
 790        bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
 791        struct affinity affinity;
 792        int i, cpu, err;
 793        bool second_pass = false;
 794
 795        if (forks) {
 796                if (evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) {
 797                        perror("failed to prepare workload");
 798                        return -1;
 799                }
 800                child_pid = evsel_list->workload.pid;
 801        }
 802
 803        if (group)
 804                evlist__set_leader(evsel_list);
 805
 806        if (affinity__setup(&affinity) < 0)
 807                return -1;
 808
 809        evlist__for_each_entry(evsel_list, counter) {
 810                if (bpf_counter__load(counter, &target))
 811                        return -1;
 812                if (!evsel__is_bpf(counter))
 813                        all_counters_use_bpf = false;
 814        }
 815
 816        evlist__for_each_cpu (evsel_list, i, cpu) {
 817                /*
 818                 * bperf calls evsel__open_per_cpu() in bperf__load(), so
 819                 * no need to call it again here.
 820                 */
 821                if (target.use_bpf)
 822                        break;
 823                affinity__set(&affinity, cpu);
 824
 825                evlist__for_each_entry(evsel_list, counter) {
 826                        if (evsel__cpu_iter_skip(counter, cpu))
 827                                continue;
 828                        if (counter->reset_group || counter->errored)
 829                                continue;
 830                        if (evsel__is_bpf(counter))
 831                                continue;
 832try_again:
 833                        if (create_perf_stat_counter(counter, &stat_config, &target,
 834                                                     counter->cpu_iter - 1) < 0) {
 835
 836                                /*
 837                                 * Weak group failed. We cannot just undo this here
 838                                 * because earlier CPUs might be in group mode, and the kernel
 839                                 * doesn't support mixing group and non group reads. Defer
 840                                 * it to later.
 841                                 * Don't close here because we're in the wrong affinity.
 842                                 */
 843                                if ((errno == EINVAL || errno == EBADF) &&
 844                                    evsel__leader(counter) != counter &&
 845                                    counter->weak_group) {
 846                                        evlist__reset_weak_group(evsel_list, counter, false);
 847                                        assert(counter->reset_group);
 848                                        second_pass = true;
 849                                        continue;
 850                                }
 851
 852                                switch (stat_handle_error(counter)) {
 853                                case COUNTER_FATAL:
 854                                        return -1;
 855                                case COUNTER_RETRY:
 856                                        goto try_again;
 857                                case COUNTER_SKIP:
 858                                        continue;
 859                                default:
 860                                        break;
 861                                }
 862
 863                        }
 864                        counter->supported = true;
 865                }
 866        }
 867
 868        if (second_pass) {
 869                /*
 870                 * Now redo all the weak group after closing them,
 871                 * and also close errored counters.
 872                 */
 873
 874                evlist__for_each_cpu(evsel_list, i, cpu) {
 875                        affinity__set(&affinity, cpu);
 876                        /* First close errored or weak retry */
 877                        evlist__for_each_entry(evsel_list, counter) {
 878                                if (!counter->reset_group && !counter->errored)
 879                                        continue;
 880                                if (evsel__cpu_iter_skip_no_inc(counter, cpu))
 881                                        continue;
 882                                perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
 883                        }
 884                        /* Now reopen weak */
 885                        evlist__for_each_entry(evsel_list, counter) {
 886                                if (!counter->reset_group && !counter->errored)
 887                                        continue;
 888                                if (evsel__cpu_iter_skip(counter, cpu))
 889                                        continue;
 890                                if (!counter->reset_group)
 891                                        continue;
 892try_again_reset:
 893                                pr_debug2("reopening weak %s\n", evsel__name(counter));
 894                                if (create_perf_stat_counter(counter, &stat_config, &target,
 895                                                             counter->cpu_iter - 1) < 0) {
 896
 897                                        switch (stat_handle_error(counter)) {
 898                                        case COUNTER_FATAL:
 899                                                return -1;
 900                                        case COUNTER_RETRY:
 901                                                goto try_again_reset;
 902                                        case COUNTER_SKIP:
 903                                                continue;
 904                                        default:
 905                                                break;
 906                                        }
 907                                }
 908                                counter->supported = true;
 909                        }
 910                }
 911        }
 912        affinity__cleanup(&affinity);
 913
 914        evlist__for_each_entry(evsel_list, counter) {
 915                if (!counter->supported) {
 916                        perf_evsel__free_fd(&counter->core);
 917                        continue;
 918                }
 919
 920                l = strlen(counter->unit);
 921                if (l > stat_config.unit_width)
 922                        stat_config.unit_width = l;
 923
 924                if (evsel__should_store_id(counter) &&
 925                    evsel__store_ids(counter, evsel_list))
 926                        return -1;
 927        }
 928
 929        if (evlist__apply_filters(evsel_list, &counter)) {
 930                pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
 931                        counter->filter, evsel__name(counter), errno,
 932                        str_error_r(errno, msg, sizeof(msg)));
 933                return -1;
 934        }
 935
 936        if (STAT_RECORD) {
 937                int fd = perf_data__fd(&perf_stat.data);
 938
 939                if (is_pipe) {
 940                        err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
 941                } else {
 942                        err = perf_session__write_header(perf_stat.session, evsel_list,
 943                                                         fd, false);
 944                }
 945
 946                if (err < 0)
 947                        return err;
 948
 949                err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list,
 950                                                         process_synthesized_event, is_pipe);
 951                if (err < 0)
 952                        return err;
 953        }
 954
 955        /*
 956         * Enable counters and exec the command:
 957         */
 958        if (forks) {
 959                evlist__start_workload(evsel_list);
 960                err = enable_counters();
 961                if (err)
 962                        return -1;
 963
 964                t0 = rdclock();
 965                clock_gettime(CLOCK_MONOTONIC, &ref_time);
 966
 967                if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
 968                        status = dispatch_events(forks, timeout, interval, &times);
 969                if (child_pid != -1) {
 970                        if (timeout)
 971                                kill(child_pid, SIGTERM);
 972                        wait4(child_pid, &status, 0, &stat_config.ru_data);
 973                }
 974
 975                if (workload_exec_errno) {
 976                        const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
 977                        pr_err("Workload failed: %s\n", emsg);
 978                        return -1;
 979                }
 980
 981                if (WIFSIGNALED(status))
 982                        psignal(WTERMSIG(status), argv[0]);
 983        } else {
 984                err = enable_counters();
 985                if (err)
 986                        return -1;
 987
 988                t0 = rdclock();
 989                clock_gettime(CLOCK_MONOTONIC, &ref_time);
 990
 991                status = dispatch_events(forks, timeout, interval, &times);
 992        }
 993
 994        disable_counters();
 995
 996        t1 = rdclock();
 997
 998        if (stat_config.walltime_run_table)
 999                stat_config.walltime_run[run_idx] = t1 - t0;
1000
1001        if (interval && stat_config.summary) {
1002                stat_config.interval = 0;
1003                stat_config.stop_read_counter = true;
1004                init_stats(&walltime_nsecs_stats);
1005                update_stats(&walltime_nsecs_stats, t1 - t0);
1006
1007                if (stat_config.aggr_mode == AGGR_GLOBAL)
1008                        evlist__save_aggr_prev_raw_counts(evsel_list);
1009
1010                evlist__copy_prev_raw_counts(evsel_list);
1011                evlist__reset_prev_raw_counts(evsel_list);
1012                runtime_stat_reset(&stat_config);
1013                perf_stat__reset_shadow_per_stat(&rt_stat);
1014        } else
1015                update_stats(&walltime_nsecs_stats, t1 - t0);
1016
1017        /*
1018         * Closing a group leader splits the group, and as we only disable
1019         * group leaders, results in remaining events becoming enabled. To
1020         * avoid arbitrary skew, we must read all counters before closing any
1021         * group leaders.
1022         */
1023        read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
1024
1025        /*
1026         * We need to keep evsel_list alive, because it's processed
1027         * later the evsel_list will be closed after.
1028         */
1029        if (!STAT_RECORD)
1030                evlist__close(evsel_list);
1031
1032        return WEXITSTATUS(status);
1033}
1034
1035static int run_perf_stat(int argc, const char **argv, int run_idx)
1036{
1037        int ret;
1038
1039        if (pre_cmd) {
1040                ret = system(pre_cmd);
1041                if (ret)
1042                        return ret;
1043        }
1044
1045        if (sync_run)
1046                sync();
1047
1048        ret = __run_perf_stat(argc, argv, run_idx);
1049        if (ret)
1050                return ret;
1051
1052        if (post_cmd) {
1053                ret = system(post_cmd);
1054                if (ret)
1055                        return ret;
1056        }
1057
1058        return ret;
1059}
1060
1061static void print_counters(struct timespec *ts, int argc, const char **argv)
1062{
1063        /* Do not print anything if we record to the pipe. */
1064        if (STAT_RECORD && perf_stat.data.is_pipe)
1065                return;
1066        if (stat_config.quiet)
1067                return;
1068
1069        evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv);
1070}
1071
1072static volatile int signr = -1;
1073
1074static void skip_signal(int signo)
1075{
1076        if ((child_pid == -1) || stat_config.interval)
1077                done = 1;
1078
1079        signr = signo;
1080        /*
1081         * render child_pid harmless
1082         * won't send SIGTERM to a random
1083         * process in case of race condition
1084         * and fast PID recycling
1085         */
1086        child_pid = -1;
1087}
1088
1089static void sig_atexit(void)
1090{
1091        sigset_t set, oset;
1092
1093        /*
1094         * avoid race condition with SIGCHLD handler
1095         * in skip_signal() which is modifying child_pid
1096         * goal is to avoid send SIGTERM to a random
1097         * process
1098         */
1099        sigemptyset(&set);
1100        sigaddset(&set, SIGCHLD);
1101        sigprocmask(SIG_BLOCK, &set, &oset);
1102
1103        if (child_pid != -1)
1104                kill(child_pid, SIGTERM);
1105
1106        sigprocmask(SIG_SETMASK, &oset, NULL);
1107
1108        if (signr == -1)
1109                return;
1110
1111        signal(signr, SIG_DFL);
1112        kill(getpid(), signr);
1113}
1114
1115void perf_stat__set_big_num(int set)
1116{
1117        stat_config.big_num = (set != 0);
1118}
1119
1120void perf_stat__set_no_csv_summary(int set)
1121{
1122        stat_config.no_csv_summary = (set != 0);
1123}
1124
1125static int stat__set_big_num(const struct option *opt __maybe_unused,
1126                             const char *s __maybe_unused, int unset)
1127{
1128        big_num_opt = unset ? 0 : 1;
1129        perf_stat__set_big_num(!unset);
1130        return 0;
1131}
1132
1133static int enable_metric_only(const struct option *opt __maybe_unused,
1134                              const char *s __maybe_unused, int unset)
1135{
1136        force_metric_only = true;
1137        stat_config.metric_only = !unset;
1138        return 0;
1139}
1140
1141static int parse_metric_groups(const struct option *opt,
1142                               const char *str,
1143                               int unset __maybe_unused)
1144{
1145        return metricgroup__parse_groups(opt, str,
1146                                         stat_config.metric_no_group,
1147                                         stat_config.metric_no_merge,
1148                                         &stat_config.metric_events);
1149}
1150
1151static int parse_control_option(const struct option *opt,
1152                                const char *str,
1153                                int unset __maybe_unused)
1154{
1155        struct perf_stat_config *config = opt->value;
1156
1157        return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close);
1158}
1159
1160static int parse_stat_cgroups(const struct option *opt,
1161                              const char *str, int unset)
1162{
1163        if (stat_config.cgroup_list) {
1164                pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
1165                return -1;
1166        }
1167
1168        return parse_cgroups(opt, str, unset);
1169}
1170
1171static struct option stat_options[] = {
1172        OPT_BOOLEAN('T', "transaction", &transaction_run,
1173                    "hardware transaction statistics"),
1174        OPT_CALLBACK('e', "event", &evsel_list, "event",
1175                     "event selector. use 'perf list' to list available events",
1176                     parse_events_option),
1177        OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1178                     "event filter", parse_filter),
1179        OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
1180                    "child tasks do not inherit counters"),
1181        OPT_STRING('p', "pid", &target.pid, "pid",
1182                   "stat events on existing process id"),
1183        OPT_STRING('t', "tid", &target.tid, "tid",
1184                   "stat events on existing thread id"),
1185#ifdef HAVE_BPF_SKEL
1186        OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id",
1187                   "stat events on existing bpf program id"),
1188        OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf,
1189                    "use bpf program to count events"),
1190        OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path",
1191                   "path to perf_event_attr map"),
1192#endif
1193        OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1194                    "system-wide collection from all CPUs"),
1195        OPT_BOOLEAN('g', "group", &group,
1196                    "put the counters into a counter group"),
1197        OPT_BOOLEAN(0, "scale", &stat_config.scale,
1198                    "Use --no-scale to disable counter scaling for multiplexing"),
1199        OPT_INCR('v', "verbose", &verbose,
1200                    "be more verbose (show counter open errors, etc)"),
1201        OPT_INTEGER('r', "repeat", &stat_config.run_count,
1202                    "repeat command and print average + stddev (max: 100, forever: 0)"),
1203        OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table,
1204                    "display details about each run (only with -r option)"),
1205        OPT_BOOLEAN('n', "null", &stat_config.null_run,
1206                    "null run - dont start any counters"),
1207        OPT_INCR('d', "detailed", &detailed_run,
1208                    "detailed run - start a lot of events"),
1209        OPT_BOOLEAN('S', "sync", &sync_run,
1210                    "call sync() before starting a run"),
1211        OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
1212                           "print large numbers with thousands\' separators",
1213                           stat__set_big_num),
1214        OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1215                    "list of cpus to monitor in system-wide"),
1216        OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
1217                    "disable CPU count aggregation", AGGR_NONE),
1218        OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
1219        OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
1220                   "print counts with custom separator"),
1221        OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1222                     "monitor event in cgroup name only", parse_stat_cgroups),
1223        OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name",
1224                    "expand events for each cgroup"),
1225        OPT_STRING('o', "output", &output_name, "file", "output file name"),
1226        OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1227        OPT_INTEGER(0, "log-fd", &output_fd,
1228                    "log output to fd, instead of stderr"),
1229        OPT_STRING(0, "pre", &pre_cmd, "command",
1230                        "command to run prior to the measured command"),
1231        OPT_STRING(0, "post", &post_cmd, "command",
1232                        "command to run after to the measured command"),
1233        OPT_UINTEGER('I', "interval-print", &stat_config.interval,
1234                    "print counts at regular interval in ms "
1235                    "(overhead is possible for values <= 100ms)"),
1236        OPT_INTEGER(0, "interval-count", &stat_config.times,
1237                    "print counts for fixed number of times"),
1238        OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
1239                    "clear screen in between new interval"),
1240        OPT_UINTEGER(0, "timeout", &stat_config.timeout,
1241                    "stop workload and print counts after a timeout period in ms (>= 10ms)"),
1242        OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
1243                     "aggregate counts per processor socket", AGGR_SOCKET),
1244        OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
1245                     "aggregate counts per processor die", AGGR_DIE),
1246        OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
1247                     "aggregate counts per physical processor core", AGGR_CORE),
1248        OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
1249                     "aggregate counts per thread", AGGR_THREAD),
1250        OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
1251                     "aggregate counts per numa node", AGGR_NODE),
1252        OPT_INTEGER('D', "delay", &stat_config.initial_delay,
1253                    "ms to wait before starting measurement after program start (-1: start with events disabled)"),
1254        OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
1255                        "Only print computed metrics. No raw values", enable_metric_only),
1256        OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
1257                       "don't group metric events, impacts multiplexing"),
1258        OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
1259                       "don't try to share events between metrics in a group"),
1260        OPT_BOOLEAN(0, "topdown", &topdown_run,
1261                        "measure top-down statistics"),
1262        OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
1263                        "Set the metrics level for the top-down statistics (0: max level)"),
1264        OPT_BOOLEAN(0, "smi-cost", &smi_cost,
1265                        "measure SMI cost"),
1266        OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
1267                     "monitor specified metrics or metric groups (separated by ,)",
1268                     parse_metric_groups),
1269        OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel,
1270                         "Configure all used events to run in kernel space.",
1271                         PARSE_OPT_EXCLUSIVE),
1272        OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
1273                         "Configure all used events to run in user space.",
1274                         PARSE_OPT_EXCLUSIVE),
1275        OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
1276                    "Use with 'percore' event qualifier to show the event "
1277                    "counts of one hardware thread by sum up total hardware "
1278                    "threads of same physical core"),
1279        OPT_BOOLEAN(0, "summary", &stat_config.summary,
1280                       "print summary for interval mode"),
1281        OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary,
1282                       "don't print 'summary' for CSV summary output"),
1283        OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
1284                        "don't print output (useful with record)"),
1285#ifdef HAVE_LIBPFM
1286        OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
1287                "libpfm4 event selector. use 'perf list' to list available events",
1288                parse_libpfm_events_option),
1289#endif
1290        OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
1291                     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
1292                     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
1293                     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
1294                      parse_control_option),
1295        OPT_CALLBACK_OPTARG(0, "iostat", &evsel_list, &stat_config, "default",
1296                            "measure I/O performance metrics provided by arch/platform",
1297                            iostat_parse),
1298        OPT_END()
1299};
1300
1301static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
1302                                 struct perf_cpu_map *map, int cpu)
1303{
1304        return cpu_map__get_socket(map, cpu, NULL);
1305}
1306
1307static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
1308                              struct perf_cpu_map *map, int cpu)
1309{
1310        return cpu_map__get_die(map, cpu, NULL);
1311}
1312
1313static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
1314                               struct perf_cpu_map *map, int cpu)
1315{
1316        return cpu_map__get_core(map, cpu, NULL);
1317}
1318
1319static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
1320                               struct perf_cpu_map *map, int cpu)
1321{
1322        return cpu_map__get_node(map, cpu, NULL);
1323}
1324
1325static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
1326                               aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
1327{
1328        int cpu;
1329        struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
1330
1331        if (idx >= map->nr)
1332                return id;
1333
1334        cpu = map->map[idx];
1335
1336        if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu]))
1337                config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
1338
1339        id = config->cpus_aggr_map->map[cpu];
1340        return id;
1341}
1342
1343static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config,
1344                                        struct perf_cpu_map *map, int idx)
1345{
1346        return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
1347}
1348
1349static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config,
1350                                        struct perf_cpu_map *map, int idx)
1351{
1352        return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
1353}
1354
1355static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config,
1356                                      struct perf_cpu_map *map, int idx)
1357{
1358        return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
1359}
1360
1361static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config,
1362                                      struct perf_cpu_map *map, int idx)
1363{
1364        return perf_stat__get_aggr(config, perf_stat__get_node, map, idx);
1365}
1366
1367static bool term_percore_set(void)
1368{
1369        struct evsel *counter;
1370
1371        evlist__for_each_entry(evsel_list, counter) {
1372                if (counter->percore)
1373                        return true;
1374        }
1375
1376        return false;
1377}
1378
1379static int perf_stat_init_aggr_mode(void)
1380{
1381        int nr;
1382
1383        switch (stat_config.aggr_mode) {
1384        case AGGR_SOCKET:
1385                if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1386                        perror("cannot build socket map");
1387                        return -1;
1388                }
1389                stat_config.aggr_get_id = perf_stat__get_socket_cached;
1390                break;
1391        case AGGR_DIE:
1392                if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1393                        perror("cannot build die map");
1394                        return -1;
1395                }
1396                stat_config.aggr_get_id = perf_stat__get_die_cached;
1397                break;
1398        case AGGR_CORE:
1399                if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1400                        perror("cannot build core map");
1401                        return -1;
1402                }
1403                stat_config.aggr_get_id = perf_stat__get_core_cached;
1404                break;
1405        case AGGR_NODE:
1406                if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1407                        perror("cannot build core map");
1408                        return -1;
1409                }
1410                stat_config.aggr_get_id = perf_stat__get_node_cached;
1411                break;
1412        case AGGR_NONE:
1413                if (term_percore_set()) {
1414                        if (cpu_map__build_core_map(evsel_list->core.cpus,
1415                                                    &stat_config.aggr_map)) {
1416                                perror("cannot build core map");
1417                                return -1;
1418                        }
1419                        stat_config.aggr_get_id = perf_stat__get_core_cached;
1420                }
1421                break;
1422        case AGGR_GLOBAL:
1423        case AGGR_THREAD:
1424        case AGGR_UNSET:
1425        default:
1426                break;
1427        }
1428
1429        /*
1430         * The evsel_list->cpus is the base we operate on,
1431         * taking the highest cpu number to be the size of
1432         * the aggregation translate cpumap.
1433         */
1434        nr = perf_cpu_map__max(evsel_list->core.cpus);
1435        stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1);
1436        return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
1437}
1438
1439static void cpu_aggr_map__delete(struct cpu_aggr_map *map)
1440{
1441        if (map) {
1442                WARN_ONCE(refcount_read(&map->refcnt) != 0,
1443                          "cpu_aggr_map refcnt unbalanced\n");
1444                free(map);
1445        }
1446}
1447
1448static void cpu_aggr_map__put(struct cpu_aggr_map *map)
1449{
1450        if (map && refcount_dec_and_test(&map->refcnt))
1451                cpu_aggr_map__delete(map);
1452}
1453
1454static void perf_stat__exit_aggr_mode(void)
1455{
1456        cpu_aggr_map__put(stat_config.aggr_map);
1457        cpu_aggr_map__put(stat_config.cpus_aggr_map);
1458        stat_config.aggr_map = NULL;
1459        stat_config.cpus_aggr_map = NULL;
1460}
1461
1462static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx)
1463{
1464        int cpu;
1465
1466        if (idx > map->nr)
1467                return -1;
1468
1469        cpu = map->map[idx];
1470
1471        if (cpu >= env->nr_cpus_avail)
1472                return -1;
1473
1474        return cpu;
1475}
1476
1477static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data)
1478{
1479        struct perf_env *env = data;
1480        int cpu = perf_env__get_cpu(env, map, idx);
1481        struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
1482
1483        if (cpu != -1)
1484                id.socket = env->cpu[cpu].socket_id;
1485
1486        return id;
1487}
1488
1489static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data)
1490{
1491        struct perf_env *env = data;
1492        struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
1493        int cpu = perf_env__get_cpu(env, map, idx);
1494
1495        if (cpu != -1) {
1496                /*
1497                 * die_id is relative to socket, so start
1498                 * with the socket ID and then add die to
1499                 * make a unique ID.
1500                 */
1501                id.socket = env->cpu[cpu].socket_id;
1502                id.die = env->cpu[cpu].die_id;
1503        }
1504
1505        return id;
1506}
1507
1508static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
1509{
1510        struct perf_env *env = data;
1511        struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
1512        int cpu = perf_env__get_cpu(env, map, idx);
1513
1514        if (cpu != -1) {
1515                /*
1516                 * core_id is relative to socket and die,
1517                 * we need a global id. So we set
1518                 * socket, die id and core id
1519                 */
1520                id.socket = env->cpu[cpu].socket_id;
1521                id.die = env->cpu[cpu].die_id;
1522                id.core = env->cpu[cpu].core_id;
1523        }
1524
1525        return id;
1526}
1527
1528static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data)
1529{
1530        int cpu = perf_env__get_cpu(data, map, idx);
1531        struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
1532
1533        id.node = perf_env__numa_node(data, cpu);
1534        return id;
1535}
1536
1537static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus,
1538                                      struct cpu_aggr_map **sockp)
1539{
1540        return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
1541}
1542
1543static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus,
1544                                   struct cpu_aggr_map **diep)
1545{
1546        return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
1547}
1548
1549static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus,
1550                                    struct cpu_aggr_map **corep)
1551{
1552        return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
1553}
1554
1555static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus,
1556                                    struct cpu_aggr_map **nodep)
1557{
1558        return cpu_map__build_map(cpus, nodep, perf_env__get_node, env);
1559}
1560
1561static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
1562                                      struct perf_cpu_map *map, int idx)
1563{
1564        return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
1565}
1566static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
1567                                   struct perf_cpu_map *map, int idx)
1568{
1569        return perf_env__get_die(map, idx, &perf_stat.session->header.env);
1570}
1571
1572static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
1573                                    struct perf_cpu_map *map, int idx)
1574{
1575        return perf_env__get_core(map, idx, &perf_stat.session->header.env);
1576}
1577
1578static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
1579                                    struct perf_cpu_map *map, int idx)
1580{
1581        return perf_env__get_node(map, idx, &perf_stat.session->header.env);
1582}
1583
1584static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
1585{
1586        struct perf_env *env = &st->session->header.env;
1587
1588        switch (stat_config.aggr_mode) {
1589        case AGGR_SOCKET:
1590                if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1591                        perror("cannot build socket map");
1592                        return -1;
1593                }
1594                stat_config.aggr_get_id = perf_stat__get_socket_file;
1595                break;
1596        case AGGR_DIE:
1597                if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1598                        perror("cannot build die map");
1599                        return -1;
1600                }
1601                stat_config.aggr_get_id = perf_stat__get_die_file;
1602                break;
1603        case AGGR_CORE:
1604                if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1605                        perror("cannot build core map");
1606                        return -1;
1607                }
1608                stat_config.aggr_get_id = perf_stat__get_core_file;
1609                break;
1610        case AGGR_NODE:
1611                if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1612                        perror("cannot build core map");
1613                        return -1;
1614                }
1615                stat_config.aggr_get_id = perf_stat__get_node_file;
1616                break;
1617        case AGGR_NONE:
1618        case AGGR_GLOBAL:
1619        case AGGR_THREAD:
1620        case AGGR_UNSET:
1621        default:
1622                break;
1623        }
1624
1625        return 0;
1626}
1627
1628/*
1629 * Add default attributes, if there were no attributes specified or
1630 * if -d/--detailed, -d -d or -d -d -d is used:
1631 */
1632static int add_default_attributes(void)
1633{
1634        int err;
1635        struct perf_event_attr default_attrs0[] = {
1636
1637  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
1638  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
1639  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
1640  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
1641
1642  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES              },
1643};
1644        struct perf_event_attr frontend_attrs[] = {
1645  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
1646};
1647        struct perf_event_attr backend_attrs[] = {
1648  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND  },
1649};
1650        struct perf_event_attr default_attrs1[] = {
1651  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS            },
1652  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS     },
1653  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES           },
1654
1655};
1656        struct perf_event_attr default_sw_attrs[] = {
1657  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK              },
1658  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES        },
1659  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS          },
1660  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS             },
1661};
1662
1663/*
1664 * Detailed stats (-d), covering the L1 and last level data caches:
1665 */
1666        struct perf_event_attr detailed_attrs[] = {
1667
1668  { .type = PERF_TYPE_HW_CACHE,
1669    .config =
1670         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1671        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1672        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1673
1674  { .type = PERF_TYPE_HW_CACHE,
1675    .config =
1676         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1677        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1678        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1679
1680  { .type = PERF_TYPE_HW_CACHE,
1681    .config =
1682         PERF_COUNT_HW_CACHE_LL                 <<  0  |
1683        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1684        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1685
1686  { .type = PERF_TYPE_HW_CACHE,
1687    .config =
1688         PERF_COUNT_HW_CACHE_LL                 <<  0  |
1689        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1690        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1691};
1692
1693/*
1694 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1695 */
1696        struct perf_event_attr very_detailed_attrs[] = {
1697
1698  { .type = PERF_TYPE_HW_CACHE,
1699    .config =
1700         PERF_COUNT_HW_CACHE_L1I                <<  0  |
1701        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1702        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1703
1704  { .type = PERF_TYPE_HW_CACHE,
1705    .config =
1706         PERF_COUNT_HW_CACHE_L1I                <<  0  |
1707        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1708        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1709
1710  { .type = PERF_TYPE_HW_CACHE,
1711    .config =
1712         PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1713        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1714        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1715
1716  { .type = PERF_TYPE_HW_CACHE,
1717    .config =
1718         PERF_COUNT_HW_CACHE_DTLB               <<  0  |
1719        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1720        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1721
1722  { .type = PERF_TYPE_HW_CACHE,
1723    .config =
1724         PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1725        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1726        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1727
1728  { .type = PERF_TYPE_HW_CACHE,
1729    .config =
1730         PERF_COUNT_HW_CACHE_ITLB               <<  0  |
1731        (PERF_COUNT_HW_CACHE_OP_READ            <<  8) |
1732        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1733
1734};
1735
1736/*
1737 * Very, very detailed stats (-d -d -d), adding prefetch events:
1738 */
1739        struct perf_event_attr very_very_detailed_attrs[] = {
1740
1741  { .type = PERF_TYPE_HW_CACHE,
1742    .config =
1743         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1744        (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1745        (PERF_COUNT_HW_CACHE_RESULT_ACCESS      << 16)                          },
1746
1747  { .type = PERF_TYPE_HW_CACHE,
1748    .config =
1749         PERF_COUNT_HW_CACHE_L1D                <<  0  |
1750        (PERF_COUNT_HW_CACHE_OP_PREFETCH        <<  8) |
1751        (PERF_COUNT_HW_CACHE_RESULT_MISS        << 16)                          },
1752};
1753        struct parse_events_error errinfo;
1754
1755        /* Set attrs if no event is selected and !null_run: */
1756        if (stat_config.null_run)
1757                return 0;
1758
1759        bzero(&errinfo, sizeof(errinfo));
1760        if (transaction_run) {
1761                /* Handle -T as -M transaction. Once platform specific metrics
1762                 * support has been added to the json files, all architectures
1763                 * will use this approach. To determine transaction support
1764                 * on an architecture test for such a metric name.
1765                 */
1766                if (metricgroup__has_metric("transaction")) {
1767                        struct option opt = { .value = &evsel_list };
1768
1769                        return metricgroup__parse_groups(&opt, "transaction",
1770                                                         stat_config.metric_no_group,
1771                                                        stat_config.metric_no_merge,
1772                                                         &stat_config.metric_events);
1773                }
1774
1775                if (pmu_have_event("cpu", "cycles-ct") &&
1776                    pmu_have_event("cpu", "el-start"))
1777                        err = parse_events(evsel_list, transaction_attrs,
1778                                           &errinfo);
1779                else
1780                        err = parse_events(evsel_list,
1781                                           transaction_limited_attrs,
1782                                           &errinfo);
1783                if (err) {
1784                        fprintf(stderr, "Cannot set up transaction events\n");
1785                        parse_events_print_error(&errinfo, transaction_attrs);
1786                        return -1;
1787                }
1788                return 0;
1789        }
1790
1791        if (smi_cost) {
1792                int smi;
1793
1794                if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
1795                        fprintf(stderr, "freeze_on_smi is not supported.\n");
1796                        return -1;
1797                }
1798
1799                if (!smi) {
1800                        if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
1801                                fprintf(stderr, "Failed to set freeze_on_smi.\n");
1802                                return -1;
1803                        }
1804                        smi_reset = true;
1805                }
1806
1807                if (pmu_have_event("msr", "aperf") &&
1808                    pmu_have_event("msr", "smi")) {
1809                        if (!force_metric_only)
1810                                stat_config.metric_only = true;
1811                        err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
1812                } else {
1813                        fprintf(stderr, "To measure SMI cost, it needs "
1814                                "msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
1815                        parse_events_print_error(&errinfo, smi_cost_attrs);
1816                        return -1;
1817                }
1818                if (err) {
1819                        parse_events_print_error(&errinfo, smi_cost_attrs);
1820                        fprintf(stderr, "Cannot set up SMI cost events\n");
1821                        return -1;
1822                }
1823                return 0;
1824        }
1825
1826        if (topdown_run) {
1827                const char **metric_attrs = topdown_metric_attrs;
1828                unsigned int max_level = 1;
1829                char *str = NULL;
1830                bool warn = false;
1831
1832                if (!force_metric_only)
1833                        stat_config.metric_only = true;
1834
1835                if (pmu_have_event("cpu", topdown_metric_L2_attrs[5])) {
1836                        metric_attrs = topdown_metric_L2_attrs;
1837                        max_level = 2;
1838                }
1839
1840                if (stat_config.topdown_level > max_level) {
1841                        pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level);
1842                        return -1;
1843                } else if (!stat_config.topdown_level)
1844                        stat_config.topdown_level = max_level;
1845
1846                if (topdown_filter_events(metric_attrs, &str, 1) < 0) {
1847                        pr_err("Out of memory\n");
1848                        return -1;
1849                }
1850                if (metric_attrs[0] && str) {
1851                        if (!stat_config.interval && !stat_config.metric_only) {
1852                                fprintf(stat_config.output,
1853                                        "Topdown accuracy may decrease when measuring long periods.\n"
1854                                        "Please print the result regularly, e.g. -I1000\n");
1855                        }
1856                        goto setup_metrics;
1857                }
1858
1859                zfree(&str);
1860
1861                if (stat_config.aggr_mode != AGGR_GLOBAL &&
1862                    stat_config.aggr_mode != AGGR_CORE) {
1863                        pr_err("top down event configuration requires --per-core mode\n");
1864                        return -1;
1865                }
1866                stat_config.aggr_mode = AGGR_CORE;
1867                if (nr_cgroups || !target__has_cpu(&target)) {
1868                        pr_err("top down event configuration requires system-wide mode (-a)\n");
1869                        return -1;
1870                }
1871
1872                if (topdown_filter_events(topdown_attrs, &str,
1873                                arch_topdown_check_group(&warn)) < 0) {
1874                        pr_err("Out of memory\n");
1875                        return -1;
1876                }
1877                if (topdown_attrs[0] && str) {
1878                        if (warn)
1879                                arch_topdown_group_warn();
1880setup_metrics:
1881                        err = parse_events(evsel_list, str, &errinfo);
1882                        if (err) {
1883                                fprintf(stderr,
1884                                        "Cannot set up top down events %s: %d\n",
1885                                        str, err);
1886                                parse_events_print_error(&errinfo, str);
1887                                free(str);
1888                                return -1;
1889                        }
1890                } else {
1891                        fprintf(stderr, "System does not support topdown\n");
1892                        return -1;
1893                }
1894                free(str);
1895        }
1896
1897        if (!evsel_list->core.nr_entries) {
1898                if (perf_pmu__has_hybrid()) {
1899                        const char *hybrid_str = "cycles,instructions,branches,branch-misses";
1900
1901                        if (target__has_cpu(&target))
1902                                default_sw_attrs[0].config = PERF_COUNT_SW_CPU_CLOCK;
1903
1904                        if (evlist__add_default_attrs(evsel_list,
1905                                                      default_sw_attrs) < 0) {
1906                                return -1;
1907                        }
1908
1909                        err = parse_events(evsel_list, hybrid_str, &errinfo);
1910                        if (err) {
1911                                fprintf(stderr,
1912                                        "Cannot set up hybrid events %s: %d\n",
1913                                        hybrid_str, err);
1914                                parse_events_print_error(&errinfo, hybrid_str);
1915                                return -1;
1916                        }
1917                        return err;
1918                }
1919
1920                if (target__has_cpu(&target))
1921                        default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
1922
1923                if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
1924                        return -1;
1925                if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
1926                        if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0)
1927                                return -1;
1928                }
1929                if (pmu_have_event("cpu", "stalled-cycles-backend")) {
1930                        if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0)
1931                                return -1;
1932                }
1933                if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
1934                        return -1;
1935
1936                stat_config.topdown_level = TOPDOWN_MAX_LEVEL;
1937                if (arch_evlist__add_default_attrs(evsel_list) < 0)
1938                        return -1;
1939        }
1940
1941        /* Detailed events get appended to the event list: */
1942
1943        if (detailed_run <  1)
1944                return 0;
1945
1946        /* Append detailed run extra attributes: */
1947        if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1948                return -1;
1949
1950        if (detailed_run < 2)
1951                return 0;
1952
1953        /* Append very detailed run extra attributes: */
1954        if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1955                return -1;
1956
1957        if (detailed_run < 3)
1958                return 0;
1959
1960        /* Append very, very detailed run extra attributes: */
1961        return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1962}
1963
1964static const char * const stat_record_usage[] = {
1965        "perf stat record [<options>]",
1966        NULL,
1967};
1968
1969static void init_features(struct perf_session *session)
1970{
1971        int feat;
1972
1973        for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1974                perf_header__set_feat(&session->header, feat);
1975
1976        perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1977        perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1978        perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1979        perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1980        perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1981}
1982
1983static int __cmd_record(int argc, const char **argv)
1984{
1985        struct perf_session *session;
1986        struct perf_data *data = &perf_stat.data;
1987
1988        argc = parse_options(argc, argv, stat_options, stat_record_usage,
1989                             PARSE_OPT_STOP_AT_NON_OPTION);
1990
1991        if (output_name)
1992                data->path = output_name;
1993
1994        if (stat_config.run_count != 1 || forever) {
1995                pr_err("Cannot use -r option with perf stat record.\n");
1996                return -1;
1997        }
1998
1999        session = perf_session__new(data, NULL);
2000        if (IS_ERR(session)) {
2001                pr_err("Perf session creation failed\n");
2002                return PTR_ERR(session);
2003        }
2004
2005        init_features(session);
2006
2007        session->evlist   = evsel_list;
2008        perf_stat.session = session;
2009        perf_stat.record  = true;
2010        return argc;
2011}
2012
2013static int process_stat_round_event(struct perf_session *session,
2014                                    union perf_event *event)
2015{
2016        struct perf_record_stat_round *stat_round = &event->stat_round;
2017        struct evsel *counter;
2018        struct timespec tsh, *ts = NULL;
2019        const char **argv = session->header.env.cmdline_argv;
2020        int argc = session->header.env.nr_cmdline;
2021
2022        evlist__for_each_entry(evsel_list, counter)
2023                perf_stat_process_counter(&stat_config, counter);
2024
2025        if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
2026                update_stats(&walltime_nsecs_stats, stat_round->time);
2027
2028        if (stat_config.interval && stat_round->time) {
2029                tsh.tv_sec  = stat_round->time / NSEC_PER_SEC;
2030                tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
2031                ts = &tsh;
2032        }
2033
2034        print_counters(ts, argc, argv);
2035        return 0;
2036}
2037
2038static
2039int process_stat_config_event(struct perf_session *session,
2040                              union perf_event *event)
2041{
2042        struct perf_tool *tool = session->tool;
2043        struct perf_stat *st = container_of(tool, struct perf_stat, tool);
2044
2045        perf_event__read_stat_config(&stat_config, &event->stat_config);
2046
2047        if (perf_cpu_map__empty(st->cpus)) {
2048                if (st->aggr_mode != AGGR_UNSET)
2049                        pr_warning("warning: processing task data, aggregation mode not set\n");
2050                return 0;
2051        }
2052
2053        if (st->aggr_mode != AGGR_UNSET)
2054                stat_config.aggr_mode = st->aggr_mode;
2055
2056        if (perf_stat.data.is_pipe)
2057                perf_stat_init_aggr_mode();
2058        else
2059                perf_stat_init_aggr_mode_file(st);
2060
2061        return 0;
2062}
2063
2064static int set_maps(struct perf_stat *st)
2065{
2066        if (!st->cpus || !st->threads)
2067                return 0;
2068
2069        if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
2070                return -EINVAL;
2071
2072        perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
2073
2074        if (evlist__alloc_stats(evsel_list, true))
2075                return -ENOMEM;
2076
2077        st->maps_allocated = true;
2078        return 0;
2079}
2080
2081static
2082int process_thread_map_event(struct perf_session *session,
2083                             union perf_event *event)
2084{
2085        struct perf_tool *tool = session->tool;
2086        struct perf_stat *st = container_of(tool, struct perf_stat, tool);
2087
2088        if (st->threads) {
2089                pr_warning("Extra thread map event, ignoring.\n");
2090                return 0;
2091        }
2092
2093        st->threads = thread_map__new_event(&event->thread_map);
2094        if (!st->threads)
2095                return -ENOMEM;
2096
2097        return set_maps(st);
2098}
2099
2100static
2101int process_cpu_map_event(struct perf_session *session,
2102                          union perf_event *event)
2103{
2104        struct perf_tool *tool = session->tool;
2105        struct perf_stat *st = container_of(tool, struct perf_stat, tool);
2106        struct perf_cpu_map *cpus;
2107
2108        if (st->cpus) {
2109                pr_warning("Extra cpu map event, ignoring.\n");
2110                return 0;
2111        }
2112
2113        cpus = cpu_map__new_data(&event->cpu_map.data);
2114        if (!cpus)
2115                return -ENOMEM;
2116
2117        st->cpus = cpus;
2118        return set_maps(st);
2119}
2120
2121static const char * const stat_report_usage[] = {
2122        "perf stat report [<options>]",
2123        NULL,
2124};
2125
2126static struct perf_stat perf_stat = {
2127        .tool = {
2128                .attr           = perf_event__process_attr,
2129                .event_update   = perf_event__process_event_update,
2130                .thread_map     = process_thread_map_event,
2131                .cpu_map        = process_cpu_map_event,
2132                .stat_config    = process_stat_config_event,
2133                .stat           = perf_event__process_stat_event,
2134                .stat_round     = process_stat_round_event,
2135        },
2136        .aggr_mode = AGGR_UNSET,
2137};
2138
2139static int __cmd_report(int argc, const char **argv)
2140{
2141        struct perf_session *session;
2142        const struct option options[] = {
2143        OPT_STRING('i', "input", &input_name, "file", "input file name"),
2144        OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
2145                     "aggregate counts per processor socket", AGGR_SOCKET),
2146        OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
2147                     "aggregate counts per processor die", AGGR_DIE),
2148        OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
2149                     "aggregate counts per physical processor core", AGGR_CORE),
2150        OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode,
2151                     "aggregate counts per numa node", AGGR_NODE),
2152        OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
2153                     "disable CPU count aggregation", AGGR_NONE),
2154        OPT_END()
2155        };
2156        struct stat st;
2157        int ret;
2158
2159        argc = parse_options(argc, argv, options, stat_report_usage, 0);
2160
2161        if (!input_name || !strlen(input_name)) {
2162                if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
2163                        input_name = "-";
2164                else
2165                        input_name = "perf.data";
2166        }
2167
2168        perf_stat.data.path = input_name;
2169        perf_stat.data.mode = PERF_DATA_MODE_READ;
2170
2171        session = perf_session__new(&perf_stat.data, &perf_stat.tool);
2172        if (IS_ERR(session))
2173                return PTR_ERR(session);
2174
2175        perf_stat.session  = session;
2176        stat_config.output = stderr;
2177        evsel_list         = session->evlist;
2178
2179        ret = perf_session__process_events(session);
2180        if (ret)
2181                return ret;
2182
2183        perf_session__delete(session);
2184        return 0;
2185}
2186
2187static void setup_system_wide(int forks)
2188{
2189        /*
2190         * Make system wide (-a) the default target if
2191         * no target was specified and one of following
2192         * conditions is met:
2193         *
2194         *   - there's no workload specified
2195         *   - there is workload specified but all requested
2196         *     events are system wide events
2197         */
2198        if (!target__none(&target))
2199                return;
2200
2201        if (!forks)
2202                target.system_wide = true;
2203        else {
2204                struct evsel *counter;
2205
2206                evlist__for_each_entry(evsel_list, counter) {
2207                        if (!counter->core.system_wide &&
2208                            strcmp(counter->name, "duration_time")) {
2209                                return;
2210                        }
2211                }
2212
2213                if (evsel_list->core.nr_entries)
2214                        target.system_wide = true;
2215        }
2216}
2217
2218int cmd_stat(int argc, const char **argv)
2219{
2220        const char * const stat_usage[] = {
2221                "perf stat [<options>] [<command>]",
2222                NULL
2223        };
2224        int status = -EINVAL, run_idx, err;
2225        const char *mode;
2226        FILE *output = stderr;
2227        unsigned int interval, timeout;
2228        const char * const stat_subcommands[] = { "record", "report" };
2229        char errbuf[BUFSIZ];
2230
2231        setlocale(LC_ALL, "");
2232
2233        evsel_list = evlist__new();
2234        if (evsel_list == NULL)
2235                return -ENOMEM;
2236
2237        parse_events__shrink_config_terms();
2238
2239        /* String-parsing callback-based options would segfault when negated */
2240        set_option_flag(stat_options, 'e', "event", PARSE_OPT_NONEG);
2241        set_option_flag(stat_options, 'M', "metrics", PARSE_OPT_NONEG);
2242        set_option_flag(stat_options, 'G', "cgroup", PARSE_OPT_NONEG);
2243
2244        argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
2245                                        (const char **) stat_usage,
2246                                        PARSE_OPT_STOP_AT_NON_OPTION);
2247        perf_stat__collect_metric_expr(evsel_list);
2248        perf_stat__init_shadow_stats();
2249
2250        if (stat_config.csv_sep) {
2251                stat_config.csv_output = true;
2252                if (!strcmp(stat_config.csv_sep, "\\t"))
2253                        stat_config.csv_sep = "\t";
2254        } else
2255                stat_config.csv_sep = DEFAULT_SEPARATOR;
2256
2257        if (argc && !strncmp(argv[0], "rec", 3)) {
2258                argc = __cmd_record(argc, argv);
2259                if (argc < 0)
2260                        return -1;
2261        } else if (argc && !strncmp(argv[0], "rep", 3))
2262                return __cmd_report(argc, argv);
2263
2264        interval = stat_config.interval;
2265        timeout = stat_config.timeout;
2266
2267        /*
2268         * For record command the -o is already taken care of.
2269         */
2270        if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
2271                output = NULL;
2272
2273        if (output_name && output_fd) {
2274                fprintf(stderr, "cannot use both --output and --log-fd\n");
2275                parse_options_usage(stat_usage, stat_options, "o", 1);
2276                parse_options_usage(NULL, stat_options, "log-fd", 0);
2277                goto out;
2278        }
2279
2280        if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) {
2281                fprintf(stderr, "--metric-only is not supported with --per-thread\n");
2282                goto out;
2283        }
2284
2285        if (stat_config.metric_only && stat_config.run_count > 1) {
2286                fprintf(stderr, "--metric-only is not supported with -r\n");
2287                goto out;
2288        }
2289
2290        if (stat_config.walltime_run_table && stat_config.run_count <= 1) {
2291                fprintf(stderr, "--table is only supported with -r\n");
2292                parse_options_usage(stat_usage, stat_options, "r", 1);
2293                parse_options_usage(NULL, stat_options, "table", 0);
2294                goto out;
2295        }
2296
2297        if (output_fd < 0) {
2298                fprintf(stderr, "argument to --log-fd must be a > 0\n");
2299                parse_options_usage(stat_usage, stat_options, "log-fd", 0);
2300                goto out;
2301        }
2302
2303        if (!output && !stat_config.quiet) {
2304                struct timespec tm;
2305                mode = append_file ? "a" : "w";
2306
2307                output = fopen(output_name, mode);
2308                if (!output) {
2309                        perror("failed to create output file");
2310                        return -1;
2311                }
2312                clock_gettime(CLOCK_REALTIME, &tm);
2313                fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
2314        } else if (output_fd > 0) {
2315                mode = append_file ? "a" : "w";
2316                output = fdopen(output_fd, mode);
2317                if (!output) {
2318                        perror("Failed opening logfd");
2319                        return -errno;
2320                }
2321        }
2322
2323        stat_config.output = output;
2324
2325        /*
2326         * let the spreadsheet do the pretty-printing
2327         */
2328        if (stat_config.csv_output) {
2329                /* User explicitly passed -B? */
2330                if (big_num_opt == 1) {
2331                        fprintf(stderr, "-B option not supported with -x\n");
2332                        parse_options_usage(stat_usage, stat_options, "B", 1);
2333                        parse_options_usage(NULL, stat_options, "x", 1);
2334                        goto out;
2335                } else /* Nope, so disable big number formatting */
2336                        stat_config.big_num = false;
2337        } else if (big_num_opt == 0) /* User passed --no-big-num */
2338                stat_config.big_num = false;
2339
2340        err = target__validate(&target);
2341        if (err) {
2342                target__strerror(&target, err, errbuf, BUFSIZ);
2343                pr_warning("%s\n", errbuf);
2344        }
2345
2346        setup_system_wide(argc);
2347
2348        /*
2349         * Display user/system times only for single
2350         * run and when there's specified tracee.
2351         */
2352        if ((stat_config.run_count == 1) && target__none(&target))
2353                stat_config.ru_display = true;
2354
2355        if (stat_config.run_count < 0) {
2356                pr_err("Run count must be a positive number\n");
2357                parse_options_usage(stat_usage, stat_options, "r", 1);
2358                goto out;
2359        } else if (stat_config.run_count == 0) {
2360                forever = true;
2361                stat_config.run_count = 1;
2362        }
2363
2364        if (stat_config.walltime_run_table) {
2365                stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0]));
2366                if (!stat_config.walltime_run) {
2367                        pr_err("failed to setup -r option");
2368                        goto out;
2369                }
2370        }
2371
2372        if ((stat_config.aggr_mode == AGGR_THREAD) &&
2373                !target__has_task(&target)) {
2374                if (!target.system_wide || target.cpu_list) {
2375                        fprintf(stderr, "The --per-thread option is only "
2376                                "available when monitoring via -p -t -a "
2377                                "options or only --per-thread.\n");
2378                        parse_options_usage(NULL, stat_options, "p", 1);
2379                        parse_options_usage(NULL, stat_options, "t", 1);
2380                        goto out;
2381                }
2382        }
2383
2384        /*
2385         * no_aggr, cgroup are for system-wide only
2386         * --per-thread is aggregated per thread, we dont mix it with cpu mode
2387         */
2388        if (((stat_config.aggr_mode != AGGR_GLOBAL &&
2389              stat_config.aggr_mode != AGGR_THREAD) ||
2390             (nr_cgroups || stat_config.cgroup_list)) &&
2391            !target__has_cpu(&target)) {
2392                fprintf(stderr, "both cgroup and no-aggregation "
2393                        "modes only available in system-wide mode\n");
2394
2395                parse_options_usage(stat_usage, stat_options, "G", 1);
2396                parse_options_usage(NULL, stat_options, "A", 1);
2397                parse_options_usage(NULL, stat_options, "a", 1);
2398                parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
2399                goto out;
2400        }
2401
2402        if (stat_config.iostat_run) {
2403                status = iostat_prepare(evsel_list, &stat_config);
2404                if (status)
2405                        goto out;
2406                if (iostat_mode == IOSTAT_LIST) {
2407                        iostat_list(evsel_list, &stat_config);
2408                        goto out;
2409                } else if (verbose)
2410                        iostat_list(evsel_list, &stat_config);
2411                if (iostat_mode == IOSTAT_RUN && !target__has_cpu(&target))
2412                        target.system_wide = true;
2413        }
2414
2415        if (add_default_attributes())
2416                goto out;
2417
2418        if (stat_config.cgroup_list) {
2419                if (nr_cgroups > 0) {
2420                        pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
2421                        parse_options_usage(stat_usage, stat_options, "G", 1);
2422                        parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
2423                        goto out;
2424                }
2425
2426                if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list,
2427                                          &stat_config.metric_events, true) < 0) {
2428                        parse_options_usage(stat_usage, stat_options,
2429                                            "for-each-cgroup", 0);
2430                        goto out;
2431                }
2432        }
2433
2434        if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
2435                target.per_thread = true;
2436
2437        if (evlist__fix_hybrid_cpus(evsel_list, target.cpu_list)) {
2438                pr_err("failed to use cpu list %s\n", target.cpu_list);
2439                goto out;
2440        }
2441
2442        target.hybrid = perf_pmu__has_hybrid();
2443        if (evlist__create_maps(evsel_list, &target) < 0) {
2444                if (target__has_task(&target)) {
2445                        pr_err("Problems finding threads of monitor\n");
2446                        parse_options_usage(stat_usage, stat_options, "p", 1);
2447                        parse_options_usage(NULL, stat_options, "t", 1);
2448                } else if (target__has_cpu(&target)) {
2449                        perror("failed to parse CPUs map");
2450                        parse_options_usage(stat_usage, stat_options, "C", 1);
2451                        parse_options_usage(NULL, stat_options, "a", 1);
2452                }
2453                goto out;
2454        }
2455
2456        evlist__check_cpu_maps(evsel_list);
2457
2458        /*
2459         * Initialize thread_map with comm names,
2460         * so we could print it out on output.
2461         */
2462        if (stat_config.aggr_mode == AGGR_THREAD) {
2463                thread_map__read_comms(evsel_list->core.threads);
2464                if (target.system_wide) {
2465                        if (runtime_stat_new(&stat_config,
2466                                perf_thread_map__nr(evsel_list->core.threads))) {
2467                                goto out;
2468                        }
2469                }
2470        }
2471
2472        if (stat_config.aggr_mode == AGGR_NODE)
2473                cpu__setup_cpunode_map();
2474
2475        if (stat_config.times && interval)
2476                interval_count = true;
2477        else if (stat_config.times && !interval) {
2478                pr_err("interval-count option should be used together with "
2479                                "interval-print.\n");
2480                parse_options_usage(stat_usage, stat_options, "interval-count", 0);
2481                parse_options_usage(stat_usage, stat_options, "I", 1);
2482                goto out;
2483        }
2484
2485        if (timeout && timeout < 100) {
2486                if (timeout < 10) {
2487                        pr_err("timeout must be >= 10ms.\n");
2488                        parse_options_usage(stat_usage, stat_options, "timeout", 0);
2489                        goto out;
2490                } else
2491                        pr_warning("timeout < 100ms. "
2492                                   "The overhead percentage could be high in some cases. "
2493                                   "Please proceed with caution.\n");
2494        }
2495        if (timeout && interval) {
2496                pr_err("timeout option is not supported with interval-print.\n");
2497                parse_options_usage(stat_usage, stat_options, "timeout", 0);
2498                parse_options_usage(stat_usage, stat_options, "I", 1);
2499                goto out;
2500        }
2501
2502        if (evlist__alloc_stats(evsel_list, interval))
2503                goto out;
2504
2505        if (perf_stat_init_aggr_mode())
2506                goto out;
2507
2508        /*
2509         * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
2510         * while avoiding that older tools show confusing messages.
2511         *
2512         * However for pipe sessions we need to keep it zero,
2513         * because script's perf_evsel__check_attr is triggered
2514         * by attr->sample_type != 0, and we can't run it on
2515         * stat sessions.
2516         */
2517        stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe);
2518
2519        /*
2520         * We dont want to block the signals - that would cause
2521         * child tasks to inherit that and Ctrl-C would not work.
2522         * What we want is for Ctrl-C to work in the exec()-ed
2523         * task, but being ignored by perf stat itself:
2524         */
2525        atexit(sig_atexit);
2526        if (!forever)
2527                signal(SIGINT,  skip_signal);
2528        signal(SIGCHLD, skip_signal);
2529        signal(SIGALRM, skip_signal);
2530        signal(SIGABRT, skip_signal);
2531
2532        if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack))
2533                goto out;
2534
2535        status = 0;
2536        for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
2537                if (stat_config.run_count != 1 && verbose > 0)
2538                        fprintf(output, "[ perf stat: executing run #%d ... ]\n",
2539                                run_idx + 1);
2540
2541                if (run_idx != 0)
2542                        evlist__reset_prev_raw_counts(evsel_list);
2543
2544                status = run_perf_stat(argc, argv, run_idx);
2545                if (forever && status != -1 && !interval) {
2546                        print_counters(NULL, argc, argv);
2547                        perf_stat__reset_stats();
2548                }
2549        }
2550
2551        if (!forever && status != -1 && (!interval || stat_config.summary))
2552                print_counters(NULL, argc, argv);
2553
2554        evlist__finalize_ctlfd(evsel_list);
2555
2556        if (STAT_RECORD) {
2557                /*
2558                 * We synthesize the kernel mmap record just so that older tools
2559                 * don't emit warnings about not being able to resolve symbols
2560                 * due to /proc/sys/kernel/kptr_restrict settings and instead provide
2561                 * a saner message about no samples being in the perf.data file.
2562                 *
2563                 * This also serves to suppress a warning about f_header.data.size == 0
2564                 * in header.c at the moment 'perf stat record' gets introduced, which
2565                 * is not really needed once we start adding the stat specific PERF_RECORD_
2566                 * records, but the need to suppress the kptr_restrict messages in older
2567                 * tools remain  -acme
2568                 */
2569                int fd = perf_data__fd(&perf_stat.data);
2570
2571                err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
2572                                                         process_synthesized_event,
2573                                                         &perf_stat.session->machines.host);
2574                if (err) {
2575                        pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
2576                                   "older tools may produce warnings about this file\n.");
2577                }
2578
2579                if (!interval) {
2580                        if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
2581                                pr_err("failed to write stat round event\n");
2582                }
2583
2584                if (!perf_stat.data.is_pipe) {
2585                        perf_stat.session->header.data_size += perf_stat.bytes_written;
2586                        perf_session__write_header(perf_stat.session, evsel_list, fd, true);
2587                }
2588
2589                evlist__close(evsel_list);
2590                perf_session__delete(perf_stat.session);
2591        }
2592
2593        perf_stat__exit_aggr_mode();
2594        evlist__free_stats(evsel_list);
2595out:
2596        if (stat_config.iostat_run)
2597                iostat_release(evsel_list);
2598
2599        zfree(&stat_config.walltime_run);
2600
2601        if (smi_cost && smi_reset)
2602                sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
2603
2604        evlist__delete(evsel_list);
2605
2606        metricgroup__rblist_exit(&stat_config.metric_events);
2607        runtime_stat_delete(&stat_config);
2608        evlist__close_control(stat_config.ctl_fd, stat_config.ctl_fd_ack, &stat_config.ctl_fd_close);
2609
2610        return status;
2611}
2612