linux/tools/perf/util/stat.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <errno.h>
   3#include <inttypes.h>
   4#include <math.h>
   5#include <string.h>
   6#include "counts.h"
   7#include "cpumap.h"
   8#include "debug.h"
   9#include "header.h"
  10#include "stat.h"
  11#include "session.h"
  12#include "target.h"
  13#include "evlist.h"
  14#include "evsel.h"
  15#include "thread_map.h"
  16#include "hashmap.h"
  17#include <linux/zalloc.h>
  18
  19void update_stats(struct stats *stats, u64 val)
  20{
  21        double delta;
  22
  23        stats->n++;
  24        delta = val - stats->mean;
  25        stats->mean += delta / stats->n;
  26        stats->M2 += delta*(val - stats->mean);
  27
  28        if (val > stats->max)
  29                stats->max = val;
  30
  31        if (val < stats->min)
  32                stats->min = val;
  33}
  34
  35double avg_stats(struct stats *stats)
  36{
  37        return stats->mean;
  38}
  39
  40/*
  41 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
  42 *
  43 *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
  44 * s^2 = -------------------------------
  45 *                  n - 1
  46 *
  47 * http://en.wikipedia.org/wiki/Stddev
  48 *
  49 * The std dev of the mean is related to the std dev by:
  50 *
  51 *             s
  52 * s_mean = -------
  53 *          sqrt(n)
  54 *
  55 */
  56double stddev_stats(struct stats *stats)
  57{
  58        double variance, variance_mean;
  59
  60        if (stats->n < 2)
  61                return 0.0;
  62
  63        variance = stats->M2 / (stats->n - 1);
  64        variance_mean = variance / stats->n;
  65
  66        return sqrt(variance_mean);
  67}
  68
  69double rel_stddev_stats(double stddev, double avg)
  70{
  71        double pct = 0.0;
  72
  73        if (avg)
  74                pct = 100.0 * stddev/avg;
  75
  76        return pct;
  77}
  78
  79bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id)
  80{
  81        struct perf_stat_evsel *ps = evsel->stats;
  82
  83        return ps->id == id;
  84}
  85
  86#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
  87static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
  88        ID(NONE,                x),
  89        ID(CYCLES_IN_TX,        cpu/cycles-t/),
  90        ID(TRANSACTION_START,   cpu/tx-start/),
  91        ID(ELISION_START,       cpu/el-start/),
  92        ID(CYCLES_IN_TX_CP,     cpu/cycles-ct/),
  93        ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
  94        ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
  95        ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
  96        ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
  97        ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
  98        ID(TOPDOWN_RETIRING, topdown-retiring),
  99        ID(TOPDOWN_BAD_SPEC, topdown-bad-spec),
 100        ID(TOPDOWN_FE_BOUND, topdown-fe-bound),
 101        ID(TOPDOWN_BE_BOUND, topdown-be-bound),
 102        ID(TOPDOWN_HEAVY_OPS, topdown-heavy-ops),
 103        ID(TOPDOWN_BR_MISPREDICT, topdown-br-mispredict),
 104        ID(TOPDOWN_FETCH_LAT, topdown-fetch-lat),
 105        ID(TOPDOWN_MEM_BOUND, topdown-mem-bound),
 106        ID(SMI_NUM, msr/smi/),
 107        ID(APERF, msr/aperf/),
 108};
 109#undef ID
 110
 111static void perf_stat_evsel_id_init(struct evsel *evsel)
 112{
 113        struct perf_stat_evsel *ps = evsel->stats;
 114        int i;
 115
 116        /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
 117
 118        for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
 119                if (!strcmp(evsel__name(evsel), id_str[i])) {
 120                        ps->id = i;
 121                        break;
 122                }
 123        }
 124}
 125
 126static void evsel__reset_stat_priv(struct evsel *evsel)
 127{
 128        int i;
 129        struct perf_stat_evsel *ps = evsel->stats;
 130
 131        for (i = 0; i < 3; i++)
 132                init_stats(&ps->res_stats[i]);
 133
 134        perf_stat_evsel_id_init(evsel);
 135}
 136
 137static int evsel__alloc_stat_priv(struct evsel *evsel)
 138{
 139        evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
 140        if (evsel->stats == NULL)
 141                return -ENOMEM;
 142        evsel__reset_stat_priv(evsel);
 143        return 0;
 144}
 145
 146static void evsel__free_stat_priv(struct evsel *evsel)
 147{
 148        struct perf_stat_evsel *ps = evsel->stats;
 149
 150        if (ps)
 151                zfree(&ps->group_data);
 152        zfree(&evsel->stats);
 153}
 154
 155static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads)
 156{
 157        struct perf_counts *counts;
 158
 159        counts = perf_counts__new(ncpus, nthreads);
 160        if (counts)
 161                evsel->prev_raw_counts = counts;
 162
 163        return counts ? 0 : -ENOMEM;
 164}
 165
 166static void evsel__free_prev_raw_counts(struct evsel *evsel)
 167{
 168        perf_counts__delete(evsel->prev_raw_counts);
 169        evsel->prev_raw_counts = NULL;
 170}
 171
 172static void evsel__reset_prev_raw_counts(struct evsel *evsel)
 173{
 174        if (evsel->prev_raw_counts)
 175                perf_counts__reset(evsel->prev_raw_counts);
 176}
 177
 178static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
 179{
 180        int ncpus = evsel__nr_cpus(evsel);
 181        int nthreads = perf_thread_map__nr(evsel->core.threads);
 182
 183        if (evsel__alloc_stat_priv(evsel) < 0 ||
 184            evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
 185            (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
 186                return -ENOMEM;
 187
 188        return 0;
 189}
 190
 191int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
 192{
 193        struct evsel *evsel;
 194
 195        evlist__for_each_entry(evlist, evsel) {
 196                if (evsel__alloc_stats(evsel, alloc_raw))
 197                        goto out_free;
 198        }
 199
 200        return 0;
 201
 202out_free:
 203        evlist__free_stats(evlist);
 204        return -1;
 205}
 206
 207void evlist__free_stats(struct evlist *evlist)
 208{
 209        struct evsel *evsel;
 210
 211        evlist__for_each_entry(evlist, evsel) {
 212                evsel__free_stat_priv(evsel);
 213                evsel__free_counts(evsel);
 214                evsel__free_prev_raw_counts(evsel);
 215        }
 216}
 217
 218void evlist__reset_stats(struct evlist *evlist)
 219{
 220        struct evsel *evsel;
 221
 222        evlist__for_each_entry(evlist, evsel) {
 223                evsel__reset_stat_priv(evsel);
 224                evsel__reset_counts(evsel);
 225        }
 226}
 227
 228void evlist__reset_prev_raw_counts(struct evlist *evlist)
 229{
 230        struct evsel *evsel;
 231
 232        evlist__for_each_entry(evlist, evsel)
 233                evsel__reset_prev_raw_counts(evsel);
 234}
 235
 236static void evsel__copy_prev_raw_counts(struct evsel *evsel)
 237{
 238        int ncpus = evsel__nr_cpus(evsel);
 239        int nthreads = perf_thread_map__nr(evsel->core.threads);
 240
 241        for (int thread = 0; thread < nthreads; thread++) {
 242                for (int cpu = 0; cpu < ncpus; cpu++) {
 243                        *perf_counts(evsel->counts, cpu, thread) =
 244                                *perf_counts(evsel->prev_raw_counts, cpu,
 245                                             thread);
 246                }
 247        }
 248
 249        evsel->counts->aggr = evsel->prev_raw_counts->aggr;
 250}
 251
 252void evlist__copy_prev_raw_counts(struct evlist *evlist)
 253{
 254        struct evsel *evsel;
 255
 256        evlist__for_each_entry(evlist, evsel)
 257                evsel__copy_prev_raw_counts(evsel);
 258}
 259
 260void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
 261{
 262        struct evsel *evsel;
 263
 264        /*
 265         * To collect the overall statistics for interval mode,
 266         * we copy the counts from evsel->prev_raw_counts to
 267         * evsel->counts. The perf_stat_process_counter creates
 268         * aggr values from per cpu values, but the per cpu values
 269         * are 0 for AGGR_GLOBAL. So we use a trick that saves the
 270         * previous aggr value to the first member of perf_counts,
 271         * then aggr calculation in process_counter_values can work
 272         * correctly.
 273         */
 274        evlist__for_each_entry(evlist, evsel) {
 275                *perf_counts(evsel->prev_raw_counts, 0, 0) =
 276                        evsel->prev_raw_counts->aggr;
 277        }
 278}
 279
 280static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
 281{
 282        uint64_t *key = (uint64_t *) __key;
 283
 284        return *key & 0xffffffff;
 285}
 286
 287static bool pkg_id_equal(const void *__key1, const void *__key2,
 288                         void *ctx __maybe_unused)
 289{
 290        uint64_t *key1 = (uint64_t *) __key1;
 291        uint64_t *key2 = (uint64_t *) __key2;
 292
 293        return *key1 == *key2;
 294}
 295
 296static int check_per_pkg(struct evsel *counter,
 297                         struct perf_counts_values *vals, int cpu, bool *skip)
 298{
 299        struct hashmap *mask = counter->per_pkg_mask;
 300        struct perf_cpu_map *cpus = evsel__cpus(counter);
 301        int s, d, ret = 0;
 302        uint64_t *key;
 303
 304        *skip = false;
 305
 306        if (!counter->per_pkg)
 307                return 0;
 308
 309        if (perf_cpu_map__empty(cpus))
 310                return 0;
 311
 312        if (!mask) {
 313                mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL);
 314                if (!mask)
 315                        return -ENOMEM;
 316
 317                counter->per_pkg_mask = mask;
 318        }
 319
 320        /*
 321         * we do not consider an event that has not run as a good
 322         * instance to mark a package as used (skip=1). Otherwise
 323         * we may run into a situation where the first CPU in a package
 324         * is not running anything, yet the second is, and this function
 325         * would mark the package as used after the first CPU and would
 326         * not read the values from the second CPU.
 327         */
 328        if (!(vals->run && vals->ena))
 329                return 0;
 330
 331        s = cpu_map__get_socket(cpus, cpu, NULL).socket;
 332        if (s < 0)
 333                return -1;
 334
 335        /*
 336         * On multi-die system, die_id > 0. On no-die system, die_id = 0.
 337         * We use hashmap(socket, die) to check the used socket+die pair.
 338         */
 339        d = cpu_map__get_die(cpus, cpu, NULL).die;
 340        if (d < 0)
 341                return -1;
 342
 343        key = malloc(sizeof(*key));
 344        if (!key)
 345                return -ENOMEM;
 346
 347        *key = (uint64_t)d << 32 | s;
 348        if (hashmap__find(mask, (void *)key, NULL))
 349                *skip = true;
 350        else
 351                ret = hashmap__add(mask, (void *)key, (void *)1);
 352
 353        return ret;
 354}
 355
 356static int
 357process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 358                       int cpu, int thread,
 359                       struct perf_counts_values *count)
 360{
 361        struct perf_counts_values *aggr = &evsel->counts->aggr;
 362        static struct perf_counts_values zero;
 363        bool skip = false;
 364
 365        if (check_per_pkg(evsel, count, cpu, &skip)) {
 366                pr_err("failed to read per-pkg counter\n");
 367                return -1;
 368        }
 369
 370        if (skip)
 371                count = &zero;
 372
 373        switch (config->aggr_mode) {
 374        case AGGR_THREAD:
 375        case AGGR_CORE:
 376        case AGGR_DIE:
 377        case AGGR_SOCKET:
 378        case AGGR_NODE:
 379        case AGGR_NONE:
 380                if (!evsel->snapshot)
 381                        evsel__compute_deltas(evsel, cpu, thread, count);
 382                perf_counts_values__scale(count, config->scale, NULL);
 383                if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
 384                        perf_stat__update_shadow_stats(evsel, count->val,
 385                                                       cpu, &rt_stat);
 386                }
 387
 388                if (config->aggr_mode == AGGR_THREAD) {
 389                        if (config->stats)
 390                                perf_stat__update_shadow_stats(evsel,
 391                                        count->val, 0, &config->stats[thread]);
 392                        else
 393                                perf_stat__update_shadow_stats(evsel,
 394                                        count->val, 0, &rt_stat);
 395                }
 396                break;
 397        case AGGR_GLOBAL:
 398                aggr->val += count->val;
 399                aggr->ena += count->ena;
 400                aggr->run += count->run;
 401        case AGGR_UNSET:
 402        default:
 403                break;
 404        }
 405
 406        return 0;
 407}
 408
 409static int process_counter_maps(struct perf_stat_config *config,
 410                                struct evsel *counter)
 411{
 412        int nthreads = perf_thread_map__nr(counter->core.threads);
 413        int ncpus = evsel__nr_cpus(counter);
 414        int cpu, thread;
 415
 416        if (counter->core.system_wide)
 417                nthreads = 1;
 418
 419        for (thread = 0; thread < nthreads; thread++) {
 420                for (cpu = 0; cpu < ncpus; cpu++) {
 421                        if (process_counter_values(config, counter, cpu, thread,
 422                                                   perf_counts(counter->counts, cpu, thread)))
 423                                return -1;
 424                }
 425        }
 426
 427        return 0;
 428}
 429
 430int perf_stat_process_counter(struct perf_stat_config *config,
 431                              struct evsel *counter)
 432{
 433        struct perf_counts_values *aggr = &counter->counts->aggr;
 434        struct perf_stat_evsel *ps = counter->stats;
 435        u64 *count = counter->counts->aggr.values;
 436        int i, ret;
 437
 438        aggr->val = aggr->ena = aggr->run = 0;
 439
 440        if (counter->per_pkg)
 441                evsel__zero_per_pkg(counter);
 442
 443        ret = process_counter_maps(config, counter);
 444        if (ret)
 445                return ret;
 446
 447        if (config->aggr_mode != AGGR_GLOBAL)
 448                return 0;
 449
 450        if (!counter->snapshot)
 451                evsel__compute_deltas(counter, -1, -1, aggr);
 452        perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
 453
 454        for (i = 0; i < 3; i++)
 455                update_stats(&ps->res_stats[i], count[i]);
 456
 457        if (verbose > 0) {
 458                fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 459                        evsel__name(counter), count[0], count[1], count[2]);
 460        }
 461
 462        /*
 463         * Save the full runtime - to allow normalization during printout:
 464         */
 465        perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat);
 466
 467        return 0;
 468}
 469
 470int perf_event__process_stat_event(struct perf_session *session,
 471                                   union perf_event *event)
 472{
 473        struct perf_counts_values count;
 474        struct perf_record_stat *st = &event->stat;
 475        struct evsel *counter;
 476
 477        count.val = st->val;
 478        count.ena = st->ena;
 479        count.run = st->run;
 480
 481        counter = evlist__id2evsel(session->evlist, st->id);
 482        if (!counter) {
 483                pr_err("Failed to resolve counter for stat event.\n");
 484                return -EINVAL;
 485        }
 486
 487        *perf_counts(counter->counts, st->cpu, st->thread) = count;
 488        counter->supported = true;
 489        return 0;
 490}
 491
 492size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
 493{
 494        struct perf_record_stat *st = (struct perf_record_stat *)event;
 495        size_t ret;
 496
 497        ret  = fprintf(fp, "\n... id %" PRI_lu64 ", cpu %d, thread %d\n",
 498                       st->id, st->cpu, st->thread);
 499        ret += fprintf(fp, "... value %" PRI_lu64 ", enabled %" PRI_lu64 ", running %" PRI_lu64 "\n",
 500                       st->val, st->ena, st->run);
 501
 502        return ret;
 503}
 504
 505size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
 506{
 507        struct perf_record_stat_round *rd = (struct perf_record_stat_round *)event;
 508        size_t ret;
 509
 510        ret = fprintf(fp, "\n... time %" PRI_lu64 ", type %s\n", rd->time,
 511                      rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
 512
 513        return ret;
 514}
 515
 516size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
 517{
 518        struct perf_stat_config sc;
 519        size_t ret;
 520
 521        perf_event__read_stat_config(&sc, &event->stat_config);
 522
 523        ret  = fprintf(fp, "\n");
 524        ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
 525        ret += fprintf(fp, "... scale     %d\n", sc.scale);
 526        ret += fprintf(fp, "... interval  %u\n", sc.interval);
 527
 528        return ret;
 529}
 530
 531int create_perf_stat_counter(struct evsel *evsel,
 532                             struct perf_stat_config *config,
 533                             struct target *target,
 534                             int cpu)
 535{
 536        struct perf_event_attr *attr = &evsel->core.attr;
 537        struct evsel *leader = evsel__leader(evsel);
 538
 539        attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
 540                            PERF_FORMAT_TOTAL_TIME_RUNNING;
 541
 542        /*
 543         * The event is part of non trivial group, let's enable
 544         * the group read (for leader) and ID retrieval for all
 545         * members.
 546         */
 547        if (leader->core.nr_members > 1)
 548                attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
 549
 550        attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list);
 551
 552        /*
 553         * Some events get initialized with sample_(period/type) set,
 554         * like tracepoints. Clear it up for counting.
 555         */
 556        attr->sample_period = 0;
 557
 558        if (config->identifier)
 559                attr->sample_type = PERF_SAMPLE_IDENTIFIER;
 560
 561        if (config->all_user) {
 562                attr->exclude_kernel = 1;
 563                attr->exclude_user   = 0;
 564        }
 565
 566        if (config->all_kernel) {
 567                attr->exclude_kernel = 0;
 568                attr->exclude_user   = 1;
 569        }
 570
 571        /*
 572         * Disabling all counters initially, they will be enabled
 573         * either manually by us or by kernel via enable_on_exec
 574         * set later.
 575         */
 576        if (evsel__is_group_leader(evsel)) {
 577                attr->disabled = 1;
 578
 579                /*
 580                 * In case of initial_delay we enable tracee
 581                 * events manually.
 582                 */
 583                if (target__none(target) && !config->initial_delay)
 584                        attr->enable_on_exec = 1;
 585        }
 586
 587        if (target__has_cpu(target) && !target__has_per_thread(target))
 588                return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
 589
 590        return evsel__open_per_thread(evsel, evsel->core.threads);
 591}
 592