linux/tools/perf/util/stat.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <errno.h>
   3#include <inttypes.h>
   4#include <math.h>
   5#include <string.h>
   6#include "counts.h"
   7#include "cpumap.h"
   8#include "debug.h"
   9#include "header.h"
  10#include "stat.h"
  11#include "session.h"
  12#include "target.h"
  13#include "evlist.h"
  14#include "evsel.h"
  15#include "thread_map.h"
  16#include <linux/zalloc.h>
  17
  18void update_stats(struct stats *stats, u64 val)
  19{
  20        double delta;
  21
  22        stats->n++;
  23        delta = val - stats->mean;
  24        stats->mean += delta / stats->n;
  25        stats->M2 += delta*(val - stats->mean);
  26
  27        if (val > stats->max)
  28                stats->max = val;
  29
  30        if (val < stats->min)
  31                stats->min = val;
  32}
  33
  34double avg_stats(struct stats *stats)
  35{
  36        return stats->mean;
  37}
  38
  39/*
  40 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
  41 *
  42 *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
  43 * s^2 = -------------------------------
  44 *                  n - 1
  45 *
  46 * http://en.wikipedia.org/wiki/Stddev
  47 *
  48 * The std dev of the mean is related to the std dev by:
  49 *
  50 *             s
  51 * s_mean = -------
  52 *          sqrt(n)
  53 *
  54 */
  55double stddev_stats(struct stats *stats)
  56{
  57        double variance, variance_mean;
  58
  59        if (stats->n < 2)
  60                return 0.0;
  61
  62        variance = stats->M2 / (stats->n - 1);
  63        variance_mean = variance / stats->n;
  64
  65        return sqrt(variance_mean);
  66}
  67
  68double rel_stddev_stats(double stddev, double avg)
  69{
  70        double pct = 0.0;
  71
  72        if (avg)
  73                pct = 100.0 * stddev/avg;
  74
  75        return pct;
  76}
  77
  78bool __perf_evsel_stat__is(struct evsel *evsel,
  79                           enum perf_stat_evsel_id id)
  80{
  81        struct perf_stat_evsel *ps = evsel->stats;
  82
  83        return ps->id == id;
  84}
  85
  86#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
  87static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
  88        ID(NONE,                x),
  89        ID(CYCLES_IN_TX,        cpu/cycles-t/),
  90        ID(TRANSACTION_START,   cpu/tx-start/),
  91        ID(ELISION_START,       cpu/el-start/),
  92        ID(CYCLES_IN_TX_CP,     cpu/cycles-ct/),
  93        ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
  94        ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
  95        ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
  96        ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
  97        ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
  98        ID(SMI_NUM, msr/smi/),
  99        ID(APERF, msr/aperf/),
 100};
 101#undef ID
 102
 103static void perf_stat_evsel_id_init(struct evsel *evsel)
 104{
 105        struct perf_stat_evsel *ps = evsel->stats;
 106        int i;
 107
 108        /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
 109
 110        for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
 111                if (!strcmp(evsel__name(evsel), id_str[i])) {
 112                        ps->id = i;
 113                        break;
 114                }
 115        }
 116}
 117
 118static void evsel__reset_stat_priv(struct evsel *evsel)
 119{
 120        int i;
 121        struct perf_stat_evsel *ps = evsel->stats;
 122
 123        for (i = 0; i < 3; i++)
 124                init_stats(&ps->res_stats[i]);
 125
 126        perf_stat_evsel_id_init(evsel);
 127}
 128
 129static int evsel__alloc_stat_priv(struct evsel *evsel)
 130{
 131        evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
 132        if (evsel->stats == NULL)
 133                return -ENOMEM;
 134        evsel__reset_stat_priv(evsel);
 135        return 0;
 136}
 137
 138static void evsel__free_stat_priv(struct evsel *evsel)
 139{
 140        struct perf_stat_evsel *ps = evsel->stats;
 141
 142        if (ps)
 143                zfree(&ps->group_data);
 144        zfree(&evsel->stats);
 145}
 146
 147static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads)
 148{
 149        struct perf_counts *counts;
 150
 151        counts = perf_counts__new(ncpus, nthreads);
 152        if (counts)
 153                evsel->prev_raw_counts = counts;
 154
 155        return counts ? 0 : -ENOMEM;
 156}
 157
 158static void evsel__free_prev_raw_counts(struct evsel *evsel)
 159{
 160        perf_counts__delete(evsel->prev_raw_counts);
 161        evsel->prev_raw_counts = NULL;
 162}
 163
 164static void evsel__reset_prev_raw_counts(struct evsel *evsel)
 165{
 166        if (evsel->prev_raw_counts)
 167                perf_counts__reset(evsel->prev_raw_counts);
 168}
 169
 170static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
 171{
 172        int ncpus = evsel__nr_cpus(evsel);
 173        int nthreads = perf_thread_map__nr(evsel->core.threads);
 174
 175        if (evsel__alloc_stat_priv(evsel) < 0 ||
 176            evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
 177            (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
 178                return -ENOMEM;
 179
 180        return 0;
 181}
 182
 183int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
 184{
 185        struct evsel *evsel;
 186
 187        evlist__for_each_entry(evlist, evsel) {
 188                if (evsel__alloc_stats(evsel, alloc_raw))
 189                        goto out_free;
 190        }
 191
 192        return 0;
 193
 194out_free:
 195        perf_evlist__free_stats(evlist);
 196        return -1;
 197}
 198
 199void perf_evlist__free_stats(struct evlist *evlist)
 200{
 201        struct evsel *evsel;
 202
 203        evlist__for_each_entry(evlist, evsel) {
 204                evsel__free_stat_priv(evsel);
 205                evsel__free_counts(evsel);
 206                evsel__free_prev_raw_counts(evsel);
 207        }
 208}
 209
 210void perf_evlist__reset_stats(struct evlist *evlist)
 211{
 212        struct evsel *evsel;
 213
 214        evlist__for_each_entry(evlist, evsel) {
 215                evsel__reset_stat_priv(evsel);
 216                evsel__reset_counts(evsel);
 217        }
 218}
 219
 220void perf_evlist__reset_prev_raw_counts(struct evlist *evlist)
 221{
 222        struct evsel *evsel;
 223
 224        evlist__for_each_entry(evlist, evsel)
 225                evsel__reset_prev_raw_counts(evsel);
 226}
 227
 228static void perf_evsel__copy_prev_raw_counts(struct evsel *evsel)
 229{
 230        int ncpus = evsel__nr_cpus(evsel);
 231        int nthreads = perf_thread_map__nr(evsel->core.threads);
 232
 233        for (int thread = 0; thread < nthreads; thread++) {
 234                for (int cpu = 0; cpu < ncpus; cpu++) {
 235                        *perf_counts(evsel->counts, cpu, thread) =
 236                                *perf_counts(evsel->prev_raw_counts, cpu,
 237                                             thread);
 238                }
 239        }
 240
 241        evsel->counts->aggr = evsel->prev_raw_counts->aggr;
 242}
 243
 244void perf_evlist__copy_prev_raw_counts(struct evlist *evlist)
 245{
 246        struct evsel *evsel;
 247
 248        evlist__for_each_entry(evlist, evsel)
 249                perf_evsel__copy_prev_raw_counts(evsel);
 250}
 251
 252void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
 253{
 254        struct evsel *evsel;
 255
 256        /*
 257         * To collect the overall statistics for interval mode,
 258         * we copy the counts from evsel->prev_raw_counts to
 259         * evsel->counts. The perf_stat_process_counter creates
 260         * aggr values from per cpu values, but the per cpu values
 261         * are 0 for AGGR_GLOBAL. So we use a trick that saves the
 262         * previous aggr value to the first member of perf_counts,
 263         * then aggr calculation in process_counter_values can work
 264         * correctly.
 265         */
 266        evlist__for_each_entry(evlist, evsel) {
 267                *perf_counts(evsel->prev_raw_counts, 0, 0) =
 268                        evsel->prev_raw_counts->aggr;
 269        }
 270}
 271
 272static void zero_per_pkg(struct evsel *counter)
 273{
 274        if (counter->per_pkg_mask)
 275                memset(counter->per_pkg_mask, 0, cpu__max_cpu());
 276}
 277
 278static int check_per_pkg(struct evsel *counter,
 279                         struct perf_counts_values *vals, int cpu, bool *skip)
 280{
 281        unsigned long *mask = counter->per_pkg_mask;
 282        struct perf_cpu_map *cpus = evsel__cpus(counter);
 283        int s;
 284
 285        *skip = false;
 286
 287        if (!counter->per_pkg)
 288                return 0;
 289
 290        if (perf_cpu_map__empty(cpus))
 291                return 0;
 292
 293        if (!mask) {
 294                mask = zalloc(cpu__max_cpu());
 295                if (!mask)
 296                        return -ENOMEM;
 297
 298                counter->per_pkg_mask = mask;
 299        }
 300
 301        /*
 302         * we do not consider an event that has not run as a good
 303         * instance to mark a package as used (skip=1). Otherwise
 304         * we may run into a situation where the first CPU in a package
 305         * is not running anything, yet the second is, and this function
 306         * would mark the package as used after the first CPU and would
 307         * not read the values from the second CPU.
 308         */
 309        if (!(vals->run && vals->ena))
 310                return 0;
 311
 312        s = cpu_map__get_socket(cpus, cpu, NULL);
 313        if (s < 0)
 314                return -1;
 315
 316        *skip = test_and_set_bit(s, mask) == 1;
 317        return 0;
 318}
 319
 320static int
 321process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 322                       int cpu, int thread,
 323                       struct perf_counts_values *count)
 324{
 325        struct perf_counts_values *aggr = &evsel->counts->aggr;
 326        static struct perf_counts_values zero;
 327        bool skip = false;
 328
 329        if (check_per_pkg(evsel, count, cpu, &skip)) {
 330                pr_err("failed to read per-pkg counter\n");
 331                return -1;
 332        }
 333
 334        if (skip)
 335                count = &zero;
 336
 337        switch (config->aggr_mode) {
 338        case AGGR_THREAD:
 339        case AGGR_CORE:
 340        case AGGR_DIE:
 341        case AGGR_SOCKET:
 342        case AGGR_NODE:
 343        case AGGR_NONE:
 344                if (!evsel->snapshot)
 345                        evsel__compute_deltas(evsel, cpu, thread, count);
 346                perf_counts_values__scale(count, config->scale, NULL);
 347                if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
 348                        perf_stat__update_shadow_stats(evsel, count->val,
 349                                                       cpu, &rt_stat);
 350                }
 351
 352                if (config->aggr_mode == AGGR_THREAD) {
 353                        if (config->stats)
 354                                perf_stat__update_shadow_stats(evsel,
 355                                        count->val, 0, &config->stats[thread]);
 356                        else
 357                                perf_stat__update_shadow_stats(evsel,
 358                                        count->val, 0, &rt_stat);
 359                }
 360                break;
 361        case AGGR_GLOBAL:
 362                aggr->val += count->val;
 363                aggr->ena += count->ena;
 364                aggr->run += count->run;
 365        case AGGR_UNSET:
 366        default:
 367                break;
 368        }
 369
 370        return 0;
 371}
 372
 373static int process_counter_maps(struct perf_stat_config *config,
 374                                struct evsel *counter)
 375{
 376        int nthreads = perf_thread_map__nr(counter->core.threads);
 377        int ncpus = evsel__nr_cpus(counter);
 378        int cpu, thread;
 379
 380        if (counter->core.system_wide)
 381                nthreads = 1;
 382
 383        for (thread = 0; thread < nthreads; thread++) {
 384                for (cpu = 0; cpu < ncpus; cpu++) {
 385                        if (process_counter_values(config, counter, cpu, thread,
 386                                                   perf_counts(counter->counts, cpu, thread)))
 387                                return -1;
 388                }
 389        }
 390
 391        return 0;
 392}
 393
 394int perf_stat_process_counter(struct perf_stat_config *config,
 395                              struct evsel *counter)
 396{
 397        struct perf_counts_values *aggr = &counter->counts->aggr;
 398        struct perf_stat_evsel *ps = counter->stats;
 399        u64 *count = counter->counts->aggr.values;
 400        int i, ret;
 401
 402        aggr->val = aggr->ena = aggr->run = 0;
 403
 404        /*
 405         * We calculate counter's data every interval,
 406         * and the display code shows ps->res_stats
 407         * avg value. We need to zero the stats for
 408         * interval mode, otherwise overall avg running
 409         * averages will be shown for each interval.
 410         */
 411        if (config->interval || config->summary) {
 412                for (i = 0; i < 3; i++)
 413                        init_stats(&ps->res_stats[i]);
 414        }
 415
 416        if (counter->per_pkg)
 417                zero_per_pkg(counter);
 418
 419        ret = process_counter_maps(config, counter);
 420        if (ret)
 421                return ret;
 422
 423        if (config->aggr_mode != AGGR_GLOBAL)
 424                return 0;
 425
 426        if (!counter->snapshot)
 427                evsel__compute_deltas(counter, -1, -1, aggr);
 428        perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
 429
 430        for (i = 0; i < 3; i++)
 431                update_stats(&ps->res_stats[i], count[i]);
 432
 433        if (verbose > 0) {
 434                fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 435                        evsel__name(counter), count[0], count[1], count[2]);
 436        }
 437
 438        /*
 439         * Save the full runtime - to allow normalization during printout:
 440         */
 441        perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat);
 442
 443        return 0;
 444}
 445
 446int perf_event__process_stat_event(struct perf_session *session,
 447                                   union perf_event *event)
 448{
 449        struct perf_counts_values count;
 450        struct perf_record_stat *st = &event->stat;
 451        struct evsel *counter;
 452
 453        count.val = st->val;
 454        count.ena = st->ena;
 455        count.run = st->run;
 456
 457        counter = perf_evlist__id2evsel(session->evlist, st->id);
 458        if (!counter) {
 459                pr_err("Failed to resolve counter for stat event.\n");
 460                return -EINVAL;
 461        }
 462
 463        *perf_counts(counter->counts, st->cpu, st->thread) = count;
 464        counter->supported = true;
 465        return 0;
 466}
 467
 468size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
 469{
 470        struct perf_record_stat *st = (struct perf_record_stat *)event;
 471        size_t ret;
 472
 473        ret  = fprintf(fp, "\n... id %" PRI_lu64 ", cpu %d, thread %d\n",
 474                       st->id, st->cpu, st->thread);
 475        ret += fprintf(fp, "... value %" PRI_lu64 ", enabled %" PRI_lu64 ", running %" PRI_lu64 "\n",
 476                       st->val, st->ena, st->run);
 477
 478        return ret;
 479}
 480
 481size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
 482{
 483        struct perf_record_stat_round *rd = (struct perf_record_stat_round *)event;
 484        size_t ret;
 485
 486        ret = fprintf(fp, "\n... time %" PRI_lu64 ", type %s\n", rd->time,
 487                      rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
 488
 489        return ret;
 490}
 491
 492size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
 493{
 494        struct perf_stat_config sc;
 495        size_t ret;
 496
 497        perf_event__read_stat_config(&sc, &event->stat_config);
 498
 499        ret  = fprintf(fp, "\n");
 500        ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
 501        ret += fprintf(fp, "... scale     %d\n", sc.scale);
 502        ret += fprintf(fp, "... interval  %u\n", sc.interval);
 503
 504        return ret;
 505}
 506
 507int create_perf_stat_counter(struct evsel *evsel,
 508                             struct perf_stat_config *config,
 509                             struct target *target,
 510                             int cpu)
 511{
 512        struct perf_event_attr *attr = &evsel->core.attr;
 513        struct evsel *leader = evsel->leader;
 514
 515        attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
 516                            PERF_FORMAT_TOTAL_TIME_RUNNING;
 517
 518        /*
 519         * The event is part of non trivial group, let's enable
 520         * the group read (for leader) and ID retrieval for all
 521         * members.
 522         */
 523        if (leader->core.nr_members > 1)
 524                attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
 525
 526        attr->inherit = !config->no_inherit;
 527
 528        /*
 529         * Some events get initialized with sample_(period/type) set,
 530         * like tracepoints. Clear it up for counting.
 531         */
 532        attr->sample_period = 0;
 533
 534        if (config->identifier)
 535                attr->sample_type = PERF_SAMPLE_IDENTIFIER;
 536
 537        if (config->all_user) {
 538                attr->exclude_kernel = 1;
 539                attr->exclude_user   = 0;
 540        }
 541
 542        if (config->all_kernel) {
 543                attr->exclude_kernel = 0;
 544                attr->exclude_user   = 1;
 545        }
 546
 547        /*
 548         * Disabling all counters initially, they will be enabled
 549         * either manually by us or by kernel via enable_on_exec
 550         * set later.
 551         */
 552        if (evsel__is_group_leader(evsel)) {
 553                attr->disabled = 1;
 554
 555                /*
 556                 * In case of initial_delay we enable tracee
 557                 * events manually.
 558                 */
 559                if (target__none(target) && !config->initial_delay)
 560                        attr->enable_on_exec = 1;
 561        }
 562
 563        if (target__has_cpu(target) && !target__has_per_thread(target))
 564                return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
 565
 566        return evsel__open_per_thread(evsel, evsel->core.threads);
 567}
 568