linux/tools/perf/util/stat.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <errno.h>
   3#include <inttypes.h>
   4#include <math.h>
   5#include <string.h>
   6#include "counts.h"
   7#include "cpumap.h"
   8#include "debug.h"
   9#include "header.h"
  10#include "stat.h"
  11#include "session.h"
  12#include "target.h"
  13#include "evlist.h"
  14#include "evsel.h"
  15#include "thread_map.h"
  16#include <linux/zalloc.h>
  17
  18void update_stats(struct stats *stats, u64 val)
  19{
  20        double delta;
  21
  22        stats->n++;
  23        delta = val - stats->mean;
  24        stats->mean += delta / stats->n;
  25        stats->M2 += delta*(val - stats->mean);
  26
  27        if (val > stats->max)
  28                stats->max = val;
  29
  30        if (val < stats->min)
  31                stats->min = val;
  32}
  33
  34double avg_stats(struct stats *stats)
  35{
  36        return stats->mean;
  37}
  38
  39/*
  40 * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
  41 *
  42 *       (\Sum n_i^2) - ((\Sum n_i)^2)/n
  43 * s^2 = -------------------------------
  44 *                  n - 1
  45 *
  46 * http://en.wikipedia.org/wiki/Stddev
  47 *
  48 * The std dev of the mean is related to the std dev by:
  49 *
  50 *             s
  51 * s_mean = -------
  52 *          sqrt(n)
  53 *
  54 */
  55double stddev_stats(struct stats *stats)
  56{
  57        double variance, variance_mean;
  58
  59        if (stats->n < 2)
  60                return 0.0;
  61
  62        variance = stats->M2 / (stats->n - 1);
  63        variance_mean = variance / stats->n;
  64
  65        return sqrt(variance_mean);
  66}
  67
  68double rel_stddev_stats(double stddev, double avg)
  69{
  70        double pct = 0.0;
  71
  72        if (avg)
  73                pct = 100.0 * stddev/avg;
  74
  75        return pct;
  76}
  77
  78bool __perf_evsel_stat__is(struct evsel *evsel,
  79                           enum perf_stat_evsel_id id)
  80{
  81        struct perf_stat_evsel *ps = evsel->stats;
  82
  83        return ps->id == id;
  84}
  85
  86#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
  87static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
  88        ID(NONE,                x),
  89        ID(CYCLES_IN_TX,        cpu/cycles-t/),
  90        ID(TRANSACTION_START,   cpu/tx-start/),
  91        ID(ELISION_START,       cpu/el-start/),
  92        ID(CYCLES_IN_TX_CP,     cpu/cycles-ct/),
  93        ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
  94        ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
  95        ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
  96        ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
  97        ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
  98        ID(SMI_NUM, msr/smi/),
  99        ID(APERF, msr/aperf/),
 100};
 101#undef ID
 102
 103static void perf_stat_evsel_id_init(struct evsel *evsel)
 104{
 105        struct perf_stat_evsel *ps = evsel->stats;
 106        int i;
 107
 108        /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
 109
 110        for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
 111                if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
 112                        ps->id = i;
 113                        break;
 114                }
 115        }
 116}
 117
 118static void perf_evsel__reset_stat_priv(struct evsel *evsel)
 119{
 120        int i;
 121        struct perf_stat_evsel *ps = evsel->stats;
 122
 123        for (i = 0; i < 3; i++)
 124                init_stats(&ps->res_stats[i]);
 125
 126        perf_stat_evsel_id_init(evsel);
 127}
 128
 129static int perf_evsel__alloc_stat_priv(struct evsel *evsel)
 130{
 131        evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
 132        if (evsel->stats == NULL)
 133                return -ENOMEM;
 134        perf_evsel__reset_stat_priv(evsel);
 135        return 0;
 136}
 137
 138static void perf_evsel__free_stat_priv(struct evsel *evsel)
 139{
 140        struct perf_stat_evsel *ps = evsel->stats;
 141
 142        if (ps)
 143                zfree(&ps->group_data);
 144        zfree(&evsel->stats);
 145}
 146
 147static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel,
 148                                             int ncpus, int nthreads)
 149{
 150        struct perf_counts *counts;
 151
 152        counts = perf_counts__new(ncpus, nthreads);
 153        if (counts)
 154                evsel->prev_raw_counts = counts;
 155
 156        return counts ? 0 : -ENOMEM;
 157}
 158
 159static void perf_evsel__free_prev_raw_counts(struct evsel *evsel)
 160{
 161        perf_counts__delete(evsel->prev_raw_counts);
 162        evsel->prev_raw_counts = NULL;
 163}
 164
 165static void perf_evsel__reset_prev_raw_counts(struct evsel *evsel)
 166{
 167        if (evsel->prev_raw_counts) {
 168                evsel->prev_raw_counts->aggr.val = 0;
 169                evsel->prev_raw_counts->aggr.ena = 0;
 170                evsel->prev_raw_counts->aggr.run = 0;
 171       }
 172}
 173
 174static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
 175{
 176        int ncpus = perf_evsel__nr_cpus(evsel);
 177        int nthreads = perf_thread_map__nr(evsel->core.threads);
 178
 179        if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
 180            perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
 181            (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
 182                return -ENOMEM;
 183
 184        return 0;
 185}
 186
 187int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
 188{
 189        struct evsel *evsel;
 190
 191        evlist__for_each_entry(evlist, evsel) {
 192                if (perf_evsel__alloc_stats(evsel, alloc_raw))
 193                        goto out_free;
 194        }
 195
 196        return 0;
 197
 198out_free:
 199        perf_evlist__free_stats(evlist);
 200        return -1;
 201}
 202
 203void perf_evlist__free_stats(struct evlist *evlist)
 204{
 205        struct evsel *evsel;
 206
 207        evlist__for_each_entry(evlist, evsel) {
 208                perf_evsel__free_stat_priv(evsel);
 209                perf_evsel__free_counts(evsel);
 210                perf_evsel__free_prev_raw_counts(evsel);
 211        }
 212}
 213
 214void perf_evlist__reset_stats(struct evlist *evlist)
 215{
 216        struct evsel *evsel;
 217
 218        evlist__for_each_entry(evlist, evsel) {
 219                perf_evsel__reset_stat_priv(evsel);
 220                perf_evsel__reset_counts(evsel);
 221        }
 222}
 223
 224void perf_evlist__reset_prev_raw_counts(struct evlist *evlist)
 225{
 226        struct evsel *evsel;
 227
 228        evlist__for_each_entry(evlist, evsel)
 229                perf_evsel__reset_prev_raw_counts(evsel);
 230}
 231
 232static void zero_per_pkg(struct evsel *counter)
 233{
 234        if (counter->per_pkg_mask)
 235                memset(counter->per_pkg_mask, 0, cpu__max_cpu());
 236}
 237
 238static int check_per_pkg(struct evsel *counter,
 239                         struct perf_counts_values *vals, int cpu, bool *skip)
 240{
 241        unsigned long *mask = counter->per_pkg_mask;
 242        struct perf_cpu_map *cpus = evsel__cpus(counter);
 243        int s;
 244
 245        *skip = false;
 246
 247        if (!counter->per_pkg)
 248                return 0;
 249
 250        if (perf_cpu_map__empty(cpus))
 251                return 0;
 252
 253        if (!mask) {
 254                mask = zalloc(cpu__max_cpu());
 255                if (!mask)
 256                        return -ENOMEM;
 257
 258                counter->per_pkg_mask = mask;
 259        }
 260
 261        /*
 262         * we do not consider an event that has not run as a good
 263         * instance to mark a package as used (skip=1). Otherwise
 264         * we may run into a situation where the first CPU in a package
 265         * is not running anything, yet the second is, and this function
 266         * would mark the package as used after the first CPU and would
 267         * not read the values from the second CPU.
 268         */
 269        if (!(vals->run && vals->ena))
 270                return 0;
 271
 272        s = cpu_map__get_socket(cpus, cpu, NULL);
 273        if (s < 0)
 274                return -1;
 275
 276        *skip = test_and_set_bit(s, mask) == 1;
 277        return 0;
 278}
 279
 280static int
 281process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 282                       int cpu, int thread,
 283                       struct perf_counts_values *count)
 284{
 285        struct perf_counts_values *aggr = &evsel->counts->aggr;
 286        static struct perf_counts_values zero;
 287        bool skip = false;
 288
 289        if (check_per_pkg(evsel, count, cpu, &skip)) {
 290                pr_err("failed to read per-pkg counter\n");
 291                return -1;
 292        }
 293
 294        if (skip)
 295                count = &zero;
 296
 297        switch (config->aggr_mode) {
 298        case AGGR_THREAD:
 299        case AGGR_CORE:
 300        case AGGR_DIE:
 301        case AGGR_SOCKET:
 302        case AGGR_NODE:
 303        case AGGR_NONE:
 304                if (!evsel->snapshot)
 305                        perf_evsel__compute_deltas(evsel, cpu, thread, count);
 306                perf_counts_values__scale(count, config->scale, NULL);
 307                if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
 308                        perf_stat__update_shadow_stats(evsel, count->val,
 309                                                       cpu, &rt_stat);
 310                }
 311
 312                if (config->aggr_mode == AGGR_THREAD) {
 313                        if (config->stats)
 314                                perf_stat__update_shadow_stats(evsel,
 315                                        count->val, 0, &config->stats[thread]);
 316                        else
 317                                perf_stat__update_shadow_stats(evsel,
 318                                        count->val, 0, &rt_stat);
 319                }
 320                break;
 321        case AGGR_GLOBAL:
 322                aggr->val += count->val;
 323                aggr->ena += count->ena;
 324                aggr->run += count->run;
 325        case AGGR_UNSET:
 326        default:
 327                break;
 328        }
 329
 330        return 0;
 331}
 332
 333static int process_counter_maps(struct perf_stat_config *config,
 334                                struct evsel *counter)
 335{
 336        int nthreads = perf_thread_map__nr(counter->core.threads);
 337        int ncpus = perf_evsel__nr_cpus(counter);
 338        int cpu, thread;
 339
 340        if (counter->core.system_wide)
 341                nthreads = 1;
 342
 343        for (thread = 0; thread < nthreads; thread++) {
 344                for (cpu = 0; cpu < ncpus; cpu++) {
 345                        if (process_counter_values(config, counter, cpu, thread,
 346                                                   perf_counts(counter->counts, cpu, thread)))
 347                                return -1;
 348                }
 349        }
 350
 351        return 0;
 352}
 353
 354int perf_stat_process_counter(struct perf_stat_config *config,
 355                              struct evsel *counter)
 356{
 357        struct perf_counts_values *aggr = &counter->counts->aggr;
 358        struct perf_stat_evsel *ps = counter->stats;
 359        u64 *count = counter->counts->aggr.values;
 360        int i, ret;
 361
 362        aggr->val = aggr->ena = aggr->run = 0;
 363
 364        /*
 365         * We calculate counter's data every interval,
 366         * and the display code shows ps->res_stats
 367         * avg value. We need to zero the stats for
 368         * interval mode, otherwise overall avg running
 369         * averages will be shown for each interval.
 370         */
 371        if (config->interval)
 372                init_stats(ps->res_stats);
 373
 374        if (counter->per_pkg)
 375                zero_per_pkg(counter);
 376
 377        ret = process_counter_maps(config, counter);
 378        if (ret)
 379                return ret;
 380
 381        if (config->aggr_mode != AGGR_GLOBAL)
 382                return 0;
 383
 384        if (!counter->snapshot)
 385                perf_evsel__compute_deltas(counter, -1, -1, aggr);
 386        perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
 387
 388        for (i = 0; i < 3; i++)
 389                update_stats(&ps->res_stats[i], count[i]);
 390
 391        if (verbose > 0) {
 392                fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
 393                        perf_evsel__name(counter), count[0], count[1], count[2]);
 394        }
 395
 396        /*
 397         * Save the full runtime - to allow normalization during printout:
 398         */
 399        perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat);
 400
 401        return 0;
 402}
 403
 404int perf_event__process_stat_event(struct perf_session *session,
 405                                   union perf_event *event)
 406{
 407        struct perf_counts_values count;
 408        struct perf_record_stat *st = &event->stat;
 409        struct evsel *counter;
 410
 411        count.val = st->val;
 412        count.ena = st->ena;
 413        count.run = st->run;
 414
 415        counter = perf_evlist__id2evsel(session->evlist, st->id);
 416        if (!counter) {
 417                pr_err("Failed to resolve counter for stat event.\n");
 418                return -EINVAL;
 419        }
 420
 421        *perf_counts(counter->counts, st->cpu, st->thread) = count;
 422        counter->supported = true;
 423        return 0;
 424}
 425
 426size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
 427{
 428        struct perf_record_stat *st = (struct perf_record_stat *)event;
 429        size_t ret;
 430
 431        ret  = fprintf(fp, "\n... id %" PRI_lu64 ", cpu %d, thread %d\n",
 432                       st->id, st->cpu, st->thread);
 433        ret += fprintf(fp, "... value %" PRI_lu64 ", enabled %" PRI_lu64 ", running %" PRI_lu64 "\n",
 434                       st->val, st->ena, st->run);
 435
 436        return ret;
 437}
 438
 439size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
 440{
 441        struct perf_record_stat_round *rd = (struct perf_record_stat_round *)event;
 442        size_t ret;
 443
 444        ret = fprintf(fp, "\n... time %" PRI_lu64 ", type %s\n", rd->time,
 445                      rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
 446
 447        return ret;
 448}
 449
 450size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
 451{
 452        struct perf_stat_config sc;
 453        size_t ret;
 454
 455        perf_event__read_stat_config(&sc, &event->stat_config);
 456
 457        ret  = fprintf(fp, "\n");
 458        ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
 459        ret += fprintf(fp, "... scale     %d\n", sc.scale);
 460        ret += fprintf(fp, "... interval  %u\n", sc.interval);
 461
 462        return ret;
 463}
 464
 465int create_perf_stat_counter(struct evsel *evsel,
 466                             struct perf_stat_config *config,
 467                             struct target *target,
 468                             int cpu)
 469{
 470        struct perf_event_attr *attr = &evsel->core.attr;
 471        struct evsel *leader = evsel->leader;
 472
 473        attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
 474                            PERF_FORMAT_TOTAL_TIME_RUNNING;
 475
 476        /*
 477         * The event is part of non trivial group, let's enable
 478         * the group read (for leader) and ID retrieval for all
 479         * members.
 480         */
 481        if (leader->core.nr_members > 1)
 482                attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
 483
 484        attr->inherit = !config->no_inherit;
 485
 486        /*
 487         * Some events get initialized with sample_(period/type) set,
 488         * like tracepoints. Clear it up for counting.
 489         */
 490        attr->sample_period = 0;
 491
 492        if (config->identifier)
 493                attr->sample_type = PERF_SAMPLE_IDENTIFIER;
 494
 495        if (config->all_user) {
 496                attr->exclude_kernel = 1;
 497                attr->exclude_user   = 0;
 498        }
 499
 500        if (config->all_kernel) {
 501                attr->exclude_kernel = 0;
 502                attr->exclude_user   = 1;
 503        }
 504
 505        /*
 506         * Disabling all counters initially, they will be enabled
 507         * either manually by us or by kernel via enable_on_exec
 508         * set later.
 509         */
 510        if (perf_evsel__is_group_leader(evsel)) {
 511                attr->disabled = 1;
 512
 513                /*
 514                 * In case of initial_delay we enable tracee
 515                 * events manually.
 516                 */
 517                if (target__none(target) && !config->initial_delay)
 518                        attr->enable_on_exec = 1;
 519        }
 520
 521        if (target__has_cpu(target) && !target__has_per_thread(target))
 522                return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
 523
 524        return perf_evsel__open_per_thread(evsel, evsel->core.threads);
 525}
 526