linux/tools/perf/bench/synthesize.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Benchmark synthesis of perf events such as at the start of a 'perf
   4 * record'. Synthesis is done on the current process and the 'dummy' event
   5 * handlers are invoked that support dump_trace but otherwise do nothing.
   6 *
   7 * Copyright 2019 Google LLC.
   8 */
   9#include <stdio.h>
  10#include "bench.h"
  11#include "../util/debug.h"
  12#include "../util/session.h"
  13#include "../util/stat.h"
  14#include "../util/synthetic-events.h"
  15#include "../util/target.h"
  16#include "../util/thread_map.h"
  17#include "../util/tool.h"
  18#include "../util/util.h"
  19#include <linux/atomic.h>
  20#include <linux/err.h>
  21#include <linux/time64.h>
  22#include <subcmd/parse-options.h>
  23
  24static unsigned int min_threads = 1;
  25static unsigned int max_threads = UINT_MAX;
  26static unsigned int single_iterations = 10000;
  27static unsigned int multi_iterations = 10;
  28static bool run_st;
  29static bool run_mt;
  30
  31static const struct option options[] = {
  32        OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
  33        OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
  34        OPT_UINTEGER('m', "min-threads", &min_threads,
  35                "Minimum number of threads in multithreaded bench"),
  36        OPT_UINTEGER('M', "max-threads", &max_threads,
  37                "Maximum number of threads in multithreaded bench"),
  38        OPT_UINTEGER('i', "single-iterations", &single_iterations,
  39                "Number of iterations used to compute single-threaded average"),
  40        OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
  41                "Number of iterations used to compute multi-threaded average"),
  42        OPT_END()
  43};
  44
  45static const char *const bench_usage[] = {
  46        "perf bench internals synthesize <options>",
  47        NULL
  48};
  49
  50static atomic_t event_count;
  51
  52static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
  53                                     union perf_event *event __maybe_unused,
  54                                     struct perf_sample *sample __maybe_unused,
  55                                     struct machine *machine __maybe_unused)
  56{
  57        atomic_inc(&event_count);
  58        return 0;
  59}
  60
  61static int do_run_single_threaded(struct perf_session *session,
  62                                struct perf_thread_map *threads,
  63                                struct target *target, bool data_mmap)
  64{
  65        const unsigned int nr_threads_synthesize = 1;
  66        struct timeval start, end, diff;
  67        u64 runtime_us;
  68        unsigned int i;
  69        double time_average, time_stddev, event_average, event_stddev;
  70        int err;
  71        struct stats time_stats, event_stats;
  72
  73        init_stats(&time_stats);
  74        init_stats(&event_stats);
  75
  76        for (i = 0; i < single_iterations; i++) {
  77                atomic_set(&event_count, 0);
  78                gettimeofday(&start, NULL);
  79                err = __machine__synthesize_threads(&session->machines.host,
  80                                                NULL,
  81                                                target, threads,
  82                                                process_synthesized_event,
  83                                                data_mmap,
  84                                                nr_threads_synthesize);
  85                if (err)
  86                        return err;
  87
  88                gettimeofday(&end, NULL);
  89                timersub(&end, &start, &diff);
  90                runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
  91                update_stats(&time_stats, runtime_us);
  92                update_stats(&event_stats, atomic_read(&event_count));
  93        }
  94
  95        time_average = avg_stats(&time_stats);
  96        time_stddev = stddev_stats(&time_stats);
  97        printf("  Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
  98                data_mmap ? "data " : "", time_average, time_stddev);
  99
 100        event_average = avg_stats(&event_stats);
 101        event_stddev = stddev_stats(&event_stats);
 102        printf("  Average num. events: %.3f (+- %.3f)\n",
 103                event_average, event_stddev);
 104
 105        printf("  Average time per event %.3f usec\n",
 106                time_average / event_average);
 107        return 0;
 108}
 109
 110static int run_single_threaded(void)
 111{
 112        struct perf_session *session;
 113        struct target target = {
 114                .pid = "self",
 115        };
 116        struct perf_thread_map *threads;
 117        int err;
 118
 119        perf_set_singlethreaded();
 120        session = perf_session__new(NULL, NULL);
 121        if (IS_ERR(session)) {
 122                pr_err("Session creation failed.\n");
 123                return PTR_ERR(session);
 124        }
 125        threads = thread_map__new_by_pid(getpid());
 126        if (!threads) {
 127                pr_err("Thread map creation failed.\n");
 128                err = -ENOMEM;
 129                goto err_out;
 130        }
 131
 132        puts(
 133"Computing performance of single threaded perf event synthesis by\n"
 134"synthesizing events on the perf process itself:");
 135
 136        err = do_run_single_threaded(session, threads, &target, false);
 137        if (err)
 138                goto err_out;
 139
 140        err = do_run_single_threaded(session, threads, &target, true);
 141
 142err_out:
 143        if (threads)
 144                perf_thread_map__put(threads);
 145
 146        perf_session__delete(session);
 147        return err;
 148}
 149
 150static int do_run_multi_threaded(struct target *target,
 151                                unsigned int nr_threads_synthesize)
 152{
 153        struct timeval start, end, diff;
 154        u64 runtime_us;
 155        unsigned int i;
 156        double time_average, time_stddev, event_average, event_stddev;
 157        int err;
 158        struct stats time_stats, event_stats;
 159        struct perf_session *session;
 160
 161        init_stats(&time_stats);
 162        init_stats(&event_stats);
 163        for (i = 0; i < multi_iterations; i++) {
 164                session = perf_session__new(NULL, NULL);
 165                if (IS_ERR(session))
 166                        return PTR_ERR(session);
 167
 168                atomic_set(&event_count, 0);
 169                gettimeofday(&start, NULL);
 170                err = __machine__synthesize_threads(&session->machines.host,
 171                                                NULL,
 172                                                target, NULL,
 173                                                process_synthesized_event,
 174                                                false,
 175                                                nr_threads_synthesize);
 176                if (err) {
 177                        perf_session__delete(session);
 178                        return err;
 179                }
 180
 181                gettimeofday(&end, NULL);
 182                timersub(&end, &start, &diff);
 183                runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
 184                update_stats(&time_stats, runtime_us);
 185                update_stats(&event_stats, atomic_read(&event_count));
 186                perf_session__delete(session);
 187        }
 188
 189        time_average = avg_stats(&time_stats);
 190        time_stddev = stddev_stats(&time_stats);
 191        printf("    Average synthesis took: %.3f usec (+- %.3f usec)\n",
 192                time_average, time_stddev);
 193
 194        event_average = avg_stats(&event_stats);
 195        event_stddev = stddev_stats(&event_stats);
 196        printf("    Average num. events: %.3f (+- %.3f)\n",
 197                event_average, event_stddev);
 198
 199        printf("    Average time per event %.3f usec\n",
 200                time_average / event_average);
 201        return 0;
 202}
 203
 204static int run_multi_threaded(void)
 205{
 206        struct target target = {
 207                .cpu_list = "0"
 208        };
 209        unsigned int nr_threads_synthesize;
 210        int err;
 211
 212        if (max_threads == UINT_MAX)
 213                max_threads = sysconf(_SC_NPROCESSORS_ONLN);
 214
 215        puts(
 216"Computing performance of multi threaded perf event synthesis by\n"
 217"synthesizing events on CPU 0:");
 218
 219        for (nr_threads_synthesize = min_threads;
 220             nr_threads_synthesize <= max_threads;
 221             nr_threads_synthesize++) {
 222                if (nr_threads_synthesize == 1)
 223                        perf_set_singlethreaded();
 224                else
 225                        perf_set_multithreaded();
 226
 227                printf("  Number of synthesis threads: %u\n",
 228                        nr_threads_synthesize);
 229
 230                err = do_run_multi_threaded(&target, nr_threads_synthesize);
 231                if (err)
 232                        return err;
 233        }
 234        perf_set_singlethreaded();
 235        return 0;
 236}
 237
 238int bench_synthesize(int argc, const char **argv)
 239{
 240        int err = 0;
 241
 242        argc = parse_options(argc, argv, options, bench_usage, 0);
 243        if (argc) {
 244                usage_with_options(bench_usage, options);
 245                exit(EXIT_FAILURE);
 246        }
 247
 248        /*
 249         * If neither single threaded or multi-threaded are specified, default
 250         * to running just single threaded.
 251         */
 252        if (!run_st && !run_mt)
 253                run_st = true;
 254
 255        if (run_st)
 256                err = run_single_threaded();
 257
 258        if (!err && run_mt)
 259                err = run_multi_threaded();
 260
 261        return err;
 262}
 263