linux/kernel/trace/trace_benchmark.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/delay.h>
   3#include <linux/module.h>
   4#include <linux/kthread.h>
   5#include <linux/trace_clock.h>
   6
   7#define CREATE_TRACE_POINTS
   8#include "trace_benchmark.h"
   9
  10static struct task_struct *bm_event_thread;
  11
  12static char bm_str[BENCHMARK_EVENT_STRLEN] = "START";
  13
  14static u64 bm_total;
  15static u64 bm_totalsq;
  16static u64 bm_last;
  17static u64 bm_max;
  18static u64 bm_min;
  19static u64 bm_first;
  20static u64 bm_cnt;
  21static u64 bm_stddev;
  22static unsigned int bm_avg;
  23static unsigned int bm_std;
  24
  25static bool ok_to_run;
  26
  27/*
  28 * This gets called in a loop recording the time it took to write
  29 * the tracepoint. What it writes is the time statistics of the last
  30 * tracepoint write. As there is nothing to write the first time
  31 * it simply writes "START". As the first write is cold cache and
  32 * the rest is hot, we save off that time in bm_first and it is
  33 * reported as "first", which is shown in the second write to the
  34 * tracepoint. The "first" field is writen within the statics from
  35 * then on but never changes.
  36 */
  37static void trace_do_benchmark(void)
  38{
  39        u64 start;
  40        u64 stop;
  41        u64 delta;
  42        u64 stddev;
  43        u64 seed;
  44        u64 last_seed;
  45        unsigned int avg;
  46        unsigned int std = 0;
  47
  48        /* Only run if the tracepoint is actually active */
  49        if (!trace_benchmark_event_enabled() || !tracing_is_on())
  50                return;
  51
  52        local_irq_disable();
  53        start = trace_clock_local();
  54        trace_benchmark_event(bm_str);
  55        stop = trace_clock_local();
  56        local_irq_enable();
  57
  58        bm_cnt++;
  59
  60        delta = stop - start;
  61
  62        /*
  63         * The first read is cold cached, keep it separate from the
  64         * other calculations.
  65         */
  66        if (bm_cnt == 1) {
  67                bm_first = delta;
  68                scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
  69                          "first=%llu [COLD CACHED]", bm_first);
  70                return;
  71        }
  72
  73        bm_last = delta;
  74
  75        if (delta > bm_max)
  76                bm_max = delta;
  77        if (!bm_min || delta < bm_min)
  78                bm_min = delta;
  79
  80        /*
  81         * When bm_cnt is greater than UINT_MAX, it breaks the statistics
  82         * accounting. Freeze the statistics when that happens.
  83         * We should have enough data for the avg and stddev anyway.
  84         */
  85        if (bm_cnt > UINT_MAX) {
  86                scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
  87                    "last=%llu first=%llu max=%llu min=%llu ** avg=%u std=%d std^2=%lld",
  88                          bm_last, bm_first, bm_max, bm_min, bm_avg, bm_std, bm_stddev);
  89                return;
  90        }
  91
  92        bm_total += delta;
  93        bm_totalsq += delta * delta;
  94
  95
  96        if (bm_cnt > 1) {
  97                /*
  98                 * Apply Welford's method to calculate standard deviation:
  99                 * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
 100                 */
 101                stddev = (u64)bm_cnt * bm_totalsq - bm_total * bm_total;
 102                do_div(stddev, (u32)bm_cnt);
 103                do_div(stddev, (u32)bm_cnt - 1);
 104        } else
 105                stddev = 0;
 106
 107        delta = bm_total;
 108        do_div(delta, bm_cnt);
 109        avg = delta;
 110
 111        if (stddev > 0) {
 112                int i = 0;
 113                /*
 114                 * stddev is the square of standard deviation but
 115                 * we want the actualy number. Use the average
 116                 * as our seed to find the std.
 117                 *
 118                 * The next try is:
 119                 *  x = (x + N/x) / 2
 120                 *
 121                 * Where N is the squared number to find the square
 122                 * root of.
 123                 */
 124                seed = avg;
 125                do {
 126                        last_seed = seed;
 127                        seed = stddev;
 128                        if (!last_seed)
 129                                break;
 130                        do_div(seed, last_seed);
 131                        seed += last_seed;
 132                        do_div(seed, 2);
 133                } while (i++ < 10 && last_seed != seed);
 134
 135                std = seed;
 136        }
 137
 138        scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
 139                  "last=%llu first=%llu max=%llu min=%llu avg=%u std=%d std^2=%lld",
 140                  bm_last, bm_first, bm_max, bm_min, avg, std, stddev);
 141
 142        bm_std = std;
 143        bm_avg = avg;
 144        bm_stddev = stddev;
 145}
 146
 147static int benchmark_event_kthread(void *arg)
 148{
 149        /* sleep a bit to make sure the tracepoint gets activated */
 150        msleep(100);
 151
 152        while (!kthread_should_stop()) {
 153
 154                trace_do_benchmark();
 155
 156                /*
 157                 * We don't go to sleep, but let others run as well.
 158                 * This is bascially a "yield()" to let any task that
 159                 * wants to run, schedule in, but if the CPU is idle,
 160                 * we'll keep burning cycles.
 161                 *
 162                 * Note the _rcu_qs() version of cond_resched() will
 163                 * notify synchronize_rcu_tasks() that this thread has
 164                 * passed a quiescent state for rcu_tasks. Otherwise
 165                 * this thread will never voluntarily schedule which would
 166                 * block synchronize_rcu_tasks() indefinitely.
 167                 */
 168                cond_resched();
 169        }
 170
 171        return 0;
 172}
 173
 174/*
 175 * When the benchmark tracepoint is enabled, it calls this
 176 * function and the thread that calls the tracepoint is created.
 177 */
 178int trace_benchmark_reg(void)
 179{
 180        if (!ok_to_run) {
 181                pr_warning("trace benchmark cannot be started via kernel command line\n");
 182                return -EBUSY;
 183        }
 184
 185        bm_event_thread = kthread_run(benchmark_event_kthread,
 186                                      NULL, "event_benchmark");
 187        if (IS_ERR(bm_event_thread)) {
 188                pr_warning("trace benchmark failed to create kernel thread\n");
 189                return PTR_ERR(bm_event_thread);
 190        }
 191
 192        return 0;
 193}
 194
 195/*
 196 * When the benchmark tracepoint is disabled, it calls this
 197 * function and the thread that calls the tracepoint is deleted
 198 * and all the numbers are reset.
 199 */
 200void trace_benchmark_unreg(void)
 201{
 202        if (!bm_event_thread)
 203                return;
 204
 205        kthread_stop(bm_event_thread);
 206        bm_event_thread = NULL;
 207
 208        strcpy(bm_str, "START");
 209        bm_total = 0;
 210        bm_totalsq = 0;
 211        bm_last = 0;
 212        bm_max = 0;
 213        bm_min = 0;
 214        bm_cnt = 0;
 215        /* These don't need to be reset but reset them anyway */
 216        bm_first = 0;
 217        bm_std = 0;
 218        bm_avg = 0;
 219        bm_stddev = 0;
 220}
 221
 222static __init int ok_to_run_trace_benchmark(void)
 223{
 224        ok_to_run = true;
 225
 226        return 0;
 227}
 228
 229early_initcall(ok_to_run_trace_benchmark);
 230