linux/tools/perf/bench/futex-hash.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2013  Davidlohr Bueso <davidlohr@hp.com>
   4 *
   5 * futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing.
   6 *
   7 * This program is particularly useful for measuring the kernel's futex hash
   8 * table/function implementation. In order for it to make sense, use with as
   9 * many threads and futexes as possible.
  10 */
  11
  12/* For the CLR_() macros */
  13#include <string.h>
  14#include <pthread.h>
  15
  16#include <errno.h>
  17#include <signal.h>
  18#include <stdlib.h>
  19#include <linux/compiler.h>
  20#include <linux/kernel.h>
  21#include <linux/zalloc.h>
  22#include <sys/time.h>
  23#include <perf/cpumap.h>
  24
  25#include "../util/stat.h"
  26#include <subcmd/parse-options.h>
  27#include "bench.h"
  28#include "futex.h"
  29
  30#include <err.h>
  31
  32static unsigned int nthreads = 0;
  33static unsigned int nsecs    = 10;
  34/* amount of futexes per thread */
  35static unsigned int nfutexes = 1024;
  36static bool fshared = false, done = false, silent = false;
  37static int futex_flag = 0;
  38
  39struct timeval bench__start, bench__end, bench__runtime;
  40static pthread_mutex_t thread_lock;
  41static unsigned int threads_starting;
  42static struct stats throughput_stats;
  43static pthread_cond_t thread_parent, thread_worker;
  44
  45struct worker {
  46        int tid;
  47        u_int32_t *futex;
  48        pthread_t thread;
  49        unsigned long ops;
  50};
  51
  52static const struct option options[] = {
  53        OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
  54        OPT_UINTEGER('r', "runtime", &nsecs,    "Specify runtime (in seconds)"),
  55        OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"),
  56        OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
  57        OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
  58        OPT_END()
  59};
  60
  61static const char * const bench_futex_hash_usage[] = {
  62        "perf bench futex hash <options>",
  63        NULL
  64};
  65
  66static void *workerfn(void *arg)
  67{
  68        int ret;
  69        struct worker *w = (struct worker *) arg;
  70        unsigned int i;
  71        unsigned long ops = w->ops; /* avoid cacheline bouncing */
  72
  73        pthread_mutex_lock(&thread_lock);
  74        threads_starting--;
  75        if (!threads_starting)
  76                pthread_cond_signal(&thread_parent);
  77        pthread_cond_wait(&thread_worker, &thread_lock);
  78        pthread_mutex_unlock(&thread_lock);
  79
  80        do {
  81                for (i = 0; i < nfutexes; i++, ops++) {
  82                        /*
  83                         * We want the futex calls to fail in order to stress
  84                         * the hashing of uaddr and not measure other steps,
  85                         * such as internal waitqueue handling, thus enlarging
  86                         * the critical region protected by hb->lock.
  87                         */
  88                        ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag);
  89                        if (!silent &&
  90                            (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
  91                                warn("Non-expected futex return call");
  92                }
  93        }  while (!done);
  94
  95        w->ops = ops;
  96        return NULL;
  97}
  98
  99static void toggle_done(int sig __maybe_unused,
 100                        siginfo_t *info __maybe_unused,
 101                        void *uc __maybe_unused)
 102{
 103        /* inform all threads that we're done for the day */
 104        done = true;
 105        gettimeofday(&bench__end, NULL);
 106        timersub(&bench__end, &bench__start, &bench__runtime);
 107}
 108
 109static void print_summary(void)
 110{
 111        unsigned long avg = avg_stats(&throughput_stats);
 112        double stddev = stddev_stats(&throughput_stats);
 113
 114        printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
 115               !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
 116               (int)bench__runtime.tv_sec);
 117}
 118
 119int bench_futex_hash(int argc, const char **argv)
 120{
 121        int ret = 0;
 122        cpu_set_t cpuset;
 123        struct sigaction act;
 124        unsigned int i;
 125        pthread_attr_t thread_attr;
 126        struct worker *worker = NULL;
 127        struct perf_cpu_map *cpu;
 128
 129        argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
 130        if (argc) {
 131                usage_with_options(bench_futex_hash_usage, options);
 132                exit(EXIT_FAILURE);
 133        }
 134
 135        cpu = perf_cpu_map__new(NULL);
 136        if (!cpu)
 137                goto errmem;
 138
 139        memset(&act, 0, sizeof(act));
 140        sigfillset(&act.sa_mask);
 141        act.sa_sigaction = toggle_done;
 142        sigaction(SIGINT, &act, NULL);
 143
 144        if (!nthreads) /* default to the number of CPUs */
 145                nthreads = cpu->nr;
 146
 147        worker = calloc(nthreads, sizeof(*worker));
 148        if (!worker)
 149                goto errmem;
 150
 151        if (!fshared)
 152                futex_flag = FUTEX_PRIVATE_FLAG;
 153
 154        printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
 155               getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
 156
 157        init_stats(&throughput_stats);
 158        pthread_mutex_init(&thread_lock, NULL);
 159        pthread_cond_init(&thread_parent, NULL);
 160        pthread_cond_init(&thread_worker, NULL);
 161
 162        threads_starting = nthreads;
 163        pthread_attr_init(&thread_attr);
 164        gettimeofday(&bench__start, NULL);
 165        for (i = 0; i < nthreads; i++) {
 166                worker[i].tid = i;
 167                worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
 168                if (!worker[i].futex)
 169                        goto errmem;
 170
 171                CPU_ZERO(&cpuset);
 172                CPU_SET(cpu->map[i % cpu->nr], &cpuset);
 173
 174                ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
 175                if (ret)
 176                        err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
 177
 178                ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
 179                                     (void *)(struct worker *) &worker[i]);
 180                if (ret)
 181                        err(EXIT_FAILURE, "pthread_create");
 182
 183        }
 184        pthread_attr_destroy(&thread_attr);
 185
 186        pthread_mutex_lock(&thread_lock);
 187        while (threads_starting)
 188                pthread_cond_wait(&thread_parent, &thread_lock);
 189        pthread_cond_broadcast(&thread_worker);
 190        pthread_mutex_unlock(&thread_lock);
 191
 192        sleep(nsecs);
 193        toggle_done(0, NULL, NULL);
 194
 195        for (i = 0; i < nthreads; i++) {
 196                ret = pthread_join(worker[i].thread, NULL);
 197                if (ret)
 198                        err(EXIT_FAILURE, "pthread_join");
 199        }
 200
 201        /* cleanup & report results */
 202        pthread_cond_destroy(&thread_parent);
 203        pthread_cond_destroy(&thread_worker);
 204        pthread_mutex_destroy(&thread_lock);
 205
 206        for (i = 0; i < nthreads; i++) {
 207                unsigned long t = bench__runtime.tv_sec > 0 ?
 208                        worker[i].ops / bench__runtime.tv_sec : 0;
 209                update_stats(&throughput_stats, t);
 210                if (!silent) {
 211                        if (nfutexes == 1)
 212                                printf("[thread %2d] futex: %p [ %ld ops/sec ]\n",
 213                                       worker[i].tid, &worker[i].futex[0], t);
 214                        else
 215                                printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n",
 216                                       worker[i].tid, &worker[i].futex[0],
 217                                       &worker[i].futex[nfutexes-1], t);
 218                }
 219
 220                zfree(&worker[i].futex);
 221        }
 222
 223        print_summary();
 224
 225        free(worker);
 226        free(cpu);
 227        return ret;
 228errmem:
 229        err(EXIT_FAILURE, "calloc");
 230}
 231