linux/tools/perf/bench/mem-functions.c
<<
>>
Prefs
   1/*
   2 * mem-memcpy.c
   3 *
   4 * Simple memcpy() and memset() benchmarks
   5 *
   6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
   7 */
   8
   9#include "debug.h"
  10#include "../perf.h"
  11#include "../util/util.h"
  12#include <subcmd/parse-options.h>
  13#include "../util/header.h"
  14#include "../util/cloexec.h"
  15#include "../util/string2.h"
  16#include "bench.h"
  17#include "mem-memcpy-arch.h"
  18#include "mem-memset-arch.h"
  19
  20#include <stdio.h>
  21#include <stdlib.h>
  22#include <string.h>
  23#include <sys/time.h>
  24#include <errno.h>
  25#include <linux/time64.h>
  26
  27#define K 1024
  28
  29static const char       *size_str       = "1MB";
  30static const char       *function_str   = "all";
  31static int              nr_loops        = 1;
  32static bool             use_cycles;
  33static int              cycles_fd;
  34
  35static const struct option options[] = {
  36        OPT_STRING('s', "size", &size_str, "1MB",
  37                    "Specify the size of the memory buffers. "
  38                    "Available units: B, KB, MB, GB and TB (case insensitive)"),
  39
  40        OPT_STRING('f', "function", &function_str, "all",
  41                    "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
  42
  43        OPT_INTEGER('l', "nr_loops", &nr_loops,
  44                    "Specify the number of loops to run. (default: 1)"),
  45
  46        OPT_BOOLEAN('c', "cycles", &use_cycles,
  47                    "Use a cycles event instead of gettimeofday() to measure performance"),
  48
  49        OPT_END()
  50};
  51
  52typedef void *(*memcpy_t)(void *, const void *, size_t);
  53typedef void *(*memset_t)(void *, int, size_t);
  54
  55struct function {
  56        const char *name;
  57        const char *desc;
  58        union {
  59                memcpy_t memcpy;
  60                memset_t memset;
  61        } fn;
  62};
  63
  64static struct perf_event_attr cycle_attr = {
  65        .type           = PERF_TYPE_HARDWARE,
  66        .config         = PERF_COUNT_HW_CPU_CYCLES
  67};
  68
  69static int init_cycles(void)
  70{
  71        cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
  72
  73        if (cycles_fd < 0 && errno == ENOSYS) {
  74                pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
  75                return -1;
  76        }
  77
  78        return cycles_fd;
  79}
  80
  81static u64 get_cycles(void)
  82{
  83        int ret;
  84        u64 clk;
  85
  86        ret = read(cycles_fd, &clk, sizeof(u64));
  87        BUG_ON(ret != sizeof(u64));
  88
  89        return clk;
  90}
  91
  92static double timeval2double(struct timeval *ts)
  93{
  94        return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
  95}
  96
  97#define print_bps(x) do {                                               \
  98                if (x < K)                                              \
  99                        printf(" %14lf bytes/sec\n", x);                \
 100                else if (x < K * K)                                     \
 101                        printf(" %14lfd KB/sec\n", x / K);              \
 102                else if (x < K * K * K)                                 \
 103                        printf(" %14lf MB/sec\n", x / K / K);           \
 104                else                                                    \
 105                        printf(" %14lf GB/sec\n", x / K / K / K);       \
 106        } while (0)
 107
 108struct bench_mem_info {
 109        const struct function *functions;
 110        u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
 111        double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
 112        const char *const *usage;
 113        bool alloc_src;
 114};
 115
 116static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
 117{
 118        const struct function *r = &info->functions[r_idx];
 119        double result_bps = 0.0;
 120        u64 result_cycles = 0;
 121        void *src = NULL, *dst = zalloc(size);
 122
 123        printf("# function '%s' (%s)\n", r->name, r->desc);
 124
 125        if (dst == NULL)
 126                goto out_alloc_failed;
 127
 128        if (info->alloc_src) {
 129                src = zalloc(size);
 130                if (src == NULL)
 131                        goto out_alloc_failed;
 132        }
 133
 134        if (bench_format == BENCH_FORMAT_DEFAULT)
 135                printf("# Copying %s bytes ...\n\n", size_str);
 136
 137        if (use_cycles) {
 138                result_cycles = info->do_cycles(r, size, src, dst);
 139        } else {
 140                result_bps = info->do_gettimeofday(r, size, src, dst);
 141        }
 142
 143        switch (bench_format) {
 144        case BENCH_FORMAT_DEFAULT:
 145                if (use_cycles) {
 146                        printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
 147                } else {
 148                        print_bps(result_bps);
 149                }
 150                break;
 151
 152        case BENCH_FORMAT_SIMPLE:
 153                if (use_cycles) {
 154                        printf("%lf\n", (double)result_cycles/size_total);
 155                } else {
 156                        printf("%lf\n", result_bps);
 157                }
 158                break;
 159
 160        default:
 161                BUG_ON(1);
 162                break;
 163        }
 164
 165out_free:
 166        free(src);
 167        free(dst);
 168        return;
 169out_alloc_failed:
 170        printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
 171        goto out_free;
 172}
 173
 174static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
 175{
 176        int i;
 177        size_t size;
 178        double size_total;
 179
 180        argc = parse_options(argc, argv, options, info->usage, 0);
 181
 182        if (use_cycles) {
 183                i = init_cycles();
 184                if (i < 0) {
 185                        fprintf(stderr, "Failed to open cycles counter\n");
 186                        return i;
 187                }
 188        }
 189
 190        size = (size_t)perf_atoll((char *)size_str);
 191        size_total = (double)size * nr_loops;
 192
 193        if ((s64)size <= 0) {
 194                fprintf(stderr, "Invalid size:%s\n", size_str);
 195                return 1;
 196        }
 197
 198        if (!strncmp(function_str, "all", 3)) {
 199                for (i = 0; info->functions[i].name; i++)
 200                        __bench_mem_function(info, i, size, size_total);
 201                return 0;
 202        }
 203
 204        for (i = 0; info->functions[i].name; i++) {
 205                if (!strcmp(info->functions[i].name, function_str))
 206                        break;
 207        }
 208        if (!info->functions[i].name) {
 209                if (strcmp(function_str, "help") && strcmp(function_str, "h"))
 210                        printf("Unknown function: %s\n", function_str);
 211                printf("Available functions:\n");
 212                for (i = 0; info->functions[i].name; i++) {
 213                        printf("\t%s ... %s\n",
 214                               info->functions[i].name, info->functions[i].desc);
 215                }
 216                return 1;
 217        }
 218
 219        __bench_mem_function(info, i, size, size_total);
 220
 221        return 0;
 222}
 223
 224static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
 225{
 226        u64 cycle_start = 0ULL, cycle_end = 0ULL;
 227        memcpy_t fn = r->fn.memcpy;
 228        int i;
 229
 230        /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
 231        memset(src, 0, size);
 232
 233        /*
 234         * We prefault the freshly allocated memory range here,
 235         * to not measure page fault overhead:
 236         */
 237        fn(dst, src, size);
 238
 239        cycle_start = get_cycles();
 240        for (i = 0; i < nr_loops; ++i)
 241                fn(dst, src, size);
 242        cycle_end = get_cycles();
 243
 244        return cycle_end - cycle_start;
 245}
 246
 247static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
 248{
 249        struct timeval tv_start, tv_end, tv_diff;
 250        memcpy_t fn = r->fn.memcpy;
 251        int i;
 252
 253        /*
 254         * We prefault the freshly allocated memory range here,
 255         * to not measure page fault overhead:
 256         */
 257        fn(dst, src, size);
 258
 259        BUG_ON(gettimeofday(&tv_start, NULL));
 260        for (i = 0; i < nr_loops; ++i)
 261                fn(dst, src, size);
 262        BUG_ON(gettimeofday(&tv_end, NULL));
 263
 264        timersub(&tv_end, &tv_start, &tv_diff);
 265
 266        return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
 267}
 268
 269struct function memcpy_functions[] = {
 270        { .name         = "default",
 271          .desc         = "Default memcpy() provided by glibc",
 272          .fn.memcpy    = memcpy },
 273
 274#ifdef HAVE_ARCH_X86_64_SUPPORT
 275# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
 276# include "mem-memcpy-x86-64-asm-def.h"
 277# undef MEMCPY_FN
 278#endif
 279
 280        { .name = NULL, }
 281};
 282
 283static const char * const bench_mem_memcpy_usage[] = {
 284        "perf bench mem memcpy <options>",
 285        NULL
 286};
 287
 288int bench_mem_memcpy(int argc, const char **argv)
 289{
 290        struct bench_mem_info info = {
 291                .functions              = memcpy_functions,
 292                .do_cycles              = do_memcpy_cycles,
 293                .do_gettimeofday        = do_memcpy_gettimeofday,
 294                .usage                  = bench_mem_memcpy_usage,
 295                .alloc_src              = true,
 296        };
 297
 298        return bench_mem_common(argc, argv, &info);
 299}
 300
 301static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
 302{
 303        u64 cycle_start = 0ULL, cycle_end = 0ULL;
 304        memset_t fn = r->fn.memset;
 305        int i;
 306
 307        /*
 308         * We prefault the freshly allocated memory range here,
 309         * to not measure page fault overhead:
 310         */
 311        fn(dst, -1, size);
 312
 313        cycle_start = get_cycles();
 314        for (i = 0; i < nr_loops; ++i)
 315                fn(dst, i, size);
 316        cycle_end = get_cycles();
 317
 318        return cycle_end - cycle_start;
 319}
 320
 321static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
 322{
 323        struct timeval tv_start, tv_end, tv_diff;
 324        memset_t fn = r->fn.memset;
 325        int i;
 326
 327        /*
 328         * We prefault the freshly allocated memory range here,
 329         * to not measure page fault overhead:
 330         */
 331        fn(dst, -1, size);
 332
 333        BUG_ON(gettimeofday(&tv_start, NULL));
 334        for (i = 0; i < nr_loops; ++i)
 335                fn(dst, i, size);
 336        BUG_ON(gettimeofday(&tv_end, NULL));
 337
 338        timersub(&tv_end, &tv_start, &tv_diff);
 339
 340        return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
 341}
 342
 343static const char * const bench_mem_memset_usage[] = {
 344        "perf bench mem memset <options>",
 345        NULL
 346};
 347
 348static const struct function memset_functions[] = {
 349        { .name         = "default",
 350          .desc         = "Default memset() provided by glibc",
 351          .fn.memset    = memset },
 352
 353#ifdef HAVE_ARCH_X86_64_SUPPORT
 354# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
 355# include "mem-memset-x86-64-asm-def.h"
 356# undef MEMSET_FN
 357#endif
 358
 359        { .name = NULL, }
 360};
 361
 362int bench_mem_memset(int argc, const char **argv)
 363{
 364        struct bench_mem_info info = {
 365                .functions              = memset_functions,
 366                .do_cycles              = do_memset_cycles,
 367                .do_gettimeofday        = do_memset_gettimeofday,
 368                .usage                  = bench_mem_memset_usage,
 369        };
 370
 371        return bench_mem_common(argc, argv, &info);
 372}
 373