linux/tools/perf/bench/mem-functions.c
<<
>>
Prefs
   1/*
   2 * mem-memcpy.c
   3 *
   4 * Simple memcpy() and memset() benchmarks
   5 *
   6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
   7 */
   8
   9#include "debug.h"
  10#include "../perf.h"
  11#include "../util/util.h"
  12#include <subcmd/parse-options.h>
  13#include "../util/header.h"
  14#include "../util/cloexec.h"
  15#include "bench.h"
  16#include "mem-memcpy-arch.h"
  17#include "mem-memset-arch.h"
  18
  19#include <stdio.h>
  20#include <stdlib.h>
  21#include <string.h>
  22#include <sys/time.h>
  23#include <errno.h>
  24#include <linux/time64.h>
  25
  26#define K 1024
  27
  28static const char       *size_str       = "1MB";
  29static const char       *function_str   = "all";
  30static int              nr_loops        = 1;
  31static bool             use_cycles;
  32static int              cycles_fd;
  33
  34static const struct option options[] = {
  35        OPT_STRING('s', "size", &size_str, "1MB",
  36                    "Specify the size of the memory buffers. "
  37                    "Available units: B, KB, MB, GB and TB (case insensitive)"),
  38
  39        OPT_STRING('f', "function", &function_str, "all",
  40                    "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
  41
  42        OPT_INTEGER('l', "nr_loops", &nr_loops,
  43                    "Specify the number of loops to run. (default: 1)"),
  44
  45        OPT_BOOLEAN('c', "cycles", &use_cycles,
  46                    "Use a cycles event instead of gettimeofday() to measure performance"),
  47
  48        OPT_END()
  49};
  50
  51typedef void *(*memcpy_t)(void *, const void *, size_t);
  52typedef void *(*memset_t)(void *, int, size_t);
  53
  54struct function {
  55        const char *name;
  56        const char *desc;
  57        union {
  58                memcpy_t memcpy;
  59                memset_t memset;
  60        } fn;
  61};
  62
  63static struct perf_event_attr cycle_attr = {
  64        .type           = PERF_TYPE_HARDWARE,
  65        .config         = PERF_COUNT_HW_CPU_CYCLES
  66};
  67
  68static int init_cycles(void)
  69{
  70        cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
  71
  72        if (cycles_fd < 0 && errno == ENOSYS) {
  73                pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
  74                return -1;
  75        }
  76
  77        return cycles_fd;
  78}
  79
  80static u64 get_cycles(void)
  81{
  82        int ret;
  83        u64 clk;
  84
  85        ret = read(cycles_fd, &clk, sizeof(u64));
  86        BUG_ON(ret != sizeof(u64));
  87
  88        return clk;
  89}
  90
  91static double timeval2double(struct timeval *ts)
  92{
  93        return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
  94}
  95
  96#define print_bps(x) do {                                               \
  97                if (x < K)                                              \
  98                        printf(" %14lf bytes/sec\n", x);                \
  99                else if (x < K * K)                                     \
 100                        printf(" %14lfd KB/sec\n", x / K);              \
 101                else if (x < K * K * K)                                 \
 102                        printf(" %14lf MB/sec\n", x / K / K);           \
 103                else                                                    \
 104                        printf(" %14lf GB/sec\n", x / K / K / K);       \
 105        } while (0)
 106
 107struct bench_mem_info {
 108        const struct function *functions;
 109        u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
 110        double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
 111        const char *const *usage;
 112        bool alloc_src;
 113};
 114
 115static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
 116{
 117        const struct function *r = &info->functions[r_idx];
 118        double result_bps = 0.0;
 119        u64 result_cycles = 0;
 120        void *src = NULL, *dst = zalloc(size);
 121
 122        printf("# function '%s' (%s)\n", r->name, r->desc);
 123
 124        if (dst == NULL)
 125                goto out_alloc_failed;
 126
 127        if (info->alloc_src) {
 128                src = zalloc(size);
 129                if (src == NULL)
 130                        goto out_alloc_failed;
 131        }
 132
 133        if (bench_format == BENCH_FORMAT_DEFAULT)
 134                printf("# Copying %s bytes ...\n\n", size_str);
 135
 136        if (use_cycles) {
 137                result_cycles = info->do_cycles(r, size, src, dst);
 138        } else {
 139                result_bps = info->do_gettimeofday(r, size, src, dst);
 140        }
 141
 142        switch (bench_format) {
 143        case BENCH_FORMAT_DEFAULT:
 144                if (use_cycles) {
 145                        printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
 146                } else {
 147                        print_bps(result_bps);
 148                }
 149                break;
 150
 151        case BENCH_FORMAT_SIMPLE:
 152                if (use_cycles) {
 153                        printf("%lf\n", (double)result_cycles/size_total);
 154                } else {
 155                        printf("%lf\n", result_bps);
 156                }
 157                break;
 158
 159        default:
 160                BUG_ON(1);
 161                break;
 162        }
 163
 164out_free:
 165        free(src);
 166        free(dst);
 167        return;
 168out_alloc_failed:
 169        printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
 170        goto out_free;
 171}
 172
 173static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
 174{
 175        int i;
 176        size_t size;
 177        double size_total;
 178
 179        argc = parse_options(argc, argv, options, info->usage, 0);
 180
 181        if (use_cycles) {
 182                i = init_cycles();
 183                if (i < 0) {
 184                        fprintf(stderr, "Failed to open cycles counter\n");
 185                        return i;
 186                }
 187        }
 188
 189        size = (size_t)perf_atoll((char *)size_str);
 190        size_total = (double)size * nr_loops;
 191
 192        if ((s64)size <= 0) {
 193                fprintf(stderr, "Invalid size:%s\n", size_str);
 194                return 1;
 195        }
 196
 197        if (!strncmp(function_str, "all", 3)) {
 198                for (i = 0; info->functions[i].name; i++)
 199                        __bench_mem_function(info, i, size, size_total);
 200                return 0;
 201        }
 202
 203        for (i = 0; info->functions[i].name; i++) {
 204                if (!strcmp(info->functions[i].name, function_str))
 205                        break;
 206        }
 207        if (!info->functions[i].name) {
 208                if (strcmp(function_str, "help") && strcmp(function_str, "h"))
 209                        printf("Unknown function: %s\n", function_str);
 210                printf("Available functions:\n");
 211                for (i = 0; info->functions[i].name; i++) {
 212                        printf("\t%s ... %s\n",
 213                               info->functions[i].name, info->functions[i].desc);
 214                }
 215                return 1;
 216        }
 217
 218        __bench_mem_function(info, i, size, size_total);
 219
 220        return 0;
 221}
 222
 223static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
 224{
 225        u64 cycle_start = 0ULL, cycle_end = 0ULL;
 226        memcpy_t fn = r->fn.memcpy;
 227        int i;
 228
 229        /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
 230        memset(src, 0, size);
 231
 232        /*
 233         * We prefault the freshly allocated memory range here,
 234         * to not measure page fault overhead:
 235         */
 236        fn(dst, src, size);
 237
 238        cycle_start = get_cycles();
 239        for (i = 0; i < nr_loops; ++i)
 240                fn(dst, src, size);
 241        cycle_end = get_cycles();
 242
 243        return cycle_end - cycle_start;
 244}
 245
 246static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
 247{
 248        struct timeval tv_start, tv_end, tv_diff;
 249        memcpy_t fn = r->fn.memcpy;
 250        int i;
 251
 252        /*
 253         * We prefault the freshly allocated memory range here,
 254         * to not measure page fault overhead:
 255         */
 256        fn(dst, src, size);
 257
 258        BUG_ON(gettimeofday(&tv_start, NULL));
 259        for (i = 0; i < nr_loops; ++i)
 260                fn(dst, src, size);
 261        BUG_ON(gettimeofday(&tv_end, NULL));
 262
 263        timersub(&tv_end, &tv_start, &tv_diff);
 264
 265        return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
 266}
 267
 268struct function memcpy_functions[] = {
 269        { .name         = "default",
 270          .desc         = "Default memcpy() provided by glibc",
 271          .fn.memcpy    = memcpy },
 272
 273#ifdef HAVE_ARCH_X86_64_SUPPORT
 274# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
 275# include "mem-memcpy-x86-64-asm-def.h"
 276# undef MEMCPY_FN
 277#endif
 278
 279        { .name = NULL, }
 280};
 281
 282static const char * const bench_mem_memcpy_usage[] = {
 283        "perf bench mem memcpy <options>",
 284        NULL
 285};
 286
 287int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused)
 288{
 289        struct bench_mem_info info = {
 290                .functions              = memcpy_functions,
 291                .do_cycles              = do_memcpy_cycles,
 292                .do_gettimeofday        = do_memcpy_gettimeofday,
 293                .usage                  = bench_mem_memcpy_usage,
 294                .alloc_src              = true,
 295        };
 296
 297        return bench_mem_common(argc, argv, &info);
 298}
 299
 300static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
 301{
 302        u64 cycle_start = 0ULL, cycle_end = 0ULL;
 303        memset_t fn = r->fn.memset;
 304        int i;
 305
 306        /*
 307         * We prefault the freshly allocated memory range here,
 308         * to not measure page fault overhead:
 309         */
 310        fn(dst, -1, size);
 311
 312        cycle_start = get_cycles();
 313        for (i = 0; i < nr_loops; ++i)
 314                fn(dst, i, size);
 315        cycle_end = get_cycles();
 316
 317        return cycle_end - cycle_start;
 318}
 319
 320static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
 321{
 322        struct timeval tv_start, tv_end, tv_diff;
 323        memset_t fn = r->fn.memset;
 324        int i;
 325
 326        /*
 327         * We prefault the freshly allocated memory range here,
 328         * to not measure page fault overhead:
 329         */
 330        fn(dst, -1, size);
 331
 332        BUG_ON(gettimeofday(&tv_start, NULL));
 333        for (i = 0; i < nr_loops; ++i)
 334                fn(dst, i, size);
 335        BUG_ON(gettimeofday(&tv_end, NULL));
 336
 337        timersub(&tv_end, &tv_start, &tv_diff);
 338
 339        return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
 340}
 341
 342static const char * const bench_mem_memset_usage[] = {
 343        "perf bench mem memset <options>",
 344        NULL
 345};
 346
 347static const struct function memset_functions[] = {
 348        { .name         = "default",
 349          .desc         = "Default memset() provided by glibc",
 350          .fn.memset    = memset },
 351
 352#ifdef HAVE_ARCH_X86_64_SUPPORT
 353# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
 354# include "mem-memset-x86-64-asm-def.h"
 355# undef MEMSET_FN
 356#endif
 357
 358        { .name = NULL, }
 359};
 360
 361int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused)
 362{
 363        struct bench_mem_info info = {
 364                .functions              = memset_functions,
 365                .do_cycles              = do_memset_cycles,
 366                .do_gettimeofday        = do_memset_gettimeofday,
 367                .usage                  = bench_mem_memset_usage,
 368        };
 369
 370        return bench_mem_common(argc, argv, &info);
 371}
 372