linux/tools/perf/bench/mem-functions.c
<<
>>
Prefs
   1/*
   2 * mem-memcpy.c
   3 *
   4 * Simple memcpy() and memset() benchmarks
   5 *
   6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
   7 */
   8
   9#include "../perf.h"
  10#include "../util/util.h"
  11#include <subcmd/parse-options.h>
  12#include "../util/header.h"
  13#include "../util/cloexec.h"
  14#include "bench.h"
  15#include "mem-memcpy-arch.h"
  16#include "mem-memset-arch.h"
  17
  18#include <stdio.h>
  19#include <stdlib.h>
  20#include <string.h>
  21#include <sys/time.h>
  22#include <errno.h>
  23
  24#define K 1024
  25
  26static const char       *size_str       = "1MB";
  27static const char       *function_str   = "all";
  28static int              nr_loops        = 1;
  29static bool             use_cycles;
  30static int              cycles_fd;
  31
  32static const struct option options[] = {
  33        OPT_STRING('s', "size", &size_str, "1MB",
  34                    "Specify the size of the memory buffers. "
  35                    "Available units: B, KB, MB, GB and TB (case insensitive)"),
  36
  37        OPT_STRING('f', "function", &function_str, "all",
  38                    "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
  39
  40        OPT_INTEGER('l', "nr_loops", &nr_loops,
  41                    "Specify the number of loops to run. (default: 1)"),
  42
  43        OPT_BOOLEAN('c', "cycles", &use_cycles,
  44                    "Use a cycles event instead of gettimeofday() to measure performance"),
  45
  46        OPT_END()
  47};
  48
  49typedef void *(*memcpy_t)(void *, const void *, size_t);
  50typedef void *(*memset_t)(void *, int, size_t);
  51
  52struct function {
  53        const char *name;
  54        const char *desc;
  55        union {
  56                memcpy_t memcpy;
  57                memset_t memset;
  58        } fn;
  59};
  60
  61static struct perf_event_attr cycle_attr = {
  62        .type           = PERF_TYPE_HARDWARE,
  63        .config         = PERF_COUNT_HW_CPU_CYCLES
  64};
  65
  66static void init_cycles(void)
  67{
  68        cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
  69
  70        if (cycles_fd < 0 && errno == ENOSYS)
  71                die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
  72        else
  73                BUG_ON(cycles_fd < 0);
  74}
  75
  76static u64 get_cycles(void)
  77{
  78        int ret;
  79        u64 clk;
  80
  81        ret = read(cycles_fd, &clk, sizeof(u64));
  82        BUG_ON(ret != sizeof(u64));
  83
  84        return clk;
  85}
  86
  87static double timeval2double(struct timeval *ts)
  88{
  89        return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000;
  90}
  91
  92#define print_bps(x) do {                                               \
  93                if (x < K)                                              \
  94                        printf(" %14lf bytes/sec\n", x);                \
  95                else if (x < K * K)                                     \
  96                        printf(" %14lfd KB/sec\n", x / K);              \
  97                else if (x < K * K * K)                                 \
  98                        printf(" %14lf MB/sec\n", x / K / K);           \
  99                else                                                    \
 100                        printf(" %14lf GB/sec\n", x / K / K / K);       \
 101        } while (0)
 102
 103struct bench_mem_info {
 104        const struct function *functions;
 105        u64 (*do_cycles)(const struct function *r, size_t size);
 106        double (*do_gettimeofday)(const struct function *r, size_t size);
 107        const char *const *usage;
 108};
 109
 110static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
 111{
 112        const struct function *r = &info->functions[r_idx];
 113        double result_bps = 0.0;
 114        u64 result_cycles = 0;
 115
 116        printf("# function '%s' (%s)\n", r->name, r->desc);
 117
 118        if (bench_format == BENCH_FORMAT_DEFAULT)
 119                printf("# Copying %s bytes ...\n\n", size_str);
 120
 121        if (use_cycles) {
 122                result_cycles = info->do_cycles(r, size);
 123        } else {
 124                result_bps = info->do_gettimeofday(r, size);
 125        }
 126
 127        switch (bench_format) {
 128        case BENCH_FORMAT_DEFAULT:
 129                if (use_cycles) {
 130                        printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
 131                } else {
 132                        print_bps(result_bps);
 133                }
 134                break;
 135
 136        case BENCH_FORMAT_SIMPLE:
 137                if (use_cycles) {
 138                        printf("%lf\n", (double)result_cycles/size_total);
 139                } else {
 140                        printf("%lf\n", result_bps);
 141                }
 142                break;
 143
 144        default:
 145                BUG_ON(1);
 146                break;
 147        }
 148}
 149
 150static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
 151{
 152        int i;
 153        size_t size;
 154        double size_total;
 155
 156        argc = parse_options(argc, argv, options, info->usage, 0);
 157
 158        if (use_cycles)
 159                init_cycles();
 160
 161        size = (size_t)perf_atoll((char *)size_str);
 162        size_total = (double)size * nr_loops;
 163
 164        if ((s64)size <= 0) {
 165                fprintf(stderr, "Invalid size:%s\n", size_str);
 166                return 1;
 167        }
 168
 169        if (!strncmp(function_str, "all", 3)) {
 170                for (i = 0; info->functions[i].name; i++)
 171                        __bench_mem_function(info, i, size, size_total);
 172                return 0;
 173        }
 174
 175        for (i = 0; info->functions[i].name; i++) {
 176                if (!strcmp(info->functions[i].name, function_str))
 177                        break;
 178        }
 179        if (!info->functions[i].name) {
 180                if (strcmp(function_str, "help") && strcmp(function_str, "h"))
 181                        printf("Unknown function: %s\n", function_str);
 182                printf("Available functions:\n");
 183                for (i = 0; info->functions[i].name; i++) {
 184                        printf("\t%s ... %s\n",
 185                               info->functions[i].name, info->functions[i].desc);
 186                }
 187                return 1;
 188        }
 189
 190        __bench_mem_function(info, i, size, size_total);
 191
 192        return 0;
 193}
 194
 195static void memcpy_alloc_mem(void **dst, void **src, size_t size)
 196{
 197        *dst = zalloc(size);
 198        if (!*dst)
 199                die("memory allocation failed - maybe size is too large?\n");
 200
 201        *src = zalloc(size);
 202        if (!*src)
 203                die("memory allocation failed - maybe size is too large?\n");
 204
 205        /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
 206        memset(*src, 0, size);
 207}
 208
 209static u64 do_memcpy_cycles(const struct function *r, size_t size)
 210{
 211        u64 cycle_start = 0ULL, cycle_end = 0ULL;
 212        void *src = NULL, *dst = NULL;
 213        memcpy_t fn = r->fn.memcpy;
 214        int i;
 215
 216        memcpy_alloc_mem(&dst, &src, size);
 217
 218        /*
 219         * We prefault the freshly allocated memory range here,
 220         * to not measure page fault overhead:
 221         */
 222        fn(dst, src, size);
 223
 224        cycle_start = get_cycles();
 225        for (i = 0; i < nr_loops; ++i)
 226                fn(dst, src, size);
 227        cycle_end = get_cycles();
 228
 229        free(src);
 230        free(dst);
 231        return cycle_end - cycle_start;
 232}
 233
 234static double do_memcpy_gettimeofday(const struct function *r, size_t size)
 235{
 236        struct timeval tv_start, tv_end, tv_diff;
 237        memcpy_t fn = r->fn.memcpy;
 238        void *src = NULL, *dst = NULL;
 239        int i;
 240
 241        memcpy_alloc_mem(&dst, &src, size);
 242
 243        /*
 244         * We prefault the freshly allocated memory range here,
 245         * to not measure page fault overhead:
 246         */
 247        fn(dst, src, size);
 248
 249        BUG_ON(gettimeofday(&tv_start, NULL));
 250        for (i = 0; i < nr_loops; ++i)
 251                fn(dst, src, size);
 252        BUG_ON(gettimeofday(&tv_end, NULL));
 253
 254        timersub(&tv_end, &tv_start, &tv_diff);
 255
 256        free(src);
 257        free(dst);
 258
 259        return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
 260}
 261
 262struct function memcpy_functions[] = {
 263        { .name         = "default",
 264          .desc         = "Default memcpy() provided by glibc",
 265          .fn.memcpy    = memcpy },
 266
 267#ifdef HAVE_ARCH_X86_64_SUPPORT
 268# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
 269# include "mem-memcpy-x86-64-asm-def.h"
 270# undef MEMCPY_FN
 271#endif
 272
 273        { .name = NULL, }
 274};
 275
 276static const char * const bench_mem_memcpy_usage[] = {
 277        "perf bench mem memcpy <options>",
 278        NULL
 279};
 280
 281int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused)
 282{
 283        struct bench_mem_info info = {
 284                .functions              = memcpy_functions,
 285                .do_cycles              = do_memcpy_cycles,
 286                .do_gettimeofday        = do_memcpy_gettimeofday,
 287                .usage                  = bench_mem_memcpy_usage,
 288        };
 289
 290        return bench_mem_common(argc, argv, &info);
 291}
 292
 293static void memset_alloc_mem(void **dst, size_t size)
 294{
 295        *dst = zalloc(size);
 296        if (!*dst)
 297                die("memory allocation failed - maybe size is too large?\n");
 298}
 299
 300static u64 do_memset_cycles(const struct function *r, size_t size)
 301{
 302        u64 cycle_start = 0ULL, cycle_end = 0ULL;
 303        memset_t fn = r->fn.memset;
 304        void *dst = NULL;
 305        int i;
 306
 307        memset_alloc_mem(&dst, size);
 308
 309        /*
 310         * We prefault the freshly allocated memory range here,
 311         * to not measure page fault overhead:
 312         */
 313        fn(dst, -1, size);
 314
 315        cycle_start = get_cycles();
 316        for (i = 0; i < nr_loops; ++i)
 317                fn(dst, i, size);
 318        cycle_end = get_cycles();
 319
 320        free(dst);
 321        return cycle_end - cycle_start;
 322}
 323
 324static double do_memset_gettimeofday(const struct function *r, size_t size)
 325{
 326        struct timeval tv_start, tv_end, tv_diff;
 327        memset_t fn = r->fn.memset;
 328        void *dst = NULL;
 329        int i;
 330
 331        memset_alloc_mem(&dst, size);
 332
 333        /*
 334         * We prefault the freshly allocated memory range here,
 335         * to not measure page fault overhead:
 336         */
 337        fn(dst, -1, size);
 338
 339        BUG_ON(gettimeofday(&tv_start, NULL));
 340        for (i = 0; i < nr_loops; ++i)
 341                fn(dst, i, size);
 342        BUG_ON(gettimeofday(&tv_end, NULL));
 343
 344        timersub(&tv_end, &tv_start, &tv_diff);
 345
 346        free(dst);
 347        return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
 348}
 349
 350static const char * const bench_mem_memset_usage[] = {
 351        "perf bench mem memset <options>",
 352        NULL
 353};
 354
 355static const struct function memset_functions[] = {
 356        { .name         = "default",
 357          .desc         = "Default memset() provided by glibc",
 358          .fn.memset    = memset },
 359
 360#ifdef HAVE_ARCH_X86_64_SUPPORT
 361# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
 362# include "mem-memset-x86-64-asm-def.h"
 363# undef MEMSET_FN
 364#endif
 365
 366        { .name = NULL, }
 367};
 368
 369int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused)
 370{
 371        struct bench_mem_info info = {
 372                .functions              = memset_functions,
 373                .do_cycles              = do_memset_cycles,
 374                .do_gettimeofday        = do_memset_gettimeofday,
 375                .usage                  = bench_mem_memset_usage,
 376        };
 377
 378        return bench_mem_common(argc, argv, &info);
 379}
 380