linux/tools/perf/bench/mem-memcpy.c
<<
>>
Prefs
   1/*
   2 * mem-memcpy.c
   3 *
   4 * memcpy: Simple memory copy in various ways
   5 *
   6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
   7 */
   8
   9#include "../perf.h"
  10#include "../util/util.h"
  11#include "../util/parse-options.h"
  12#include "../util/header.h"
  13#include "bench.h"
  14#include "mem-memcpy-arch.h"
  15
  16#include <stdio.h>
  17#include <stdlib.h>
  18#include <string.h>
  19#include <sys/time.h>
  20#include <errno.h>
  21
  22#define K 1024
  23
  24static const char       *length_str     = "1MB";
  25static const char       *routine        = "default";
  26static int              iterations      = 1;
  27static bool             use_cycle;
  28static int              cycle_fd;
  29static bool             only_prefault;
  30static bool             no_prefault;
  31
  32static const struct option options[] = {
  33        OPT_STRING('l', "length", &length_str, "1MB",
  34                    "Specify length of memory to copy. "
  35                    "Available units: B, KB, MB, GB and TB (upper and lower)"),
  36        OPT_STRING('r', "routine", &routine, "default",
  37                    "Specify routine to copy"),
  38        OPT_INTEGER('i', "iterations", &iterations,
  39                    "repeat memcpy() invocation this number of times"),
  40        OPT_BOOLEAN('c', "cycle", &use_cycle,
  41                    "Use cycles event instead of gettimeofday() for measuring"),
  42        OPT_BOOLEAN('o', "only-prefault", &only_prefault,
  43                    "Show only the result with page faults before memcpy()"),
  44        OPT_BOOLEAN('n', "no-prefault", &no_prefault,
  45                    "Show only the result without page faults before memcpy()"),
  46        OPT_END()
  47};
  48
  49typedef void *(*memcpy_t)(void *, const void *, size_t);
  50
  51struct routine {
  52        const char *name;
  53        const char *desc;
  54        memcpy_t fn;
  55};
  56
  57struct routine routines[] = {
  58        { "default",
  59          "Default memcpy() provided by glibc",
  60          memcpy },
  61#ifdef ARCH_X86_64
  62
  63#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
  64#include "mem-memcpy-x86-64-asm-def.h"
  65#undef MEMCPY_FN
  66
  67#endif
  68
  69        { NULL,
  70          NULL,
  71          NULL   }
  72};
  73
  74static const char * const bench_mem_memcpy_usage[] = {
  75        "perf bench mem memcpy <options>",
  76        NULL
  77};
  78
  79static struct perf_event_attr cycle_attr = {
  80        .type           = PERF_TYPE_HARDWARE,
  81        .config         = PERF_COUNT_HW_CPU_CYCLES
  82};
  83
  84static void init_cycle(void)
  85{
  86        cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0);
  87
  88        if (cycle_fd < 0 && errno == ENOSYS)
  89                die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
  90        else
  91                BUG_ON(cycle_fd < 0);
  92}
  93
  94static u64 get_cycle(void)
  95{
  96        int ret;
  97        u64 clk;
  98
  99        ret = read(cycle_fd, &clk, sizeof(u64));
 100        BUG_ON(ret != sizeof(u64));
 101
 102        return clk;
 103}
 104
 105static double timeval2double(struct timeval *ts)
 106{
 107        return (double)ts->tv_sec +
 108                (double)ts->tv_usec / (double)1000000;
 109}
 110
 111static void alloc_mem(void **dst, void **src, size_t length)
 112{
 113        *dst = zalloc(length);
 114        if (!*dst)
 115                die("memory allocation failed - maybe length is too large?\n");
 116
 117        *src = zalloc(length);
 118        if (!*src)
 119                die("memory allocation failed - maybe length is too large?\n");
 120        /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */
 121        memset(*src, 0, length);
 122}
 123
 124static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
 125{
 126        u64 cycle_start = 0ULL, cycle_end = 0ULL;
 127        void *src = NULL, *dst = NULL;
 128        int i;
 129
 130        alloc_mem(&src, &dst, len);
 131
 132        if (prefault)
 133                fn(dst, src, len);
 134
 135        cycle_start = get_cycle();
 136        for (i = 0; i < iterations; ++i)
 137                fn(dst, src, len);
 138        cycle_end = get_cycle();
 139
 140        free(src);
 141        free(dst);
 142        return cycle_end - cycle_start;
 143}
 144
 145static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
 146{
 147        struct timeval tv_start, tv_end, tv_diff;
 148        void *src = NULL, *dst = NULL;
 149        int i;
 150
 151        alloc_mem(&src, &dst, len);
 152
 153        if (prefault)
 154                fn(dst, src, len);
 155
 156        BUG_ON(gettimeofday(&tv_start, NULL));
 157        for (i = 0; i < iterations; ++i)
 158                fn(dst, src, len);
 159        BUG_ON(gettimeofday(&tv_end, NULL));
 160
 161        timersub(&tv_end, &tv_start, &tv_diff);
 162
 163        free(src);
 164        free(dst);
 165        return (double)((double)len / timeval2double(&tv_diff));
 166}
 167
 168#define pf (no_prefault ? 0 : 1)
 169
 170#define print_bps(x) do {                                       \
 171                if (x < K)                                      \
 172                        printf(" %14lf B/Sec", x);              \
 173                else if (x < K * K)                             \
 174                        printf(" %14lfd KB/Sec", x / K);        \
 175                else if (x < K * K * K)                         \
 176                        printf(" %14lf MB/Sec", x / K / K);     \
 177                else                                            \
 178                        printf(" %14lf GB/Sec", x / K / K / K); \
 179        } while (0)
 180
 181int bench_mem_memcpy(int argc, const char **argv,
 182                     const char *prefix __maybe_unused)
 183{
 184        int i;
 185        size_t len;
 186        double result_bps[2];
 187        u64 result_cycle[2];
 188
 189        argc = parse_options(argc, argv, options,
 190                             bench_mem_memcpy_usage, 0);
 191
 192        if (use_cycle)
 193                init_cycle();
 194
 195        len = (size_t)perf_atoll((char *)length_str);
 196
 197        result_cycle[0] = result_cycle[1] = 0ULL;
 198        result_bps[0] = result_bps[1] = 0.0;
 199
 200        if ((s64)len <= 0) {
 201                fprintf(stderr, "Invalid length:%s\n", length_str);
 202                return 1;
 203        }
 204
 205        /* same to without specifying either of prefault and no-prefault */
 206        if (only_prefault && no_prefault)
 207                only_prefault = no_prefault = false;
 208
 209        for (i = 0; routines[i].name; i++) {
 210                if (!strcmp(routines[i].name, routine))
 211                        break;
 212        }
 213        if (!routines[i].name) {
 214                printf("Unknown routine:%s\n", routine);
 215                printf("Available routines...\n");
 216                for (i = 0; routines[i].name; i++) {
 217                        printf("\t%s ... %s\n",
 218                               routines[i].name, routines[i].desc);
 219                }
 220                return 1;
 221        }
 222
 223        if (bench_format == BENCH_FORMAT_DEFAULT)
 224                printf("# Copying %s Bytes ...\n\n", length_str);
 225
 226        if (!only_prefault && !no_prefault) {
 227                /* show both of results */
 228                if (use_cycle) {
 229                        result_cycle[0] =
 230                                do_memcpy_cycle(routines[i].fn, len, false);
 231                        result_cycle[1] =
 232                                do_memcpy_cycle(routines[i].fn, len, true);
 233                } else {
 234                        result_bps[0] =
 235                                do_memcpy_gettimeofday(routines[i].fn,
 236                                                len, false);
 237                        result_bps[1] =
 238                                do_memcpy_gettimeofday(routines[i].fn,
 239                                                len, true);
 240                }
 241        } else {
 242                if (use_cycle) {
 243                        result_cycle[pf] =
 244                                do_memcpy_cycle(routines[i].fn,
 245                                                len, only_prefault);
 246                } else {
 247                        result_bps[pf] =
 248                                do_memcpy_gettimeofday(routines[i].fn,
 249                                                len, only_prefault);
 250                }
 251        }
 252
 253        switch (bench_format) {
 254        case BENCH_FORMAT_DEFAULT:
 255                if (!only_prefault && !no_prefault) {
 256                        if (use_cycle) {
 257                                printf(" %14lf Cycle/Byte\n",
 258                                        (double)result_cycle[0]
 259                                        / (double)len);
 260                                printf(" %14lf Cycle/Byte (with prefault)\n",
 261                                        (double)result_cycle[1]
 262                                        / (double)len);
 263                        } else {
 264                                print_bps(result_bps[0]);
 265                                printf("\n");
 266                                print_bps(result_bps[1]);
 267                                printf(" (with prefault)\n");
 268                        }
 269                } else {
 270                        if (use_cycle) {
 271                                printf(" %14lf Cycle/Byte",
 272                                        (double)result_cycle[pf]
 273                                        / (double)len);
 274                        } else
 275                                print_bps(result_bps[pf]);
 276
 277                        printf("%s\n", only_prefault ? " (with prefault)" : "");
 278                }
 279                break;
 280        case BENCH_FORMAT_SIMPLE:
 281                if (!only_prefault && !no_prefault) {
 282                        if (use_cycle) {
 283                                printf("%lf %lf\n",
 284                                        (double)result_cycle[0] / (double)len,
 285                                        (double)result_cycle[1] / (double)len);
 286                        } else {
 287                                printf("%lf %lf\n",
 288                                        result_bps[0], result_bps[1]);
 289                        }
 290                } else {
 291                        if (use_cycle) {
 292                                printf("%lf\n", (double)result_cycle[pf]
 293                                        / (double)len);
 294                        } else
 295                                printf("%lf\n", result_bps[pf]);
 296                }
 297                break;
 298        default:
 299                /* reaching this means there's some disaster: */
 300                die("unknown format: %d\n", bench_format);
 301                break;
 302        }
 303
 304        return 0;
 305}
 306