linux/tools/perf/bench/mem-memcpy.c
<<
>>
Prefs
   1/*
   2 * mem-memcpy.c
   3 *
   4 * memcpy: Simple memory copy in various ways
   5 *
   6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
   7 */
   8#include <ctype.h>
   9
  10#include "../perf.h"
  11#include "../util/util.h"
  12#include "../util/parse-options.h"
  13#include "../util/header.h"
  14#include "bench.h"
  15#include "mem-memcpy-arch.h"
  16
  17#include <stdio.h>
  18#include <stdlib.h>
  19#include <string.h>
  20#include <sys/time.h>
  21#include <errno.h>
  22
  23#define K 1024
  24
  25static const char       *length_str     = "1MB";
  26static const char       *routine        = "default";
  27static bool             use_clock;
  28static int              clock_fd;
  29static bool             only_prefault;
  30static bool             no_prefault;
  31
  32static const struct option options[] = {
  33        OPT_STRING('l', "length", &length_str, "1MB",
  34                    "Specify length of memory to copy. "
  35                    "available unit: B, MB, GB (upper and lower)"),
  36        OPT_STRING('r', "routine", &routine, "default",
  37                    "Specify routine to copy"),
  38        OPT_BOOLEAN('c', "clock", &use_clock,
  39                    "Use CPU clock for measuring"),
  40        OPT_BOOLEAN('o', "only-prefault", &only_prefault,
  41                    "Show only the result with page faults before memcpy()"),
  42        OPT_BOOLEAN('n', "no-prefault", &no_prefault,
  43                    "Show only the result without page faults before memcpy()"),
  44        OPT_END()
  45};
  46
  47typedef void *(*memcpy_t)(void *, const void *, size_t);
  48
  49struct routine {
  50        const char *name;
  51        const char *desc;
  52        memcpy_t fn;
  53};
  54
  55struct routine routines[] = {
  56        { "default",
  57          "Default memcpy() provided by glibc",
  58          memcpy },
  59#ifdef ARCH_X86_64
  60
  61#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
  62#include "mem-memcpy-x86-64-asm-def.h"
  63#undef MEMCPY_FN
  64
  65#endif
  66
  67        { NULL,
  68          NULL,
  69          NULL   }
  70};
  71
  72static const char * const bench_mem_memcpy_usage[] = {
  73        "perf bench mem memcpy <options>",
  74        NULL
  75};
  76
  77static struct perf_event_attr clock_attr = {
  78        .type           = PERF_TYPE_HARDWARE,
  79        .config         = PERF_COUNT_HW_CPU_CYCLES
  80};
  81
  82static void init_clock(void)
  83{
  84        clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
  85
  86        if (clock_fd < 0 && errno == ENOSYS)
  87                die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
  88        else
  89                BUG_ON(clock_fd < 0);
  90}
  91
  92static u64 get_clock(void)
  93{
  94        int ret;
  95        u64 clk;
  96
  97        ret = read(clock_fd, &clk, sizeof(u64));
  98        BUG_ON(ret != sizeof(u64));
  99
 100        return clk;
 101}
 102
 103static double timeval2double(struct timeval *ts)
 104{
 105        return (double)ts->tv_sec +
 106                (double)ts->tv_usec / (double)1000000;
 107}
 108
 109static void alloc_mem(void **dst, void **src, size_t length)
 110{
 111        *dst = zalloc(length);
 112        if (!dst)
 113                die("memory allocation failed - maybe length is too large?\n");
 114
 115        *src = zalloc(length);
 116        if (!src)
 117                die("memory allocation failed - maybe length is too large?\n");
 118}
 119
 120static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
 121{
 122        u64 clock_start = 0ULL, clock_end = 0ULL;
 123        void *src = NULL, *dst = NULL;
 124
 125        alloc_mem(&src, &dst, len);
 126
 127        if (prefault)
 128                fn(dst, src, len);
 129
 130        clock_start = get_clock();
 131        fn(dst, src, len);
 132        clock_end = get_clock();
 133
 134        free(src);
 135        free(dst);
 136        return clock_end - clock_start;
 137}
 138
 139static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
 140{
 141        struct timeval tv_start, tv_end, tv_diff;
 142        void *src = NULL, *dst = NULL;
 143
 144        alloc_mem(&src, &dst, len);
 145
 146        if (prefault)
 147                fn(dst, src, len);
 148
 149        BUG_ON(gettimeofday(&tv_start, NULL));
 150        fn(dst, src, len);
 151        BUG_ON(gettimeofday(&tv_end, NULL));
 152
 153        timersub(&tv_end, &tv_start, &tv_diff);
 154
 155        free(src);
 156        free(dst);
 157        return (double)((double)len / timeval2double(&tv_diff));
 158}
 159
 160#define pf (no_prefault ? 0 : 1)
 161
 162#define print_bps(x) do {                                       \
 163                if (x < K)                                      \
 164                        printf(" %14lf B/Sec", x);              \
 165                else if (x < K * K)                             \
 166                        printf(" %14lfd KB/Sec", x / K);        \
 167                else if (x < K * K * K)                         \
 168                        printf(" %14lf MB/Sec", x / K / K);     \
 169                else                                            \
 170                        printf(" %14lf GB/Sec", x / K / K / K); \
 171        } while (0)
 172
 173int bench_mem_memcpy(int argc, const char **argv,
 174                     const char *prefix __used)
 175{
 176        int i;
 177        size_t len;
 178        double result_bps[2];
 179        u64 result_clock[2];
 180
 181        argc = parse_options(argc, argv, options,
 182                             bench_mem_memcpy_usage, 0);
 183
 184        if (use_clock)
 185                init_clock();
 186
 187        len = (size_t)perf_atoll((char *)length_str);
 188
 189        result_clock[0] = result_clock[1] = 0ULL;
 190        result_bps[0] = result_bps[1] = 0.0;
 191
 192        if ((s64)len <= 0) {
 193                fprintf(stderr, "Invalid length:%s\n", length_str);
 194                return 1;
 195        }
 196
 197        /* same to without specifying either of prefault and no-prefault */
 198        if (only_prefault && no_prefault)
 199                only_prefault = no_prefault = false;
 200
 201        for (i = 0; routines[i].name; i++) {
 202                if (!strcmp(routines[i].name, routine))
 203                        break;
 204        }
 205        if (!routines[i].name) {
 206                printf("Unknown routine:%s\n", routine);
 207                printf("Available routines...\n");
 208                for (i = 0; routines[i].name; i++) {
 209                        printf("\t%s ... %s\n",
 210                               routines[i].name, routines[i].desc);
 211                }
 212                return 1;
 213        }
 214
 215        if (bench_format == BENCH_FORMAT_DEFAULT)
 216                printf("# Copying %s Bytes ...\n\n", length_str);
 217
 218        if (!only_prefault && !no_prefault) {
 219                /* show both of results */
 220                if (use_clock) {
 221                        result_clock[0] =
 222                                do_memcpy_clock(routines[i].fn, len, false);
 223                        result_clock[1] =
 224                                do_memcpy_clock(routines[i].fn, len, true);
 225                } else {
 226                        result_bps[0] =
 227                                do_memcpy_gettimeofday(routines[i].fn,
 228                                                len, false);
 229                        result_bps[1] =
 230                                do_memcpy_gettimeofday(routines[i].fn,
 231                                                len, true);
 232                }
 233        } else {
 234                if (use_clock) {
 235                        result_clock[pf] =
 236                                do_memcpy_clock(routines[i].fn,
 237                                                len, only_prefault);
 238                } else {
 239                        result_bps[pf] =
 240                                do_memcpy_gettimeofday(routines[i].fn,
 241                                                len, only_prefault);
 242                }
 243        }
 244
 245        switch (bench_format) {
 246        case BENCH_FORMAT_DEFAULT:
 247                if (!only_prefault && !no_prefault) {
 248                        if (use_clock) {
 249                                printf(" %14lf Clock/Byte\n",
 250                                        (double)result_clock[0]
 251                                        / (double)len);
 252                                printf(" %14lf Clock/Byte (with prefault)\n",
 253                                        (double)result_clock[1]
 254                                        / (double)len);
 255                        } else {
 256                                print_bps(result_bps[0]);
 257                                printf("\n");
 258                                print_bps(result_bps[1]);
 259                                printf(" (with prefault)\n");
 260                        }
 261                } else {
 262                        if (use_clock) {
 263                                printf(" %14lf Clock/Byte",
 264                                        (double)result_clock[pf]
 265                                        / (double)len);
 266                        } else
 267                                print_bps(result_bps[pf]);
 268
 269                        printf("%s\n", only_prefault ? " (with prefault)" : "");
 270                }
 271                break;
 272        case BENCH_FORMAT_SIMPLE:
 273                if (!only_prefault && !no_prefault) {
 274                        if (use_clock) {
 275                                printf("%lf %lf\n",
 276                                        (double)result_clock[0] / (double)len,
 277                                        (double)result_clock[1] / (double)len);
 278                        } else {
 279                                printf("%lf %lf\n",
 280                                        result_bps[0], result_bps[1]);
 281                        }
 282                } else {
 283                        if (use_clock) {
 284                                printf("%lf\n", (double)result_clock[pf]
 285                                        / (double)len);
 286                        } else
 287                                printf("%lf\n", result_bps[pf]);
 288                }
 289                break;
 290        default:
 291                /* reaching this means there's some disaster: */
 292                die("unknown format: %d\n", bench_format);
 293                break;
 294        }
 295
 296        return 0;
 297}
 298