qemu/tests/fp/fp-bench.c
<<
>>
Prefs
   1/*
   2 * fp-bench.c - A collection of simple floating point microbenchmarks.
   3 *
   4 * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
   5 *
   6 * License: GNU GPL, version 2 or later.
   7 *   See the COPYING file in the top-level directory.
   8 */
   9#ifndef HW_POISON_H
  10#error Must define HW_POISON_H to work around TARGET_* poisoning
  11#endif
  12
  13#include "qemu/osdep.h"
  14#include <math.h>
  15#include <fenv.h>
  16#include "qemu/timer.h"
  17#include "fpu/softfloat.h"
  18
  19/* amortize the computation of random inputs */
  20#define OPS_PER_ITER     50000
  21
  22#define MAX_OPERANDS 3
  23
  24#define SEED_A 0xdeadfacedeadface
  25#define SEED_B 0xbadc0feebadc0fee
  26#define SEED_C 0xbeefdeadbeefdead
  27
  28enum op {
  29    OP_ADD,
  30    OP_SUB,
  31    OP_MUL,
  32    OP_DIV,
  33    OP_FMA,
  34    OP_SQRT,
  35    OP_CMP,
  36    OP_MAX_NR,
  37};
  38
  39static const char * const op_names[] = {
  40    [OP_ADD] = "add",
  41    [OP_SUB] = "sub",
  42    [OP_MUL] = "mul",
  43    [OP_DIV] = "div",
  44    [OP_FMA] = "mulAdd",
  45    [OP_SQRT] = "sqrt",
  46    [OP_CMP] = "cmp",
  47    [OP_MAX_NR] = NULL,
  48};
  49
  50enum precision {
  51    PREC_SINGLE,
  52    PREC_DOUBLE,
  53    PREC_FLOAT32,
  54    PREC_FLOAT64,
  55    PREC_MAX_NR,
  56};
  57
  58enum rounding {
  59    ROUND_EVEN,
  60    ROUND_ZERO,
  61    ROUND_DOWN,
  62    ROUND_UP,
  63    ROUND_TIEAWAY,
  64    N_ROUND_MODES,
  65};
  66
  67static const char * const round_names[] = {
  68    [ROUND_EVEN] = "even",
  69    [ROUND_ZERO] = "zero",
  70    [ROUND_DOWN] = "down",
  71    [ROUND_UP] = "up",
  72    [ROUND_TIEAWAY] = "tieaway",
  73};
  74
  75enum tester {
  76    TESTER_SOFT,
  77    TESTER_HOST,
  78    TESTER_MAX_NR,
  79};
  80
  81static const char * const tester_names[] = {
  82    [TESTER_SOFT] = "soft",
  83    [TESTER_HOST] = "host",
  84    [TESTER_MAX_NR] = NULL,
  85};
  86
  87union fp {
  88    float f;
  89    double d;
  90    float32 f32;
  91    float64 f64;
  92    uint64_t u64;
  93};
  94
  95struct op_state;
  96
  97typedef float (*float_func_t)(const struct op_state *s);
  98typedef double (*double_func_t)(const struct op_state *s);
  99
 100union fp_func {
 101    float_func_t float_func;
 102    double_func_t double_func;
 103};
 104
 105typedef void (*bench_func_t)(void);
 106
 107struct op_desc {
 108    const char * const name;
 109};
 110
 111#define DEFAULT_DURATION_SECS 1
 112
 113static uint64_t random_ops[MAX_OPERANDS] = {
 114    SEED_A, SEED_B, SEED_C,
 115};
 116static float_status soft_status;
 117static enum precision precision;
 118static enum op operation;
 119static enum tester tester;
 120static uint64_t n_completed_ops;
 121static unsigned int duration = DEFAULT_DURATION_SECS;
 122static int64_t ns_elapsed;
 123/* disable optimizations with volatile */
 124static volatile union fp res;
 125
 126/*
 127 * From: https://en.wikipedia.org/wiki/Xorshift
 128 * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
 129 * guaranteed to be >= INT_MAX).
 130 */
 131static uint64_t xorshift64star(uint64_t x)
 132{
 133    x ^= x >> 12; /* a */
 134    x ^= x << 25; /* b */
 135    x ^= x >> 27; /* c */
 136    return x * UINT64_C(2685821657736338717);
 137}
 138
 139static void update_random_ops(int n_ops, enum precision prec)
 140{
 141    int i;
 142
 143    for (i = 0; i < n_ops; i++) {
 144        uint64_t r = random_ops[i];
 145
 146        switch (prec) {
 147        case PREC_SINGLE:
 148        case PREC_FLOAT32:
 149            do {
 150                r = xorshift64star(r);
 151            } while (!float32_is_normal(r));
 152            break;
 153        case PREC_DOUBLE:
 154        case PREC_FLOAT64:
 155            do {
 156                r = xorshift64star(r);
 157            } while (!float64_is_normal(r));
 158            break;
 159        default:
 160            g_assert_not_reached();
 161        }
 162        random_ops[i] = r;
 163    }
 164}
 165
 166static void fill_random(union fp *ops, int n_ops, enum precision prec,
 167                        bool no_neg)
 168{
 169    int i;
 170
 171    for (i = 0; i < n_ops; i++) {
 172        switch (prec) {
 173        case PREC_SINGLE:
 174        case PREC_FLOAT32:
 175            ops[i].f32 = make_float32(random_ops[i]);
 176            if (no_neg && float32_is_neg(ops[i].f32)) {
 177                ops[i].f32 = float32_chs(ops[i].f32);
 178            }
 179            break;
 180        case PREC_DOUBLE:
 181        case PREC_FLOAT64:
 182            ops[i].f64 = make_float64(random_ops[i]);
 183            if (no_neg && float64_is_neg(ops[i].f64)) {
 184                ops[i].f64 = float64_chs(ops[i].f64);
 185            }
 186            break;
 187        default:
 188            g_assert_not_reached();
 189        }
 190    }
 191}
 192
 193/*
 194 * The main benchmark function. Instead of (ab)using macros, we rely
 195 * on the compiler to unfold this at compile-time.
 196 */
 197static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
 198{
 199    int64_t tf = get_clock() + duration * 1000000000LL;
 200
 201    while (get_clock() < tf) {
 202        union fp ops[MAX_OPERANDS];
 203        int64_t t0;
 204        int i;
 205
 206        update_random_ops(n_ops, prec);
 207        switch (prec) {
 208        case PREC_SINGLE:
 209            fill_random(ops, n_ops, prec, no_neg);
 210            t0 = get_clock();
 211            for (i = 0; i < OPS_PER_ITER; i++) {
 212                float a = ops[0].f;
 213                float b = ops[1].f;
 214                float c = ops[2].f;
 215
 216                switch (op) {
 217                case OP_ADD:
 218                    res.f = a + b;
 219                    break;
 220                case OP_SUB:
 221                    res.f = a - b;
 222                    break;
 223                case OP_MUL:
 224                    res.f = a * b;
 225                    break;
 226                case OP_DIV:
 227                    res.f = a / b;
 228                    break;
 229                case OP_FMA:
 230                    res.f = fmaf(a, b, c);
 231                    break;
 232                case OP_SQRT:
 233                    res.f = sqrtf(a);
 234                    break;
 235                case OP_CMP:
 236                    res.u64 = isgreater(a, b);
 237                    break;
 238                default:
 239                    g_assert_not_reached();
 240                }
 241            }
 242            break;
 243        case PREC_DOUBLE:
 244            fill_random(ops, n_ops, prec, no_neg);
 245            t0 = get_clock();
 246            for (i = 0; i < OPS_PER_ITER; i++) {
 247                double a = ops[0].d;
 248                double b = ops[1].d;
 249                double c = ops[2].d;
 250
 251                switch (op) {
 252                case OP_ADD:
 253                    res.d = a + b;
 254                    break;
 255                case OP_SUB:
 256                    res.d = a - b;
 257                    break;
 258                case OP_MUL:
 259                    res.d = a * b;
 260                    break;
 261                case OP_DIV:
 262                    res.d = a / b;
 263                    break;
 264                case OP_FMA:
 265                    res.d = fma(a, b, c);
 266                    break;
 267                case OP_SQRT:
 268                    res.d = sqrt(a);
 269                    break;
 270                case OP_CMP:
 271                    res.u64 = isgreater(a, b);
 272                    break;
 273                default:
 274                    g_assert_not_reached();
 275                }
 276            }
 277            break;
 278        case PREC_FLOAT32:
 279            fill_random(ops, n_ops, prec, no_neg);
 280            t0 = get_clock();
 281            for (i = 0; i < OPS_PER_ITER; i++) {
 282                float32 a = ops[0].f32;
 283                float32 b = ops[1].f32;
 284                float32 c = ops[2].f32;
 285
 286                switch (op) {
 287                case OP_ADD:
 288                    res.f32 = float32_add(a, b, &soft_status);
 289                    break;
 290                case OP_SUB:
 291                    res.f32 = float32_sub(a, b, &soft_status);
 292                    break;
 293                case OP_MUL:
 294                    res.f = float32_mul(a, b, &soft_status);
 295                    break;
 296                case OP_DIV:
 297                    res.f32 = float32_div(a, b, &soft_status);
 298                    break;
 299                case OP_FMA:
 300                    res.f32 = float32_muladd(a, b, c, 0, &soft_status);
 301                    break;
 302                case OP_SQRT:
 303                    res.f32 = float32_sqrt(a, &soft_status);
 304                    break;
 305                case OP_CMP:
 306                    res.u64 = float32_compare_quiet(a, b, &soft_status);
 307                    break;
 308                default:
 309                    g_assert_not_reached();
 310                }
 311            }
 312            break;
 313        case PREC_FLOAT64:
 314            fill_random(ops, n_ops, prec, no_neg);
 315            t0 = get_clock();
 316            for (i = 0; i < OPS_PER_ITER; i++) {
 317                float64 a = ops[0].f64;
 318                float64 b = ops[1].f64;
 319                float64 c = ops[2].f64;
 320
 321                switch (op) {
 322                case OP_ADD:
 323                    res.f64 = float64_add(a, b, &soft_status);
 324                    break;
 325                case OP_SUB:
 326                    res.f64 = float64_sub(a, b, &soft_status);
 327                    break;
 328                case OP_MUL:
 329                    res.f = float64_mul(a, b, &soft_status);
 330                    break;
 331                case OP_DIV:
 332                    res.f64 = float64_div(a, b, &soft_status);
 333                    break;
 334                case OP_FMA:
 335                    res.f64 = float64_muladd(a, b, c, 0, &soft_status);
 336                    break;
 337                case OP_SQRT:
 338                    res.f64 = float64_sqrt(a, &soft_status);
 339                    break;
 340                case OP_CMP:
 341                    res.u64 = float64_compare_quiet(a, b, &soft_status);
 342                    break;
 343                default:
 344                    g_assert_not_reached();
 345                }
 346            }
 347            break;
 348        default:
 349            g_assert_not_reached();
 350        }
 351        ns_elapsed += get_clock() - t0;
 352        n_completed_ops += OPS_PER_ITER;
 353    }
 354}
 355
 356#define GEN_BENCH(name, type, prec, op, n_ops)          \
 357    static void __attribute__((flatten)) name(void)     \
 358    {                                                   \
 359        bench(prec, op, n_ops, false);                  \
 360    }
 361
 362#define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops)   \
 363    static void __attribute__((flatten)) name(void)     \
 364    {                                                   \
 365        bench(prec, op, n_ops, true);                   \
 366    }
 367
 368#define GEN_BENCH_ALL_TYPES(opname, op, n_ops)                          \
 369    GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
 370    GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
 371    GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
 372    GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops)
 373
 374GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
 375GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
 376GEN_BENCH_ALL_TYPES(mul, OP_MUL, 2)
 377GEN_BENCH_ALL_TYPES(div, OP_DIV, 2)
 378GEN_BENCH_ALL_TYPES(fma, OP_FMA, 3)
 379GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
 380#undef GEN_BENCH_ALL_TYPES
 381
 382#define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n)                         \
 383    GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
 384    GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
 385    GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
 386    GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n)
 387
 388GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
 389#undef GEN_BENCH_ALL_TYPES_NO_NEG
 390
 391#undef GEN_BENCH_NO_NEG
 392#undef GEN_BENCH
 393
 394#define GEN_BENCH_FUNCS(opname, op)                             \
 395    [op] = {                                                    \
 396        [PREC_SINGLE]    = bench_ ## opname ## _float,          \
 397        [PREC_DOUBLE]    = bench_ ## opname ## _double,         \
 398        [PREC_FLOAT32]   = bench_ ## opname ## _float32,        \
 399        [PREC_FLOAT64]   = bench_ ## opname ## _float64,        \
 400    }
 401
 402static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
 403    GEN_BENCH_FUNCS(add, OP_ADD),
 404    GEN_BENCH_FUNCS(sub, OP_SUB),
 405    GEN_BENCH_FUNCS(mul, OP_MUL),
 406    GEN_BENCH_FUNCS(div, OP_DIV),
 407    GEN_BENCH_FUNCS(fma, OP_FMA),
 408    GEN_BENCH_FUNCS(sqrt, OP_SQRT),
 409    GEN_BENCH_FUNCS(cmp, OP_CMP),
 410};
 411
 412#undef GEN_BENCH_FUNCS
 413
 414static void run_bench(void)
 415{
 416    bench_func_t f;
 417
 418    f = bench_funcs[operation][precision];
 419    g_assert(f);
 420    f();
 421}
 422
 423/* @arr must be NULL-terminated */
 424static int find_name(const char * const *arr, const char *name)
 425{
 426    int i;
 427
 428    for (i = 0; arr[i] != NULL; i++) {
 429        if (strcmp(name, arr[i]) == 0) {
 430            return i;
 431        }
 432    }
 433    return -1;
 434}
 435
 436static void usage_complete(int argc, char *argv[])
 437{
 438    gchar *op_list = g_strjoinv(", ", (gchar **)op_names);
 439    gchar *tester_list = g_strjoinv(", ", (gchar **)tester_names);
 440
 441    fprintf(stderr, "Usage: %s [options]\n", argv[0]);
 442    fprintf(stderr, "options:\n");
 443    fprintf(stderr, " -d = duration, in seconds. Default: %d\n",
 444            DEFAULT_DURATION_SECS);
 445    fprintf(stderr, " -h = show this help message.\n");
 446    fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
 447            op_list, op_names[0]);
 448    fprintf(stderr, " -p = floating point precision (single, double). "
 449            "Default: single\n");
 450    fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
 451            "Default: even\n");
 452    fprintf(stderr, " -t = tester (%s). Default: %s\n",
 453            tester_list, tester_names[0]);
 454    fprintf(stderr, " -z = flush inputs to zero (soft tester only). "
 455            "Default: disabled\n");
 456    fprintf(stderr, " -Z = flush output to zero (soft tester only). "
 457            "Default: disabled\n");
 458
 459    g_free(tester_list);
 460    g_free(op_list);
 461}
 462
 463static int round_name_to_mode(const char *name)
 464{
 465    int i;
 466
 467    for (i = 0; i < N_ROUND_MODES; i++) {
 468        if (!strcmp(round_names[i], name)) {
 469            return i;
 470        }
 471    }
 472    return -1;
 473}
 474
 475static void QEMU_NORETURN die_host_rounding(enum rounding rounding)
 476{
 477    fprintf(stderr, "fatal: '%s' rounding not supported on this host\n",
 478            round_names[rounding]);
 479    exit(EXIT_FAILURE);
 480}
 481
 482static void set_host_precision(enum rounding rounding)
 483{
 484    int rhost;
 485
 486    switch (rounding) {
 487    case ROUND_EVEN:
 488        rhost = FE_TONEAREST;
 489        break;
 490    case ROUND_ZERO:
 491        rhost = FE_TOWARDZERO;
 492        break;
 493    case ROUND_DOWN:
 494        rhost = FE_DOWNWARD;
 495        break;
 496    case ROUND_UP:
 497        rhost = FE_UPWARD;
 498        break;
 499    case ROUND_TIEAWAY:
 500        die_host_rounding(rounding);
 501        return;
 502    default:
 503        g_assert_not_reached();
 504    }
 505
 506    if (fesetround(rhost)) {
 507        die_host_rounding(rounding);
 508    }
 509}
 510
 511static void set_soft_precision(enum rounding rounding)
 512{
 513    signed char mode;
 514
 515    switch (rounding) {
 516    case ROUND_EVEN:
 517        mode = float_round_nearest_even;
 518        break;
 519    case ROUND_ZERO:
 520        mode = float_round_to_zero;
 521        break;
 522    case ROUND_DOWN:
 523        mode = float_round_down;
 524        break;
 525    case ROUND_UP:
 526        mode = float_round_up;
 527        break;
 528    case ROUND_TIEAWAY:
 529        mode = float_round_ties_away;
 530        break;
 531    default:
 532        g_assert_not_reached();
 533    }
 534    soft_status.float_rounding_mode = mode;
 535}
 536
 537static void parse_args(int argc, char *argv[])
 538{
 539    int c;
 540    int val;
 541    int rounding = ROUND_EVEN;
 542
 543    for (;;) {
 544        c = getopt(argc, argv, "d:ho:p:r:t:zZ");
 545        if (c < 0) {
 546            break;
 547        }
 548        switch (c) {
 549        case 'd':
 550            duration = atoi(optarg);
 551            break;
 552        case 'h':
 553            usage_complete(argc, argv);
 554            exit(EXIT_SUCCESS);
 555        case 'o':
 556            val = find_name(op_names, optarg);
 557            if (val < 0) {
 558                fprintf(stderr, "Unsupported op '%s'\n", optarg);
 559                exit(EXIT_FAILURE);
 560            }
 561            operation = val;
 562            break;
 563        case 'p':
 564            if (!strcmp(optarg, "single")) {
 565                precision = PREC_SINGLE;
 566            } else if (!strcmp(optarg, "double")) {
 567                precision = PREC_DOUBLE;
 568            } else {
 569                fprintf(stderr, "Unsupported precision '%s'\n", optarg);
 570                exit(EXIT_FAILURE);
 571            }
 572            break;
 573        case 'r':
 574            rounding = round_name_to_mode(optarg);
 575            if (rounding < 0) {
 576                fprintf(stderr, "fatal: invalid rounding mode '%s'\n", optarg);
 577                exit(EXIT_FAILURE);
 578            }
 579            break;
 580        case 't':
 581            val = find_name(tester_names, optarg);
 582            if (val < 0) {
 583                fprintf(stderr, "Unsupported tester '%s'\n", optarg);
 584                exit(EXIT_FAILURE);
 585            }
 586            tester = val;
 587            break;
 588        case 'z':
 589            soft_status.flush_inputs_to_zero = 1;
 590            break;
 591        case 'Z':
 592            soft_status.flush_to_zero = 1;
 593            break;
 594        }
 595    }
 596
 597    /* set precision and rounding mode based on the tester */
 598    switch (tester) {
 599    case TESTER_HOST:
 600        set_host_precision(rounding);
 601        break;
 602    case TESTER_SOFT:
 603        set_soft_precision(rounding);
 604        switch (precision) {
 605        case PREC_SINGLE:
 606            precision = PREC_FLOAT32;
 607            break;
 608        case PREC_DOUBLE:
 609            precision = PREC_FLOAT64;
 610            break;
 611        default:
 612            g_assert_not_reached();
 613        }
 614        break;
 615    default:
 616        g_assert_not_reached();
 617    }
 618}
 619
 620static void pr_stats(void)
 621{
 622    printf("%.2f MFlops\n", (double)n_completed_ops / ns_elapsed * 1e3);
 623}
 624
 625int main(int argc, char *argv[])
 626{
 627    parse_args(argc, argv);
 628    run_bench();
 629    pr_stats();
 630    return 0;
 631}
 632