qemu/tests/fp/fp-bench.c
<<
>>
Prefs
   1/*
   2 * fp-bench.c - A collection of simple floating point microbenchmarks.
   3 *
   4 * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
   5 *
   6 * License: GNU GPL, version 2 or later.
   7 *   See the COPYING file in the top-level directory.
   8 */
   9#ifndef HW_POISON_H
  10#error Must define HW_POISON_H to work around TARGET_* poisoning
  11#endif
  12
  13#include "qemu/osdep.h"
  14#include <math.h>
  15#include <fenv.h>
  16#include "qemu/timer.h"
  17#include "qemu/int128.h"
  18#include "fpu/softfloat.h"
  19
  20/* amortize the computation of random inputs */
  21#define OPS_PER_ITER     50000
  22
  23#define MAX_OPERANDS 3
  24
  25#define SEED_A 0xdeadfacedeadface
  26#define SEED_B 0xbadc0feebadc0fee
  27#define SEED_C 0xbeefdeadbeefdead
  28
  29enum op {
  30    OP_ADD,
  31    OP_SUB,
  32    OP_MUL,
  33    OP_DIV,
  34    OP_FMA,
  35    OP_SQRT,
  36    OP_CMP,
  37    OP_MAX_NR,
  38};
  39
  40static const char * const op_names[] = {
  41    [OP_ADD] = "add",
  42    [OP_SUB] = "sub",
  43    [OP_MUL] = "mul",
  44    [OP_DIV] = "div",
  45    [OP_FMA] = "mulAdd",
  46    [OP_SQRT] = "sqrt",
  47    [OP_CMP] = "cmp",
  48    [OP_MAX_NR] = NULL,
  49};
  50
  51enum precision {
  52    PREC_SINGLE,
  53    PREC_DOUBLE,
  54    PREC_QUAD,
  55    PREC_FLOAT32,
  56    PREC_FLOAT64,
  57    PREC_FLOAT128,
  58    PREC_MAX_NR,
  59};
  60
  61enum rounding {
  62    ROUND_EVEN,
  63    ROUND_ZERO,
  64    ROUND_DOWN,
  65    ROUND_UP,
  66    ROUND_TIEAWAY,
  67    N_ROUND_MODES,
  68};
  69
  70static const char * const round_names[] = {
  71    [ROUND_EVEN] = "even",
  72    [ROUND_ZERO] = "zero",
  73    [ROUND_DOWN] = "down",
  74    [ROUND_UP] = "up",
  75    [ROUND_TIEAWAY] = "tieaway",
  76};
  77
  78enum tester {
  79    TESTER_SOFT,
  80    TESTER_HOST,
  81    TESTER_MAX_NR,
  82};
  83
  84static const char * const tester_names[] = {
  85    [TESTER_SOFT] = "soft",
  86    [TESTER_HOST] = "host",
  87    [TESTER_MAX_NR] = NULL,
  88};
  89
  90union fp {
  91    float f;
  92    double d;
  93    float32 f32;
  94    float64 f64;
  95    float128 f128;
  96    uint64_t u64;
  97};
  98
  99struct op_state;
 100
 101typedef float (*float_func_t)(const struct op_state *s);
 102typedef double (*double_func_t)(const struct op_state *s);
 103
 104union fp_func {
 105    float_func_t float_func;
 106    double_func_t double_func;
 107};
 108
 109typedef void (*bench_func_t)(void);
 110
 111struct op_desc {
 112    const char * const name;
 113};
 114
 115#define DEFAULT_DURATION_SECS 1
 116
 117static uint64_t random_ops[MAX_OPERANDS] = {
 118    SEED_A, SEED_B, SEED_C,
 119};
 120
 121static float128 random_quad_ops[MAX_OPERANDS] = {
 122    {SEED_A, SEED_B}, {SEED_B, SEED_C}, {SEED_C, SEED_A},
 123};
 124static float_status soft_status;
 125static enum precision precision;
 126static enum op operation;
 127static enum tester tester;
 128static uint64_t n_completed_ops;
 129static unsigned int duration = DEFAULT_DURATION_SECS;
 130static int64_t ns_elapsed;
 131/* disable optimizations with volatile */
 132static volatile union fp res;
 133
 134/*
 135 * From: https://en.wikipedia.org/wiki/Xorshift
 136 * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
 137 * guaranteed to be >= INT_MAX).
 138 */
 139static uint64_t xorshift64star(uint64_t x)
 140{
 141    x ^= x >> 12; /* a */
 142    x ^= x << 25; /* b */
 143    x ^= x >> 27; /* c */
 144    return x * UINT64_C(2685821657736338717);
 145}
 146
 147static void update_random_ops(int n_ops, enum precision prec)
 148{
 149    int i;
 150
 151    for (i = 0; i < n_ops; i++) {
 152
 153        switch (prec) {
 154        case PREC_SINGLE:
 155        case PREC_FLOAT32:
 156        {
 157            uint64_t r = random_ops[i];
 158            do {
 159                r = xorshift64star(r);
 160            } while (!float32_is_normal(r));
 161            random_ops[i] = r;
 162            break;
 163        }
 164        case PREC_DOUBLE:
 165        case PREC_FLOAT64:
 166        {
 167            uint64_t r = random_ops[i];
 168            do {
 169                r = xorshift64star(r);
 170            } while (!float64_is_normal(r));
 171            random_ops[i] = r;
 172            break;
 173        }
 174        case PREC_QUAD:
 175        case PREC_FLOAT128:
 176        {
 177            float128 r = random_quad_ops[i];
 178            uint64_t hi = r.high;
 179            uint64_t lo = r.low;
 180            do {
 181                hi = xorshift64star(hi);
 182                lo = xorshift64star(lo);
 183                r = make_float128(hi, lo);
 184            } while (!float128_is_normal(r));
 185            random_quad_ops[i] = r;
 186            break;
 187        }
 188        default:
 189            g_assert_not_reached();
 190        }
 191    }
 192}
 193
 194static void fill_random(union fp *ops, int n_ops, enum precision prec,
 195                        bool no_neg)
 196{
 197    int i;
 198
 199    for (i = 0; i < n_ops; i++) {
 200        switch (prec) {
 201        case PREC_SINGLE:
 202        case PREC_FLOAT32:
 203            ops[i].f32 = make_float32(random_ops[i]);
 204            if (no_neg && float32_is_neg(ops[i].f32)) {
 205                ops[i].f32 = float32_chs(ops[i].f32);
 206            }
 207            break;
 208        case PREC_DOUBLE:
 209        case PREC_FLOAT64:
 210            ops[i].f64 = make_float64(random_ops[i]);
 211            if (no_neg && float64_is_neg(ops[i].f64)) {
 212                ops[i].f64 = float64_chs(ops[i].f64);
 213            }
 214            break;
 215        case PREC_QUAD:
 216        case PREC_FLOAT128:
 217            ops[i].f128 = random_quad_ops[i];
 218            if (no_neg && float128_is_neg(ops[i].f128)) {
 219                ops[i].f128 = float128_chs(ops[i].f128);
 220            }
 221            break;
 222        default:
 223            g_assert_not_reached();
 224        }
 225    }
 226}
 227
 228/*
 229 * The main benchmark function. Instead of (ab)using macros, we rely
 230 * on the compiler to unfold this at compile-time.
 231 */
 232static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
 233{
 234    int64_t tf = get_clock() + duration * 1000000000LL;
 235
 236    while (get_clock() < tf) {
 237        union fp ops[MAX_OPERANDS];
 238        int64_t t0;
 239        int i;
 240
 241        update_random_ops(n_ops, prec);
 242        switch (prec) {
 243        case PREC_SINGLE:
 244            fill_random(ops, n_ops, prec, no_neg);
 245            t0 = get_clock();
 246            for (i = 0; i < OPS_PER_ITER; i++) {
 247                float a = ops[0].f;
 248                float b = ops[1].f;
 249                float c = ops[2].f;
 250
 251                switch (op) {
 252                case OP_ADD:
 253                    res.f = a + b;
 254                    break;
 255                case OP_SUB:
 256                    res.f = a - b;
 257                    break;
 258                case OP_MUL:
 259                    res.f = a * b;
 260                    break;
 261                case OP_DIV:
 262                    res.f = a / b;
 263                    break;
 264                case OP_FMA:
 265                    res.f = fmaf(a, b, c);
 266                    break;
 267                case OP_SQRT:
 268                    res.f = sqrtf(a);
 269                    break;
 270                case OP_CMP:
 271                    res.u64 = isgreater(a, b);
 272                    break;
 273                default:
 274                    g_assert_not_reached();
 275                }
 276            }
 277            break;
 278        case PREC_DOUBLE:
 279            fill_random(ops, n_ops, prec, no_neg);
 280            t0 = get_clock();
 281            for (i = 0; i < OPS_PER_ITER; i++) {
 282                double a = ops[0].d;
 283                double b = ops[1].d;
 284                double c = ops[2].d;
 285
 286                switch (op) {
 287                case OP_ADD:
 288                    res.d = a + b;
 289                    break;
 290                case OP_SUB:
 291                    res.d = a - b;
 292                    break;
 293                case OP_MUL:
 294                    res.d = a * b;
 295                    break;
 296                case OP_DIV:
 297                    res.d = a / b;
 298                    break;
 299                case OP_FMA:
 300                    res.d = fma(a, b, c);
 301                    break;
 302                case OP_SQRT:
 303                    res.d = sqrt(a);
 304                    break;
 305                case OP_CMP:
 306                    res.u64 = isgreater(a, b);
 307                    break;
 308                default:
 309                    g_assert_not_reached();
 310                }
 311            }
 312            break;
 313        case PREC_FLOAT32:
 314            fill_random(ops, n_ops, prec, no_neg);
 315            t0 = get_clock();
 316            for (i = 0; i < OPS_PER_ITER; i++) {
 317                float32 a = ops[0].f32;
 318                float32 b = ops[1].f32;
 319                float32 c = ops[2].f32;
 320
 321                switch (op) {
 322                case OP_ADD:
 323                    res.f32 = float32_add(a, b, &soft_status);
 324                    break;
 325                case OP_SUB:
 326                    res.f32 = float32_sub(a, b, &soft_status);
 327                    break;
 328                case OP_MUL:
 329                    res.f = float32_mul(a, b, &soft_status);
 330                    break;
 331                case OP_DIV:
 332                    res.f32 = float32_div(a, b, &soft_status);
 333                    break;
 334                case OP_FMA:
 335                    res.f32 = float32_muladd(a, b, c, 0, &soft_status);
 336                    break;
 337                case OP_SQRT:
 338                    res.f32 = float32_sqrt(a, &soft_status);
 339                    break;
 340                case OP_CMP:
 341                    res.u64 = float32_compare_quiet(a, b, &soft_status);
 342                    break;
 343                default:
 344                    g_assert_not_reached();
 345                }
 346            }
 347            break;
 348        case PREC_FLOAT64:
 349            fill_random(ops, n_ops, prec, no_neg);
 350            t0 = get_clock();
 351            for (i = 0; i < OPS_PER_ITER; i++) {
 352                float64 a = ops[0].f64;
 353                float64 b = ops[1].f64;
 354                float64 c = ops[2].f64;
 355
 356                switch (op) {
 357                case OP_ADD:
 358                    res.f64 = float64_add(a, b, &soft_status);
 359                    break;
 360                case OP_SUB:
 361                    res.f64 = float64_sub(a, b, &soft_status);
 362                    break;
 363                case OP_MUL:
 364                    res.f = float64_mul(a, b, &soft_status);
 365                    break;
 366                case OP_DIV:
 367                    res.f64 = float64_div(a, b, &soft_status);
 368                    break;
 369                case OP_FMA:
 370                    res.f64 = float64_muladd(a, b, c, 0, &soft_status);
 371                    break;
 372                case OP_SQRT:
 373                    res.f64 = float64_sqrt(a, &soft_status);
 374                    break;
 375                case OP_CMP:
 376                    res.u64 = float64_compare_quiet(a, b, &soft_status);
 377                    break;
 378                default:
 379                    g_assert_not_reached();
 380                }
 381            }
 382            break;
 383        case PREC_FLOAT128:
 384            fill_random(ops, n_ops, prec, no_neg);
 385            t0 = get_clock();
 386            for (i = 0; i < OPS_PER_ITER; i++) {
 387                float128 a = ops[0].f128;
 388                float128 b = ops[1].f128;
 389                float128 c = ops[2].f128;
 390
 391                switch (op) {
 392                case OP_ADD:
 393                    res.f128 = float128_add(a, b, &soft_status);
 394                    break;
 395                case OP_SUB:
 396                    res.f128 = float128_sub(a, b, &soft_status);
 397                    break;
 398                case OP_MUL:
 399                    res.f128 = float128_mul(a, b, &soft_status);
 400                    break;
 401                case OP_DIV:
 402                    res.f128 = float128_div(a, b, &soft_status);
 403                    break;
 404                case OP_FMA:
 405                    res.f128 = float128_muladd(a, b, c, 0, &soft_status);
 406                    break;
 407                case OP_SQRT:
 408                    res.f128 = float128_sqrt(a, &soft_status);
 409                    break;
 410                case OP_CMP:
 411                    res.u64 = float128_compare_quiet(a, b, &soft_status);
 412                    break;
 413                default:
 414                    g_assert_not_reached();
 415                }
 416            }
 417            break;
 418        default:
 419            g_assert_not_reached();
 420        }
 421        ns_elapsed += get_clock() - t0;
 422        n_completed_ops += OPS_PER_ITER;
 423    }
 424}
 425
 426#define GEN_BENCH(name, type, prec, op, n_ops)          \
 427    static void __attribute__((flatten)) name(void)     \
 428    {                                                   \
 429        bench(prec, op, n_ops, false);                  \
 430    }
 431
 432#define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops)   \
 433    static void __attribute__((flatten)) name(void)     \
 434    {                                                   \
 435        bench(prec, op, n_ops, true);                   \
 436    }
 437
 438#define GEN_BENCH_ALL_TYPES(opname, op, n_ops)                          \
 439    GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
 440    GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
 441    GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
 442    GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops) \
 443    GEN_BENCH(bench_ ## opname ## _float128, float128, PREC_FLOAT128, op, n_ops)
 444
 445GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
 446GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
 447GEN_BENCH_ALL_TYPES(mul, OP_MUL, 2)
 448GEN_BENCH_ALL_TYPES(div, OP_DIV, 2)
 449GEN_BENCH_ALL_TYPES(fma, OP_FMA, 3)
 450GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
 451#undef GEN_BENCH_ALL_TYPES
 452
 453#define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n)                         \
 454    GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
 455    GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
 456    GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
 457    GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n) \
 458    GEN_BENCH_NO_NEG(bench_ ## name ## _float128, float128, PREC_FLOAT128, op, n)
 459
 460GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
 461#undef GEN_BENCH_ALL_TYPES_NO_NEG
 462
 463#undef GEN_BENCH_NO_NEG
 464#undef GEN_BENCH
 465
 466#define GEN_BENCH_FUNCS(opname, op)                             \
 467    [op] = {                                                    \
 468        [PREC_SINGLE]    = bench_ ## opname ## _float,          \
 469        [PREC_DOUBLE]    = bench_ ## opname ## _double,         \
 470        [PREC_FLOAT32]   = bench_ ## opname ## _float32,        \
 471        [PREC_FLOAT64]   = bench_ ## opname ## _float64,        \
 472        [PREC_FLOAT128]   = bench_ ## opname ## _float128,      \
 473    }
 474
 475static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
 476    GEN_BENCH_FUNCS(add, OP_ADD),
 477    GEN_BENCH_FUNCS(sub, OP_SUB),
 478    GEN_BENCH_FUNCS(mul, OP_MUL),
 479    GEN_BENCH_FUNCS(div, OP_DIV),
 480    GEN_BENCH_FUNCS(fma, OP_FMA),
 481    GEN_BENCH_FUNCS(sqrt, OP_SQRT),
 482    GEN_BENCH_FUNCS(cmp, OP_CMP),
 483};
 484
 485#undef GEN_BENCH_FUNCS
 486
 487static void run_bench(void)
 488{
 489    bench_func_t f;
 490
 491    f = bench_funcs[operation][precision];
 492    g_assert(f);
 493    f();
 494}
 495
 496/* @arr must be NULL-terminated */
 497static int find_name(const char * const *arr, const char *name)
 498{
 499    int i;
 500
 501    for (i = 0; arr[i] != NULL; i++) {
 502        if (strcmp(name, arr[i]) == 0) {
 503            return i;
 504        }
 505    }
 506    return -1;
 507}
 508
 509static void usage_complete(int argc, char *argv[])
 510{
 511    gchar *op_list = g_strjoinv(", ", (gchar **)op_names);
 512    gchar *tester_list = g_strjoinv(", ", (gchar **)tester_names);
 513
 514    fprintf(stderr, "Usage: %s [options]\n", argv[0]);
 515    fprintf(stderr, "options:\n");
 516    fprintf(stderr, " -d = duration, in seconds. Default: %d\n",
 517            DEFAULT_DURATION_SECS);
 518    fprintf(stderr, " -h = show this help message.\n");
 519    fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
 520            op_list, op_names[0]);
 521    fprintf(stderr, " -p = floating point precision (single, double, quad[soft only]). "
 522            "Default: single\n");
 523    fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
 524            "Default: even\n");
 525    fprintf(stderr, " -t = tester (%s). Default: %s\n",
 526            tester_list, tester_names[0]);
 527    fprintf(stderr, " -z = flush inputs to zero (soft tester only). "
 528            "Default: disabled\n");
 529    fprintf(stderr, " -Z = flush output to zero (soft tester only). "
 530            "Default: disabled\n");
 531
 532    g_free(tester_list);
 533    g_free(op_list);
 534}
 535
 536static int round_name_to_mode(const char *name)
 537{
 538    int i;
 539
 540    for (i = 0; i < N_ROUND_MODES; i++) {
 541        if (!strcmp(round_names[i], name)) {
 542            return i;
 543        }
 544    }
 545    return -1;
 546}
 547
 548static G_NORETURN
 549void die_host_rounding(enum rounding rounding)
 550{
 551    fprintf(stderr, "fatal: '%s' rounding not supported on this host\n",
 552            round_names[rounding]);
 553    exit(EXIT_FAILURE);
 554}
 555
 556static void set_host_precision(enum rounding rounding)
 557{
 558    int rhost;
 559
 560    switch (rounding) {
 561    case ROUND_EVEN:
 562        rhost = FE_TONEAREST;
 563        break;
 564    case ROUND_ZERO:
 565        rhost = FE_TOWARDZERO;
 566        break;
 567    case ROUND_DOWN:
 568        rhost = FE_DOWNWARD;
 569        break;
 570    case ROUND_UP:
 571        rhost = FE_UPWARD;
 572        break;
 573    case ROUND_TIEAWAY:
 574        die_host_rounding(rounding);
 575        return;
 576    default:
 577        g_assert_not_reached();
 578    }
 579
 580    if (fesetround(rhost)) {
 581        die_host_rounding(rounding);
 582    }
 583}
 584
 585static void set_soft_precision(enum rounding rounding)
 586{
 587    signed char mode;
 588
 589    switch (rounding) {
 590    case ROUND_EVEN:
 591        mode = float_round_nearest_even;
 592        break;
 593    case ROUND_ZERO:
 594        mode = float_round_to_zero;
 595        break;
 596    case ROUND_DOWN:
 597        mode = float_round_down;
 598        break;
 599    case ROUND_UP:
 600        mode = float_round_up;
 601        break;
 602    case ROUND_TIEAWAY:
 603        mode = float_round_ties_away;
 604        break;
 605    default:
 606        g_assert_not_reached();
 607    }
 608    soft_status.float_rounding_mode = mode;
 609}
 610
 611static void parse_args(int argc, char *argv[])
 612{
 613    int c;
 614    int val;
 615    int rounding = ROUND_EVEN;
 616
 617    for (;;) {
 618        c = getopt(argc, argv, "d:ho:p:r:t:zZ");
 619        if (c < 0) {
 620            break;
 621        }
 622        switch (c) {
 623        case 'd':
 624            duration = atoi(optarg);
 625            break;
 626        case 'h':
 627            usage_complete(argc, argv);
 628            exit(EXIT_SUCCESS);
 629        case 'o':
 630            val = find_name(op_names, optarg);
 631            if (val < 0) {
 632                fprintf(stderr, "Unsupported op '%s'\n", optarg);
 633                exit(EXIT_FAILURE);
 634            }
 635            operation = val;
 636            break;
 637        case 'p':
 638            if (!strcmp(optarg, "single")) {
 639                precision = PREC_SINGLE;
 640            } else if (!strcmp(optarg, "double")) {
 641                precision = PREC_DOUBLE;
 642            } else if (!strcmp(optarg, "quad")) {
 643                precision = PREC_QUAD;
 644            } else {
 645                fprintf(stderr, "Unsupported precision '%s'\n", optarg);
 646                exit(EXIT_FAILURE);
 647            }
 648            break;
 649        case 'r':
 650            rounding = round_name_to_mode(optarg);
 651            if (rounding < 0) {
 652                fprintf(stderr, "fatal: invalid rounding mode '%s'\n", optarg);
 653                exit(EXIT_FAILURE);
 654            }
 655            break;
 656        case 't':
 657            val = find_name(tester_names, optarg);
 658            if (val < 0) {
 659                fprintf(stderr, "Unsupported tester '%s'\n", optarg);
 660                exit(EXIT_FAILURE);
 661            }
 662            tester = val;
 663            break;
 664        case 'z':
 665            soft_status.flush_inputs_to_zero = 1;
 666            break;
 667        case 'Z':
 668            soft_status.flush_to_zero = 1;
 669            break;
 670        }
 671    }
 672
 673    /* set precision and rounding mode based on the tester */
 674    switch (tester) {
 675    case TESTER_HOST:
 676        set_host_precision(rounding);
 677        break;
 678    case TESTER_SOFT:
 679        set_soft_precision(rounding);
 680        switch (precision) {
 681        case PREC_SINGLE:
 682            precision = PREC_FLOAT32;
 683            break;
 684        case PREC_DOUBLE:
 685            precision = PREC_FLOAT64;
 686            break;
 687        case PREC_QUAD:
 688            precision = PREC_FLOAT128;
 689            break;
 690        default:
 691            g_assert_not_reached();
 692        }
 693        break;
 694    default:
 695        g_assert_not_reached();
 696    }
 697}
 698
 699static void pr_stats(void)
 700{
 701    printf("%.2f MFlops\n", (double)n_completed_ops / ns_elapsed * 1e3);
 702}
 703
 704int main(int argc, char *argv[])
 705{
 706    parse_args(argc, argv);
 707    run_bench();
 708    pr_stats();
 709    return 0;
 710}
 711