1
2
3
4
5
6
7
8
9#ifndef HW_POISON_H
10#error Must define HW_POISON_H to work around TARGET_* poisoning
11#endif
12
13#include "qemu/osdep.h"
14#include <math.h>
15#include <fenv.h>
16#include "qemu/timer.h"
17#include "fpu/softfloat.h"
18
19
20#define OPS_PER_ITER 50000
21
22#define MAX_OPERANDS 3
23
24#define SEED_A 0xdeadfacedeadface
25#define SEED_B 0xbadc0feebadc0fee
26#define SEED_C 0xbeefdeadbeefdead
27
28enum op {
29 OP_ADD,
30 OP_SUB,
31 OP_MUL,
32 OP_DIV,
33 OP_FMA,
34 OP_SQRT,
35 OP_CMP,
36 OP_MAX_NR,
37};
38
39static const char * const op_names[] = {
40 [OP_ADD] = "add",
41 [OP_SUB] = "sub",
42 [OP_MUL] = "mul",
43 [OP_DIV] = "div",
44 [OP_FMA] = "mulAdd",
45 [OP_SQRT] = "sqrt",
46 [OP_CMP] = "cmp",
47 [OP_MAX_NR] = NULL,
48};
49
50enum precision {
51 PREC_SINGLE,
52 PREC_DOUBLE,
53 PREC_FLOAT32,
54 PREC_FLOAT64,
55 PREC_MAX_NR,
56};
57
58enum rounding {
59 ROUND_EVEN,
60 ROUND_ZERO,
61 ROUND_DOWN,
62 ROUND_UP,
63 ROUND_TIEAWAY,
64 N_ROUND_MODES,
65};
66
67static const char * const round_names[] = {
68 [ROUND_EVEN] = "even",
69 [ROUND_ZERO] = "zero",
70 [ROUND_DOWN] = "down",
71 [ROUND_UP] = "up",
72 [ROUND_TIEAWAY] = "tieaway",
73};
74
75enum tester {
76 TESTER_SOFT,
77 TESTER_HOST,
78 TESTER_MAX_NR,
79};
80
81static const char * const tester_names[] = {
82 [TESTER_SOFT] = "soft",
83 [TESTER_HOST] = "host",
84 [TESTER_MAX_NR] = NULL,
85};
86
87union fp {
88 float f;
89 double d;
90 float32 f32;
91 float64 f64;
92 uint64_t u64;
93};
94
95struct op_state;
96
97typedef float (*float_func_t)(const struct op_state *s);
98typedef double (*double_func_t)(const struct op_state *s);
99
100union fp_func {
101 float_func_t float_func;
102 double_func_t double_func;
103};
104
105typedef void (*bench_func_t)(void);
106
107struct op_desc {
108 const char * const name;
109};
110
111#define DEFAULT_DURATION_SECS 1
112
113static uint64_t random_ops[MAX_OPERANDS] = {
114 SEED_A, SEED_B, SEED_C,
115};
116static float_status soft_status;
117static enum precision precision;
118static enum op operation;
119static enum tester tester;
120static uint64_t n_completed_ops;
121static unsigned int duration = DEFAULT_DURATION_SECS;
122static int64_t ns_elapsed;
123
124static volatile union fp res;
125
126
127
128
129
130
131static uint64_t xorshift64star(uint64_t x)
132{
133 x ^= x >> 12;
134 x ^= x << 25;
135 x ^= x >> 27;
136 return x * UINT64_C(2685821657736338717);
137}
138
139static void update_random_ops(int n_ops, enum precision prec)
140{
141 int i;
142
143 for (i = 0; i < n_ops; i++) {
144 uint64_t r = random_ops[i];
145
146 switch (prec) {
147 case PREC_SINGLE:
148 case PREC_FLOAT32:
149 do {
150 r = xorshift64star(r);
151 } while (!float32_is_normal(r));
152 break;
153 case PREC_DOUBLE:
154 case PREC_FLOAT64:
155 do {
156 r = xorshift64star(r);
157 } while (!float64_is_normal(r));
158 break;
159 default:
160 g_assert_not_reached();
161 }
162 random_ops[i] = r;
163 }
164}
165
166static void fill_random(union fp *ops, int n_ops, enum precision prec,
167 bool no_neg)
168{
169 int i;
170
171 for (i = 0; i < n_ops; i++) {
172 switch (prec) {
173 case PREC_SINGLE:
174 case PREC_FLOAT32:
175 ops[i].f32 = make_float32(random_ops[i]);
176 if (no_neg && float32_is_neg(ops[i].f32)) {
177 ops[i].f32 = float32_chs(ops[i].f32);
178 }
179 break;
180 case PREC_DOUBLE:
181 case PREC_FLOAT64:
182 ops[i].f64 = make_float64(random_ops[i]);
183 if (no_neg && float64_is_neg(ops[i].f64)) {
184 ops[i].f64 = float64_chs(ops[i].f64);
185 }
186 break;
187 default:
188 g_assert_not_reached();
189 }
190 }
191}
192
193
194
195
196
197static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
198{
199 int64_t tf = get_clock() + duration * 1000000000LL;
200
201 while (get_clock() < tf) {
202 union fp ops[MAX_OPERANDS];
203 int64_t t0;
204 int i;
205
206 update_random_ops(n_ops, prec);
207 switch (prec) {
208 case PREC_SINGLE:
209 fill_random(ops, n_ops, prec, no_neg);
210 t0 = get_clock();
211 for (i = 0; i < OPS_PER_ITER; i++) {
212 float a = ops[0].f;
213 float b = ops[1].f;
214 float c = ops[2].f;
215
216 switch (op) {
217 case OP_ADD:
218 res.f = a + b;
219 break;
220 case OP_SUB:
221 res.f = a - b;
222 break;
223 case OP_MUL:
224 res.f = a * b;
225 break;
226 case OP_DIV:
227 res.f = a / b;
228 break;
229 case OP_FMA:
230 res.f = fmaf(a, b, c);
231 break;
232 case OP_SQRT:
233 res.f = sqrtf(a);
234 break;
235 case OP_CMP:
236 res.u64 = isgreater(a, b);
237 break;
238 default:
239 g_assert_not_reached();
240 }
241 }
242 break;
243 case PREC_DOUBLE:
244 fill_random(ops, n_ops, prec, no_neg);
245 t0 = get_clock();
246 for (i = 0; i < OPS_PER_ITER; i++) {
247 double a = ops[0].d;
248 double b = ops[1].d;
249 double c = ops[2].d;
250
251 switch (op) {
252 case OP_ADD:
253 res.d = a + b;
254 break;
255 case OP_SUB:
256 res.d = a - b;
257 break;
258 case OP_MUL:
259 res.d = a * b;
260 break;
261 case OP_DIV:
262 res.d = a / b;
263 break;
264 case OP_FMA:
265 res.d = fma(a, b, c);
266 break;
267 case OP_SQRT:
268 res.d = sqrt(a);
269 break;
270 case OP_CMP:
271 res.u64 = isgreater(a, b);
272 break;
273 default:
274 g_assert_not_reached();
275 }
276 }
277 break;
278 case PREC_FLOAT32:
279 fill_random(ops, n_ops, prec, no_neg);
280 t0 = get_clock();
281 for (i = 0; i < OPS_PER_ITER; i++) {
282 float32 a = ops[0].f32;
283 float32 b = ops[1].f32;
284 float32 c = ops[2].f32;
285
286 switch (op) {
287 case OP_ADD:
288 res.f32 = float32_add(a, b, &soft_status);
289 break;
290 case OP_SUB:
291 res.f32 = float32_sub(a, b, &soft_status);
292 break;
293 case OP_MUL:
294 res.f = float32_mul(a, b, &soft_status);
295 break;
296 case OP_DIV:
297 res.f32 = float32_div(a, b, &soft_status);
298 break;
299 case OP_FMA:
300 res.f32 = float32_muladd(a, b, c, 0, &soft_status);
301 break;
302 case OP_SQRT:
303 res.f32 = float32_sqrt(a, &soft_status);
304 break;
305 case OP_CMP:
306 res.u64 = float32_compare_quiet(a, b, &soft_status);
307 break;
308 default:
309 g_assert_not_reached();
310 }
311 }
312 break;
313 case PREC_FLOAT64:
314 fill_random(ops, n_ops, prec, no_neg);
315 t0 = get_clock();
316 for (i = 0; i < OPS_PER_ITER; i++) {
317 float64 a = ops[0].f64;
318 float64 b = ops[1].f64;
319 float64 c = ops[2].f64;
320
321 switch (op) {
322 case OP_ADD:
323 res.f64 = float64_add(a, b, &soft_status);
324 break;
325 case OP_SUB:
326 res.f64 = float64_sub(a, b, &soft_status);
327 break;
328 case OP_MUL:
329 res.f = float64_mul(a, b, &soft_status);
330 break;
331 case OP_DIV:
332 res.f64 = float64_div(a, b, &soft_status);
333 break;
334 case OP_FMA:
335 res.f64 = float64_muladd(a, b, c, 0, &soft_status);
336 break;
337 case OP_SQRT:
338 res.f64 = float64_sqrt(a, &soft_status);
339 break;
340 case OP_CMP:
341 res.u64 = float64_compare_quiet(a, b, &soft_status);
342 break;
343 default:
344 g_assert_not_reached();
345 }
346 }
347 break;
348 default:
349 g_assert_not_reached();
350 }
351 ns_elapsed += get_clock() - t0;
352 n_completed_ops += OPS_PER_ITER;
353 }
354}
355
356#define GEN_BENCH(name, type, prec, op, n_ops) \
357 static void __attribute__((flatten)) name(void) \
358 { \
359 bench(prec, op, n_ops, false); \
360 }
361
362#define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops) \
363 static void __attribute__((flatten)) name(void) \
364 { \
365 bench(prec, op, n_ops, true); \
366 }
367
368#define GEN_BENCH_ALL_TYPES(opname, op, n_ops) \
369 GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
370 GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
371 GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
372 GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops)
373
374GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
375GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
376GEN_BENCH_ALL_TYPES(mul, OP_MUL, 2)
377GEN_BENCH_ALL_TYPES(div, OP_DIV, 2)
378GEN_BENCH_ALL_TYPES(fma, OP_FMA, 3)
379GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
380#undef GEN_BENCH_ALL_TYPES
381
382#define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n) \
383 GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
384 GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
385 GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
386 GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n)
387
388GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
389#undef GEN_BENCH_ALL_TYPES_NO_NEG
390
391#undef GEN_BENCH_NO_NEG
392#undef GEN_BENCH
393
394#define GEN_BENCH_FUNCS(opname, op) \
395 [op] = { \
396 [PREC_SINGLE] = bench_ ## opname ## _float, \
397 [PREC_DOUBLE] = bench_ ## opname ## _double, \
398 [PREC_FLOAT32] = bench_ ## opname ## _float32, \
399 [PREC_FLOAT64] = bench_ ## opname ## _float64, \
400 }
401
402static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
403 GEN_BENCH_FUNCS(add, OP_ADD),
404 GEN_BENCH_FUNCS(sub, OP_SUB),
405 GEN_BENCH_FUNCS(mul, OP_MUL),
406 GEN_BENCH_FUNCS(div, OP_DIV),
407 GEN_BENCH_FUNCS(fma, OP_FMA),
408 GEN_BENCH_FUNCS(sqrt, OP_SQRT),
409 GEN_BENCH_FUNCS(cmp, OP_CMP),
410};
411
412#undef GEN_BENCH_FUNCS
413
414static void run_bench(void)
415{
416 bench_func_t f;
417
418 f = bench_funcs[operation][precision];
419 g_assert(f);
420 f();
421}
422
423
424static int find_name(const char * const *arr, const char *name)
425{
426 int i;
427
428 for (i = 0; arr[i] != NULL; i++) {
429 if (strcmp(name, arr[i]) == 0) {
430 return i;
431 }
432 }
433 return -1;
434}
435
436static void usage_complete(int argc, char *argv[])
437{
438 gchar *op_list = g_strjoinv(", ", (gchar **)op_names);
439 gchar *tester_list = g_strjoinv(", ", (gchar **)tester_names);
440
441 fprintf(stderr, "Usage: %s [options]\n", argv[0]);
442 fprintf(stderr, "options:\n");
443 fprintf(stderr, " -d = duration, in seconds. Default: %d\n",
444 DEFAULT_DURATION_SECS);
445 fprintf(stderr, " -h = show this help message.\n");
446 fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
447 op_list, op_names[0]);
448 fprintf(stderr, " -p = floating point precision (single, double). "
449 "Default: single\n");
450 fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
451 "Default: even\n");
452 fprintf(stderr, " -t = tester (%s). Default: %s\n",
453 tester_list, tester_names[0]);
454 fprintf(stderr, " -z = flush inputs to zero (soft tester only). "
455 "Default: disabled\n");
456 fprintf(stderr, " -Z = flush output to zero (soft tester only). "
457 "Default: disabled\n");
458
459 g_free(tester_list);
460 g_free(op_list);
461}
462
463static int round_name_to_mode(const char *name)
464{
465 int i;
466
467 for (i = 0; i < N_ROUND_MODES; i++) {
468 if (!strcmp(round_names[i], name)) {
469 return i;
470 }
471 }
472 return -1;
473}
474
475static void QEMU_NORETURN die_host_rounding(enum rounding rounding)
476{
477 fprintf(stderr, "fatal: '%s' rounding not supported on this host\n",
478 round_names[rounding]);
479 exit(EXIT_FAILURE);
480}
481
482static void set_host_precision(enum rounding rounding)
483{
484 int rhost;
485
486 switch (rounding) {
487 case ROUND_EVEN:
488 rhost = FE_TONEAREST;
489 break;
490 case ROUND_ZERO:
491 rhost = FE_TOWARDZERO;
492 break;
493 case ROUND_DOWN:
494 rhost = FE_DOWNWARD;
495 break;
496 case ROUND_UP:
497 rhost = FE_UPWARD;
498 break;
499 case ROUND_TIEAWAY:
500 die_host_rounding(rounding);
501 return;
502 default:
503 g_assert_not_reached();
504 }
505
506 if (fesetround(rhost)) {
507 die_host_rounding(rounding);
508 }
509}
510
511static void set_soft_precision(enum rounding rounding)
512{
513 signed char mode;
514
515 switch (rounding) {
516 case ROUND_EVEN:
517 mode = float_round_nearest_even;
518 break;
519 case ROUND_ZERO:
520 mode = float_round_to_zero;
521 break;
522 case ROUND_DOWN:
523 mode = float_round_down;
524 break;
525 case ROUND_UP:
526 mode = float_round_up;
527 break;
528 case ROUND_TIEAWAY:
529 mode = float_round_ties_away;
530 break;
531 default:
532 g_assert_not_reached();
533 }
534 soft_status.float_rounding_mode = mode;
535}
536
537static void parse_args(int argc, char *argv[])
538{
539 int c;
540 int val;
541 int rounding = ROUND_EVEN;
542
543 for (;;) {
544 c = getopt(argc, argv, "d:ho:p:r:t:zZ");
545 if (c < 0) {
546 break;
547 }
548 switch (c) {
549 case 'd':
550 duration = atoi(optarg);
551 break;
552 case 'h':
553 usage_complete(argc, argv);
554 exit(EXIT_SUCCESS);
555 case 'o':
556 val = find_name(op_names, optarg);
557 if (val < 0) {
558 fprintf(stderr, "Unsupported op '%s'\n", optarg);
559 exit(EXIT_FAILURE);
560 }
561 operation = val;
562 break;
563 case 'p':
564 if (!strcmp(optarg, "single")) {
565 precision = PREC_SINGLE;
566 } else if (!strcmp(optarg, "double")) {
567 precision = PREC_DOUBLE;
568 } else {
569 fprintf(stderr, "Unsupported precision '%s'\n", optarg);
570 exit(EXIT_FAILURE);
571 }
572 break;
573 case 'r':
574 rounding = round_name_to_mode(optarg);
575 if (rounding < 0) {
576 fprintf(stderr, "fatal: invalid rounding mode '%s'\n", optarg);
577 exit(EXIT_FAILURE);
578 }
579 break;
580 case 't':
581 val = find_name(tester_names, optarg);
582 if (val < 0) {
583 fprintf(stderr, "Unsupported tester '%s'\n", optarg);
584 exit(EXIT_FAILURE);
585 }
586 tester = val;
587 break;
588 case 'z':
589 soft_status.flush_inputs_to_zero = 1;
590 break;
591 case 'Z':
592 soft_status.flush_to_zero = 1;
593 break;
594 }
595 }
596
597
598 switch (tester) {
599 case TESTER_HOST:
600 set_host_precision(rounding);
601 break;
602 case TESTER_SOFT:
603 set_soft_precision(rounding);
604 switch (precision) {
605 case PREC_SINGLE:
606 precision = PREC_FLOAT32;
607 break;
608 case PREC_DOUBLE:
609 precision = PREC_FLOAT64;
610 break;
611 default:
612 g_assert_not_reached();
613 }
614 break;
615 default:
616 g_assert_not_reached();
617 }
618}
619
620static void pr_stats(void)
621{
622 printf("%.2f MFlops\n", (double)n_completed_ops / ns_elapsed * 1e3);
623}
624
625int main(int argc, char *argv[])
626{
627 parse_args(argc, argv);
628 run_bench();
629 pr_stats();
630 return 0;
631}
632