1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include "qemu/osdep.h"
19#include "qemu/int128.h"
20#include "fpu/softfloat.h"
21#include "macros.h"
22#include "fma_emu.h"
23
24#define DF_INF_EXP 0x7ff
25#define DF_BIAS 1023
26#define DF_MANTBITS 52
27#define DF_NAN 0xffffffffffffffffULL
28#define DF_INF 0x7ff0000000000000ULL
29#define DF_MINUS_INF 0xfff0000000000000ULL
30#define DF_MAXF 0x7fefffffffffffffULL
31#define DF_MINUS_MAXF 0xffefffffffffffffULL
32
33#define SF_INF_EXP 0xff
34#define SF_BIAS 127
35#define SF_MANTBITS 23
36#define SF_INF 0x7f800000
37#define SF_MINUS_INF 0xff800000
38#define SF_MAXF 0x7f7fffff
39#define SF_MINUS_MAXF 0xff7fffff
40
41#define HF_INF_EXP 0x1f
42#define HF_BIAS 15
43
44#define WAY_BIG_EXP 4096
45
46typedef union {
47 double f;
48 uint64_t i;
49 struct {
50 uint64_t mant:52;
51 uint64_t exp:11;
52 uint64_t sign:1;
53 };
54} Double;
55
56typedef union {
57 float f;
58 uint32_t i;
59 struct {
60 uint32_t mant:23;
61 uint32_t exp:8;
62 uint32_t sign:1;
63 };
64} Float;
65
66static uint64_t float64_getmant(float64 f64)
67{
68 Double a = { .i = f64 };
69 if (float64_is_normal(f64)) {
70 return a.mant | 1ULL << 52;
71 }
72 if (float64_is_zero(f64)) {
73 return 0;
74 }
75 if (float64_is_denormal(f64)) {
76 return a.mant;
77 }
78 return ~0ULL;
79}
80
81int32_t float64_getexp(float64 f64)
82{
83 Double a = { .i = f64 };
84 if (float64_is_normal(f64)) {
85 return a.exp;
86 }
87 if (float64_is_denormal(f64)) {
88 return a.exp + 1;
89 }
90 return -1;
91}
92
93static uint64_t float32_getmant(float32 f32)
94{
95 Float a = { .i = f32 };
96 if (float32_is_normal(f32)) {
97 return a.mant | 1ULL << 23;
98 }
99 if (float32_is_zero(f32)) {
100 return 0;
101 }
102 if (float32_is_denormal(f32)) {
103 return a.mant;
104 }
105 return ~0ULL;
106}
107
108int32_t float32_getexp(float32 f32)
109{
110 Float a = { .i = f32 };
111 if (float32_is_normal(f32)) {
112 return a.exp;
113 }
114 if (float32_is_denormal(f32)) {
115 return a.exp + 1;
116 }
117 return -1;
118}
119
120static uint32_t int128_getw0(Int128 x)
121{
122 return int128_getlo(x);
123}
124
125static uint32_t int128_getw1(Int128 x)
126{
127 return int128_getlo(x) >> 32;
128}
129
130static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
131{
132 Int128 a, b;
133 uint64_t pp0, pp1a, pp1b, pp1s, pp2;
134
135 a = int128_make64(ai);
136 b = int128_make64(bi);
137 pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
138 pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
139 pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
140 pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
141
142 pp1s = pp1a + pp1b;
143 if ((pp1s < pp1a) || (pp1s < pp1b)) {
144 pp2 += (1ULL << 32);
145 }
146 uint64_t ret_low = pp0 + (pp1s << 32);
147 if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
148 pp2 += 1;
149 }
150
151 return int128_make128(ret_low, pp2 + (pp1s >> 32));
152}
153
154static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
155{
156 Int128 ret = int128_sub(a, b);
157 if (borrow != 0) {
158 ret = int128_sub(ret, int128_one());
159 }
160 return ret;
161}
162
163typedef struct {
164 Int128 mant;
165 int32_t exp;
166 uint8_t sign;
167 uint8_t guard;
168 uint8_t round;
169 uint8_t sticky;
170} Accum;
171
172static void accum_init(Accum *p)
173{
174 p->mant = int128_zero();
175 p->exp = 0;
176 p->sign = 0;
177 p->guard = 0;
178 p->round = 0;
179 p->sticky = 0;
180}
181
182static Accum accum_norm_left(Accum a)
183{
184 a.exp--;
185 a.mant = int128_lshift(a.mant, 1);
186 a.mant = int128_or(a.mant, int128_make64(a.guard));
187 a.guard = a.round;
188 a.round = a.sticky;
189 return a;
190}
191
192
193static inline Accum accum_norm_right(Accum a, int amt)
194{
195 if (amt > 130) {
196 a.sticky |=
197 a.round | a.guard | int128_nz(a.mant);
198 a.guard = a.round = 0;
199 a.mant = int128_zero();
200 a.exp += amt;
201 return a;
202
203 }
204 while (amt >= 64) {
205 a.sticky |= a.round | a.guard | (int128_getlo(a.mant) != 0);
206 a.guard = (int128_getlo(a.mant) >> 63) & 1;
207 a.round = (int128_getlo(a.mant) >> 62) & 1;
208 a.mant = int128_make64(int128_gethi(a.mant));
209 a.exp += 64;
210 amt -= 64;
211 }
212 while (amt > 0) {
213 a.exp++;
214 a.sticky |= a.round;
215 a.round = a.guard;
216 a.guard = int128_getlo(a.mant) & 1;
217 a.mant = int128_rshift(a.mant, 1);
218 amt--;
219 }
220 return a;
221}
222
223
224
225
226
227static Accum accum_add(Accum a, Accum b);
228
229static Accum accum_sub(Accum a, Accum b, int negate)
230{
231 Accum ret;
232 accum_init(&ret);
233 int borrow;
234
235 if (a.sign != b.sign) {
236 b.sign = !b.sign;
237 return accum_add(a, b);
238 }
239 if (b.exp > a.exp) {
240
241 return accum_sub(b, a, !negate);
242 }
243 if ((b.exp == a.exp) && (int128_gt(b.mant, a.mant))) {
244
245 return accum_sub(b, a, !negate);
246 }
247
248 while (a.exp > b.exp) {
249
250 if (int128_gethi(a.mant) & (1ULL << 62)) {
251
252 break;
253 } else {
254 a = accum_norm_left(a);
255 }
256 }
257
258 while (a.exp > b.exp) {
259
260
261 b = accum_norm_right(b, a.exp - b.exp);
262 }
263
264 if ((int128_gt(b.mant, a.mant))) {
265 return accum_sub(b, a, !negate);
266 }
267
268
269 ret.sign = a.sign;
270 ret.exp = a.exp;
271 assert(!int128_gt(b.mant, a.mant));
272 borrow = (b.round << 2) | (b.guard << 1) | b.sticky;
273 ret.mant = int128_sub_borrow(a.mant, b.mant, (borrow != 0));
274 borrow = 0 - borrow;
275 ret.guard = (borrow >> 2) & 1;
276 ret.round = (borrow >> 1) & 1;
277 ret.sticky = (borrow >> 0) & 1;
278 if (negate) {
279 ret.sign = !ret.sign;
280 }
281 return ret;
282}
283
284static Accum accum_add(Accum a, Accum b)
285{
286 Accum ret;
287 accum_init(&ret);
288 if (a.sign != b.sign) {
289 b.sign = !b.sign;
290 return accum_sub(a, b, 0);
291 }
292 if (b.exp > a.exp) {
293
294 return accum_add(b, a);
295 }
296 if ((b.exp == a.exp) && int128_gt(b.mant, a.mant)) {
297
298 return accum_add(b, a);
299 }
300
301 while (a.exp > b.exp) {
302
303 if (int128_gethi(a.mant) & (1ULL << 62)) {
304
305 break;
306 } else {
307 a = accum_norm_left(a);
308 }
309 }
310
311 while (a.exp > b.exp) {
312
313
314 b = accum_norm_right(b, a.exp - b.exp);
315 }
316
317
318 if (int128_gt(b.mant, a.mant)) {
319 return accum_add(b, a);
320 };
321 ret.sign = a.sign;
322 ret.exp = a.exp;
323 assert(!int128_gt(b.mant, a.mant));
324 ret.mant = int128_add(a.mant, b.mant);
325 ret.guard = b.guard;
326 ret.round = b.round;
327 ret.sticky = b.sticky;
328 return ret;
329}
330
331
332static float64 infinite_float64(uint8_t sign)
333{
334 if (sign) {
335 return make_float64(DF_MINUS_INF);
336 } else {
337 return make_float64(DF_INF);
338 }
339}
340
341
342static float64 maxfinite_float64(uint8_t sign)
343{
344 if (sign) {
345 return make_float64(DF_MINUS_MAXF);
346 } else {
347 return make_float64(DF_MAXF);
348 }
349}
350
351
352static float64 zero_float64(uint8_t sign)
353{
354 if (sign) {
355 return make_float64(0x8000000000000000);
356 } else {
357 return float64_zero;
358 }
359}
360
361
362float32 infinite_float32(uint8_t sign)
363{
364 if (sign) {
365 return make_float32(SF_MINUS_INF);
366 } else {
367 return make_float32(SF_INF);
368 }
369}
370
371
372static float32 maxfinite_float32(uint8_t sign)
373{
374 if (sign) {
375 return make_float32(SF_MINUS_MAXF);
376 } else {
377 return make_float32(SF_MAXF);
378 }
379}
380
381
382static float32 zero_float32(uint8_t sign)
383{
384 if (sign) {
385 return make_float32(0x80000000);
386 } else {
387 return float32_zero;
388 }
389}
390
391#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
392static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
393{ \
394 if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
395 && ((a.guard | a.round | a.sticky) == 0)) { \
396 \
397 switch (fp_status->float_rounding_mode) { \
398 case float_round_down: \
399 return zero_##SUFFIX(1); \
400 default: \
401 return zero_##SUFFIX(0); \
402 } \
403 } \
404 \
405 \
406 \
407
408 \
409 while ((int128_gethi(a.mant) != 0) || \
410 ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
411 a = accum_norm_right(a, 1); \
412 } \
413
414
415
416
417
418
419 \
420 while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
421 a = accum_norm_left(a); \
422 } \
423
424
425
426
427 \
428 while (a.exp <= 0) { \
429 a = accum_norm_right(a, 1 - a.exp); \
430
431
432
433
434 \
435 if (a.guard || a.round || a.sticky) { \
436 float_raise(float_flag_underflow, fp_status); \
437 } \
438 } \
439 \
440 if (a.guard || a.round || a.sticky) { \
441 float_raise(float_flag_inexact, fp_status); \
442 switch (fp_status->float_rounding_mode) { \
443 case float_round_to_zero: \
444 \
445 break; \
446 case float_round_up: \
447 if (a.sign == 0) { \
448 a.mant = int128_add(a.mant, int128_one()); \
449 } \
450 break; \
451 case float_round_down: \
452 if (a.sign != 0) { \
453 a.mant = int128_add(a.mant, int128_one()); \
454 } \
455 break; \
456 default: \
457 if (a.round || a.sticky) { \
458 \
459 a.mant = int128_add(a.mant, int128_make64(a.guard)); \
460 } else if (a.guard) { \
461 \
462 a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
463 } \
464 break; \
465 } \
466 } \
467
468
469
470
471
472 \
473 if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
474 a = accum_norm_right(a, 1); \
475 } \
476 \
477 if (a.exp >= INF_EXP) { \
478 \
479 float_raise(float_flag_overflow, fp_status); \
480 float_raise(float_flag_inexact, fp_status); \
481 switch (fp_status->float_rounding_mode) { \
482 case float_round_to_zero: \
483 return maxfinite_##SUFFIX(a.sign); \
484 case float_round_up: \
485 if (a.sign == 0) { \
486 return infinite_##SUFFIX(a.sign); \
487 } else { \
488 return maxfinite_##SUFFIX(a.sign); \
489 } \
490 case float_round_down: \
491 if (a.sign != 0) { \
492 return infinite_##SUFFIX(a.sign); \
493 } else { \
494 return maxfinite_##SUFFIX(a.sign); \
495 } \
496 default: \
497 return infinite_##SUFFIX(a.sign); \
498 } \
499 } \
500 \
501 if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
502 \
503 INTERNAL_TYPE ret; \
504 ret.i = 0; \
505 ret.sign = a.sign; \
506 ret.exp = a.exp; \
507 ret.mant = int128_getlo(a.mant); \
508 return ret.i; \
509 } \
510 assert(a.exp == 1); \
511 INTERNAL_TYPE ret; \
512 ret.i = 0; \
513 ret.sign = a.sign; \
514 ret.exp = 0; \
515 ret.mant = int128_getlo(a.mant); \
516 return ret.i; \
517}
518
519GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
520GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
521
522static bool is_inf_prod(float64 a, float64 b)
523{
524 return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
525 (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
526 (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
527}
528
529static float64 special_fma(float64 a, float64 b, float64 c,
530 float_status *fp_status)
531{
532 float64 ret = make_float64(0);
533
534
535
536
537
538 uint8_t a_sign = float64_is_neg(a);
539 uint8_t b_sign = float64_is_neg(b);
540 uint8_t c_sign = float64_is_neg(c);
541 if (is_inf_prod(a, b) && float64_is_infinity(c)) {
542 if ((a_sign ^ b_sign) != c_sign) {
543 ret = make_float64(DF_NAN);
544 float_raise(float_flag_invalid, fp_status);
545 return ret;
546 }
547 }
548 if ((float64_is_infinity(a) && float64_is_zero(b)) ||
549 (float64_is_zero(a) && float64_is_infinity(b))) {
550 ret = make_float64(DF_NAN);
551 float_raise(float_flag_invalid, fp_status);
552 return ret;
553 }
554
555
556
557
558
559 if (float64_is_any_nan(a) ||
560 float64_is_any_nan(b) ||
561 float64_is_any_nan(c)) {
562 if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
563 float_raise(float_flag_invalid, fp_status);
564 }
565 if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
566 float_raise(float_flag_invalid, fp_status);
567 }
568 if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
569 float_raise(float_flag_invalid, fp_status);
570 }
571 ret = make_float64(DF_NAN);
572 return ret;
573 }
574
575
576
577
578 if (float64_is_infinity(c)) {
579 ret = infinite_float64(c_sign);
580 return ret;
581 }
582 if (float64_is_infinity(a) || float64_is_infinity(b)) {
583 ret = infinite_float64(a_sign ^ b_sign);
584 return ret;
585 }
586 g_assert_not_reached();
587}
588
589static float32 special_fmaf(float32 a, float32 b, float32 c,
590 float_status *fp_status)
591{
592 float64 aa, bb, cc;
593 aa = float32_to_float64(a, fp_status);
594 bb = float32_to_float64(b, fp_status);
595 cc = float32_to_float64(c, fp_status);
596 return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
597}
598
599float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
600 float_status *fp_status)
601{
602 Accum prod;
603 Accum acc;
604 Accum result;
605 accum_init(&prod);
606 accum_init(&acc);
607 accum_init(&result);
608
609 uint8_t a_sign = float32_is_neg(a);
610 uint8_t b_sign = float32_is_neg(b);
611 uint8_t c_sign = float32_is_neg(c);
612 if (float32_is_infinity(a) ||
613 float32_is_infinity(b) ||
614 float32_is_infinity(c)) {
615 return special_fmaf(a, b, c, fp_status);
616 }
617 if (float32_is_any_nan(a) ||
618 float32_is_any_nan(b) ||
619 float32_is_any_nan(c)) {
620 return special_fmaf(a, b, c, fp_status);
621 }
622 if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
623 float32 tmp = float32_mul(a, b, fp_status);
624 tmp = float32_add(tmp, c, fp_status);
625 return tmp;
626 }
627
628
629 prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
630
631
632
633
634
635 prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
636 prod.sign = a_sign ^ b_sign;
637 if (float32_is_zero(a) || float32_is_zero(b)) {
638 prod.exp = -2 * WAY_BIG_EXP;
639 }
640 if ((scale > 0) && float32_is_denormal(c)) {
641 acc.mant = int128_mul_6464(0, 0);
642 acc.exp = -WAY_BIG_EXP;
643 acc.sign = c_sign;
644 acc.sticky = 1;
645 result = accum_add(prod, acc);
646 } else if (!float32_is_zero(c)) {
647 acc.mant = int128_mul_6464(float32_getmant(c), 1);
648 acc.exp = float32_getexp(c);
649 acc.sign = c_sign;
650 result = accum_add(prod, acc);
651 } else {
652 result = prod;
653 }
654 result.exp += scale;
655 return accum_round_float32(result, fp_status);
656}
657
658float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
659{
660 if (float32_is_zero(a) || float32_is_zero(b)) {
661 return float32_mul(a, b, fp_status);
662 }
663 return internal_fmafx(a, b, float32_zero, 0, fp_status);
664}
665
666float64 internal_mpyhh(float64 a, float64 b,
667 unsigned long long int accumulated,
668 float_status *fp_status)
669{
670 Accum x;
671 unsigned long long int prod;
672 unsigned int sticky;
673 uint8_t a_sign, b_sign;
674
675 sticky = accumulated & 1;
676 accumulated >>= 1;
677 accum_init(&x);
678 if (float64_is_zero(a) ||
679 float64_is_any_nan(a) ||
680 float64_is_infinity(a)) {
681 return float64_mul(a, b, fp_status);
682 }
683 if (float64_is_zero(b) ||
684 float64_is_any_nan(b) ||
685 float64_is_infinity(b)) {
686 return float64_mul(a, b, fp_status);
687 }
688 x.mant = int128_mul_6464(accumulated, 1);
689 x.sticky = sticky;
690 prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
691 x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
692 x.exp = float64_getexp(a) + float64_getexp(b) - DF_BIAS - 20;
693 if (!float64_is_normal(a) || !float64_is_normal(b)) {
694
695 x.sticky = 1;
696 x.exp = -4096;
697 }
698 a_sign = float64_is_neg(a);
699 b_sign = float64_is_neg(b);
700 x.sign = a_sign ^ b_sign;
701 return accum_round_float64(x, fp_status);
702}
703