1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85#include "qemu/osdep.h"
86#include "qemu/bitops.h"
87#include "fpu/softfloat.h"
88
89
90
91
92
93
94
95
96#include "fpu/softfloat-macros.h"
97
98
99
100
101
102static inline uint32_t extractFloat16Frac(float16 a)
103{
104 return float16_val(a) & 0x3ff;
105}
106
107
108
109
110
111static inline int extractFloat16Exp(float16 a)
112{
113 return (float16_val(a) >> 10) & 0x1f;
114}
115
116
117
118
119
120static inline uint32_t extractFloat32Frac(float32 a)
121{
122 return float32_val(a) & 0x007FFFFF;
123}
124
125
126
127
128
129static inline int extractFloat32Exp(float32 a)
130{
131 return (float32_val(a) >> 23) & 0xFF;
132}
133
134
135
136
137
138static inline flag extractFloat32Sign(float32 a)
139{
140 return float32_val(a) >> 31;
141}
142
143
144
145
146
147static inline uint64_t extractFloat64Frac(float64 a)
148{
149 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
150}
151
152
153
154
155
156static inline int extractFloat64Exp(float64 a)
157{
158 return (float64_val(a) >> 52) & 0x7FF;
159}
160
161
162
163
164
165static inline flag extractFloat64Sign(float64 a)
166{
167 return float64_val(a) >> 63;
168}
169
170
171
172
173
174
175typedef enum __attribute__ ((__packed__)) {
176 float_class_unclassified,
177 float_class_zero,
178 float_class_normal,
179 float_class_inf,
180 float_class_qnan,
181 float_class_snan,
182} FloatClass;
183
184
185static inline __attribute__((unused)) bool is_nan(FloatClass c)
186{
187 return unlikely(c >= float_class_qnan);
188}
189
190static inline __attribute__((unused)) bool is_snan(FloatClass c)
191{
192 return c == float_class_snan;
193}
194
195static inline __attribute__((unused)) bool is_qnan(FloatClass c)
196{
197 return c == float_class_qnan;
198}
199
200
201
202
203
204
205
206
207
208
209
210
211typedef struct {
212 uint64_t frac;
213 int32_t exp;
214 FloatClass cls;
215 bool sign;
216} FloatParts;
217
218#define DECOMPOSED_BINARY_POINT (64 - 2)
219#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
220#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235typedef struct {
236 int exp_size;
237 int exp_bias;
238 int exp_max;
239 int frac_size;
240 int frac_shift;
241 uint64_t frac_lsb;
242 uint64_t frac_lsbm1;
243 uint64_t round_mask;
244 uint64_t roundeven_mask;
245 bool arm_althp;
246} FloatFmt;
247
248
249#define FLOAT_PARAMS(E, F) \
250 .exp_size = E, \
251 .exp_bias = ((1 << E) - 1) >> 1, \
252 .exp_max = (1 << E) - 1, \
253 .frac_size = F, \
254 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
255 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
256 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
257 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
258 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
259
260static const FloatFmt float16_params = {
261 FLOAT_PARAMS(5, 10)
262};
263
264static const FloatFmt float16_params_ahp = {
265 FLOAT_PARAMS(5, 10),
266 .arm_althp = true
267};
268
269static const FloatFmt float32_params = {
270 FLOAT_PARAMS(8, 23)
271};
272
273static const FloatFmt float64_params = {
274 FLOAT_PARAMS(11, 52)
275};
276
277
278static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
279{
280 const int sign_pos = fmt.frac_size + fmt.exp_size;
281
282 return (FloatParts) {
283 .cls = float_class_unclassified,
284 .sign = extract64(raw, sign_pos, 1),
285 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
286 .frac = extract64(raw, 0, fmt.frac_size),
287 };
288}
289
290static inline FloatParts float16_unpack_raw(float16 f)
291{
292 return unpack_raw(float16_params, f);
293}
294
295static inline FloatParts float32_unpack_raw(float32 f)
296{
297 return unpack_raw(float32_params, f);
298}
299
300static inline FloatParts float64_unpack_raw(float64 f)
301{
302 return unpack_raw(float64_params, f);
303}
304
305
306static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
307{
308 const int sign_pos = fmt.frac_size + fmt.exp_size;
309 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
310 return deposit64(ret, sign_pos, 1, p.sign);
311}
312
313static inline float16 float16_pack_raw(FloatParts p)
314{
315 return make_float16(pack_raw(float16_params, p));
316}
317
318static inline float32 float32_pack_raw(FloatParts p)
319{
320 return make_float32(pack_raw(float32_params, p));
321}
322
323static inline float64 float64_pack_raw(FloatParts p)
324{
325 return make_float64(pack_raw(float64_params, p));
326}
327
328
329
330
331
332
333
334
335
336#include "softfloat-specialize.h"
337
338
339static FloatParts canonicalize(FloatParts part, const FloatFmt *parm,
340 float_status *status)
341{
342 if (part.exp == parm->exp_max && !parm->arm_althp) {
343 if (part.frac == 0) {
344 part.cls = float_class_inf;
345 } else {
346 part.frac <<= parm->frac_shift;
347 part.cls = (parts_is_snan_frac(part.frac, status)
348 ? float_class_snan : float_class_qnan);
349 }
350 } else if (part.exp == 0) {
351 if (likely(part.frac == 0)) {
352 part.cls = float_class_zero;
353 } else if (status->flush_inputs_to_zero) {
354 float_raise(float_flag_input_denormal, status);
355 part.cls = float_class_zero;
356 part.frac = 0;
357 } else {
358 int shift = clz64(part.frac) - 1;
359 part.cls = float_class_normal;
360 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
361 part.frac <<= shift;
362 }
363 } else {
364 part.cls = float_class_normal;
365 part.exp -= parm->exp_bias;
366 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
367 }
368 return part;
369}
370
371
372
373
374
375
376
377static FloatParts round_canonical(FloatParts p, float_status *s,
378 const FloatFmt *parm)
379{
380 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
381 const uint64_t round_mask = parm->round_mask;
382 const uint64_t roundeven_mask = parm->roundeven_mask;
383 const int exp_max = parm->exp_max;
384 const int frac_shift = parm->frac_shift;
385 uint64_t frac, inc;
386 int exp, flags = 0;
387 bool overflow_norm;
388
389 frac = p.frac;
390 exp = p.exp;
391
392 switch (p.cls) {
393 case float_class_normal:
394 switch (s->float_rounding_mode) {
395 case float_round_nearest_even:
396 overflow_norm = false;
397 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
398 break;
399 case float_round_ties_away:
400 overflow_norm = false;
401 inc = frac_lsbm1;
402 break;
403 case float_round_to_zero:
404 overflow_norm = true;
405 inc = 0;
406 break;
407 case float_round_up:
408 inc = p.sign ? 0 : round_mask;
409 overflow_norm = p.sign;
410 break;
411 case float_round_down:
412 inc = p.sign ? round_mask : 0;
413 overflow_norm = !p.sign;
414 break;
415 default:
416 g_assert_not_reached();
417 }
418
419 exp += parm->exp_bias;
420 if (likely(exp > 0)) {
421 if (frac & round_mask) {
422 flags |= float_flag_inexact;
423 frac += inc;
424 if (frac & DECOMPOSED_OVERFLOW_BIT) {
425 frac >>= 1;
426 exp++;
427 }
428 }
429 frac >>= frac_shift;
430
431 if (parm->arm_althp) {
432
433 if (unlikely(exp > exp_max)) {
434
435 flags = float_flag_invalid;
436 exp = exp_max;
437 frac = -1;
438 }
439 } else if (unlikely(exp >= exp_max)) {
440 flags |= float_flag_overflow | float_flag_inexact;
441 if (overflow_norm) {
442 exp = exp_max - 1;
443 frac = -1;
444 } else {
445 p.cls = float_class_inf;
446 goto do_inf;
447 }
448 }
449 } else if (s->flush_to_zero) {
450 flags |= float_flag_output_denormal;
451 p.cls = float_class_zero;
452 goto do_zero;
453 } else {
454 bool is_tiny = (s->float_detect_tininess
455 == float_tininess_before_rounding)
456 || (exp < 0)
457 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
458
459 shift64RightJamming(frac, 1 - exp, &frac);
460 if (frac & round_mask) {
461
462 if (s->float_rounding_mode == float_round_nearest_even) {
463 inc = ((frac & roundeven_mask) != frac_lsbm1
464 ? frac_lsbm1 : 0);
465 }
466 flags |= float_flag_inexact;
467 frac += inc;
468 }
469
470 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
471 frac >>= frac_shift;
472
473 if (is_tiny && (flags & float_flag_inexact)) {
474 flags |= float_flag_underflow;
475 }
476 if (exp == 0 && frac == 0) {
477 p.cls = float_class_zero;
478 }
479 }
480 break;
481
482 case float_class_zero:
483 do_zero:
484 exp = 0;
485 frac = 0;
486 break;
487
488 case float_class_inf:
489 do_inf:
490 assert(!parm->arm_althp);
491 exp = exp_max;
492 frac = 0;
493 break;
494
495 case float_class_qnan:
496 case float_class_snan:
497 assert(!parm->arm_althp);
498 exp = exp_max;
499 frac >>= parm->frac_shift;
500 break;
501
502 default:
503 g_assert_not_reached();
504 }
505
506 float_raise(flags, s);
507 p.exp = exp;
508 p.frac = frac;
509 return p;
510}
511
512
513static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
514 const FloatFmt *params)
515{
516 return canonicalize(float16_unpack_raw(f), params, s);
517}
518
519static FloatParts float16_unpack_canonical(float16 f, float_status *s)
520{
521 return float16a_unpack_canonical(f, s, &float16_params);
522}
523
524static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
525 const FloatFmt *params)
526{
527 return float16_pack_raw(round_canonical(p, s, params));
528}
529
530static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
531{
532 return float16a_round_pack_canonical(p, s, &float16_params);
533}
534
535static FloatParts float32_unpack_canonical(float32 f, float_status *s)
536{
537 return canonicalize(float32_unpack_raw(f), &float32_params, s);
538}
539
540static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
541{
542 return float32_pack_raw(round_canonical(p, s, &float32_params));
543}
544
545static FloatParts float64_unpack_canonical(float64 f, float_status *s)
546{
547 return canonicalize(float64_unpack_raw(f), &float64_params, s);
548}
549
550static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
551{
552 return float64_pack_raw(round_canonical(p, s, &float64_params));
553}
554
555static FloatParts return_nan(FloatParts a, float_status *s)
556{
557 switch (a.cls) {
558 case float_class_snan:
559 s->float_exception_flags |= float_flag_invalid;
560 a = parts_silence_nan(a, s);
561
562 case float_class_qnan:
563 if (s->default_nan_mode) {
564 return parts_default_nan(s);
565 }
566 break;
567
568 default:
569 g_assert_not_reached();
570 }
571 return a;
572}
573
574static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
575{
576 if (is_snan(a.cls) || is_snan(b.cls)) {
577 s->float_exception_flags |= float_flag_invalid;
578 }
579
580 if (s->default_nan_mode) {
581 return parts_default_nan(s);
582 } else {
583 if (pickNaN(a.cls, b.cls,
584 a.frac > b.frac ||
585 (a.frac == b.frac && a.sign < b.sign))) {
586 a = b;
587 }
588 if (is_snan(a.cls)) {
589 return parts_silence_nan(a, s);
590 }
591 }
592 return a;
593}
594
595static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
596 bool inf_zero, float_status *s)
597{
598 int which;
599
600 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
601 s->float_exception_flags |= float_flag_invalid;
602 }
603
604 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
605
606 if (s->default_nan_mode) {
607
608
609
610 which = 3;
611 }
612
613 switch (which) {
614 case 0:
615 break;
616 case 1:
617 a = b;
618 break;
619 case 2:
620 a = c;
621 break;
622 case 3:
623 return parts_default_nan(s);
624 default:
625 g_assert_not_reached();
626 }
627
628 if (is_snan(a.cls)) {
629 return parts_silence_nan(a, s);
630 }
631 return a;
632}
633
634
635
636
637
638
639
640
641static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
642 float_status *s)
643{
644 bool a_sign = a.sign;
645 bool b_sign = b.sign ^ subtract;
646
647 if (a_sign != b_sign) {
648
649
650 if (a.cls == float_class_normal && b.cls == float_class_normal) {
651 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
652 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
653 a.frac = a.frac - b.frac;
654 } else {
655 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
656 a.frac = b.frac - a.frac;
657 a.exp = b.exp;
658 a_sign ^= 1;
659 }
660
661 if (a.frac == 0) {
662 a.cls = float_class_zero;
663 a.sign = s->float_rounding_mode == float_round_down;
664 } else {
665 int shift = clz64(a.frac) - 1;
666 a.frac = a.frac << shift;
667 a.exp = a.exp - shift;
668 a.sign = a_sign;
669 }
670 return a;
671 }
672 if (is_nan(a.cls) || is_nan(b.cls)) {
673 return pick_nan(a, b, s);
674 }
675 if (a.cls == float_class_inf) {
676 if (b.cls == float_class_inf) {
677 float_raise(float_flag_invalid, s);
678 return parts_default_nan(s);
679 }
680 return a;
681 }
682 if (a.cls == float_class_zero && b.cls == float_class_zero) {
683 a.sign = s->float_rounding_mode == float_round_down;
684 return a;
685 }
686 if (a.cls == float_class_zero || b.cls == float_class_inf) {
687 b.sign = a_sign ^ 1;
688 return b;
689 }
690 if (b.cls == float_class_zero) {
691 return a;
692 }
693 } else {
694
695 if (a.cls == float_class_normal && b.cls == float_class_normal) {
696 if (a.exp > b.exp) {
697 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
698 } else if (a.exp < b.exp) {
699 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
700 a.exp = b.exp;
701 }
702 a.frac += b.frac;
703 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
704 shift64RightJamming(a.frac, 1, &a.frac);
705 a.exp += 1;
706 }
707 return a;
708 }
709 if (is_nan(a.cls) || is_nan(b.cls)) {
710 return pick_nan(a, b, s);
711 }
712 if (a.cls == float_class_inf || b.cls == float_class_zero) {
713 return a;
714 }
715 if (b.cls == float_class_inf || a.cls == float_class_zero) {
716 b.sign = b_sign;
717 return b;
718 }
719 }
720 g_assert_not_reached();
721}
722
723
724
725
726
727
728
729float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
730{
731 FloatParts pa = float16_unpack_canonical(a, status);
732 FloatParts pb = float16_unpack_canonical(b, status);
733 FloatParts pr = addsub_floats(pa, pb, false, status);
734
735 return float16_round_pack_canonical(pr, status);
736}
737
738float32 QEMU_FLATTEN float32_add(float32 a, float32 b, float_status *status)
739{
740 FloatParts pa = float32_unpack_canonical(a, status);
741 FloatParts pb = float32_unpack_canonical(b, status);
742 FloatParts pr = addsub_floats(pa, pb, false, status);
743
744 return float32_round_pack_canonical(pr, status);
745}
746
747float64 QEMU_FLATTEN float64_add(float64 a, float64 b, float_status *status)
748{
749 FloatParts pa = float64_unpack_canonical(a, status);
750 FloatParts pb = float64_unpack_canonical(b, status);
751 FloatParts pr = addsub_floats(pa, pb, false, status);
752
753 return float64_round_pack_canonical(pr, status);
754}
755
756float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
757{
758 FloatParts pa = float16_unpack_canonical(a, status);
759 FloatParts pb = float16_unpack_canonical(b, status);
760 FloatParts pr = addsub_floats(pa, pb, true, status);
761
762 return float16_round_pack_canonical(pr, status);
763}
764
765float32 QEMU_FLATTEN float32_sub(float32 a, float32 b, float_status *status)
766{
767 FloatParts pa = float32_unpack_canonical(a, status);
768 FloatParts pb = float32_unpack_canonical(b, status);
769 FloatParts pr = addsub_floats(pa, pb, true, status);
770
771 return float32_round_pack_canonical(pr, status);
772}
773
774float64 QEMU_FLATTEN float64_sub(float64 a, float64 b, float_status *status)
775{
776 FloatParts pa = float64_unpack_canonical(a, status);
777 FloatParts pb = float64_unpack_canonical(b, status);
778 FloatParts pr = addsub_floats(pa, pb, true, status);
779
780 return float64_round_pack_canonical(pr, status);
781}
782
783
784
785
786
787
788
789static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
790{
791 bool sign = a.sign ^ b.sign;
792
793 if (a.cls == float_class_normal && b.cls == float_class_normal) {
794 uint64_t hi, lo;
795 int exp = a.exp + b.exp;
796
797 mul64To128(a.frac, b.frac, &hi, &lo);
798 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
799 if (lo & DECOMPOSED_OVERFLOW_BIT) {
800 shift64RightJamming(lo, 1, &lo);
801 exp += 1;
802 }
803
804
805 a.exp = exp;
806 a.sign = sign;
807 a.frac = lo;
808 return a;
809 }
810
811 if (is_nan(a.cls) || is_nan(b.cls)) {
812 return pick_nan(a, b, s);
813 }
814
815 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
816 (a.cls == float_class_zero && b.cls == float_class_inf)) {
817 s->float_exception_flags |= float_flag_invalid;
818 return parts_default_nan(s);
819 }
820
821 if (a.cls == float_class_inf || a.cls == float_class_zero) {
822 a.sign = sign;
823 return a;
824 }
825 if (b.cls == float_class_inf || b.cls == float_class_zero) {
826 b.sign = sign;
827 return b;
828 }
829 g_assert_not_reached();
830}
831
832float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
833{
834 FloatParts pa = float16_unpack_canonical(a, status);
835 FloatParts pb = float16_unpack_canonical(b, status);
836 FloatParts pr = mul_floats(pa, pb, status);
837
838 return float16_round_pack_canonical(pr, status);
839}
840
841float32 QEMU_FLATTEN float32_mul(float32 a, float32 b, float_status *status)
842{
843 FloatParts pa = float32_unpack_canonical(a, status);
844 FloatParts pb = float32_unpack_canonical(b, status);
845 FloatParts pr = mul_floats(pa, pb, status);
846
847 return float32_round_pack_canonical(pr, status);
848}
849
850float64 QEMU_FLATTEN float64_mul(float64 a, float64 b, float_status *status)
851{
852 FloatParts pa = float64_unpack_canonical(a, status);
853 FloatParts pb = float64_unpack_canonical(b, status);
854 FloatParts pr = mul_floats(pa, pb, status);
855
856 return float64_round_pack_canonical(pr, status);
857}
858
859
860
861
862
863
864
865
866
867
868
869
870
871static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
872 int flags, float_status *s)
873{
874 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
875 ((1 << float_class_inf) | (1 << float_class_zero));
876 bool p_sign;
877 bool sign_flip = flags & float_muladd_negate_result;
878 FloatClass p_class;
879 uint64_t hi, lo;
880 int p_exp;
881
882
883
884
885
886
887 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
888 return pick_nan_muladd(a, b, c, inf_zero, s);
889 }
890
891 if (inf_zero) {
892 s->float_exception_flags |= float_flag_invalid;
893 return parts_default_nan(s);
894 }
895
896 if (flags & float_muladd_negate_c) {
897 c.sign ^= 1;
898 }
899
900 p_sign = a.sign ^ b.sign;
901
902 if (flags & float_muladd_negate_product) {
903 p_sign ^= 1;
904 }
905
906 if (a.cls == float_class_inf || b.cls == float_class_inf) {
907 p_class = float_class_inf;
908 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
909 p_class = float_class_zero;
910 } else {
911 p_class = float_class_normal;
912 }
913
914 if (c.cls == float_class_inf) {
915 if (p_class == float_class_inf && p_sign != c.sign) {
916 s->float_exception_flags |= float_flag_invalid;
917 return parts_default_nan(s);
918 } else {
919 a.cls = float_class_inf;
920 a.sign = c.sign ^ sign_flip;
921 return a;
922 }
923 }
924
925 if (p_class == float_class_inf) {
926 a.cls = float_class_inf;
927 a.sign = p_sign ^ sign_flip;
928 return a;
929 }
930
931 if (p_class == float_class_zero) {
932 if (c.cls == float_class_zero) {
933 if (p_sign != c.sign) {
934 p_sign = s->float_rounding_mode == float_round_down;
935 }
936 c.sign = p_sign;
937 } else if (flags & float_muladd_halve_result) {
938 c.exp -= 1;
939 }
940 c.sign ^= sign_flip;
941 return c;
942 }
943
944
945 assert(a.cls == float_class_normal &&
946 b.cls == float_class_normal);
947
948 p_exp = a.exp + b.exp;
949
950
951
952
953 mul64To128(a.frac, b.frac, &hi, &lo);
954
955
956
957 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
958 shift128RightJamming(hi, lo, 1, &hi, &lo);
959 p_exp += 1;
960 }
961
962
963 if (c.cls == float_class_zero) {
964
965 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
966 } else {
967 int exp_diff = p_exp - c.exp;
968 if (p_sign == c.sign) {
969
970 if (exp_diff <= 0) {
971 shift128RightJamming(hi, lo,
972 DECOMPOSED_BINARY_POINT - exp_diff,
973 &hi, &lo);
974 lo += c.frac;
975 p_exp = c.exp;
976 } else {
977 uint64_t c_hi, c_lo;
978
979 c_hi = c.frac >> 2;
980 c_lo = 0;
981 shift128RightJamming(c_hi, c_lo,
982 exp_diff,
983 &c_hi, &c_lo);
984 add128(hi, lo, c_hi, c_lo, &hi, &lo);
985
986 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
987 }
988
989 if (lo & DECOMPOSED_OVERFLOW_BIT) {
990 shift64RightJamming(lo, 1, &lo);
991 p_exp += 1;
992 }
993
994 } else {
995
996 uint64_t c_hi, c_lo;
997
998 c_hi = c.frac >> 2;
999 c_lo = 0;
1000
1001 if (exp_diff <= 0) {
1002 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1003 if (exp_diff == 0
1004 &&
1005 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1006 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1007 } else {
1008 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1009 p_sign ^= 1;
1010 p_exp = c.exp;
1011 }
1012 } else {
1013 shift128RightJamming(c_hi, c_lo,
1014 exp_diff,
1015 &c_hi, &c_lo);
1016 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1017 }
1018
1019 if (hi == 0 && lo == 0) {
1020 a.cls = float_class_zero;
1021 a.sign = s->float_rounding_mode == float_round_down;
1022 a.sign ^= sign_flip;
1023 return a;
1024 } else {
1025 int shift;
1026 if (hi != 0) {
1027 shift = clz64(hi);
1028 } else {
1029 shift = clz64(lo) + 64;
1030 }
1031
1032
1033
1034
1035
1036
1037 shift -= 1;
1038 if (shift >= 64) {
1039 lo = lo << (shift - 64);
1040 } else {
1041 hi = (hi << shift) | (lo >> (64 - shift));
1042 lo = hi | ((lo << shift) != 0);
1043 }
1044 p_exp -= shift - 2;
1045 }
1046 }
1047 }
1048
1049 if (flags & float_muladd_halve_result) {
1050 p_exp -= 1;
1051 }
1052
1053
1054 a.cls = float_class_normal;
1055 a.sign = p_sign ^ sign_flip;
1056 a.exp = p_exp;
1057 a.frac = lo;
1058
1059 return a;
1060}
1061
1062float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1063 int flags, float_status *status)
1064{
1065 FloatParts pa = float16_unpack_canonical(a, status);
1066 FloatParts pb = float16_unpack_canonical(b, status);
1067 FloatParts pc = float16_unpack_canonical(c, status);
1068 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1069
1070 return float16_round_pack_canonical(pr, status);
1071}
1072
1073float32 QEMU_FLATTEN float32_muladd(float32 a, float32 b, float32 c,
1074 int flags, float_status *status)
1075{
1076 FloatParts pa = float32_unpack_canonical(a, status);
1077 FloatParts pb = float32_unpack_canonical(b, status);
1078 FloatParts pc = float32_unpack_canonical(c, status);
1079 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1080
1081 return float32_round_pack_canonical(pr, status);
1082}
1083
1084float64 QEMU_FLATTEN float64_muladd(float64 a, float64 b, float64 c,
1085 int flags, float_status *status)
1086{
1087 FloatParts pa = float64_unpack_canonical(a, status);
1088 FloatParts pb = float64_unpack_canonical(b, status);
1089 FloatParts pc = float64_unpack_canonical(c, status);
1090 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1091
1092 return float64_round_pack_canonical(pr, status);
1093}
1094
1095
1096
1097
1098
1099
1100
1101static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1102{
1103 bool sign = a.sign ^ b.sign;
1104
1105 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1106 uint64_t n0, n1, q, r;
1107 int exp = a.exp - b.exp;
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122 if (a.frac < b.frac) {
1123 exp -= 1;
1124 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
1125 } else {
1126 shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
1127 }
1128 q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1129
1130
1131
1132
1133
1134
1135
1136
1137 a.frac = q | (r != 0);
1138 a.sign = sign;
1139 a.exp = exp;
1140 return a;
1141 }
1142
1143 if (is_nan(a.cls) || is_nan(b.cls)) {
1144 return pick_nan(a, b, s);
1145 }
1146
1147 if (a.cls == b.cls
1148 &&
1149 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1150 s->float_exception_flags |= float_flag_invalid;
1151 return parts_default_nan(s);
1152 }
1153
1154 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1155 a.sign = sign;
1156 return a;
1157 }
1158
1159 if (b.cls == float_class_zero) {
1160 s->float_exception_flags |= float_flag_divbyzero;
1161 a.cls = float_class_inf;
1162 a.sign = sign;
1163 return a;
1164 }
1165
1166 if (b.cls == float_class_inf) {
1167 a.cls = float_class_zero;
1168 a.sign = sign;
1169 return a;
1170 }
1171 g_assert_not_reached();
1172}
1173
1174float16 float16_div(float16 a, float16 b, float_status *status)
1175{
1176 FloatParts pa = float16_unpack_canonical(a, status);
1177 FloatParts pb = float16_unpack_canonical(b, status);
1178 FloatParts pr = div_floats(pa, pb, status);
1179
1180 return float16_round_pack_canonical(pr, status);
1181}
1182
1183float32 float32_div(float32 a, float32 b, float_status *status)
1184{
1185 FloatParts pa = float32_unpack_canonical(a, status);
1186 FloatParts pb = float32_unpack_canonical(b, status);
1187 FloatParts pr = div_floats(pa, pb, status);
1188
1189 return float32_round_pack_canonical(pr, status);
1190}
1191
1192float64 float64_div(float64 a, float64 b, float_status *status)
1193{
1194 FloatParts pa = float64_unpack_canonical(a, status);
1195 FloatParts pb = float64_unpack_canonical(b, status);
1196 FloatParts pr = div_floats(pa, pb, status);
1197
1198 return float64_round_pack_canonical(pr, status);
1199}
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1213 float_status *s)
1214{
1215 if (dstf->arm_althp) {
1216 switch (a.cls) {
1217 case float_class_qnan:
1218 case float_class_snan:
1219
1220
1221
1222 s->float_exception_flags |= float_flag_invalid;
1223 a.cls = float_class_zero;
1224 a.frac = 0;
1225 a.exp = 0;
1226 break;
1227
1228 case float_class_inf:
1229
1230
1231
1232 s->float_exception_flags |= float_flag_invalid;
1233 a.cls = float_class_normal;
1234 a.exp = dstf->exp_max;
1235 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1236 break;
1237
1238 default:
1239 break;
1240 }
1241 } else if (is_nan(a.cls)) {
1242 if (is_snan(a.cls)) {
1243 s->float_exception_flags |= float_flag_invalid;
1244 a = parts_silence_nan(a, s);
1245 }
1246 if (s->default_nan_mode) {
1247 return parts_default_nan(s);
1248 }
1249 }
1250 return a;
1251}
1252
1253float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1254{
1255 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1256 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1257 FloatParts pr = float_to_float(p, &float32_params, s);
1258 return float32_round_pack_canonical(pr, s);
1259}
1260
1261float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1262{
1263 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1264 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1265 FloatParts pr = float_to_float(p, &float64_params, s);
1266 return float64_round_pack_canonical(pr, s);
1267}
1268
1269float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1270{
1271 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1272 FloatParts p = float32_unpack_canonical(a, s);
1273 FloatParts pr = float_to_float(p, fmt16, s);
1274 return float16a_round_pack_canonical(pr, s, fmt16);
1275}
1276
1277float64 float32_to_float64(float32 a, float_status *s)
1278{
1279 FloatParts p = float32_unpack_canonical(a, s);
1280 FloatParts pr = float_to_float(p, &float64_params, s);
1281 return float64_round_pack_canonical(pr, s);
1282}
1283
1284float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1285{
1286 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1287 FloatParts p = float64_unpack_canonical(a, s);
1288 FloatParts pr = float_to_float(p, fmt16, s);
1289 return float16a_round_pack_canonical(pr, s, fmt16);
1290}
1291
1292float32 float64_to_float32(float64 a, float_status *s)
1293{
1294 FloatParts p = float64_unpack_canonical(a, s);
1295 FloatParts pr = float_to_float(p, &float32_params, s);
1296 return float32_round_pack_canonical(pr, s);
1297}
1298
1299
1300
1301
1302
1303
1304
1305
1306static FloatParts round_to_int(FloatParts a, int rmode,
1307 int scale, float_status *s)
1308{
1309 switch (a.cls) {
1310 case float_class_qnan:
1311 case float_class_snan:
1312 return return_nan(a, s);
1313
1314 case float_class_zero:
1315 case float_class_inf:
1316
1317 break;
1318
1319 case float_class_normal:
1320 scale = MIN(MAX(scale, -0x10000), 0x10000);
1321 a.exp += scale;
1322
1323 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1324
1325 break;
1326 }
1327 if (a.exp < 0) {
1328 bool one;
1329
1330 s->float_exception_flags |= float_flag_inexact;
1331 switch (rmode) {
1332 case float_round_nearest_even:
1333 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1334 break;
1335 case float_round_ties_away:
1336 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1337 break;
1338 case float_round_to_zero:
1339 one = false;
1340 break;
1341 case float_round_up:
1342 one = !a.sign;
1343 break;
1344 case float_round_down:
1345 one = a.sign;
1346 break;
1347 default:
1348 g_assert_not_reached();
1349 }
1350
1351 if (one) {
1352 a.frac = DECOMPOSED_IMPLICIT_BIT;
1353 a.exp = 0;
1354 } else {
1355 a.cls = float_class_zero;
1356 }
1357 } else {
1358 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1359 uint64_t frac_lsbm1 = frac_lsb >> 1;
1360 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1361 uint64_t rnd_mask = rnd_even_mask >> 1;
1362 uint64_t inc;
1363
1364 switch (rmode) {
1365 case float_round_nearest_even:
1366 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1367 break;
1368 case float_round_ties_away:
1369 inc = frac_lsbm1;
1370 break;
1371 case float_round_to_zero:
1372 inc = 0;
1373 break;
1374 case float_round_up:
1375 inc = a.sign ? 0 : rnd_mask;
1376 break;
1377 case float_round_down:
1378 inc = a.sign ? rnd_mask : 0;
1379 break;
1380 default:
1381 g_assert_not_reached();
1382 }
1383
1384 if (a.frac & rnd_mask) {
1385 s->float_exception_flags |= float_flag_inexact;
1386 a.frac += inc;
1387 a.frac &= ~rnd_mask;
1388 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1389 a.frac >>= 1;
1390 a.exp++;
1391 }
1392 }
1393 }
1394 break;
1395 default:
1396 g_assert_not_reached();
1397 }
1398 return a;
1399}
1400
1401float16 float16_round_to_int(float16 a, float_status *s)
1402{
1403 FloatParts pa = float16_unpack_canonical(a, s);
1404 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
1405 return float16_round_pack_canonical(pr, s);
1406}
1407
1408float32 float32_round_to_int(float32 a, float_status *s)
1409{
1410 FloatParts pa = float32_unpack_canonical(a, s);
1411 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
1412 return float32_round_pack_canonical(pr, s);
1413}
1414
1415float64 float64_round_to_int(float64 a, float_status *s)
1416{
1417 FloatParts pa = float64_unpack_canonical(a, s);
1418 FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
1419 return float64_round_pack_canonical(pr, s);
1420}
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
1434 int64_t min, int64_t max,
1435 float_status *s)
1436{
1437 uint64_t r;
1438 int orig_flags = get_float_exception_flags(s);
1439 FloatParts p = round_to_int(in, rmode, scale, s);
1440
1441 switch (p.cls) {
1442 case float_class_snan:
1443 case float_class_qnan:
1444 s->float_exception_flags = orig_flags | float_flag_invalid;
1445 return max;
1446 case float_class_inf:
1447 s->float_exception_flags = orig_flags | float_flag_invalid;
1448 return p.sign ? min : max;
1449 case float_class_zero:
1450 return 0;
1451 case float_class_normal:
1452 if (p.exp < DECOMPOSED_BINARY_POINT) {
1453 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1454 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1455 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1456 } else {
1457 r = UINT64_MAX;
1458 }
1459 if (p.sign) {
1460 if (r <= -(uint64_t) min) {
1461 return -r;
1462 } else {
1463 s->float_exception_flags = orig_flags | float_flag_invalid;
1464 return min;
1465 }
1466 } else {
1467 if (r <= max) {
1468 return r;
1469 } else {
1470 s->float_exception_flags = orig_flags | float_flag_invalid;
1471 return max;
1472 }
1473 }
1474 default:
1475 g_assert_not_reached();
1476 }
1477}
1478
1479int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
1480 float_status *s)
1481{
1482 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1483 rmode, scale, INT16_MIN, INT16_MAX, s);
1484}
1485
1486int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
1487 float_status *s)
1488{
1489 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1490 rmode, scale, INT32_MIN, INT32_MAX, s);
1491}
1492
1493int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
1494 float_status *s)
1495{
1496 return round_to_int_and_pack(float16_unpack_canonical(a, s),
1497 rmode, scale, INT64_MIN, INT64_MAX, s);
1498}
1499
1500int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
1501 float_status *s)
1502{
1503 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1504 rmode, scale, INT16_MIN, INT16_MAX, s);
1505}
1506
1507int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
1508 float_status *s)
1509{
1510 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1511 rmode, scale, INT32_MIN, INT32_MAX, s);
1512}
1513
1514int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
1515 float_status *s)
1516{
1517 return round_to_int_and_pack(float32_unpack_canonical(a, s),
1518 rmode, scale, INT64_MIN, INT64_MAX, s);
1519}
1520
1521int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
1522 float_status *s)
1523{
1524 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1525 rmode, scale, INT16_MIN, INT16_MAX, s);
1526}
1527
1528int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
1529 float_status *s)
1530{
1531 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1532 rmode, scale, INT32_MIN, INT32_MAX, s);
1533}
1534
1535int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
1536 float_status *s)
1537{
1538 return round_to_int_and_pack(float64_unpack_canonical(a, s),
1539 rmode, scale, INT64_MIN, INT64_MAX, s);
1540}
1541
1542int16_t float16_to_int16(float16 a, float_status *s)
1543{
1544 return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1545}
1546
1547int32_t float16_to_int32(float16 a, float_status *s)
1548{
1549 return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1550}
1551
1552int64_t float16_to_int64(float16 a, float_status *s)
1553{
1554 return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1555}
1556
1557int16_t float32_to_int16(float32 a, float_status *s)
1558{
1559 return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1560}
1561
1562int32_t float32_to_int32(float32 a, float_status *s)
1563{
1564 return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1565}
1566
1567int64_t float32_to_int64(float32 a, float_status *s)
1568{
1569 return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1570}
1571
1572int16_t float64_to_int16(float64 a, float_status *s)
1573{
1574 return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
1575}
1576
1577int32_t float64_to_int32(float64 a, float_status *s)
1578{
1579 return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
1580}
1581
1582int64_t float64_to_int64(float64 a, float_status *s)
1583{
1584 return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
1585}
1586
1587int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
1588{
1589 return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
1590}
1591
1592int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
1593{
1594 return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
1595}
1596
1597int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
1598{
1599 return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
1600}
1601
1602int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
1603{
1604 return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
1605}
1606
1607int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
1608{
1609 return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
1610}
1611
1612int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
1613{
1614 return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
1615}
1616
1617int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
1618{
1619 return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
1620}
1621
1622int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
1623{
1624 return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
1625}
1626
1627int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
1628{
1629 return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
1630}
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
1646 uint64_t max, float_status *s)
1647{
1648 int orig_flags = get_float_exception_flags(s);
1649 FloatParts p = round_to_int(in, rmode, scale, s);
1650 uint64_t r;
1651
1652 switch (p.cls) {
1653 case float_class_snan:
1654 case float_class_qnan:
1655 s->float_exception_flags = orig_flags | float_flag_invalid;
1656 return max;
1657 case float_class_inf:
1658 s->float_exception_flags = orig_flags | float_flag_invalid;
1659 return p.sign ? 0 : max;
1660 case float_class_zero:
1661 return 0;
1662 case float_class_normal:
1663 if (p.sign) {
1664 s->float_exception_flags = orig_flags | float_flag_invalid;
1665 return 0;
1666 }
1667
1668 if (p.exp < DECOMPOSED_BINARY_POINT) {
1669 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1670 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1671 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1672 } else {
1673 s->float_exception_flags = orig_flags | float_flag_invalid;
1674 return max;
1675 }
1676
1677
1678
1679
1680
1681 if (r > max) {
1682 s->float_exception_flags = orig_flags | float_flag_invalid;
1683 return max;
1684 }
1685 return r;
1686 default:
1687 g_assert_not_reached();
1688 }
1689}
1690
1691uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
1692 float_status *s)
1693{
1694 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1695 rmode, scale, UINT16_MAX, s);
1696}
1697
1698uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
1699 float_status *s)
1700{
1701 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1702 rmode, scale, UINT32_MAX, s);
1703}
1704
1705uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
1706 float_status *s)
1707{
1708 return round_to_uint_and_pack(float16_unpack_canonical(a, s),
1709 rmode, scale, UINT64_MAX, s);
1710}
1711
1712uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
1713 float_status *s)
1714{
1715 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1716 rmode, scale, UINT16_MAX, s);
1717}
1718
1719uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
1720 float_status *s)
1721{
1722 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1723 rmode, scale, UINT32_MAX, s);
1724}
1725
1726uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
1727 float_status *s)
1728{
1729 return round_to_uint_and_pack(float32_unpack_canonical(a, s),
1730 rmode, scale, UINT64_MAX, s);
1731}
1732
1733uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
1734 float_status *s)
1735{
1736 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1737 rmode, scale, UINT16_MAX, s);
1738}
1739
1740uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
1741 float_status *s)
1742{
1743 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1744 rmode, scale, UINT32_MAX, s);
1745}
1746
1747uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
1748 float_status *s)
1749{
1750 return round_to_uint_and_pack(float64_unpack_canonical(a, s),
1751 rmode, scale, UINT64_MAX, s);
1752}
1753
1754uint16_t float16_to_uint16(float16 a, float_status *s)
1755{
1756 return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1757}
1758
1759uint32_t float16_to_uint32(float16 a, float_status *s)
1760{
1761 return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1762}
1763
1764uint64_t float16_to_uint64(float16 a, float_status *s)
1765{
1766 return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1767}
1768
1769uint16_t float32_to_uint16(float32 a, float_status *s)
1770{
1771 return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1772}
1773
1774uint32_t float32_to_uint32(float32 a, float_status *s)
1775{
1776 return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1777}
1778
1779uint64_t float32_to_uint64(float32 a, float_status *s)
1780{
1781 return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1782}
1783
1784uint16_t float64_to_uint16(float64 a, float_status *s)
1785{
1786 return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
1787}
1788
1789uint32_t float64_to_uint32(float64 a, float_status *s)
1790{
1791 return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
1792}
1793
1794uint64_t float64_to_uint64(float64 a, float_status *s)
1795{
1796 return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
1797}
1798
1799uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
1800{
1801 return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1802}
1803
1804uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
1805{
1806 return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1807}
1808
1809uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
1810{
1811 return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1812}
1813
1814uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
1815{
1816 return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1817}
1818
1819uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
1820{
1821 return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1822}
1823
1824uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
1825{
1826 return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1827}
1828
1829uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
1830{
1831 return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
1832}
1833
1834uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
1835{
1836 return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
1837}
1838
1839uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
1840{
1841 return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
1842}
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852static FloatParts int_to_float(int64_t a, int scale, float_status *status)
1853{
1854 FloatParts r = { .sign = false };
1855
1856 if (a == 0) {
1857 r.cls = float_class_zero;
1858 } else {
1859 uint64_t f = a;
1860 int shift;
1861
1862 r.cls = float_class_normal;
1863 if (a < 0) {
1864 f = -f;
1865 r.sign = true;
1866 }
1867 shift = clz64(f) - 1;
1868 scale = MIN(MAX(scale, -0x10000), 0x10000);
1869
1870 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
1871 r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
1872 }
1873
1874 return r;
1875}
1876
1877float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
1878{
1879 FloatParts pa = int_to_float(a, scale, status);
1880 return float16_round_pack_canonical(pa, status);
1881}
1882
1883float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
1884{
1885 return int64_to_float16_scalbn(a, scale, status);
1886}
1887
1888float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
1889{
1890 return int64_to_float16_scalbn(a, scale, status);
1891}
1892
1893float16 int64_to_float16(int64_t a, float_status *status)
1894{
1895 return int64_to_float16_scalbn(a, 0, status);
1896}
1897
1898float16 int32_to_float16(int32_t a, float_status *status)
1899{
1900 return int64_to_float16_scalbn(a, 0, status);
1901}
1902
1903float16 int16_to_float16(int16_t a, float_status *status)
1904{
1905 return int64_to_float16_scalbn(a, 0, status);
1906}
1907
1908float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
1909{
1910 FloatParts pa = int_to_float(a, scale, status);
1911 return float32_round_pack_canonical(pa, status);
1912}
1913
1914float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
1915{
1916 return int64_to_float32_scalbn(a, scale, status);
1917}
1918
1919float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
1920{
1921 return int64_to_float32_scalbn(a, scale, status);
1922}
1923
1924float32 int64_to_float32(int64_t a, float_status *status)
1925{
1926 return int64_to_float32_scalbn(a, 0, status);
1927}
1928
1929float32 int32_to_float32(int32_t a, float_status *status)
1930{
1931 return int64_to_float32_scalbn(a, 0, status);
1932}
1933
1934float32 int16_to_float32(int16_t a, float_status *status)
1935{
1936 return int64_to_float32_scalbn(a, 0, status);
1937}
1938
1939float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
1940{
1941 FloatParts pa = int_to_float(a, scale, status);
1942 return float64_round_pack_canonical(pa, status);
1943}
1944
1945float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
1946{
1947 return int64_to_float64_scalbn(a, scale, status);
1948}
1949
1950float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
1951{
1952 return int64_to_float64_scalbn(a, scale, status);
1953}
1954
1955float64 int64_to_float64(int64_t a, float_status *status)
1956{
1957 return int64_to_float64_scalbn(a, 0, status);
1958}
1959
1960float64 int32_to_float64(int32_t a, float_status *status)
1961{
1962 return int64_to_float64_scalbn(a, 0, status);
1963}
1964
1965float64 int16_to_float64(int16_t a, float_status *status)
1966{
1967 return int64_to_float64_scalbn(a, 0, status);
1968}
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
1980{
1981 FloatParts r = { .sign = false };
1982
1983 if (a == 0) {
1984 r.cls = float_class_zero;
1985 } else {
1986 scale = MIN(MAX(scale, -0x10000), 0x10000);
1987 r.cls = float_class_normal;
1988 if ((int64_t)a < 0) {
1989 r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
1990 shift64RightJamming(a, 1, &a);
1991 r.frac = a;
1992 } else {
1993 int shift = clz64(a) - 1;
1994 r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
1995 r.frac = a << shift;
1996 }
1997 }
1998
1999 return r;
2000}
2001
2002float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
2003{
2004 FloatParts pa = uint_to_float(a, scale, status);
2005 return float16_round_pack_canonical(pa, status);
2006}
2007
2008float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2009{
2010 return uint64_to_float16_scalbn(a, scale, status);
2011}
2012
2013float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2014{
2015 return uint64_to_float16_scalbn(a, scale, status);
2016}
2017
2018float16 uint64_to_float16(uint64_t a, float_status *status)
2019{
2020 return uint64_to_float16_scalbn(a, 0, status);
2021}
2022
2023float16 uint32_to_float16(uint32_t a, float_status *status)
2024{
2025 return uint64_to_float16_scalbn(a, 0, status);
2026}
2027
2028float16 uint16_to_float16(uint16_t a, float_status *status)
2029{
2030 return uint64_to_float16_scalbn(a, 0, status);
2031}
2032
2033float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
2034{
2035 FloatParts pa = uint_to_float(a, scale, status);
2036 return float32_round_pack_canonical(pa, status);
2037}
2038
2039float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2040{
2041 return uint64_to_float32_scalbn(a, scale, status);
2042}
2043
2044float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2045{
2046 return uint64_to_float32_scalbn(a, scale, status);
2047}
2048
2049float32 uint64_to_float32(uint64_t a, float_status *status)
2050{
2051 return uint64_to_float32_scalbn(a, 0, status);
2052}
2053
2054float32 uint32_to_float32(uint32_t a, float_status *status)
2055{
2056 return uint64_to_float32_scalbn(a, 0, status);
2057}
2058
2059float32 uint16_to_float32(uint16_t a, float_status *status)
2060{
2061 return uint64_to_float32_scalbn(a, 0, status);
2062}
2063
2064float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
2065{
2066 FloatParts pa = uint_to_float(a, scale, status);
2067 return float64_round_pack_canonical(pa, status);
2068}
2069
2070float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2071{
2072 return uint64_to_float64_scalbn(a, scale, status);
2073}
2074
2075float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2076{
2077 return uint64_to_float64_scalbn(a, scale, status);
2078}
2079
2080float64 uint64_to_float64(uint64_t a, float_status *status)
2081{
2082 return uint64_to_float64_scalbn(a, 0, status);
2083}
2084
2085float64 uint32_to_float64(uint32_t a, float_status *status)
2086{
2087 return uint64_to_float64_scalbn(a, 0, status);
2088}
2089
2090float64 uint16_to_float64(uint16_t a, float_status *status)
2091{
2092 return uint64_to_float64_scalbn(a, 0, status);
2093}
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2112 bool ieee, bool ismag, float_status *s)
2113{
2114 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
2115 if (ieee) {
2116
2117
2118
2119
2120
2121 if (is_snan(a.cls) || is_snan(b.cls)) {
2122 return pick_nan(a, b, s);
2123 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
2124 return b;
2125 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
2126 return a;
2127 }
2128 }
2129 return pick_nan(a, b, s);
2130 } else {
2131 int a_exp, b_exp;
2132
2133 switch (a.cls) {
2134 case float_class_normal:
2135 a_exp = a.exp;
2136 break;
2137 case float_class_inf:
2138 a_exp = INT_MAX;
2139 break;
2140 case float_class_zero:
2141 a_exp = INT_MIN;
2142 break;
2143 default:
2144 g_assert_not_reached();
2145 break;
2146 }
2147 switch (b.cls) {
2148 case float_class_normal:
2149 b_exp = b.exp;
2150 break;
2151 case float_class_inf:
2152 b_exp = INT_MAX;
2153 break;
2154 case float_class_zero:
2155 b_exp = INT_MIN;
2156 break;
2157 default:
2158 g_assert_not_reached();
2159 break;
2160 }
2161
2162 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
2163 bool a_less = a_exp < b_exp;
2164 if (a_exp == b_exp) {
2165 a_less = a.frac < b.frac;
2166 }
2167 return a_less ^ ismin ? b : a;
2168 }
2169
2170 if (a.sign == b.sign) {
2171 bool a_less = a_exp < b_exp;
2172 if (a_exp == b_exp) {
2173 a_less = a.frac < b.frac;
2174 }
2175 return a.sign ^ a_less ^ ismin ? b : a;
2176 } else {
2177 return a.sign ^ ismin ? b : a;
2178 }
2179 }
2180}
2181
2182#define MINMAX(sz, name, ismin, isiee, ismag) \
2183float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2184 float_status *s) \
2185{ \
2186 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2187 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2188 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2189 \
2190 return float ## sz ## _round_pack_canonical(pr, s); \
2191}
2192
2193MINMAX(16, min, true, false, false)
2194MINMAX(16, minnum, true, true, false)
2195MINMAX(16, minnummag, true, true, true)
2196MINMAX(16, max, false, false, false)
2197MINMAX(16, maxnum, false, true, false)
2198MINMAX(16, maxnummag, false, true, true)
2199
2200MINMAX(32, min, true, false, false)
2201MINMAX(32, minnum, true, true, false)
2202MINMAX(32, minnummag, true, true, true)
2203MINMAX(32, max, false, false, false)
2204MINMAX(32, maxnum, false, true, false)
2205MINMAX(32, maxnummag, false, true, true)
2206
2207MINMAX(64, min, true, false, false)
2208MINMAX(64, minnum, true, true, false)
2209MINMAX(64, minnummag, true, true, true)
2210MINMAX(64, max, false, false, false)
2211MINMAX(64, maxnum, false, true, false)
2212MINMAX(64, maxnummag, false, true, true)
2213
2214#undef MINMAX
2215
2216
2217static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2218 float_status *s)
2219{
2220 if (is_nan(a.cls) || is_nan(b.cls)) {
2221 if (!is_quiet ||
2222 a.cls == float_class_snan ||
2223 b.cls == float_class_snan) {
2224 s->float_exception_flags |= float_flag_invalid;
2225 }
2226 return float_relation_unordered;
2227 }
2228
2229 if (a.cls == float_class_zero) {
2230 if (b.cls == float_class_zero) {
2231 return float_relation_equal;
2232 }
2233 return b.sign ? float_relation_greater : float_relation_less;
2234 } else if (b.cls == float_class_zero) {
2235 return a.sign ? float_relation_less : float_relation_greater;
2236 }
2237
2238
2239
2240
2241 if (a.cls == float_class_inf) {
2242 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2243 return float_relation_equal;
2244 }
2245 return a.sign ? float_relation_less : float_relation_greater;
2246 } else if (b.cls == float_class_inf) {
2247 return b.sign ? float_relation_greater : float_relation_less;
2248 }
2249
2250 if (a.sign != b.sign) {
2251 return a.sign ? float_relation_less : float_relation_greater;
2252 }
2253
2254 if (a.exp == b.exp) {
2255 if (a.frac == b.frac) {
2256 return float_relation_equal;
2257 }
2258 if (a.sign) {
2259 return a.frac > b.frac ?
2260 float_relation_less : float_relation_greater;
2261 } else {
2262 return a.frac > b.frac ?
2263 float_relation_greater : float_relation_less;
2264 }
2265 } else {
2266 if (a.sign) {
2267 return a.exp > b.exp ? float_relation_less : float_relation_greater;
2268 } else {
2269 return a.exp > b.exp ? float_relation_greater : float_relation_less;
2270 }
2271 }
2272}
2273
2274#define COMPARE(sz) \
2275int float ## sz ## _compare(float ## sz a, float ## sz b, \
2276 float_status *s) \
2277{ \
2278 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2279 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2280 return compare_floats(pa, pb, false, s); \
2281} \
2282int float ## sz ## _compare_quiet(float ## sz a, float ## sz b, \
2283 float_status *s) \
2284{ \
2285 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2286 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2287 return compare_floats(pa, pb, true, s); \
2288}
2289
2290COMPARE(16)
2291COMPARE(32)
2292COMPARE(64)
2293
2294#undef COMPARE
2295
2296
2297static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
2298{
2299 if (unlikely(is_nan(a.cls))) {
2300 return return_nan(a, s);
2301 }
2302 if (a.cls == float_class_normal) {
2303
2304
2305
2306
2307
2308 n = MIN(MAX(n, -0x10000), 0x10000);
2309 a.exp += n;
2310 }
2311 return a;
2312}
2313
2314float16 float16_scalbn(float16 a, int n, float_status *status)
2315{
2316 FloatParts pa = float16_unpack_canonical(a, status);
2317 FloatParts pr = scalbn_decomposed(pa, n, status);
2318 return float16_round_pack_canonical(pr, status);
2319}
2320
2321float32 float32_scalbn(float32 a, int n, float_status *status)
2322{
2323 FloatParts pa = float32_unpack_canonical(a, status);
2324 FloatParts pr = scalbn_decomposed(pa, n, status);
2325 return float32_round_pack_canonical(pr, status);
2326}
2327
2328float64 float64_scalbn(float64 a, int n, float_status *status)
2329{
2330 FloatParts pa = float64_unpack_canonical(a, status);
2331 FloatParts pr = scalbn_decomposed(pa, n, status);
2332 return float64_round_pack_canonical(pr, status);
2333}
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
2348{
2349 uint64_t a_frac, r_frac, s_frac;
2350 int bit, last_bit;
2351
2352 if (is_nan(a.cls)) {
2353 return return_nan(a, s);
2354 }
2355 if (a.cls == float_class_zero) {
2356 return a;
2357 }
2358 if (a.sign) {
2359 s->float_exception_flags |= float_flag_invalid;
2360 return parts_default_nan(s);
2361 }
2362 if (a.cls == float_class_inf) {
2363 return a;
2364 }
2365
2366 assert(a.cls == float_class_normal);
2367
2368
2369
2370
2371
2372
2373 a_frac = a.frac;
2374 if (!(a.exp & 1)) {
2375 a_frac >>= 1;
2376 }
2377 a.exp >>= 1;
2378
2379
2380 r_frac = 0;
2381 s_frac = 0;
2382
2383
2384
2385
2386
2387 bit = DECOMPOSED_BINARY_POINT - 1;
2388 last_bit = MAX(p->frac_shift - 4, 0);
2389 do {
2390 uint64_t q = 1ULL << bit;
2391 uint64_t t_frac = s_frac + q;
2392 if (t_frac <= a_frac) {
2393 s_frac = t_frac + q;
2394 a_frac -= t_frac;
2395 r_frac += q;
2396 }
2397 a_frac <<= 1;
2398 } while (--bit >= last_bit);
2399
2400
2401
2402
2403 a.frac = (r_frac << 1) + (a_frac != 0);
2404
2405 return a;
2406}
2407
2408float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
2409{
2410 FloatParts pa = float16_unpack_canonical(a, status);
2411 FloatParts pr = sqrt_float(pa, status, &float16_params);
2412 return float16_round_pack_canonical(pr, status);
2413}
2414
2415float32 QEMU_FLATTEN float32_sqrt(float32 a, float_status *status)
2416{
2417 FloatParts pa = float32_unpack_canonical(a, status);
2418 FloatParts pr = sqrt_float(pa, status, &float32_params);
2419 return float32_round_pack_canonical(pr, status);
2420}
2421
2422float64 QEMU_FLATTEN float64_sqrt(float64 a, float_status *status)
2423{
2424 FloatParts pa = float64_unpack_canonical(a, status);
2425 FloatParts pr = sqrt_float(pa, status, &float64_params);
2426 return float64_round_pack_canonical(pr, status);
2427}
2428
2429
2430
2431
2432
2433float16 float16_default_nan(float_status *status)
2434{
2435 FloatParts p = parts_default_nan(status);
2436 p.frac >>= float16_params.frac_shift;
2437 return float16_pack_raw(p);
2438}
2439
2440float32 float32_default_nan(float_status *status)
2441{
2442 FloatParts p = parts_default_nan(status);
2443 p.frac >>= float32_params.frac_shift;
2444 return float32_pack_raw(p);
2445}
2446
2447float64 float64_default_nan(float_status *status)
2448{
2449 FloatParts p = parts_default_nan(status);
2450 p.frac >>= float64_params.frac_shift;
2451 return float64_pack_raw(p);
2452}
2453
2454float128 float128_default_nan(float_status *status)
2455{
2456 FloatParts p = parts_default_nan(status);
2457 float128 r;
2458
2459
2460
2461
2462
2463 r.low = -(p.frac & 1);
2464 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
2465 r.high |= LIT64(0x7FFF000000000000);
2466 r.high |= (uint64_t)p.sign << 63;
2467
2468 return r;
2469}
2470
2471
2472
2473
2474
2475float16 float16_silence_nan(float16 a, float_status *status)
2476{
2477 FloatParts p = float16_unpack_raw(a);
2478 p.frac <<= float16_params.frac_shift;
2479 p = parts_silence_nan(p, status);
2480 p.frac >>= float16_params.frac_shift;
2481 return float16_pack_raw(p);
2482}
2483
2484float32 float32_silence_nan(float32 a, float_status *status)
2485{
2486 FloatParts p = float32_unpack_raw(a);
2487 p.frac <<= float32_params.frac_shift;
2488 p = parts_silence_nan(p, status);
2489 p.frac >>= float32_params.frac_shift;
2490 return float32_pack_raw(p);
2491}
2492
2493float64 float64_silence_nan(float64 a, float_status *status)
2494{
2495 FloatParts p = float64_unpack_raw(a);
2496 p.frac <<= float64_params.frac_shift;
2497 p = parts_silence_nan(p, status);
2498 p.frac >>= float64_params.frac_shift;
2499 return float64_pack_raw(p);
2500}
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
2514{
2515 int8_t roundingMode;
2516 flag roundNearestEven;
2517 int8_t roundIncrement, roundBits;
2518 int32_t z;
2519
2520 roundingMode = status->float_rounding_mode;
2521 roundNearestEven = ( roundingMode == float_round_nearest_even );
2522 switch (roundingMode) {
2523 case float_round_nearest_even:
2524 case float_round_ties_away:
2525 roundIncrement = 0x40;
2526 break;
2527 case float_round_to_zero:
2528 roundIncrement = 0;
2529 break;
2530 case float_round_up:
2531 roundIncrement = zSign ? 0 : 0x7f;
2532 break;
2533 case float_round_down:
2534 roundIncrement = zSign ? 0x7f : 0;
2535 break;
2536 default:
2537 abort();
2538 }
2539 roundBits = absZ & 0x7F;
2540 absZ = ( absZ + roundIncrement )>>7;
2541 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2542 z = absZ;
2543 if ( zSign ) z = - z;
2544 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
2545 float_raise(float_flag_invalid, status);
2546 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
2547 }
2548 if (roundBits) {
2549 status->float_exception_flags |= float_flag_inexact;
2550 }
2551 return z;
2552
2553}
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
2568 float_status *status)
2569{
2570 int8_t roundingMode;
2571 flag roundNearestEven, increment;
2572 int64_t z;
2573
2574 roundingMode = status->float_rounding_mode;
2575 roundNearestEven = ( roundingMode == float_round_nearest_even );
2576 switch (roundingMode) {
2577 case float_round_nearest_even:
2578 case float_round_ties_away:
2579 increment = ((int64_t) absZ1 < 0);
2580 break;
2581 case float_round_to_zero:
2582 increment = 0;
2583 break;
2584 case float_round_up:
2585 increment = !zSign && absZ1;
2586 break;
2587 case float_round_down:
2588 increment = zSign && absZ1;
2589 break;
2590 default:
2591 abort();
2592 }
2593 if ( increment ) {
2594 ++absZ0;
2595 if ( absZ0 == 0 ) goto overflow;
2596 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
2597 }
2598 z = absZ0;
2599 if ( zSign ) z = - z;
2600 if ( z && ( ( z < 0 ) ^ zSign ) ) {
2601 overflow:
2602 float_raise(float_flag_invalid, status);
2603 return
2604 zSign ? (int64_t) LIT64( 0x8000000000000000 )
2605 : LIT64( 0x7FFFFFFFFFFFFFFF );
2606 }
2607 if (absZ1) {
2608 status->float_exception_flags |= float_flag_inexact;
2609 }
2610 return z;
2611
2612}
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
2625 uint64_t absZ1, float_status *status)
2626{
2627 int8_t roundingMode;
2628 flag roundNearestEven, increment;
2629
2630 roundingMode = status->float_rounding_mode;
2631 roundNearestEven = (roundingMode == float_round_nearest_even);
2632 switch (roundingMode) {
2633 case float_round_nearest_even:
2634 case float_round_ties_away:
2635 increment = ((int64_t)absZ1 < 0);
2636 break;
2637 case float_round_to_zero:
2638 increment = 0;
2639 break;
2640 case float_round_up:
2641 increment = !zSign && absZ1;
2642 break;
2643 case float_round_down:
2644 increment = zSign && absZ1;
2645 break;
2646 default:
2647 abort();
2648 }
2649 if (increment) {
2650 ++absZ0;
2651 if (absZ0 == 0) {
2652 float_raise(float_flag_invalid, status);
2653 return LIT64(0xFFFFFFFFFFFFFFFF);
2654 }
2655 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
2656 }
2657
2658 if (zSign && absZ0) {
2659 float_raise(float_flag_invalid, status);
2660 return 0;
2661 }
2662
2663 if (absZ1) {
2664 status->float_exception_flags |= float_flag_inexact;
2665 }
2666 return absZ0;
2667}
2668
2669
2670
2671
2672
2673float32 float32_squash_input_denormal(float32 a, float_status *status)
2674{
2675 if (status->flush_inputs_to_zero) {
2676 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
2677 float_raise(float_flag_input_denormal, status);
2678 return make_float32(float32_val(a) & 0x80000000);
2679 }
2680 }
2681 return a;
2682}
2683
2684
2685
2686
2687
2688
2689
2690
2691static void
2692 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
2693{
2694 int8_t shiftCount;
2695
2696 shiftCount = clz32(aSig) - 8;
2697 *zSigPtr = aSig<<shiftCount;
2698 *zExpPtr = 1 - shiftCount;
2699
2700}
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
2725 float_status *status)
2726{
2727 int8_t roundingMode;
2728 flag roundNearestEven;
2729 int8_t roundIncrement, roundBits;
2730 flag isTiny;
2731
2732 roundingMode = status->float_rounding_mode;
2733 roundNearestEven = ( roundingMode == float_round_nearest_even );
2734 switch (roundingMode) {
2735 case float_round_nearest_even:
2736 case float_round_ties_away:
2737 roundIncrement = 0x40;
2738 break;
2739 case float_round_to_zero:
2740 roundIncrement = 0;
2741 break;
2742 case float_round_up:
2743 roundIncrement = zSign ? 0 : 0x7f;
2744 break;
2745 case float_round_down:
2746 roundIncrement = zSign ? 0x7f : 0;
2747 break;
2748 default:
2749 abort();
2750 break;
2751 }
2752 roundBits = zSig & 0x7F;
2753 if ( 0xFD <= (uint16_t) zExp ) {
2754 if ( ( 0xFD < zExp )
2755 || ( ( zExp == 0xFD )
2756 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
2757 ) {
2758 float_raise(float_flag_overflow | float_flag_inexact, status);
2759 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
2760 }
2761 if ( zExp < 0 ) {
2762 if (status->flush_to_zero) {
2763 float_raise(float_flag_output_denormal, status);
2764 return packFloat32(zSign, 0, 0);
2765 }
2766 isTiny =
2767 (status->float_detect_tininess
2768 == float_tininess_before_rounding)
2769 || ( zExp < -1 )
2770 || ( zSig + roundIncrement < 0x80000000 );
2771 shift32RightJamming( zSig, - zExp, &zSig );
2772 zExp = 0;
2773 roundBits = zSig & 0x7F;
2774 if (isTiny && roundBits) {
2775 float_raise(float_flag_underflow, status);
2776 }
2777 }
2778 }
2779 if (roundBits) {
2780 status->float_exception_flags |= float_flag_inexact;
2781 }
2782 zSig = ( zSig + roundIncrement )>>7;
2783 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2784 if ( zSig == 0 ) zExp = 0;
2785 return packFloat32( zSign, zExp, zSig );
2786
2787}
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798static float32
2799 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
2800 float_status *status)
2801{
2802 int8_t shiftCount;
2803
2804 shiftCount = clz32(zSig) - 1;
2805 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
2806 status);
2807
2808}
2809
2810
2811
2812
2813
2814float64 float64_squash_input_denormal(float64 a, float_status *status)
2815{
2816 if (status->flush_inputs_to_zero) {
2817 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
2818 float_raise(float_flag_input_denormal, status);
2819 return make_float64(float64_val(a) & (1ULL << 63));
2820 }
2821 }
2822 return a;
2823}
2824
2825
2826
2827
2828
2829
2830
2831
2832static void
2833 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
2834{
2835 int8_t shiftCount;
2836
2837 shiftCount = clz64(aSig) - 11;
2838 *zSigPtr = aSig<<shiftCount;
2839 *zExpPtr = 1 - shiftCount;
2840
2841}
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
2855{
2856
2857 return make_float64(
2858 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
2859
2860}
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
2885 float_status *status)
2886{
2887 int8_t roundingMode;
2888 flag roundNearestEven;
2889 int roundIncrement, roundBits;
2890 flag isTiny;
2891
2892 roundingMode = status->float_rounding_mode;
2893 roundNearestEven = ( roundingMode == float_round_nearest_even );
2894 switch (roundingMode) {
2895 case float_round_nearest_even:
2896 case float_round_ties_away:
2897 roundIncrement = 0x200;
2898 break;
2899 case float_round_to_zero:
2900 roundIncrement = 0;
2901 break;
2902 case float_round_up:
2903 roundIncrement = zSign ? 0 : 0x3ff;
2904 break;
2905 case float_round_down:
2906 roundIncrement = zSign ? 0x3ff : 0;
2907 break;
2908 case float_round_to_odd:
2909 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2910 break;
2911 default:
2912 abort();
2913 }
2914 roundBits = zSig & 0x3FF;
2915 if ( 0x7FD <= (uint16_t) zExp ) {
2916 if ( ( 0x7FD < zExp )
2917 || ( ( zExp == 0x7FD )
2918 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
2919 ) {
2920 bool overflow_to_inf = roundingMode != float_round_to_odd &&
2921 roundIncrement != 0;
2922 float_raise(float_flag_overflow | float_flag_inexact, status);
2923 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
2924 }
2925 if ( zExp < 0 ) {
2926 if (status->flush_to_zero) {
2927 float_raise(float_flag_output_denormal, status);
2928 return packFloat64(zSign, 0, 0);
2929 }
2930 isTiny =
2931 (status->float_detect_tininess
2932 == float_tininess_before_rounding)
2933 || ( zExp < -1 )
2934 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
2935 shift64RightJamming( zSig, - zExp, &zSig );
2936 zExp = 0;
2937 roundBits = zSig & 0x3FF;
2938 if (isTiny && roundBits) {
2939 float_raise(float_flag_underflow, status);
2940 }
2941 if (roundingMode == float_round_to_odd) {
2942
2943
2944
2945
2946 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2947 }
2948 }
2949 }
2950 if (roundBits) {
2951 status->float_exception_flags |= float_flag_inexact;
2952 }
2953 zSig = ( zSig + roundIncrement )>>10;
2954 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
2955 if ( zSig == 0 ) zExp = 0;
2956 return packFloat64( zSign, zExp, zSig );
2957
2958}
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969static float64
2970 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
2971 float_status *status)
2972{
2973 int8_t shiftCount;
2974
2975 shiftCount = clz64(zSig) - 1;
2976 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
2977 status);
2978
2979}
2980
2981
2982
2983
2984
2985
2986
2987
2988void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
2989 uint64_t *zSigPtr)
2990{
2991 int8_t shiftCount;
2992
2993 shiftCount = clz64(aSig);
2994 *zSigPtr = aSig<<shiftCount;
2995 *zExpPtr = 1 - shiftCount;
2996}
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
3023 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
3024 float_status *status)
3025{
3026 int8_t roundingMode;
3027 flag roundNearestEven, increment, isTiny;
3028 int64_t roundIncrement, roundMask, roundBits;
3029
3030 roundingMode = status->float_rounding_mode;
3031 roundNearestEven = ( roundingMode == float_round_nearest_even );
3032 if ( roundingPrecision == 80 ) goto precision80;
3033 if ( roundingPrecision == 64 ) {
3034 roundIncrement = LIT64( 0x0000000000000400 );
3035 roundMask = LIT64( 0x00000000000007FF );
3036 }
3037 else if ( roundingPrecision == 32 ) {
3038 roundIncrement = LIT64( 0x0000008000000000 );
3039 roundMask = LIT64( 0x000000FFFFFFFFFF );
3040 }
3041 else {
3042 goto precision80;
3043 }
3044 zSig0 |= ( zSig1 != 0 );
3045 switch (roundingMode) {
3046 case float_round_nearest_even:
3047 case float_round_ties_away:
3048 break;
3049 case float_round_to_zero:
3050 roundIncrement = 0;
3051 break;
3052 case float_round_up:
3053 roundIncrement = zSign ? 0 : roundMask;
3054 break;
3055 case float_round_down:
3056 roundIncrement = zSign ? roundMask : 0;
3057 break;
3058 default:
3059 abort();
3060 }
3061 roundBits = zSig0 & roundMask;
3062 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
3063 if ( ( 0x7FFE < zExp )
3064 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
3065 ) {
3066 goto overflow;
3067 }
3068 if ( zExp <= 0 ) {
3069 if (status->flush_to_zero) {
3070 float_raise(float_flag_output_denormal, status);
3071 return packFloatx80(zSign, 0, 0);
3072 }
3073 isTiny =
3074 (status->float_detect_tininess
3075 == float_tininess_before_rounding)
3076 || ( zExp < 0 )
3077 || ( zSig0 <= zSig0 + roundIncrement );
3078 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
3079 zExp = 0;
3080 roundBits = zSig0 & roundMask;
3081 if (isTiny && roundBits) {
3082 float_raise(float_flag_underflow, status);
3083 }
3084 if (roundBits) {
3085 status->float_exception_flags |= float_flag_inexact;
3086 }
3087 zSig0 += roundIncrement;
3088 if ( (int64_t) zSig0 < 0 ) zExp = 1;
3089 roundIncrement = roundMask + 1;
3090 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3091 roundMask |= roundIncrement;
3092 }
3093 zSig0 &= ~ roundMask;
3094 return packFloatx80( zSign, zExp, zSig0 );
3095 }
3096 }
3097 if (roundBits) {
3098 status->float_exception_flags |= float_flag_inexact;
3099 }
3100 zSig0 += roundIncrement;
3101 if ( zSig0 < roundIncrement ) {
3102 ++zExp;
3103 zSig0 = LIT64( 0x8000000000000000 );
3104 }
3105 roundIncrement = roundMask + 1;
3106 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
3107 roundMask |= roundIncrement;
3108 }
3109 zSig0 &= ~ roundMask;
3110 if ( zSig0 == 0 ) zExp = 0;
3111 return packFloatx80( zSign, zExp, zSig0 );
3112 precision80:
3113 switch (roundingMode) {
3114 case float_round_nearest_even:
3115 case float_round_ties_away:
3116 increment = ((int64_t)zSig1 < 0);
3117 break;
3118 case float_round_to_zero:
3119 increment = 0;
3120 break;
3121 case float_round_up:
3122 increment = !zSign && zSig1;
3123 break;
3124 case float_round_down:
3125 increment = zSign && zSig1;
3126 break;
3127 default:
3128 abort();
3129 }
3130 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
3131 if ( ( 0x7FFE < zExp )
3132 || ( ( zExp == 0x7FFE )
3133 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
3134 && increment
3135 )
3136 ) {
3137 roundMask = 0;
3138 overflow:
3139 float_raise(float_flag_overflow | float_flag_inexact, status);
3140 if ( ( roundingMode == float_round_to_zero )
3141 || ( zSign && ( roundingMode == float_round_up ) )
3142 || ( ! zSign && ( roundingMode == float_round_down ) )
3143 ) {
3144 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
3145 }
3146 return packFloatx80(zSign,
3147 floatx80_infinity_high,
3148 floatx80_infinity_low);
3149 }
3150 if ( zExp <= 0 ) {
3151 isTiny =
3152 (status->float_detect_tininess
3153 == float_tininess_before_rounding)
3154 || ( zExp < 0 )
3155 || ! increment
3156 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
3157 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
3158 zExp = 0;
3159 if (isTiny && zSig1) {
3160 float_raise(float_flag_underflow, status);
3161 }
3162 if (zSig1) {
3163 status->float_exception_flags |= float_flag_inexact;
3164 }
3165 switch (roundingMode) {
3166 case float_round_nearest_even:
3167 case float_round_ties_away:
3168 increment = ((int64_t)zSig1 < 0);
3169 break;
3170 case float_round_to_zero:
3171 increment = 0;
3172 break;
3173 case float_round_up:
3174 increment = !zSign && zSig1;
3175 break;
3176 case float_round_down:
3177 increment = zSign && zSig1;
3178 break;
3179 default:
3180 abort();
3181 }
3182 if ( increment ) {
3183 ++zSig0;
3184 zSig0 &=
3185 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
3186 if ( (int64_t) zSig0 < 0 ) zExp = 1;
3187 }
3188 return packFloatx80( zSign, zExp, zSig0 );
3189 }
3190 }
3191 if (zSig1) {
3192 status->float_exception_flags |= float_flag_inexact;
3193 }
3194 if ( increment ) {
3195 ++zSig0;
3196 if ( zSig0 == 0 ) {
3197 ++zExp;
3198 zSig0 = LIT64( 0x8000000000000000 );
3199 }
3200 else {
3201 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
3202 }
3203 }
3204 else {
3205 if ( zSig0 == 0 ) zExp = 0;
3206 }
3207 return packFloatx80( zSign, zExp, zSig0 );
3208
3209}
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
3221 flag zSign, int32_t zExp,
3222 uint64_t zSig0, uint64_t zSig1,
3223 float_status *status)
3224{
3225 int8_t shiftCount;
3226
3227 if ( zSig0 == 0 ) {
3228 zSig0 = zSig1;
3229 zSig1 = 0;
3230 zExp -= 64;
3231 }
3232 shiftCount = clz64(zSig0);
3233 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3234 zExp -= shiftCount;
3235 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
3236 zSig0, zSig1, status);
3237
3238}
3239
3240
3241
3242
3243
3244
3245static inline uint64_t extractFloat128Frac1( float128 a )
3246{
3247
3248 return a.low;
3249
3250}
3251
3252
3253
3254
3255
3256
3257static inline uint64_t extractFloat128Frac0( float128 a )
3258{
3259
3260 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
3261
3262}
3263
3264
3265
3266
3267
3268
3269static inline int32_t extractFloat128Exp( float128 a )
3270{
3271
3272 return ( a.high>>48 ) & 0x7FFF;
3273
3274}
3275
3276
3277
3278
3279
3280static inline flag extractFloat128Sign( float128 a )
3281{
3282
3283 return a.high>>63;
3284
3285}
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297static void
3298 normalizeFloat128Subnormal(
3299 uint64_t aSig0,
3300 uint64_t aSig1,
3301 int32_t *zExpPtr,
3302 uint64_t *zSig0Ptr,
3303 uint64_t *zSig1Ptr
3304 )
3305{
3306 int8_t shiftCount;
3307
3308 if ( aSig0 == 0 ) {
3309 shiftCount = clz64(aSig1) - 15;
3310 if ( shiftCount < 0 ) {
3311 *zSig0Ptr = aSig1>>( - shiftCount );
3312 *zSig1Ptr = aSig1<<( shiftCount & 63 );
3313 }
3314 else {
3315 *zSig0Ptr = aSig1<<shiftCount;
3316 *zSig1Ptr = 0;
3317 }
3318 *zExpPtr = - shiftCount - 63;
3319 }
3320 else {
3321 shiftCount = clz64(aSig0) - 15;
3322 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
3323 *zExpPtr = 1 - shiftCount;
3324 }
3325
3326}
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341static inline float128
3342 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
3343{
3344 float128 z;
3345
3346 z.low = zSig1;
3347 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
3348 return z;
3349
3350}
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
3374 uint64_t zSig0, uint64_t zSig1,
3375 uint64_t zSig2, float_status *status)
3376{
3377 int8_t roundingMode;
3378 flag roundNearestEven, increment, isTiny;
3379
3380 roundingMode = status->float_rounding_mode;
3381 roundNearestEven = ( roundingMode == float_round_nearest_even );
3382 switch (roundingMode) {
3383 case float_round_nearest_even:
3384 case float_round_ties_away:
3385 increment = ((int64_t)zSig2 < 0);
3386 break;
3387 case float_round_to_zero:
3388 increment = 0;
3389 break;
3390 case float_round_up:
3391 increment = !zSign && zSig2;
3392 break;
3393 case float_round_down:
3394 increment = zSign && zSig2;
3395 break;
3396 case float_round_to_odd:
3397 increment = !(zSig1 & 0x1) && zSig2;
3398 break;
3399 default:
3400 abort();
3401 }
3402 if ( 0x7FFD <= (uint32_t) zExp ) {
3403 if ( ( 0x7FFD < zExp )
3404 || ( ( zExp == 0x7FFD )
3405 && eq128(
3406 LIT64( 0x0001FFFFFFFFFFFF ),
3407 LIT64( 0xFFFFFFFFFFFFFFFF ),
3408 zSig0,
3409 zSig1
3410 )
3411 && increment
3412 )
3413 ) {
3414 float_raise(float_flag_overflow | float_flag_inexact, status);
3415 if ( ( roundingMode == float_round_to_zero )
3416 || ( zSign && ( roundingMode == float_round_up ) )
3417 || ( ! zSign && ( roundingMode == float_round_down ) )
3418 || (roundingMode == float_round_to_odd)
3419 ) {
3420 return
3421 packFloat128(
3422 zSign,
3423 0x7FFE,
3424 LIT64( 0x0000FFFFFFFFFFFF ),
3425 LIT64( 0xFFFFFFFFFFFFFFFF )
3426 );
3427 }
3428 return packFloat128( zSign, 0x7FFF, 0, 0 );
3429 }
3430 if ( zExp < 0 ) {
3431 if (status->flush_to_zero) {
3432 float_raise(float_flag_output_denormal, status);
3433 return packFloat128(zSign, 0, 0, 0);
3434 }
3435 isTiny =
3436 (status->float_detect_tininess
3437 == float_tininess_before_rounding)
3438 || ( zExp < -1 )
3439 || ! increment
3440 || lt128(
3441 zSig0,
3442 zSig1,
3443 LIT64( 0x0001FFFFFFFFFFFF ),
3444 LIT64( 0xFFFFFFFFFFFFFFFF )
3445 );
3446 shift128ExtraRightJamming(
3447 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
3448 zExp = 0;
3449 if (isTiny && zSig2) {
3450 float_raise(float_flag_underflow, status);
3451 }
3452 switch (roundingMode) {
3453 case float_round_nearest_even:
3454 case float_round_ties_away:
3455 increment = ((int64_t)zSig2 < 0);
3456 break;
3457 case float_round_to_zero:
3458 increment = 0;
3459 break;
3460 case float_round_up:
3461 increment = !zSign && zSig2;
3462 break;
3463 case float_round_down:
3464 increment = zSign && zSig2;
3465 break;
3466 case float_round_to_odd:
3467 increment = !(zSig1 & 0x1) && zSig2;
3468 break;
3469 default:
3470 abort();
3471 }
3472 }
3473 }
3474 if (zSig2) {
3475 status->float_exception_flags |= float_flag_inexact;
3476 }
3477 if ( increment ) {
3478 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
3479 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
3480 }
3481 else {
3482 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
3483 }
3484 return packFloat128( zSign, zExp, zSig0, zSig1 );
3485
3486}
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
3499 uint64_t zSig0, uint64_t zSig1,
3500 float_status *status)
3501{
3502 int8_t shiftCount;
3503 uint64_t zSig2;
3504
3505 if ( zSig0 == 0 ) {
3506 zSig0 = zSig1;
3507 zSig1 = 0;
3508 zExp -= 64;
3509 }
3510 shiftCount = clz64(zSig0) - 15;
3511 if ( 0 <= shiftCount ) {
3512 zSig2 = 0;
3513 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3514 }
3515 else {
3516 shift128ExtraRightJamming(
3517 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
3518 }
3519 zExp -= shiftCount;
3520 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
3521
3522}
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532floatx80 int32_to_floatx80(int32_t a, float_status *status)
3533{
3534 flag zSign;
3535 uint32_t absA;
3536 int8_t shiftCount;
3537 uint64_t zSig;
3538
3539 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3540 zSign = ( a < 0 );
3541 absA = zSign ? - a : a;
3542 shiftCount = clz32(absA) + 32;
3543 zSig = absA;
3544 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
3545
3546}
3547
3548
3549
3550
3551
3552
3553
3554float128 int32_to_float128(int32_t a, float_status *status)
3555{
3556 flag zSign;
3557 uint32_t absA;
3558 int8_t shiftCount;
3559 uint64_t zSig0;
3560
3561 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3562 zSign = ( a < 0 );
3563 absA = zSign ? - a : a;
3564 shiftCount = clz32(absA) + 17;
3565 zSig0 = absA;
3566 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
3567
3568}
3569
3570
3571
3572
3573
3574
3575
3576
3577floatx80 int64_to_floatx80(int64_t a, float_status *status)
3578{
3579 flag zSign;
3580 uint64_t absA;
3581 int8_t shiftCount;
3582
3583 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3584 zSign = ( a < 0 );
3585 absA = zSign ? - a : a;
3586 shiftCount = clz64(absA);
3587 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
3588
3589}
3590
3591
3592
3593
3594
3595
3596
3597float128 int64_to_float128(int64_t a, float_status *status)
3598{
3599 flag zSign;
3600 uint64_t absA;
3601 int8_t shiftCount;
3602 int32_t zExp;
3603 uint64_t zSig0, zSig1;
3604
3605 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3606 zSign = ( a < 0 );
3607 absA = zSign ? - a : a;
3608 shiftCount = clz64(absA) + 49;
3609 zExp = 0x406E - shiftCount;
3610 if ( 64 <= shiftCount ) {
3611 zSig1 = 0;
3612 zSig0 = absA;
3613 shiftCount -= 64;
3614 }
3615 else {
3616 zSig1 = absA;
3617 zSig0 = 0;
3618 }
3619 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3620 return packFloat128( zSign, zExp, zSig0, zSig1 );
3621
3622}
3623
3624
3625
3626
3627
3628
3629
3630float128 uint64_to_float128(uint64_t a, float_status *status)
3631{
3632 if (a == 0) {
3633 return float128_zero;
3634 }
3635 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
3636}
3637
3638
3639
3640
3641
3642
3643
3644
3645floatx80 float32_to_floatx80(float32 a, float_status *status)
3646{
3647 flag aSign;
3648 int aExp;
3649 uint32_t aSig;
3650
3651 a = float32_squash_input_denormal(a, status);
3652 aSig = extractFloat32Frac( a );
3653 aExp = extractFloat32Exp( a );
3654 aSign = extractFloat32Sign( a );
3655 if ( aExp == 0xFF ) {
3656 if (aSig) {
3657 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
3658 }
3659 return packFloatx80(aSign,
3660 floatx80_infinity_high,
3661 floatx80_infinity_low);
3662 }
3663 if ( aExp == 0 ) {
3664 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3665 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3666 }
3667 aSig |= 0x00800000;
3668 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
3669
3670}
3671
3672
3673
3674
3675
3676
3677
3678
3679float128 float32_to_float128(float32 a, float_status *status)
3680{
3681 flag aSign;
3682 int aExp;
3683 uint32_t aSig;
3684
3685 a = float32_squash_input_denormal(a, status);
3686 aSig = extractFloat32Frac( a );
3687 aExp = extractFloat32Exp( a );
3688 aSign = extractFloat32Sign( a );
3689 if ( aExp == 0xFF ) {
3690 if (aSig) {
3691 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
3692 }
3693 return packFloat128( aSign, 0x7FFF, 0, 0 );
3694 }
3695 if ( aExp == 0 ) {
3696 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3697 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3698 --aExp;
3699 }
3700 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
3701
3702}
3703
3704
3705
3706
3707
3708
3709
3710float32 float32_rem(float32 a, float32 b, float_status *status)
3711{
3712 flag aSign, zSign;
3713 int aExp, bExp, expDiff;
3714 uint32_t aSig, bSig;
3715 uint32_t q;
3716 uint64_t aSig64, bSig64, q64;
3717 uint32_t alternateASig;
3718 int32_t sigMean;
3719 a = float32_squash_input_denormal(a, status);
3720 b = float32_squash_input_denormal(b, status);
3721
3722 aSig = extractFloat32Frac( a );
3723 aExp = extractFloat32Exp( a );
3724 aSign = extractFloat32Sign( a );
3725 bSig = extractFloat32Frac( b );
3726 bExp = extractFloat32Exp( b );
3727 if ( aExp == 0xFF ) {
3728 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
3729 return propagateFloat32NaN(a, b, status);
3730 }
3731 float_raise(float_flag_invalid, status);
3732 return float32_default_nan(status);
3733 }
3734 if ( bExp == 0xFF ) {
3735 if (bSig) {
3736 return propagateFloat32NaN(a, b, status);
3737 }
3738 return a;
3739 }
3740 if ( bExp == 0 ) {
3741 if ( bSig == 0 ) {
3742 float_raise(float_flag_invalid, status);
3743 return float32_default_nan(status);
3744 }
3745 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
3746 }
3747 if ( aExp == 0 ) {
3748 if ( aSig == 0 ) return a;
3749 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3750 }
3751 expDiff = aExp - bExp;
3752 aSig |= 0x00800000;
3753 bSig |= 0x00800000;
3754 if ( expDiff < 32 ) {
3755 aSig <<= 8;
3756 bSig <<= 8;
3757 if ( expDiff < 0 ) {
3758 if ( expDiff < -1 ) return a;
3759 aSig >>= 1;
3760 }
3761 q = ( bSig <= aSig );
3762 if ( q ) aSig -= bSig;
3763 if ( 0 < expDiff ) {
3764 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
3765 q >>= 32 - expDiff;
3766 bSig >>= 2;
3767 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3768 }
3769 else {
3770 aSig >>= 2;
3771 bSig >>= 2;
3772 }
3773 }
3774 else {
3775 if ( bSig <= aSig ) aSig -= bSig;
3776 aSig64 = ( (uint64_t) aSig )<<40;
3777 bSig64 = ( (uint64_t) bSig )<<40;
3778 expDiff -= 64;
3779 while ( 0 < expDiff ) {
3780 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3781 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3782 aSig64 = - ( ( bSig * q64 )<<38 );
3783 expDiff -= 62;
3784 }
3785 expDiff += 64;
3786 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3787 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3788 q = q64>>( 64 - expDiff );
3789 bSig <<= 6;
3790 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
3791 }
3792 do {
3793 alternateASig = aSig;
3794 ++q;
3795 aSig -= bSig;
3796 } while ( 0 <= (int32_t) aSig );
3797 sigMean = aSig + alternateASig;
3798 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3799 aSig = alternateASig;
3800 }
3801 zSign = ( (int32_t) aSig < 0 );
3802 if ( zSign ) aSig = - aSig;
3803 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
3804}
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826static const float64 float32_exp2_coefficients[15] =
3827{
3828 const_float64( 0x3ff0000000000000ll ),
3829 const_float64( 0x3fe0000000000000ll ),
3830 const_float64( 0x3fc5555555555555ll ),
3831 const_float64( 0x3fa5555555555555ll ),
3832 const_float64( 0x3f81111111111111ll ),
3833 const_float64( 0x3f56c16c16c16c17ll ),
3834 const_float64( 0x3f2a01a01a01a01all ),
3835 const_float64( 0x3efa01a01a01a01all ),
3836 const_float64( 0x3ec71de3a556c734ll ),
3837 const_float64( 0x3e927e4fb7789f5cll ),
3838 const_float64( 0x3e5ae64567f544e4ll ),
3839 const_float64( 0x3e21eed8eff8d898ll ),
3840 const_float64( 0x3de6124613a86d09ll ),
3841 const_float64( 0x3da93974a8c07c9dll ),
3842 const_float64( 0x3d6ae7f3e733b81fll ),
3843};
3844
3845float32 float32_exp2(float32 a, float_status *status)
3846{
3847 flag aSign;
3848 int aExp;
3849 uint32_t aSig;
3850 float64 r, x, xn;
3851 int i;
3852 a = float32_squash_input_denormal(a, status);
3853
3854 aSig = extractFloat32Frac( a );
3855 aExp = extractFloat32Exp( a );
3856 aSign = extractFloat32Sign( a );
3857
3858 if ( aExp == 0xFF) {
3859 if (aSig) {
3860 return propagateFloat32NaN(a, float32_zero, status);
3861 }
3862 return (aSign) ? float32_zero : a;
3863 }
3864 if (aExp == 0) {
3865 if (aSig == 0) return float32_one;
3866 }
3867
3868 float_raise(float_flag_inexact, status);
3869
3870
3871
3872
3873 x = float32_to_float64(a, status);
3874 x = float64_mul(x, float64_ln2, status);
3875
3876 xn = x;
3877 r = float64_one;
3878 for (i = 0 ; i < 15 ; i++) {
3879 float64 f;
3880
3881 f = float64_mul(xn, float32_exp2_coefficients[i], status);
3882 r = float64_add(r, f, status);
3883
3884 xn = float64_mul(xn, x, status);
3885 }
3886
3887 return float64_to_float32(r, status);
3888}
3889
3890
3891
3892
3893
3894
3895float32 float32_log2(float32 a, float_status *status)
3896{
3897 flag aSign, zSign;
3898 int aExp;
3899 uint32_t aSig, zSig, i;
3900
3901 a = float32_squash_input_denormal(a, status);
3902 aSig = extractFloat32Frac( a );
3903 aExp = extractFloat32Exp( a );
3904 aSign = extractFloat32Sign( a );
3905
3906 if ( aExp == 0 ) {
3907 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
3908 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3909 }
3910 if ( aSign ) {
3911 float_raise(float_flag_invalid, status);
3912 return float32_default_nan(status);
3913 }
3914 if ( aExp == 0xFF ) {
3915 if (aSig) {
3916 return propagateFloat32NaN(a, float32_zero, status);
3917 }
3918 return a;
3919 }
3920
3921 aExp -= 0x7F;
3922 aSig |= 0x00800000;
3923 zSign = aExp < 0;
3924 zSig = aExp << 23;
3925
3926 for (i = 1 << 22; i > 0; i >>= 1) {
3927 aSig = ( (uint64_t)aSig * aSig ) >> 23;
3928 if ( aSig & 0x01000000 ) {
3929 aSig >>= 1;
3930 zSig |= i;
3931 }
3932 }
3933
3934 if ( zSign )
3935 zSig = -zSig;
3936
3937 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
3938}
3939
3940
3941
3942
3943
3944
3945
3946
3947int float32_eq(float32 a, float32 b, float_status *status)
3948{
3949 uint32_t av, bv;
3950 a = float32_squash_input_denormal(a, status);
3951 b = float32_squash_input_denormal(b, status);
3952
3953 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3954 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3955 ) {
3956 float_raise(float_flag_invalid, status);
3957 return 0;
3958 }
3959 av = float32_val(a);
3960 bv = float32_val(b);
3961 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
3962}
3963
3964
3965
3966
3967
3968
3969
3970
3971int float32_le(float32 a, float32 b, float_status *status)
3972{
3973 flag aSign, bSign;
3974 uint32_t av, bv;
3975 a = float32_squash_input_denormal(a, status);
3976 b = float32_squash_input_denormal(b, status);
3977
3978 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3979 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3980 ) {
3981 float_raise(float_flag_invalid, status);
3982 return 0;
3983 }
3984 aSign = extractFloat32Sign( a );
3985 bSign = extractFloat32Sign( b );
3986 av = float32_val(a);
3987 bv = float32_val(b);
3988 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
3989 return ( av == bv ) || ( aSign ^ ( av < bv ) );
3990
3991}
3992
3993
3994
3995
3996
3997
3998
3999
4000int float32_lt(float32 a, float32 b, float_status *status)
4001{
4002 flag aSign, bSign;
4003 uint32_t av, bv;
4004 a = float32_squash_input_denormal(a, status);
4005 b = float32_squash_input_denormal(b, status);
4006
4007 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4008 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4009 ) {
4010 float_raise(float_flag_invalid, status);
4011 return 0;
4012 }
4013 aSign = extractFloat32Sign( a );
4014 bSign = extractFloat32Sign( b );
4015 av = float32_val(a);
4016 bv = float32_val(b);
4017 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
4018 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4019
4020}
4021
4022
4023
4024
4025
4026
4027
4028
4029int float32_unordered(float32 a, float32 b, float_status *status)
4030{
4031 a = float32_squash_input_denormal(a, status);
4032 b = float32_squash_input_denormal(b, status);
4033
4034 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4035 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4036 ) {
4037 float_raise(float_flag_invalid, status);
4038 return 1;
4039 }
4040 return 0;
4041}
4042
4043
4044
4045
4046
4047
4048
4049
4050int float32_eq_quiet(float32 a, float32 b, float_status *status)
4051{
4052 a = float32_squash_input_denormal(a, status);
4053 b = float32_squash_input_denormal(b, status);
4054
4055 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4056 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4057 ) {
4058 if (float32_is_signaling_nan(a, status)
4059 || float32_is_signaling_nan(b, status)) {
4060 float_raise(float_flag_invalid, status);
4061 }
4062 return 0;
4063 }
4064 return ( float32_val(a) == float32_val(b) ) ||
4065 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
4066}
4067
4068
4069
4070
4071
4072
4073
4074
4075int float32_le_quiet(float32 a, float32 b, float_status *status)
4076{
4077 flag aSign, bSign;
4078 uint32_t av, bv;
4079 a = float32_squash_input_denormal(a, status);
4080 b = float32_squash_input_denormal(b, status);
4081
4082 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4083 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4084 ) {
4085 if (float32_is_signaling_nan(a, status)
4086 || float32_is_signaling_nan(b, status)) {
4087 float_raise(float_flag_invalid, status);
4088 }
4089 return 0;
4090 }
4091 aSign = extractFloat32Sign( a );
4092 bSign = extractFloat32Sign( b );
4093 av = float32_val(a);
4094 bv = float32_val(b);
4095 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
4096 return ( av == bv ) || ( aSign ^ ( av < bv ) );
4097
4098}
4099
4100
4101
4102
4103
4104
4105
4106
4107int float32_lt_quiet(float32 a, float32 b, float_status *status)
4108{
4109 flag aSign, bSign;
4110 uint32_t av, bv;
4111 a = float32_squash_input_denormal(a, status);
4112 b = float32_squash_input_denormal(b, status);
4113
4114 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4115 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4116 ) {
4117 if (float32_is_signaling_nan(a, status)
4118 || float32_is_signaling_nan(b, status)) {
4119 float_raise(float_flag_invalid, status);
4120 }
4121 return 0;
4122 }
4123 aSign = extractFloat32Sign( a );
4124 bSign = extractFloat32Sign( b );
4125 av = float32_val(a);
4126 bv = float32_val(b);
4127 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
4128 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4129
4130}
4131
4132
4133
4134
4135
4136
4137
4138
4139int float32_unordered_quiet(float32 a, float32 b, float_status *status)
4140{
4141 a = float32_squash_input_denormal(a, status);
4142 b = float32_squash_input_denormal(b, status);
4143
4144 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
4145 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
4146 ) {
4147 if (float32_is_signaling_nan(a, status)
4148 || float32_is_signaling_nan(b, status)) {
4149 float_raise(float_flag_invalid, status);
4150 }
4151 return 1;
4152 }
4153 return 0;
4154}
4155
4156
4157
4158
4159
4160float16 float16_squash_input_denormal(float16 a, float_status *status)
4161{
4162 if (status->flush_inputs_to_zero) {
4163 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
4164 float_raise(float_flag_input_denormal, status);
4165 return make_float16(float16_val(a) & 0x8000);
4166 }
4167 }
4168 return a;
4169}
4170
4171
4172
4173
4174
4175
4176
4177
4178floatx80 float64_to_floatx80(float64 a, float_status *status)
4179{
4180 flag aSign;
4181 int aExp;
4182 uint64_t aSig;
4183
4184 a = float64_squash_input_denormal(a, status);
4185 aSig = extractFloat64Frac( a );
4186 aExp = extractFloat64Exp( a );
4187 aSign = extractFloat64Sign( a );
4188 if ( aExp == 0x7FF ) {
4189 if (aSig) {
4190 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
4191 }
4192 return packFloatx80(aSign,
4193 floatx80_infinity_high,
4194 floatx80_infinity_low);
4195 }
4196 if ( aExp == 0 ) {
4197 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4198 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4199 }
4200 return
4201 packFloatx80(
4202 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
4203
4204}
4205
4206
4207
4208
4209
4210
4211
4212
4213float128 float64_to_float128(float64 a, float_status *status)
4214{
4215 flag aSign;
4216 int aExp;
4217 uint64_t aSig, zSig0, zSig1;
4218
4219 a = float64_squash_input_denormal(a, status);
4220 aSig = extractFloat64Frac( a );
4221 aExp = extractFloat64Exp( a );
4222 aSign = extractFloat64Sign( a );
4223 if ( aExp == 0x7FF ) {
4224 if (aSig) {
4225 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
4226 }
4227 return packFloat128( aSign, 0x7FFF, 0, 0 );
4228 }
4229 if ( aExp == 0 ) {
4230 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4231 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4232 --aExp;
4233 }
4234 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
4235 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
4236
4237}
4238
4239
4240
4241
4242
4243
4244
4245
4246float64 float64_rem(float64 a, float64 b, float_status *status)
4247{
4248 flag aSign, zSign;
4249 int aExp, bExp, expDiff;
4250 uint64_t aSig, bSig;
4251 uint64_t q, alternateASig;
4252 int64_t sigMean;
4253
4254 a = float64_squash_input_denormal(a, status);
4255 b = float64_squash_input_denormal(b, status);
4256 aSig = extractFloat64Frac( a );
4257 aExp = extractFloat64Exp( a );
4258 aSign = extractFloat64Sign( a );
4259 bSig = extractFloat64Frac( b );
4260 bExp = extractFloat64Exp( b );
4261 if ( aExp == 0x7FF ) {
4262 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
4263 return propagateFloat64NaN(a, b, status);
4264 }
4265 float_raise(float_flag_invalid, status);
4266 return float64_default_nan(status);
4267 }
4268 if ( bExp == 0x7FF ) {
4269 if (bSig) {
4270 return propagateFloat64NaN(a, b, status);
4271 }
4272 return a;
4273 }
4274 if ( bExp == 0 ) {
4275 if ( bSig == 0 ) {
4276 float_raise(float_flag_invalid, status);
4277 return float64_default_nan(status);
4278 }
4279 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4280 }
4281 if ( aExp == 0 ) {
4282 if ( aSig == 0 ) return a;
4283 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4284 }
4285 expDiff = aExp - bExp;
4286 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4287 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4288 if ( expDiff < 0 ) {
4289 if ( expDiff < -1 ) return a;
4290 aSig >>= 1;
4291 }
4292 q = ( bSig <= aSig );
4293 if ( q ) aSig -= bSig;
4294 expDiff -= 64;
4295 while ( 0 < expDiff ) {
4296 q = estimateDiv128To64( aSig, 0, bSig );
4297 q = ( 2 < q ) ? q - 2 : 0;
4298 aSig = - ( ( bSig>>2 ) * q );
4299 expDiff -= 62;
4300 }
4301 expDiff += 64;
4302 if ( 0 < expDiff ) {
4303 q = estimateDiv128To64( aSig, 0, bSig );
4304 q = ( 2 < q ) ? q - 2 : 0;
4305 q >>= 64 - expDiff;
4306 bSig >>= 2;
4307 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4308 }
4309 else {
4310 aSig >>= 2;
4311 bSig >>= 2;
4312 }
4313 do {
4314 alternateASig = aSig;
4315 ++q;
4316 aSig -= bSig;
4317 } while ( 0 <= (int64_t) aSig );
4318 sigMean = aSig + alternateASig;
4319 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4320 aSig = alternateASig;
4321 }
4322 zSign = ( (int64_t) aSig < 0 );
4323 if ( zSign ) aSig = - aSig;
4324 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
4325
4326}
4327
4328
4329
4330
4331
4332
4333float64 float64_log2(float64 a, float_status *status)
4334{
4335 flag aSign, zSign;
4336 int aExp;
4337 uint64_t aSig, aSig0, aSig1, zSig, i;
4338 a = float64_squash_input_denormal(a, status);
4339
4340 aSig = extractFloat64Frac( a );
4341 aExp = extractFloat64Exp( a );
4342 aSign = extractFloat64Sign( a );
4343
4344 if ( aExp == 0 ) {
4345 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4346 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4347 }
4348 if ( aSign ) {
4349 float_raise(float_flag_invalid, status);
4350 return float64_default_nan(status);
4351 }
4352 if ( aExp == 0x7FF ) {
4353 if (aSig) {
4354 return propagateFloat64NaN(a, float64_zero, status);
4355 }
4356 return a;
4357 }
4358
4359 aExp -= 0x3FF;
4360 aSig |= LIT64( 0x0010000000000000 );
4361 zSign = aExp < 0;
4362 zSig = (uint64_t)aExp << 52;
4363 for (i = 1LL << 51; i > 0; i >>= 1) {
4364 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4365 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4366 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4367 aSig >>= 1;
4368 zSig |= i;
4369 }
4370 }
4371
4372 if ( zSign )
4373 zSig = -zSig;
4374 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
4375}
4376
4377
4378
4379
4380
4381
4382
4383
4384int float64_eq(float64 a, float64 b, float_status *status)
4385{
4386 uint64_t av, bv;
4387 a = float64_squash_input_denormal(a, status);
4388 b = float64_squash_input_denormal(b, status);
4389
4390 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4391 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4392 ) {
4393 float_raise(float_flag_invalid, status);
4394 return 0;
4395 }
4396 av = float64_val(a);
4397 bv = float64_val(b);
4398 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4399
4400}
4401
4402
4403
4404
4405
4406
4407
4408
4409int float64_le(float64 a, float64 b, float_status *status)
4410{
4411 flag aSign, bSign;
4412 uint64_t av, bv;
4413 a = float64_squash_input_denormal(a, status);
4414 b = float64_squash_input_denormal(b, status);
4415
4416 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4417 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4418 ) {
4419 float_raise(float_flag_invalid, status);
4420 return 0;
4421 }
4422 aSign = extractFloat64Sign( a );
4423 bSign = extractFloat64Sign( b );
4424 av = float64_val(a);
4425 bv = float64_val(b);
4426 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4427 return ( av == bv ) || ( aSign ^ ( av < bv ) );
4428
4429}
4430
4431
4432
4433
4434
4435
4436
4437
4438int float64_lt(float64 a, float64 b, float_status *status)
4439{
4440 flag aSign, bSign;
4441 uint64_t av, bv;
4442
4443 a = float64_squash_input_denormal(a, status);
4444 b = float64_squash_input_denormal(b, status);
4445 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4446 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4447 ) {
4448 float_raise(float_flag_invalid, status);
4449 return 0;
4450 }
4451 aSign = extractFloat64Sign( a );
4452 bSign = extractFloat64Sign( b );
4453 av = float64_val(a);
4454 bv = float64_val(b);
4455 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4456 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4457
4458}
4459
4460
4461
4462
4463
4464
4465
4466
4467int float64_unordered(float64 a, float64 b, float_status *status)
4468{
4469 a = float64_squash_input_denormal(a, status);
4470 b = float64_squash_input_denormal(b, status);
4471
4472 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4473 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4474 ) {
4475 float_raise(float_flag_invalid, status);
4476 return 1;
4477 }
4478 return 0;
4479}
4480
4481
4482
4483
4484
4485
4486
4487
4488int float64_eq_quiet(float64 a, float64 b, float_status *status)
4489{
4490 uint64_t av, bv;
4491 a = float64_squash_input_denormal(a, status);
4492 b = float64_squash_input_denormal(b, status);
4493
4494 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4495 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4496 ) {
4497 if (float64_is_signaling_nan(a, status)
4498 || float64_is_signaling_nan(b, status)) {
4499 float_raise(float_flag_invalid, status);
4500 }
4501 return 0;
4502 }
4503 av = float64_val(a);
4504 bv = float64_val(b);
4505 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4506
4507}
4508
4509
4510
4511
4512
4513
4514
4515
4516int float64_le_quiet(float64 a, float64 b, float_status *status)
4517{
4518 flag aSign, bSign;
4519 uint64_t av, bv;
4520 a = float64_squash_input_denormal(a, status);
4521 b = float64_squash_input_denormal(b, status);
4522
4523 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4524 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4525 ) {
4526 if (float64_is_signaling_nan(a, status)
4527 || float64_is_signaling_nan(b, status)) {
4528 float_raise(float_flag_invalid, status);
4529 }
4530 return 0;
4531 }
4532 aSign = extractFloat64Sign( a );
4533 bSign = extractFloat64Sign( b );
4534 av = float64_val(a);
4535 bv = float64_val(b);
4536 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4537 return ( av == bv ) || ( aSign ^ ( av < bv ) );
4538
4539}
4540
4541
4542
4543
4544
4545
4546
4547
4548int float64_lt_quiet(float64 a, float64 b, float_status *status)
4549{
4550 flag aSign, bSign;
4551 uint64_t av, bv;
4552 a = float64_squash_input_denormal(a, status);
4553 b = float64_squash_input_denormal(b, status);
4554
4555 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4556 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4557 ) {
4558 if (float64_is_signaling_nan(a, status)
4559 || float64_is_signaling_nan(b, status)) {
4560 float_raise(float_flag_invalid, status);
4561 }
4562 return 0;
4563 }
4564 aSign = extractFloat64Sign( a );
4565 bSign = extractFloat64Sign( b );
4566 av = float64_val(a);
4567 bv = float64_val(b);
4568 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4569 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4570
4571}
4572
4573
4574
4575
4576
4577
4578
4579
4580int float64_unordered_quiet(float64 a, float64 b, float_status *status)
4581{
4582 a = float64_squash_input_denormal(a, status);
4583 b = float64_squash_input_denormal(b, status);
4584
4585 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4586 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4587 ) {
4588 if (float64_is_signaling_nan(a, status)
4589 || float64_is_signaling_nan(b, status)) {
4590 float_raise(float_flag_invalid, status);
4591 }
4592 return 1;
4593 }
4594 return 0;
4595}
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607int32_t floatx80_to_int32(floatx80 a, float_status *status)
4608{
4609 flag aSign;
4610 int32_t aExp, shiftCount;
4611 uint64_t aSig;
4612
4613 if (floatx80_invalid_encoding(a)) {
4614 float_raise(float_flag_invalid, status);
4615 return 1 << 31;
4616 }
4617 aSig = extractFloatx80Frac( a );
4618 aExp = extractFloatx80Exp( a );
4619 aSign = extractFloatx80Sign( a );
4620 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4621 shiftCount = 0x4037 - aExp;
4622 if ( shiftCount <= 0 ) shiftCount = 1;
4623 shift64RightJamming( aSig, shiftCount, &aSig );
4624 return roundAndPackInt32(aSign, aSig, status);
4625
4626}
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
4639{
4640 flag aSign;
4641 int32_t aExp, shiftCount;
4642 uint64_t aSig, savedASig;
4643 int32_t z;
4644
4645 if (floatx80_invalid_encoding(a)) {
4646 float_raise(float_flag_invalid, status);
4647 return 1 << 31;
4648 }
4649 aSig = extractFloatx80Frac( a );
4650 aExp = extractFloatx80Exp( a );
4651 aSign = extractFloatx80Sign( a );
4652 if ( 0x401E < aExp ) {
4653 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4654 goto invalid;
4655 }
4656 else if ( aExp < 0x3FFF ) {
4657 if (aExp || aSig) {
4658 status->float_exception_flags |= float_flag_inexact;
4659 }
4660 return 0;
4661 }
4662 shiftCount = 0x403E - aExp;
4663 savedASig = aSig;
4664 aSig >>= shiftCount;
4665 z = aSig;
4666 if ( aSign ) z = - z;
4667 if ( ( z < 0 ) ^ aSign ) {
4668 invalid:
4669 float_raise(float_flag_invalid, status);
4670 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
4671 }
4672 if ( ( aSig<<shiftCount ) != savedASig ) {
4673 status->float_exception_flags |= float_flag_inexact;
4674 }
4675 return z;
4676
4677}
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689int64_t floatx80_to_int64(floatx80 a, float_status *status)
4690{
4691 flag aSign;
4692 int32_t aExp, shiftCount;
4693 uint64_t aSig, aSigExtra;
4694
4695 if (floatx80_invalid_encoding(a)) {
4696 float_raise(float_flag_invalid, status);
4697 return 1ULL << 63;
4698 }
4699 aSig = extractFloatx80Frac( a );
4700 aExp = extractFloatx80Exp( a );
4701 aSign = extractFloatx80Sign( a );
4702 shiftCount = 0x403E - aExp;
4703 if ( shiftCount <= 0 ) {
4704 if ( shiftCount ) {
4705 float_raise(float_flag_invalid, status);
4706 if (!aSign || floatx80_is_any_nan(a)) {
4707 return LIT64( 0x7FFFFFFFFFFFFFFF );
4708 }
4709 return (int64_t) LIT64( 0x8000000000000000 );
4710 }
4711 aSigExtra = 0;
4712 }
4713 else {
4714 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4715 }
4716 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
4717
4718}
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
4731{
4732 flag aSign;
4733 int32_t aExp, shiftCount;
4734 uint64_t aSig;
4735 int64_t z;
4736
4737 if (floatx80_invalid_encoding(a)) {
4738 float_raise(float_flag_invalid, status);
4739 return 1ULL << 63;
4740 }
4741 aSig = extractFloatx80Frac( a );
4742 aExp = extractFloatx80Exp( a );
4743 aSign = extractFloatx80Sign( a );
4744 shiftCount = aExp - 0x403E;
4745 if ( 0 <= shiftCount ) {
4746 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4747 if ( ( a.high != 0xC03E ) || aSig ) {
4748 float_raise(float_flag_invalid, status);
4749 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4750 return LIT64( 0x7FFFFFFFFFFFFFFF );
4751 }
4752 }
4753 return (int64_t) LIT64( 0x8000000000000000 );
4754 }
4755 else if ( aExp < 0x3FFF ) {
4756 if (aExp | aSig) {
4757 status->float_exception_flags |= float_flag_inexact;
4758 }
4759 return 0;
4760 }
4761 z = aSig>>( - shiftCount );
4762 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
4763 status->float_exception_flags |= float_flag_inexact;
4764 }
4765 if ( aSign ) z = - z;
4766 return z;
4767
4768}
4769
4770
4771
4772
4773
4774
4775
4776
4777float32 floatx80_to_float32(floatx80 a, float_status *status)
4778{
4779 flag aSign;
4780 int32_t aExp;
4781 uint64_t aSig;
4782
4783 if (floatx80_invalid_encoding(a)) {
4784 float_raise(float_flag_invalid, status);
4785 return float32_default_nan(status);
4786 }
4787 aSig = extractFloatx80Frac( a );
4788 aExp = extractFloatx80Exp( a );
4789 aSign = extractFloatx80Sign( a );
4790 if ( aExp == 0x7FFF ) {
4791 if ( (uint64_t) ( aSig<<1 ) ) {
4792 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
4793 }
4794 return packFloat32( aSign, 0xFF, 0 );
4795 }
4796 shift64RightJamming( aSig, 33, &aSig );
4797 if ( aExp || aSig ) aExp -= 0x3F81;
4798 return roundAndPackFloat32(aSign, aExp, aSig, status);
4799
4800}
4801
4802
4803
4804
4805
4806
4807
4808
4809float64 floatx80_to_float64(floatx80 a, float_status *status)
4810{
4811 flag aSign;
4812 int32_t aExp;
4813 uint64_t aSig, zSig;
4814
4815 if (floatx80_invalid_encoding(a)) {
4816 float_raise(float_flag_invalid, status);
4817 return float64_default_nan(status);
4818 }
4819 aSig = extractFloatx80Frac( a );
4820 aExp = extractFloatx80Exp( a );
4821 aSign = extractFloatx80Sign( a );
4822 if ( aExp == 0x7FFF ) {
4823 if ( (uint64_t) ( aSig<<1 ) ) {
4824 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
4825 }
4826 return packFloat64( aSign, 0x7FF, 0 );
4827 }
4828 shift64RightJamming( aSig, 1, &zSig );
4829 if ( aExp || aSig ) aExp -= 0x3C01;
4830 return roundAndPackFloat64(aSign, aExp, zSig, status);
4831
4832}
4833
4834
4835
4836
4837
4838
4839
4840
4841float128 floatx80_to_float128(floatx80 a, float_status *status)
4842{
4843 flag aSign;
4844 int aExp;
4845 uint64_t aSig, zSig0, zSig1;
4846
4847 if (floatx80_invalid_encoding(a)) {
4848 float_raise(float_flag_invalid, status);
4849 return float128_default_nan(status);
4850 }
4851 aSig = extractFloatx80Frac( a );
4852 aExp = extractFloatx80Exp( a );
4853 aSign = extractFloatx80Sign( a );
4854 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
4855 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
4856 }
4857 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4858 return packFloat128( aSign, aExp, zSig0, zSig1 );
4859
4860}
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870floatx80 floatx80_round(floatx80 a, float_status *status)
4871{
4872 return roundAndPackFloatx80(status->floatx80_rounding_precision,
4873 extractFloatx80Sign(a),
4874 extractFloatx80Exp(a),
4875 extractFloatx80Frac(a), 0, status);
4876}
4877
4878
4879
4880
4881
4882
4883
4884
4885floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
4886{
4887 flag aSign;
4888 int32_t aExp;
4889 uint64_t lastBitMask, roundBitsMask;
4890 floatx80 z;
4891
4892 if (floatx80_invalid_encoding(a)) {
4893 float_raise(float_flag_invalid, status);
4894 return floatx80_default_nan(status);
4895 }
4896 aExp = extractFloatx80Exp( a );
4897 if ( 0x403E <= aExp ) {
4898 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
4899 return propagateFloatx80NaN(a, a, status);
4900 }
4901 return a;
4902 }
4903 if ( aExp < 0x3FFF ) {
4904 if ( ( aExp == 0 )
4905 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
4906 return a;
4907 }
4908 status->float_exception_flags |= float_flag_inexact;
4909 aSign = extractFloatx80Sign( a );
4910 switch (status->float_rounding_mode) {
4911 case float_round_nearest_even:
4912 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
4913 ) {
4914 return
4915 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4916 }
4917 break;
4918 case float_round_ties_away:
4919 if (aExp == 0x3FFE) {
4920 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
4921 }
4922 break;
4923 case float_round_down:
4924 return
4925 aSign ?
4926 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4927 : packFloatx80( 0, 0, 0 );
4928 case float_round_up:
4929 return
4930 aSign ? packFloatx80( 1, 0, 0 )
4931 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4932 }
4933 return packFloatx80( aSign, 0, 0 );
4934 }
4935 lastBitMask = 1;
4936 lastBitMask <<= 0x403E - aExp;
4937 roundBitsMask = lastBitMask - 1;
4938 z = a;
4939 switch (status->float_rounding_mode) {
4940 case float_round_nearest_even:
4941 z.low += lastBitMask>>1;
4942 if ((z.low & roundBitsMask) == 0) {
4943 z.low &= ~lastBitMask;
4944 }
4945 break;
4946 case float_round_ties_away:
4947 z.low += lastBitMask >> 1;
4948 break;
4949 case float_round_to_zero:
4950 break;
4951 case float_round_up:
4952 if (!extractFloatx80Sign(z)) {
4953 z.low += roundBitsMask;
4954 }
4955 break;
4956 case float_round_down:
4957 if (extractFloatx80Sign(z)) {
4958 z.low += roundBitsMask;
4959 }
4960 break;
4961 default:
4962 abort();
4963 }
4964 z.low &= ~ roundBitsMask;
4965 if ( z.low == 0 ) {
4966 ++z.high;
4967 z.low = LIT64( 0x8000000000000000 );
4968 }
4969 if (z.low != a.low) {
4970 status->float_exception_flags |= float_flag_inexact;
4971 }
4972 return z;
4973
4974}
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4985 float_status *status)
4986{
4987 int32_t aExp, bExp, zExp;
4988 uint64_t aSig, bSig, zSig0, zSig1;
4989 int32_t expDiff;
4990
4991 aSig = extractFloatx80Frac( a );
4992 aExp = extractFloatx80Exp( a );
4993 bSig = extractFloatx80Frac( b );
4994 bExp = extractFloatx80Exp( b );
4995 expDiff = aExp - bExp;
4996 if ( 0 < expDiff ) {
4997 if ( aExp == 0x7FFF ) {
4998 if ((uint64_t)(aSig << 1)) {
4999 return propagateFloatx80NaN(a, b, status);
5000 }
5001 return a;
5002 }
5003 if ( bExp == 0 ) --expDiff;
5004 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5005 zExp = aExp;
5006 }
5007 else if ( expDiff < 0 ) {
5008 if ( bExp == 0x7FFF ) {
5009 if ((uint64_t)(bSig << 1)) {
5010 return propagateFloatx80NaN(a, b, status);
5011 }
5012 return packFloatx80(zSign,
5013 floatx80_infinity_high,
5014 floatx80_infinity_low);
5015 }
5016 if ( aExp == 0 ) ++expDiff;
5017 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5018 zExp = bExp;
5019 }
5020 else {
5021 if ( aExp == 0x7FFF ) {
5022 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5023 return propagateFloatx80NaN(a, b, status);
5024 }
5025 return a;
5026 }
5027 zSig1 = 0;
5028 zSig0 = aSig + bSig;
5029 if ( aExp == 0 ) {
5030 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5031 goto roundAndPack;
5032 }
5033 zExp = aExp;
5034 goto shiftRight1;
5035 }
5036 zSig0 = aSig + bSig;
5037 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5038 shiftRight1:
5039 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5040 zSig0 |= LIT64( 0x8000000000000000 );
5041 ++zExp;
5042 roundAndPack:
5043 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5044 zSign, zExp, zSig0, zSig1, status);
5045}
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5056 float_status *status)
5057{
5058 int32_t aExp, bExp, zExp;
5059 uint64_t aSig, bSig, zSig0, zSig1;
5060 int32_t expDiff;
5061
5062 aSig = extractFloatx80Frac( a );
5063 aExp = extractFloatx80Exp( a );
5064 bSig = extractFloatx80Frac( b );
5065 bExp = extractFloatx80Exp( b );
5066 expDiff = aExp - bExp;
5067 if ( 0 < expDiff ) goto aExpBigger;
5068 if ( expDiff < 0 ) goto bExpBigger;
5069 if ( aExp == 0x7FFF ) {
5070 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5071 return propagateFloatx80NaN(a, b, status);
5072 }
5073 float_raise(float_flag_invalid, status);
5074 return floatx80_default_nan(status);
5075 }
5076 if ( aExp == 0 ) {
5077 aExp = 1;
5078 bExp = 1;
5079 }
5080 zSig1 = 0;
5081 if ( bSig < aSig ) goto aBigger;
5082 if ( aSig < bSig ) goto bBigger;
5083 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5084 bExpBigger:
5085 if ( bExp == 0x7FFF ) {
5086 if ((uint64_t)(bSig << 1)) {
5087 return propagateFloatx80NaN(a, b, status);
5088 }
5089 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5090 floatx80_infinity_low);
5091 }
5092 if ( aExp == 0 ) ++expDiff;
5093 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5094 bBigger:
5095 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5096 zExp = bExp;
5097 zSign ^= 1;
5098 goto normalizeRoundAndPack;
5099 aExpBigger:
5100 if ( aExp == 0x7FFF ) {
5101 if ((uint64_t)(aSig << 1)) {
5102 return propagateFloatx80NaN(a, b, status);
5103 }
5104 return a;
5105 }
5106 if ( bExp == 0 ) --expDiff;
5107 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5108 aBigger:
5109 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5110 zExp = aExp;
5111 normalizeRoundAndPack:
5112 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5113 zSign, zExp, zSig0, zSig1, status);
5114}
5115
5116
5117
5118
5119
5120
5121
5122floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5123{
5124 flag aSign, bSign;
5125
5126 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5127 float_raise(float_flag_invalid, status);
5128 return floatx80_default_nan(status);
5129 }
5130 aSign = extractFloatx80Sign( a );
5131 bSign = extractFloatx80Sign( b );
5132 if ( aSign == bSign ) {
5133 return addFloatx80Sigs(a, b, aSign, status);
5134 }
5135 else {
5136 return subFloatx80Sigs(a, b, aSign, status);
5137 }
5138
5139}
5140
5141
5142
5143
5144
5145
5146
5147floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5148{
5149 flag aSign, bSign;
5150
5151 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5152 float_raise(float_flag_invalid, status);
5153 return floatx80_default_nan(status);
5154 }
5155 aSign = extractFloatx80Sign( a );
5156 bSign = extractFloatx80Sign( b );
5157 if ( aSign == bSign ) {
5158 return subFloatx80Sigs(a, b, aSign, status);
5159 }
5160 else {
5161 return addFloatx80Sigs(a, b, aSign, status);
5162 }
5163
5164}
5165
5166
5167
5168
5169
5170
5171
5172floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5173{
5174 flag aSign, bSign, zSign;
5175 int32_t aExp, bExp, zExp;
5176 uint64_t aSig, bSig, zSig0, zSig1;
5177
5178 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5179 float_raise(float_flag_invalid, status);
5180 return floatx80_default_nan(status);
5181 }
5182 aSig = extractFloatx80Frac( a );
5183 aExp = extractFloatx80Exp( a );
5184 aSign = extractFloatx80Sign( a );
5185 bSig = extractFloatx80Frac( b );
5186 bExp = extractFloatx80Exp( b );
5187 bSign = extractFloatx80Sign( b );
5188 zSign = aSign ^ bSign;
5189 if ( aExp == 0x7FFF ) {
5190 if ( (uint64_t) ( aSig<<1 )
5191 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5192 return propagateFloatx80NaN(a, b, status);
5193 }
5194 if ( ( bExp | bSig ) == 0 ) goto invalid;
5195 return packFloatx80(zSign, floatx80_infinity_high,
5196 floatx80_infinity_low);
5197 }
5198 if ( bExp == 0x7FFF ) {
5199 if ((uint64_t)(bSig << 1)) {
5200 return propagateFloatx80NaN(a, b, status);
5201 }
5202 if ( ( aExp | aSig ) == 0 ) {
5203 invalid:
5204 float_raise(float_flag_invalid, status);
5205 return floatx80_default_nan(status);
5206 }
5207 return packFloatx80(zSign, floatx80_infinity_high,
5208 floatx80_infinity_low);
5209 }
5210 if ( aExp == 0 ) {
5211 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5212 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5213 }
5214 if ( bExp == 0 ) {
5215 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5216 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5217 }
5218 zExp = aExp + bExp - 0x3FFE;
5219 mul64To128( aSig, bSig, &zSig0, &zSig1 );
5220 if ( 0 < (int64_t) zSig0 ) {
5221 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5222 --zExp;
5223 }
5224 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5225 zSign, zExp, zSig0, zSig1, status);
5226}
5227
5228
5229
5230
5231
5232
5233
5234floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
5235{
5236 flag aSign, bSign, zSign;
5237 int32_t aExp, bExp, zExp;
5238 uint64_t aSig, bSig, zSig0, zSig1;
5239 uint64_t rem0, rem1, rem2, term0, term1, term2;
5240
5241 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5242 float_raise(float_flag_invalid, status);
5243 return floatx80_default_nan(status);
5244 }
5245 aSig = extractFloatx80Frac( a );
5246 aExp = extractFloatx80Exp( a );
5247 aSign = extractFloatx80Sign( a );
5248 bSig = extractFloatx80Frac( b );
5249 bExp = extractFloatx80Exp( b );
5250 bSign = extractFloatx80Sign( b );
5251 zSign = aSign ^ bSign;
5252 if ( aExp == 0x7FFF ) {
5253 if ((uint64_t)(aSig << 1)) {
5254 return propagateFloatx80NaN(a, b, status);
5255 }
5256 if ( bExp == 0x7FFF ) {
5257 if ((uint64_t)(bSig << 1)) {
5258 return propagateFloatx80NaN(a, b, status);
5259 }
5260 goto invalid;
5261 }
5262 return packFloatx80(zSign, floatx80_infinity_high,
5263 floatx80_infinity_low);
5264 }
5265 if ( bExp == 0x7FFF ) {
5266 if ((uint64_t)(bSig << 1)) {
5267 return propagateFloatx80NaN(a, b, status);
5268 }
5269 return packFloatx80( zSign, 0, 0 );
5270 }
5271 if ( bExp == 0 ) {
5272 if ( bSig == 0 ) {
5273 if ( ( aExp | aSig ) == 0 ) {
5274 invalid:
5275 float_raise(float_flag_invalid, status);
5276 return floatx80_default_nan(status);
5277 }
5278 float_raise(float_flag_divbyzero, status);
5279 return packFloatx80(zSign, floatx80_infinity_high,
5280 floatx80_infinity_low);
5281 }
5282 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5283 }
5284 if ( aExp == 0 ) {
5285 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5286 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5287 }
5288 zExp = aExp - bExp + 0x3FFE;
5289 rem1 = 0;
5290 if ( bSig <= aSig ) {
5291 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5292 ++zExp;
5293 }
5294 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5295 mul64To128( bSig, zSig0, &term0, &term1 );
5296 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
5297 while ( (int64_t) rem0 < 0 ) {
5298 --zSig0;
5299 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5300 }
5301 zSig1 = estimateDiv128To64( rem1, 0, bSig );
5302 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
5303 mul64To128( bSig, zSig1, &term1, &term2 );
5304 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5305 while ( (int64_t) rem1 < 0 ) {
5306 --zSig1;
5307 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5308 }
5309 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5310 }
5311 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5312 zSign, zExp, zSig0, zSig1, status);
5313}
5314
5315
5316
5317
5318
5319
5320
5321floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
5322{
5323 flag aSign, zSign;
5324 int32_t aExp, bExp, expDiff;
5325 uint64_t aSig0, aSig1, bSig;
5326 uint64_t q, term0, term1, alternateASig0, alternateASig1;
5327
5328 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5329 float_raise(float_flag_invalid, status);
5330 return floatx80_default_nan(status);
5331 }
5332 aSig0 = extractFloatx80Frac( a );
5333 aExp = extractFloatx80Exp( a );
5334 aSign = extractFloatx80Sign( a );
5335 bSig = extractFloatx80Frac( b );
5336 bExp = extractFloatx80Exp( b );
5337 if ( aExp == 0x7FFF ) {
5338 if ( (uint64_t) ( aSig0<<1 )
5339 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5340 return propagateFloatx80NaN(a, b, status);
5341 }
5342 goto invalid;
5343 }
5344 if ( bExp == 0x7FFF ) {
5345 if ((uint64_t)(bSig << 1)) {
5346 return propagateFloatx80NaN(a, b, status);
5347 }
5348 return a;
5349 }
5350 if ( bExp == 0 ) {
5351 if ( bSig == 0 ) {
5352 invalid:
5353 float_raise(float_flag_invalid, status);
5354 return floatx80_default_nan(status);
5355 }
5356 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5357 }
5358 if ( aExp == 0 ) {
5359 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
5360 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5361 }
5362 bSig |= LIT64( 0x8000000000000000 );
5363 zSign = aSign;
5364 expDiff = aExp - bExp;
5365 aSig1 = 0;
5366 if ( expDiff < 0 ) {
5367 if ( expDiff < -1 ) return a;
5368 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5369 expDiff = 0;
5370 }
5371 q = ( bSig <= aSig0 );
5372 if ( q ) aSig0 -= bSig;
5373 expDiff -= 64;
5374 while ( 0 < expDiff ) {
5375 q = estimateDiv128To64( aSig0, aSig1, bSig );
5376 q = ( 2 < q ) ? q - 2 : 0;
5377 mul64To128( bSig, q, &term0, &term1 );
5378 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5379 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5380 expDiff -= 62;
5381 }
5382 expDiff += 64;
5383 if ( 0 < expDiff ) {
5384 q = estimateDiv128To64( aSig0, aSig1, bSig );
5385 q = ( 2 < q ) ? q - 2 : 0;
5386 q >>= 64 - expDiff;
5387 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5388 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5389 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5390 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5391 ++q;
5392 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5393 }
5394 }
5395 else {
5396 term1 = 0;
5397 term0 = bSig;
5398 }
5399 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5400 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5401 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5402 && ( q & 1 ) )
5403 ) {
5404 aSig0 = alternateASig0;
5405 aSig1 = alternateASig1;
5406 zSign = ! zSign;
5407 }
5408 return
5409 normalizeRoundAndPackFloatx80(
5410 80, zSign, bExp + expDiff, aSig0, aSig1, status);
5411
5412}
5413
5414
5415
5416
5417
5418
5419
5420floatx80 floatx80_sqrt(floatx80 a, float_status *status)
5421{
5422 flag aSign;
5423 int32_t aExp, zExp;
5424 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5425 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5426
5427 if (floatx80_invalid_encoding(a)) {
5428 float_raise(float_flag_invalid, status);
5429 return floatx80_default_nan(status);
5430 }
5431 aSig0 = extractFloatx80Frac( a );
5432 aExp = extractFloatx80Exp( a );
5433 aSign = extractFloatx80Sign( a );
5434 if ( aExp == 0x7FFF ) {
5435 if ((uint64_t)(aSig0 << 1)) {
5436 return propagateFloatx80NaN(a, a, status);
5437 }
5438 if ( ! aSign ) return a;
5439 goto invalid;
5440 }
5441 if ( aSign ) {
5442 if ( ( aExp | aSig0 ) == 0 ) return a;
5443 invalid:
5444 float_raise(float_flag_invalid, status);
5445 return floatx80_default_nan(status);
5446 }
5447 if ( aExp == 0 ) {
5448 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5449 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5450 }
5451 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5452 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5453 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5454 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5455 doubleZSig0 = zSig0<<1;
5456 mul64To128( zSig0, zSig0, &term0, &term1 );
5457 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5458 while ( (int64_t) rem0 < 0 ) {
5459 --zSig0;
5460 doubleZSig0 -= 2;
5461 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5462 }
5463 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5464 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5465 if ( zSig1 == 0 ) zSig1 = 1;
5466 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5467 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5468 mul64To128( zSig1, zSig1, &term2, &term3 );
5469 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5470 while ( (int64_t) rem1 < 0 ) {
5471 --zSig1;
5472 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5473 term3 |= 1;
5474 term2 |= doubleZSig0;
5475 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5476 }
5477 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5478 }
5479 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5480 zSig0 |= doubleZSig0;
5481 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5482 0, zExp, zSig0, zSig1, status);
5483}
5484
5485
5486
5487
5488
5489
5490
5491
5492int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
5493{
5494
5495 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5496 || (extractFloatx80Exp(a) == 0x7FFF
5497 && (uint64_t) (extractFloatx80Frac(a) << 1))
5498 || (extractFloatx80Exp(b) == 0x7FFF
5499 && (uint64_t) (extractFloatx80Frac(b) << 1))
5500 ) {
5501 float_raise(float_flag_invalid, status);
5502 return 0;
5503 }
5504 return
5505 ( a.low == b.low )
5506 && ( ( a.high == b.high )
5507 || ( ( a.low == 0 )
5508 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5509 );
5510
5511}
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521int floatx80_le(floatx80 a, floatx80 b, float_status *status)
5522{
5523 flag aSign, bSign;
5524
5525 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5526 || (extractFloatx80Exp(a) == 0x7FFF
5527 && (uint64_t) (extractFloatx80Frac(a) << 1))
5528 || (extractFloatx80Exp(b) == 0x7FFF
5529 && (uint64_t) (extractFloatx80Frac(b) << 1))
5530 ) {
5531 float_raise(float_flag_invalid, status);
5532 return 0;
5533 }
5534 aSign = extractFloatx80Sign( a );
5535 bSign = extractFloatx80Sign( b );
5536 if ( aSign != bSign ) {
5537 return
5538 aSign
5539 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5540 == 0 );
5541 }
5542 return
5543 aSign ? le128( b.high, b.low, a.high, a.low )
5544 : le128( a.high, a.low, b.high, b.low );
5545
5546}
5547
5548
5549
5550
5551
5552
5553
5554
5555int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
5556{
5557 flag aSign, bSign;
5558
5559 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5560 || (extractFloatx80Exp(a) == 0x7FFF
5561 && (uint64_t) (extractFloatx80Frac(a) << 1))
5562 || (extractFloatx80Exp(b) == 0x7FFF
5563 && (uint64_t) (extractFloatx80Frac(b) << 1))
5564 ) {
5565 float_raise(float_flag_invalid, status);
5566 return 0;
5567 }
5568 aSign = extractFloatx80Sign( a );
5569 bSign = extractFloatx80Sign( b );
5570 if ( aSign != bSign ) {
5571 return
5572 aSign
5573 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5574 != 0 );
5575 }
5576 return
5577 aSign ? lt128( b.high, b.low, a.high, a.low )
5578 : lt128( a.high, a.low, b.high, b.low );
5579
5580}
5581
5582
5583
5584
5585
5586
5587
5588int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
5589{
5590 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5591 || (extractFloatx80Exp(a) == 0x7FFF
5592 && (uint64_t) (extractFloatx80Frac(a) << 1))
5593 || (extractFloatx80Exp(b) == 0x7FFF
5594 && (uint64_t) (extractFloatx80Frac(b) << 1))
5595 ) {
5596 float_raise(float_flag_invalid, status);
5597 return 1;
5598 }
5599 return 0;
5600}
5601
5602
5603
5604
5605
5606
5607
5608
5609int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
5610{
5611
5612 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5613 float_raise(float_flag_invalid, status);
5614 return 0;
5615 }
5616 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5617 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5618 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5619 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5620 ) {
5621 if (floatx80_is_signaling_nan(a, status)
5622 || floatx80_is_signaling_nan(b, status)) {
5623 float_raise(float_flag_invalid, status);
5624 }
5625 return 0;
5626 }
5627 return
5628 ( a.low == b.low )
5629 && ( ( a.high == b.high )
5630 || ( ( a.low == 0 )
5631 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5632 );
5633
5634}
5635
5636
5637
5638
5639
5640
5641
5642
5643int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
5644{
5645 flag aSign, bSign;
5646
5647 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5648 float_raise(float_flag_invalid, status);
5649 return 0;
5650 }
5651 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5652 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5653 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5654 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5655 ) {
5656 if (floatx80_is_signaling_nan(a, status)
5657 || floatx80_is_signaling_nan(b, status)) {
5658 float_raise(float_flag_invalid, status);
5659 }
5660 return 0;
5661 }
5662 aSign = extractFloatx80Sign( a );
5663 bSign = extractFloatx80Sign( b );
5664 if ( aSign != bSign ) {
5665 return
5666 aSign
5667 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5668 == 0 );
5669 }
5670 return
5671 aSign ? le128( b.high, b.low, a.high, a.low )
5672 : le128( a.high, a.low, b.high, b.low );
5673
5674}
5675
5676
5677
5678
5679
5680
5681
5682
5683int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
5684{
5685 flag aSign, bSign;
5686
5687 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5688 float_raise(float_flag_invalid, status);
5689 return 0;
5690 }
5691 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5692 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5693 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5694 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5695 ) {
5696 if (floatx80_is_signaling_nan(a, status)
5697 || floatx80_is_signaling_nan(b, status)) {
5698 float_raise(float_flag_invalid, status);
5699 }
5700 return 0;
5701 }
5702 aSign = extractFloatx80Sign( a );
5703 bSign = extractFloatx80Sign( b );
5704 if ( aSign != bSign ) {
5705 return
5706 aSign
5707 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5708 != 0 );
5709 }
5710 return
5711 aSign ? lt128( b.high, b.low, a.high, a.low )
5712 : lt128( a.high, a.low, b.high, b.low );
5713
5714}
5715
5716
5717
5718
5719
5720
5721
5722int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
5723{
5724 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5725 float_raise(float_flag_invalid, status);
5726 return 1;
5727 }
5728 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5729 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5730 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5731 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5732 ) {
5733 if (floatx80_is_signaling_nan(a, status)
5734 || floatx80_is_signaling_nan(b, status)) {
5735 float_raise(float_flag_invalid, status);
5736 }
5737 return 1;
5738 }
5739 return 0;
5740}
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752int32_t float128_to_int32(float128 a, float_status *status)
5753{
5754 flag aSign;
5755 int32_t aExp, shiftCount;
5756 uint64_t aSig0, aSig1;
5757
5758 aSig1 = extractFloat128Frac1( a );
5759 aSig0 = extractFloat128Frac0( a );
5760 aExp = extractFloat128Exp( a );
5761 aSign = extractFloat128Sign( a );
5762 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5763 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5764 aSig0 |= ( aSig1 != 0 );
5765 shiftCount = 0x4028 - aExp;
5766 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
5767 return roundAndPackInt32(aSign, aSig0, status);
5768
5769}
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
5782{
5783 flag aSign;
5784 int32_t aExp, shiftCount;
5785 uint64_t aSig0, aSig1, savedASig;
5786 int32_t z;
5787
5788 aSig1 = extractFloat128Frac1( a );
5789 aSig0 = extractFloat128Frac0( a );
5790 aExp = extractFloat128Exp( a );
5791 aSign = extractFloat128Sign( a );
5792 aSig0 |= ( aSig1 != 0 );
5793 if ( 0x401E < aExp ) {
5794 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5795 goto invalid;
5796 }
5797 else if ( aExp < 0x3FFF ) {
5798 if (aExp || aSig0) {
5799 status->float_exception_flags |= float_flag_inexact;
5800 }
5801 return 0;
5802 }
5803 aSig0 |= LIT64( 0x0001000000000000 );
5804 shiftCount = 0x402F - aExp;
5805 savedASig = aSig0;
5806 aSig0 >>= shiftCount;
5807 z = aSig0;
5808 if ( aSign ) z = - z;
5809 if ( ( z < 0 ) ^ aSign ) {
5810 invalid:
5811 float_raise(float_flag_invalid, status);
5812 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5813 }
5814 if ( ( aSig0<<shiftCount ) != savedASig ) {
5815 status->float_exception_flags |= float_flag_inexact;
5816 }
5817 return z;
5818
5819}
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831int64_t float128_to_int64(float128 a, float_status *status)
5832{
5833 flag aSign;
5834 int32_t aExp, shiftCount;
5835 uint64_t aSig0, aSig1;
5836
5837 aSig1 = extractFloat128Frac1( a );
5838 aSig0 = extractFloat128Frac0( a );
5839 aExp = extractFloat128Exp( a );
5840 aSign = extractFloat128Sign( a );
5841 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5842 shiftCount = 0x402F - aExp;
5843 if ( shiftCount <= 0 ) {
5844 if ( 0x403E < aExp ) {
5845 float_raise(float_flag_invalid, status);
5846 if ( ! aSign
5847 || ( ( aExp == 0x7FFF )
5848 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5849 )
5850 ) {
5851 return LIT64( 0x7FFFFFFFFFFFFFFF );
5852 }
5853 return (int64_t) LIT64( 0x8000000000000000 );
5854 }
5855 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5856 }
5857 else {
5858 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5859 }
5860 return roundAndPackInt64(aSign, aSig0, aSig1, status);
5861
5862}
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
5875{
5876 flag aSign;
5877 int32_t aExp, shiftCount;
5878 uint64_t aSig0, aSig1;
5879 int64_t z;
5880
5881 aSig1 = extractFloat128Frac1( a );
5882 aSig0 = extractFloat128Frac0( a );
5883 aExp = extractFloat128Exp( a );
5884 aSign = extractFloat128Sign( a );
5885 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5886 shiftCount = aExp - 0x402F;
5887 if ( 0 < shiftCount ) {
5888 if ( 0x403E <= aExp ) {
5889 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5890 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5891 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
5892 if (aSig1) {
5893 status->float_exception_flags |= float_flag_inexact;
5894 }
5895 }
5896 else {
5897 float_raise(float_flag_invalid, status);
5898 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5899 return LIT64( 0x7FFFFFFFFFFFFFFF );
5900 }
5901 }
5902 return (int64_t) LIT64( 0x8000000000000000 );
5903 }
5904 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
5905 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
5906 status->float_exception_flags |= float_flag_inexact;
5907 }
5908 }
5909 else {
5910 if ( aExp < 0x3FFF ) {
5911 if ( aExp | aSig0 | aSig1 ) {
5912 status->float_exception_flags |= float_flag_inexact;
5913 }
5914 return 0;
5915 }
5916 z = aSig0>>( - shiftCount );
5917 if ( aSig1
5918 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
5919 status->float_exception_flags |= float_flag_inexact;
5920 }
5921 }
5922 if ( aSign ) z = - z;
5923 return z;
5924
5925}
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939uint64_t float128_to_uint64(float128 a, float_status *status)
5940{
5941 flag aSign;
5942 int aExp;
5943 int shiftCount;
5944 uint64_t aSig0, aSig1;
5945
5946 aSig0 = extractFloat128Frac0(a);
5947 aSig1 = extractFloat128Frac1(a);
5948 aExp = extractFloat128Exp(a);
5949 aSign = extractFloat128Sign(a);
5950 if (aSign && (aExp > 0x3FFE)) {
5951 float_raise(float_flag_invalid, status);
5952 if (float128_is_any_nan(a)) {
5953 return LIT64(0xFFFFFFFFFFFFFFFF);
5954 } else {
5955 return 0;
5956 }
5957 }
5958 if (aExp) {
5959 aSig0 |= LIT64(0x0001000000000000);
5960 }
5961 shiftCount = 0x402F - aExp;
5962 if (shiftCount <= 0) {
5963 if (0x403E < aExp) {
5964 float_raise(float_flag_invalid, status);
5965 return LIT64(0xFFFFFFFFFFFFFFFF);
5966 }
5967 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
5968 } else {
5969 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
5970 }
5971 return roundAndPackUint64(aSign, aSig0, aSig1, status);
5972}
5973
5974uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
5975{
5976 uint64_t v;
5977 signed char current_rounding_mode = status->float_rounding_mode;
5978
5979 set_float_rounding_mode(float_round_to_zero, status);
5980 v = float128_to_uint64(a, status);
5981 set_float_rounding_mode(current_rounding_mode, status);
5982
5983 return v;
5984}
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
5998{
5999 uint64_t v;
6000 uint32_t res;
6001 int old_exc_flags = get_float_exception_flags(status);
6002
6003 v = float128_to_uint64_round_to_zero(a, status);
6004 if (v > 0xffffffff) {
6005 res = 0xffffffff;
6006 } else {
6007 return v;
6008 }
6009 set_float_exception_flags(old_exc_flags, status);
6010 float_raise(float_flag_invalid, status);
6011 return res;
6012}
6013
6014
6015
6016
6017
6018
6019
6020
6021float32 float128_to_float32(float128 a, float_status *status)
6022{
6023 flag aSign;
6024 int32_t aExp;
6025 uint64_t aSig0, aSig1;
6026 uint32_t zSig;
6027
6028 aSig1 = extractFloat128Frac1( a );
6029 aSig0 = extractFloat128Frac0( a );
6030 aExp = extractFloat128Exp( a );
6031 aSign = extractFloat128Sign( a );
6032 if ( aExp == 0x7FFF ) {
6033 if ( aSig0 | aSig1 ) {
6034 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
6035 }
6036 return packFloat32( aSign, 0xFF, 0 );
6037 }
6038 aSig0 |= ( aSig1 != 0 );
6039 shift64RightJamming( aSig0, 18, &aSig0 );
6040 zSig = aSig0;
6041 if ( aExp || zSig ) {
6042 zSig |= 0x40000000;
6043 aExp -= 0x3F81;
6044 }
6045 return roundAndPackFloat32(aSign, aExp, zSig, status);
6046
6047}
6048
6049
6050
6051
6052
6053
6054
6055
6056float64 float128_to_float64(float128 a, float_status *status)
6057{
6058 flag aSign;
6059 int32_t aExp;
6060 uint64_t aSig0, aSig1;
6061
6062 aSig1 = extractFloat128Frac1( a );
6063 aSig0 = extractFloat128Frac0( a );
6064 aExp = extractFloat128Exp( a );
6065 aSign = extractFloat128Sign( a );
6066 if ( aExp == 0x7FFF ) {
6067 if ( aSig0 | aSig1 ) {
6068 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
6069 }
6070 return packFloat64( aSign, 0x7FF, 0 );
6071 }
6072 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6073 aSig0 |= ( aSig1 != 0 );
6074 if ( aExp || aSig0 ) {
6075 aSig0 |= LIT64( 0x4000000000000000 );
6076 aExp -= 0x3C01;
6077 }
6078 return roundAndPackFloat64(aSign, aExp, aSig0, status);
6079
6080}
6081
6082
6083
6084
6085
6086
6087
6088
6089floatx80 float128_to_floatx80(float128 a, float_status *status)
6090{
6091 flag aSign;
6092 int32_t aExp;
6093 uint64_t aSig0, aSig1;
6094
6095 aSig1 = extractFloat128Frac1( a );
6096 aSig0 = extractFloat128Frac0( a );
6097 aExp = extractFloat128Exp( a );
6098 aSign = extractFloat128Sign( a );
6099 if ( aExp == 0x7FFF ) {
6100 if ( aSig0 | aSig1 ) {
6101 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
6102 }
6103 return packFloatx80(aSign, floatx80_infinity_high,
6104 floatx80_infinity_low);
6105 }
6106 if ( aExp == 0 ) {
6107 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6108 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6109 }
6110 else {
6111 aSig0 |= LIT64( 0x0001000000000000 );
6112 }
6113 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6114 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6115
6116}
6117
6118
6119
6120
6121
6122
6123
6124
6125float128 float128_round_to_int(float128 a, float_status *status)
6126{
6127 flag aSign;
6128 int32_t aExp;
6129 uint64_t lastBitMask, roundBitsMask;
6130 float128 z;
6131
6132 aExp = extractFloat128Exp( a );
6133 if ( 0x402F <= aExp ) {
6134 if ( 0x406F <= aExp ) {
6135 if ( ( aExp == 0x7FFF )
6136 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6137 ) {
6138 return propagateFloat128NaN(a, a, status);
6139 }
6140 return a;
6141 }
6142 lastBitMask = 1;
6143 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6144 roundBitsMask = lastBitMask - 1;
6145 z = a;
6146 switch (status->float_rounding_mode) {
6147 case float_round_nearest_even:
6148 if ( lastBitMask ) {
6149 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6150 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6151 }
6152 else {
6153 if ( (int64_t) z.low < 0 ) {
6154 ++z.high;
6155 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
6156 }
6157 }
6158 break;
6159 case float_round_ties_away:
6160 if (lastBitMask) {
6161 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6162 } else {
6163 if ((int64_t) z.low < 0) {
6164 ++z.high;
6165 }
6166 }
6167 break;
6168 case float_round_to_zero:
6169 break;
6170 case float_round_up:
6171 if (!extractFloat128Sign(z)) {
6172 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6173 }
6174 break;
6175 case float_round_down:
6176 if (extractFloat128Sign(z)) {
6177 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6178 }
6179 break;
6180 default:
6181 abort();
6182 }
6183 z.low &= ~ roundBitsMask;
6184 }
6185 else {
6186 if ( aExp < 0x3FFF ) {
6187 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
6188 status->float_exception_flags |= float_flag_inexact;
6189 aSign = extractFloat128Sign( a );
6190 switch (status->float_rounding_mode) {
6191 case float_round_nearest_even:
6192 if ( ( aExp == 0x3FFE )
6193 && ( extractFloat128Frac0( a )
6194 | extractFloat128Frac1( a ) )
6195 ) {
6196 return packFloat128( aSign, 0x3FFF, 0, 0 );
6197 }
6198 break;
6199 case float_round_ties_away:
6200 if (aExp == 0x3FFE) {
6201 return packFloat128(aSign, 0x3FFF, 0, 0);
6202 }
6203 break;
6204 case float_round_down:
6205 return
6206 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6207 : packFloat128( 0, 0, 0, 0 );
6208 case float_round_up:
6209 return
6210 aSign ? packFloat128( 1, 0, 0, 0 )
6211 : packFloat128( 0, 0x3FFF, 0, 0 );
6212 }
6213 return packFloat128( aSign, 0, 0, 0 );
6214 }
6215 lastBitMask = 1;
6216 lastBitMask <<= 0x402F - aExp;
6217 roundBitsMask = lastBitMask - 1;
6218 z.low = 0;
6219 z.high = a.high;
6220 switch (status->float_rounding_mode) {
6221 case float_round_nearest_even:
6222 z.high += lastBitMask>>1;
6223 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6224 z.high &= ~ lastBitMask;
6225 }
6226 break;
6227 case float_round_ties_away:
6228 z.high += lastBitMask>>1;
6229 break;
6230 case float_round_to_zero:
6231 break;
6232 case float_round_up:
6233 if (!extractFloat128Sign(z)) {
6234 z.high |= ( a.low != 0 );
6235 z.high += roundBitsMask;
6236 }
6237 break;
6238 case float_round_down:
6239 if (extractFloat128Sign(z)) {
6240 z.high |= (a.low != 0);
6241 z.high += roundBitsMask;
6242 }
6243 break;
6244 default:
6245 abort();
6246 }
6247 z.high &= ~ roundBitsMask;
6248 }
6249 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
6250 status->float_exception_flags |= float_flag_inexact;
6251 }
6252 return z;
6253
6254}
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6265 float_status *status)
6266{
6267 int32_t aExp, bExp, zExp;
6268 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6269 int32_t expDiff;
6270
6271 aSig1 = extractFloat128Frac1( a );
6272 aSig0 = extractFloat128Frac0( a );
6273 aExp = extractFloat128Exp( a );
6274 bSig1 = extractFloat128Frac1( b );
6275 bSig0 = extractFloat128Frac0( b );
6276 bExp = extractFloat128Exp( b );
6277 expDiff = aExp - bExp;
6278 if ( 0 < expDiff ) {
6279 if ( aExp == 0x7FFF ) {
6280 if (aSig0 | aSig1) {
6281 return propagateFloat128NaN(a, b, status);
6282 }
6283 return a;
6284 }
6285 if ( bExp == 0 ) {
6286 --expDiff;
6287 }
6288 else {
6289 bSig0 |= LIT64( 0x0001000000000000 );
6290 }
6291 shift128ExtraRightJamming(
6292 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6293 zExp = aExp;
6294 }
6295 else if ( expDiff < 0 ) {
6296 if ( bExp == 0x7FFF ) {
6297 if (bSig0 | bSig1) {
6298 return propagateFloat128NaN(a, b, status);
6299 }
6300 return packFloat128( zSign, 0x7FFF, 0, 0 );
6301 }
6302 if ( aExp == 0 ) {
6303 ++expDiff;
6304 }
6305 else {
6306 aSig0 |= LIT64( 0x0001000000000000 );
6307 }
6308 shift128ExtraRightJamming(
6309 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6310 zExp = bExp;
6311 }
6312 else {
6313 if ( aExp == 0x7FFF ) {
6314 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6315 return propagateFloat128NaN(a, b, status);
6316 }
6317 return a;
6318 }
6319 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6320 if ( aExp == 0 ) {
6321 if (status->flush_to_zero) {
6322 if (zSig0 | zSig1) {
6323 float_raise(float_flag_output_denormal, status);
6324 }
6325 return packFloat128(zSign, 0, 0, 0);
6326 }
6327 return packFloat128( zSign, 0, zSig0, zSig1 );
6328 }
6329 zSig2 = 0;
6330 zSig0 |= LIT64( 0x0002000000000000 );
6331 zExp = aExp;
6332 goto shiftRight1;
6333 }
6334 aSig0 |= LIT64( 0x0001000000000000 );
6335 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6336 --zExp;
6337 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6338 ++zExp;
6339 shiftRight1:
6340 shift128ExtraRightJamming(
6341 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6342 roundAndPack:
6343 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6344
6345}
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6356 float_status *status)
6357{
6358 int32_t aExp, bExp, zExp;
6359 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
6360 int32_t expDiff;
6361
6362 aSig1 = extractFloat128Frac1( a );
6363 aSig0 = extractFloat128Frac0( a );
6364 aExp = extractFloat128Exp( a );
6365 bSig1 = extractFloat128Frac1( b );
6366 bSig0 = extractFloat128Frac0( b );
6367 bExp = extractFloat128Exp( b );
6368 expDiff = aExp - bExp;
6369 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6370 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6371 if ( 0 < expDiff ) goto aExpBigger;
6372 if ( expDiff < 0 ) goto bExpBigger;
6373 if ( aExp == 0x7FFF ) {
6374 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6375 return propagateFloat128NaN(a, b, status);
6376 }
6377 float_raise(float_flag_invalid, status);
6378 return float128_default_nan(status);
6379 }
6380 if ( aExp == 0 ) {
6381 aExp = 1;
6382 bExp = 1;
6383 }
6384 if ( bSig0 < aSig0 ) goto aBigger;
6385 if ( aSig0 < bSig0 ) goto bBigger;
6386 if ( bSig1 < aSig1 ) goto aBigger;
6387 if ( aSig1 < bSig1 ) goto bBigger;
6388 return packFloat128(status->float_rounding_mode == float_round_down,
6389 0, 0, 0);
6390 bExpBigger:
6391 if ( bExp == 0x7FFF ) {
6392 if (bSig0 | bSig1) {
6393 return propagateFloat128NaN(a, b, status);
6394 }
6395 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6396 }
6397 if ( aExp == 0 ) {
6398 ++expDiff;
6399 }
6400 else {
6401 aSig0 |= LIT64( 0x4000000000000000 );
6402 }
6403 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6404 bSig0 |= LIT64( 0x4000000000000000 );
6405 bBigger:
6406 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6407 zExp = bExp;
6408 zSign ^= 1;
6409 goto normalizeRoundAndPack;
6410 aExpBigger:
6411 if ( aExp == 0x7FFF ) {
6412 if (aSig0 | aSig1) {
6413 return propagateFloat128NaN(a, b, status);
6414 }
6415 return a;
6416 }
6417 if ( bExp == 0 ) {
6418 --expDiff;
6419 }
6420 else {
6421 bSig0 |= LIT64( 0x4000000000000000 );
6422 }
6423 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6424 aSig0 |= LIT64( 0x4000000000000000 );
6425 aBigger:
6426 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6427 zExp = aExp;
6428 normalizeRoundAndPack:
6429 --zExp;
6430 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6431 status);
6432
6433}
6434
6435
6436
6437
6438
6439
6440
6441float128 float128_add(float128 a, float128 b, float_status *status)
6442{
6443 flag aSign, bSign;
6444
6445 aSign = extractFloat128Sign( a );
6446 bSign = extractFloat128Sign( b );
6447 if ( aSign == bSign ) {
6448 return addFloat128Sigs(a, b, aSign, status);
6449 }
6450 else {
6451 return subFloat128Sigs(a, b, aSign, status);
6452 }
6453
6454}
6455
6456
6457
6458
6459
6460
6461
6462float128 float128_sub(float128 a, float128 b, float_status *status)
6463{
6464 flag aSign, bSign;
6465
6466 aSign = extractFloat128Sign( a );
6467 bSign = extractFloat128Sign( b );
6468 if ( aSign == bSign ) {
6469 return subFloat128Sigs(a, b, aSign, status);
6470 }
6471 else {
6472 return addFloat128Sigs(a, b, aSign, status);
6473 }
6474
6475}
6476
6477
6478
6479
6480
6481
6482
6483float128 float128_mul(float128 a, float128 b, float_status *status)
6484{
6485 flag aSign, bSign, zSign;
6486 int32_t aExp, bExp, zExp;
6487 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
6488
6489 aSig1 = extractFloat128Frac1( a );
6490 aSig0 = extractFloat128Frac0( a );
6491 aExp = extractFloat128Exp( a );
6492 aSign = extractFloat128Sign( a );
6493 bSig1 = extractFloat128Frac1( b );
6494 bSig0 = extractFloat128Frac0( b );
6495 bExp = extractFloat128Exp( b );
6496 bSign = extractFloat128Sign( b );
6497 zSign = aSign ^ bSign;
6498 if ( aExp == 0x7FFF ) {
6499 if ( ( aSig0 | aSig1 )
6500 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6501 return propagateFloat128NaN(a, b, status);
6502 }
6503 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6504 return packFloat128( zSign, 0x7FFF, 0, 0 );
6505 }
6506 if ( bExp == 0x7FFF ) {
6507 if (bSig0 | bSig1) {
6508 return propagateFloat128NaN(a, b, status);
6509 }
6510 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6511 invalid:
6512 float_raise(float_flag_invalid, status);
6513 return float128_default_nan(status);
6514 }
6515 return packFloat128( zSign, 0x7FFF, 0, 0 );
6516 }
6517 if ( aExp == 0 ) {
6518 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6519 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6520 }
6521 if ( bExp == 0 ) {
6522 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6523 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6524 }
6525 zExp = aExp + bExp - 0x4000;
6526 aSig0 |= LIT64( 0x0001000000000000 );
6527 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6528 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6529 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6530 zSig2 |= ( zSig3 != 0 );
6531 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6532 shift128ExtraRightJamming(
6533 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6534 ++zExp;
6535 }
6536 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6537
6538}
6539
6540
6541
6542
6543
6544
6545
6546float128 float128_div(float128 a, float128 b, float_status *status)
6547{
6548 flag aSign, bSign, zSign;
6549 int32_t aExp, bExp, zExp;
6550 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6551 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6552
6553 aSig1 = extractFloat128Frac1( a );
6554 aSig0 = extractFloat128Frac0( a );
6555 aExp = extractFloat128Exp( a );
6556 aSign = extractFloat128Sign( a );
6557 bSig1 = extractFloat128Frac1( b );
6558 bSig0 = extractFloat128Frac0( b );
6559 bExp = extractFloat128Exp( b );
6560 bSign = extractFloat128Sign( b );
6561 zSign = aSign ^ bSign;
6562 if ( aExp == 0x7FFF ) {
6563 if (aSig0 | aSig1) {
6564 return propagateFloat128NaN(a, b, status);
6565 }
6566 if ( bExp == 0x7FFF ) {
6567 if (bSig0 | bSig1) {
6568 return propagateFloat128NaN(a, b, status);
6569 }
6570 goto invalid;
6571 }
6572 return packFloat128( zSign, 0x7FFF, 0, 0 );
6573 }
6574 if ( bExp == 0x7FFF ) {
6575 if (bSig0 | bSig1) {
6576 return propagateFloat128NaN(a, b, status);
6577 }
6578 return packFloat128( zSign, 0, 0, 0 );
6579 }
6580 if ( bExp == 0 ) {
6581 if ( ( bSig0 | bSig1 ) == 0 ) {
6582 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6583 invalid:
6584 float_raise(float_flag_invalid, status);
6585 return float128_default_nan(status);
6586 }
6587 float_raise(float_flag_divbyzero, status);
6588 return packFloat128( zSign, 0x7FFF, 0, 0 );
6589 }
6590 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6591 }
6592 if ( aExp == 0 ) {
6593 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6594 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6595 }
6596 zExp = aExp - bExp + 0x3FFD;
6597 shortShift128Left(
6598 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6599 shortShift128Left(
6600 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6601 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6602 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6603 ++zExp;
6604 }
6605 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6606 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6607 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
6608 while ( (int64_t) rem0 < 0 ) {
6609 --zSig0;
6610 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6611 }
6612 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6613 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6614 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6615 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
6616 while ( (int64_t) rem1 < 0 ) {
6617 --zSig1;
6618 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6619 }
6620 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6621 }
6622 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
6623 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6624
6625}
6626
6627
6628
6629
6630
6631
6632
6633float128 float128_rem(float128 a, float128 b, float_status *status)
6634{
6635 flag aSign, zSign;
6636 int32_t aExp, bExp, expDiff;
6637 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6638 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6639 int64_t sigMean0;
6640
6641 aSig1 = extractFloat128Frac1( a );
6642 aSig0 = extractFloat128Frac0( a );
6643 aExp = extractFloat128Exp( a );
6644 aSign = extractFloat128Sign( a );
6645 bSig1 = extractFloat128Frac1( b );
6646 bSig0 = extractFloat128Frac0( b );
6647 bExp = extractFloat128Exp( b );
6648 if ( aExp == 0x7FFF ) {
6649 if ( ( aSig0 | aSig1 )
6650 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6651 return propagateFloat128NaN(a, b, status);
6652 }
6653 goto invalid;
6654 }
6655 if ( bExp == 0x7FFF ) {
6656 if (bSig0 | bSig1) {
6657 return propagateFloat128NaN(a, b, status);
6658 }
6659 return a;
6660 }
6661 if ( bExp == 0 ) {
6662 if ( ( bSig0 | bSig1 ) == 0 ) {
6663 invalid:
6664 float_raise(float_flag_invalid, status);
6665 return float128_default_nan(status);
6666 }
6667 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6668 }
6669 if ( aExp == 0 ) {
6670 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6671 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6672 }
6673 expDiff = aExp - bExp;
6674 if ( expDiff < -1 ) return a;
6675 shortShift128Left(
6676 aSig0 | LIT64( 0x0001000000000000 ),
6677 aSig1,
6678 15 - ( expDiff < 0 ),
6679 &aSig0,
6680 &aSig1
6681 );
6682 shortShift128Left(
6683 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6684 q = le128( bSig0, bSig1, aSig0, aSig1 );
6685 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6686 expDiff -= 64;
6687 while ( 0 < expDiff ) {
6688 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6689 q = ( 4 < q ) ? q - 4 : 0;
6690 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6691 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6692 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6693 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6694 expDiff -= 61;
6695 }
6696 if ( -64 < expDiff ) {
6697 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6698 q = ( 4 < q ) ? q - 4 : 0;
6699 q >>= - expDiff;
6700 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6701 expDiff += 52;
6702 if ( expDiff < 0 ) {
6703 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6704 }
6705 else {
6706 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6707 }
6708 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6709 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6710 }
6711 else {
6712 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6713 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6714 }
6715 do {
6716 alternateASig0 = aSig0;
6717 alternateASig1 = aSig1;
6718 ++q;
6719 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6720 } while ( 0 <= (int64_t) aSig0 );
6721 add128(
6722 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6723 if ( ( sigMean0 < 0 )
6724 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6725 aSig0 = alternateASig0;
6726 aSig1 = alternateASig1;
6727 }
6728 zSign = ( (int64_t) aSig0 < 0 );
6729 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6730 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6731 status);
6732}
6733
6734
6735
6736
6737
6738
6739
6740float128 float128_sqrt(float128 a, float_status *status)
6741{
6742 flag aSign;
6743 int32_t aExp, zExp;
6744 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6745 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6746
6747 aSig1 = extractFloat128Frac1( a );
6748 aSig0 = extractFloat128Frac0( a );
6749 aExp = extractFloat128Exp( a );
6750 aSign = extractFloat128Sign( a );
6751 if ( aExp == 0x7FFF ) {
6752 if (aSig0 | aSig1) {
6753 return propagateFloat128NaN(a, a, status);
6754 }
6755 if ( ! aSign ) return a;
6756 goto invalid;
6757 }
6758 if ( aSign ) {
6759 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6760 invalid:
6761 float_raise(float_flag_invalid, status);
6762 return float128_default_nan(status);
6763 }
6764 if ( aExp == 0 ) {
6765 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6766 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6767 }
6768 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6769 aSig0 |= LIT64( 0x0001000000000000 );
6770 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6771 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6772 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6773 doubleZSig0 = zSig0<<1;
6774 mul64To128( zSig0, zSig0, &term0, &term1 );
6775 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6776 while ( (int64_t) rem0 < 0 ) {
6777 --zSig0;
6778 doubleZSig0 -= 2;
6779 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6780 }
6781 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6782 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6783 if ( zSig1 == 0 ) zSig1 = 1;
6784 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6785 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6786 mul64To128( zSig1, zSig1, &term2, &term3 );
6787 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6788 while ( (int64_t) rem1 < 0 ) {
6789 --zSig1;
6790 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6791 term3 |= 1;
6792 term2 |= doubleZSig0;
6793 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6794 }
6795 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6796 }
6797 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6798 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6799
6800}
6801
6802
6803
6804
6805
6806
6807
6808
6809int float128_eq(float128 a, float128 b, float_status *status)
6810{
6811
6812 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6813 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6814 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6815 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6816 ) {
6817 float_raise(float_flag_invalid, status);
6818 return 0;
6819 }
6820 return
6821 ( a.low == b.low )
6822 && ( ( a.high == b.high )
6823 || ( ( a.low == 0 )
6824 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
6825 );
6826
6827}
6828
6829
6830
6831
6832
6833
6834
6835
6836int float128_le(float128 a, float128 b, float_status *status)
6837{
6838 flag aSign, bSign;
6839
6840 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6841 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6842 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6843 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6844 ) {
6845 float_raise(float_flag_invalid, status);
6846 return 0;
6847 }
6848 aSign = extractFloat128Sign( a );
6849 bSign = extractFloat128Sign( b );
6850 if ( aSign != bSign ) {
6851 return
6852 aSign
6853 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6854 == 0 );
6855 }
6856 return
6857 aSign ? le128( b.high, b.low, a.high, a.low )
6858 : le128( a.high, a.low, b.high, b.low );
6859
6860}
6861
6862
6863
6864
6865
6866
6867
6868
6869int float128_lt(float128 a, float128 b, float_status *status)
6870{
6871 flag aSign, bSign;
6872
6873 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6874 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6875 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6876 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6877 ) {
6878 float_raise(float_flag_invalid, status);
6879 return 0;
6880 }
6881 aSign = extractFloat128Sign( a );
6882 bSign = extractFloat128Sign( b );
6883 if ( aSign != bSign ) {
6884 return
6885 aSign
6886 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6887 != 0 );
6888 }
6889 return
6890 aSign ? lt128( b.high, b.low, a.high, a.low )
6891 : lt128( a.high, a.low, b.high, b.low );
6892
6893}
6894
6895
6896
6897
6898
6899
6900
6901
6902int float128_unordered(float128 a, float128 b, float_status *status)
6903{
6904 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6905 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6906 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6907 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6908 ) {
6909 float_raise(float_flag_invalid, status);
6910 return 1;
6911 }
6912 return 0;
6913}
6914
6915
6916
6917
6918
6919
6920
6921
6922int float128_eq_quiet(float128 a, float128 b, float_status *status)
6923{
6924
6925 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6926 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6927 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6928 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6929 ) {
6930 if (float128_is_signaling_nan(a, status)
6931 || float128_is_signaling_nan(b, status)) {
6932 float_raise(float_flag_invalid, status);
6933 }
6934 return 0;
6935 }
6936 return
6937 ( a.low == b.low )
6938 && ( ( a.high == b.high )
6939 || ( ( a.low == 0 )
6940 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
6941 );
6942
6943}
6944
6945
6946
6947
6948
6949
6950
6951
6952int float128_le_quiet(float128 a, float128 b, float_status *status)
6953{
6954 flag aSign, bSign;
6955
6956 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6957 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6958 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6959 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6960 ) {
6961 if (float128_is_signaling_nan(a, status)
6962 || float128_is_signaling_nan(b, status)) {
6963 float_raise(float_flag_invalid, status);
6964 }
6965 return 0;
6966 }
6967 aSign = extractFloat128Sign( a );
6968 bSign = extractFloat128Sign( b );
6969 if ( aSign != bSign ) {
6970 return
6971 aSign
6972 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6973 == 0 );
6974 }
6975 return
6976 aSign ? le128( b.high, b.low, a.high, a.low )
6977 : le128( a.high, a.low, b.high, b.low );
6978
6979}
6980
6981
6982
6983
6984
6985
6986
6987
6988int float128_lt_quiet(float128 a, float128 b, float_status *status)
6989{
6990 flag aSign, bSign;
6991
6992 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6993 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6994 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6995 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6996 ) {
6997 if (float128_is_signaling_nan(a, status)
6998 || float128_is_signaling_nan(b, status)) {
6999 float_raise(float_flag_invalid, status);
7000 }
7001 return 0;
7002 }
7003 aSign = extractFloat128Sign( a );
7004 bSign = extractFloat128Sign( b );
7005 if ( aSign != bSign ) {
7006 return
7007 aSign
7008 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
7009 != 0 );
7010 }
7011 return
7012 aSign ? lt128( b.high, b.low, a.high, a.low )
7013 : lt128( a.high, a.low, b.high, b.low );
7014
7015}
7016
7017
7018
7019
7020
7021
7022
7023
7024int float128_unordered_quiet(float128 a, float128 b, float_status *status)
7025{
7026 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7027 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7028 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7029 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7030 ) {
7031 if (float128_is_signaling_nan(a, status)
7032 || float128_is_signaling_nan(b, status)) {
7033 float_raise(float_flag_invalid, status);
7034 }
7035 return 1;
7036 }
7037 return 0;
7038}
7039
7040static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7041 int is_quiet, float_status *status)
7042{
7043 flag aSign, bSign;
7044
7045 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7046 float_raise(float_flag_invalid, status);
7047 return float_relation_unordered;
7048 }
7049 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7050 ( extractFloatx80Frac( a )<<1 ) ) ||
7051 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7052 ( extractFloatx80Frac( b )<<1 ) )) {
7053 if (!is_quiet ||
7054 floatx80_is_signaling_nan(a, status) ||
7055 floatx80_is_signaling_nan(b, status)) {
7056 float_raise(float_flag_invalid, status);
7057 }
7058 return float_relation_unordered;
7059 }
7060 aSign = extractFloatx80Sign( a );
7061 bSign = extractFloatx80Sign( b );
7062 if ( aSign != bSign ) {
7063
7064 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7065 ( ( a.low | b.low ) == 0 ) ) {
7066
7067 return float_relation_equal;
7068 } else {
7069 return 1 - (2 * aSign);
7070 }
7071 } else {
7072 if (a.low == b.low && a.high == b.high) {
7073 return float_relation_equal;
7074 } else {
7075 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7076 }
7077 }
7078}
7079
7080int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7081{
7082 return floatx80_compare_internal(a, b, 0, status);
7083}
7084
7085int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
7086{
7087 return floatx80_compare_internal(a, b, 1, status);
7088}
7089
7090static inline int float128_compare_internal(float128 a, float128 b,
7091 int is_quiet, float_status *status)
7092{
7093 flag aSign, bSign;
7094
7095 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7096 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7097 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7098 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7099 if (!is_quiet ||
7100 float128_is_signaling_nan(a, status) ||
7101 float128_is_signaling_nan(b, status)) {
7102 float_raise(float_flag_invalid, status);
7103 }
7104 return float_relation_unordered;
7105 }
7106 aSign = extractFloat128Sign( a );
7107 bSign = extractFloat128Sign( b );
7108 if ( aSign != bSign ) {
7109 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7110
7111 return float_relation_equal;
7112 } else {
7113 return 1 - (2 * aSign);
7114 }
7115 } else {
7116 if (a.low == b.low && a.high == b.high) {
7117 return float_relation_equal;
7118 } else {
7119 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7120 }
7121 }
7122}
7123
7124int float128_compare(float128 a, float128 b, float_status *status)
7125{
7126 return float128_compare_internal(a, b, 0, status);
7127}
7128
7129int float128_compare_quiet(float128 a, float128 b, float_status *status)
7130{
7131 return float128_compare_internal(a, b, 1, status);
7132}
7133
7134floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7135{
7136 flag aSign;
7137 int32_t aExp;
7138 uint64_t aSig;
7139
7140 if (floatx80_invalid_encoding(a)) {
7141 float_raise(float_flag_invalid, status);
7142 return floatx80_default_nan(status);
7143 }
7144 aSig = extractFloatx80Frac( a );
7145 aExp = extractFloatx80Exp( a );
7146 aSign = extractFloatx80Sign( a );
7147
7148 if ( aExp == 0x7FFF ) {
7149 if ( aSig<<1 ) {
7150 return propagateFloatx80NaN(a, a, status);
7151 }
7152 return a;
7153 }
7154
7155 if (aExp == 0) {
7156 if (aSig == 0) {
7157 return a;
7158 }
7159 aExp++;
7160 }
7161
7162 if (n > 0x10000) {
7163 n = 0x10000;
7164 } else if (n < -0x10000) {
7165 n = -0x10000;
7166 }
7167
7168 aExp += n;
7169 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7170 aSign, aExp, aSig, 0, status);
7171}
7172
7173float128 float128_scalbn(float128 a, int n, float_status *status)
7174{
7175 flag aSign;
7176 int32_t aExp;
7177 uint64_t aSig0, aSig1;
7178
7179 aSig1 = extractFloat128Frac1( a );
7180 aSig0 = extractFloat128Frac0( a );
7181 aExp = extractFloat128Exp( a );
7182 aSign = extractFloat128Sign( a );
7183 if ( aExp == 0x7FFF ) {
7184 if ( aSig0 | aSig1 ) {
7185 return propagateFloat128NaN(a, a, status);
7186 }
7187 return a;
7188 }
7189 if (aExp != 0) {
7190 aSig0 |= LIT64( 0x0001000000000000 );
7191 } else if (aSig0 == 0 && aSig1 == 0) {
7192 return a;
7193 } else {
7194 aExp++;
7195 }
7196
7197 if (n > 0x10000) {
7198 n = 0x10000;
7199 } else if (n < -0x10000) {
7200 n = -0x10000;
7201 }
7202
7203 aExp += n - 1;
7204 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7205 , status);
7206
7207}
7208