1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85#include "qemu/osdep.h"
86#include "qemu/bitops.h"
87#include "fpu/softfloat.h"
88
89
90
91
92
93
94
95
96#include "fpu/softfloat-macros.h"
97
98
99
100
101
102
103
104
105
106#include "softfloat-specialize.h"
107
108
109
110
111
112static inline uint32_t extractFloat16Frac(float16 a)
113{
114 return float16_val(a) & 0x3ff;
115}
116
117
118
119
120
121static inline int extractFloat16Exp(float16 a)
122{
123 return (float16_val(a) >> 10) & 0x1f;
124}
125
126
127
128
129
130static inline flag extractFloat16Sign(float16 a)
131{
132 return float16_val(a)>>15;
133}
134
135
136
137
138
139static inline uint32_t extractFloat32Frac(float32 a)
140{
141 return float32_val(a) & 0x007FFFFF;
142}
143
144
145
146
147
148static inline int extractFloat32Exp(float32 a)
149{
150 return (float32_val(a) >> 23) & 0xFF;
151}
152
153
154
155
156
157static inline flag extractFloat32Sign(float32 a)
158{
159 return float32_val(a) >> 31;
160}
161
162
163
164
165
166static inline uint64_t extractFloat64Frac(float64 a)
167{
168 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
169}
170
171
172
173
174
175static inline int extractFloat64Exp(float64 a)
176{
177 return (float64_val(a) >> 52) & 0x7FF;
178}
179
180
181
182
183
184static inline flag extractFloat64Sign(float64 a)
185{
186 return float64_val(a) >> 63;
187}
188
189
190
191
192
193
194typedef enum __attribute__ ((__packed__)) {
195 float_class_unclassified,
196 float_class_zero,
197 float_class_normal,
198 float_class_inf,
199 float_class_qnan,
200 float_class_snan,
201 float_class_dnan,
202 float_class_msnan,
203} FloatClass;
204
205
206
207
208
209
210
211
212
213
214
215
216typedef struct {
217 uint64_t frac;
218 int32_t exp;
219 FloatClass cls;
220 bool sign;
221} FloatParts;
222
223#define DECOMPOSED_BINARY_POINT (64 - 2)
224#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
225#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
226
227
228
229
230
231
232
233
234
235
236
237
238typedef struct {
239 int exp_size;
240 int exp_bias;
241 int exp_max;
242 int frac_size;
243 int frac_shift;
244 uint64_t frac_lsb;
245 uint64_t frac_lsbm1;
246 uint64_t round_mask;
247 uint64_t roundeven_mask;
248} FloatFmt;
249
250
251#define FLOAT_PARAMS(E, F) \
252 .exp_size = E, \
253 .exp_bias = ((1 << E) - 1) >> 1, \
254 .exp_max = (1 << E) - 1, \
255 .frac_size = F, \
256 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
257 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
258 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
259 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
260 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
261
262static const FloatFmt float16_params = {
263 FLOAT_PARAMS(5, 10)
264};
265
266static const FloatFmt float32_params = {
267 FLOAT_PARAMS(8, 23)
268};
269
270static const FloatFmt float64_params = {
271 FLOAT_PARAMS(11, 52)
272};
273
274
275static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
276{
277 const int sign_pos = fmt.frac_size + fmt.exp_size;
278
279 return (FloatParts) {
280 .cls = float_class_unclassified,
281 .sign = extract64(raw, sign_pos, 1),
282 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
283 .frac = extract64(raw, 0, fmt.frac_size),
284 };
285}
286
287static inline FloatParts float16_unpack_raw(float16 f)
288{
289 return unpack_raw(float16_params, f);
290}
291
292static inline FloatParts float32_unpack_raw(float32 f)
293{
294 return unpack_raw(float32_params, f);
295}
296
297static inline FloatParts float64_unpack_raw(float64 f)
298{
299 return unpack_raw(float64_params, f);
300}
301
302
303static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
304{
305 const int sign_pos = fmt.frac_size + fmt.exp_size;
306 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
307 return deposit64(ret, sign_pos, 1, p.sign);
308}
309
310static inline float16 float16_pack_raw(FloatParts p)
311{
312 return make_float16(pack_raw(float16_params, p));
313}
314
315static inline float32 float32_pack_raw(FloatParts p)
316{
317 return make_float32(pack_raw(float32_params, p));
318}
319
320static inline float64 float64_pack_raw(FloatParts p)
321{
322 return make_float64(pack_raw(float64_params, p));
323}
324
325
326static FloatParts canonicalize(FloatParts part, const FloatFmt *parm,
327 float_status *status)
328{
329 if (part.exp == parm->exp_max) {
330 if (part.frac == 0) {
331 part.cls = float_class_inf;
332 } else {
333#ifdef NO_SIGNALING_NANS
334 part.cls = float_class_qnan;
335#else
336 int64_t msb = part.frac << (parm->frac_shift + 2);
337 if ((msb < 0) == status->snan_bit_is_one) {
338 part.cls = float_class_snan;
339 } else {
340 part.cls = float_class_qnan;
341 }
342#endif
343 }
344 } else if (part.exp == 0) {
345 if (likely(part.frac == 0)) {
346 part.cls = float_class_zero;
347 } else if (status->flush_inputs_to_zero) {
348 float_raise(float_flag_input_denormal, status);
349 part.cls = float_class_zero;
350 part.frac = 0;
351 } else {
352 int shift = clz64(part.frac) - 1;
353 part.cls = float_class_normal;
354 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
355 part.frac <<= shift;
356 }
357 } else {
358 part.cls = float_class_normal;
359 part.exp -= parm->exp_bias;
360 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
361 }
362 return part;
363}
364
365
366
367
368
369
370
371static FloatParts round_canonical(FloatParts p, float_status *s,
372 const FloatFmt *parm)
373{
374 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
375 const uint64_t round_mask = parm->round_mask;
376 const uint64_t roundeven_mask = parm->roundeven_mask;
377 const int exp_max = parm->exp_max;
378 const int frac_shift = parm->frac_shift;
379 uint64_t frac, inc;
380 int exp, flags = 0;
381 bool overflow_norm;
382
383 frac = p.frac;
384 exp = p.exp;
385
386 switch (p.cls) {
387 case float_class_normal:
388 switch (s->float_rounding_mode) {
389 case float_round_nearest_even:
390 overflow_norm = false;
391 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
392 break;
393 case float_round_ties_away:
394 overflow_norm = false;
395 inc = frac_lsbm1;
396 break;
397 case float_round_to_zero:
398 overflow_norm = true;
399 inc = 0;
400 break;
401 case float_round_up:
402 inc = p.sign ? 0 : round_mask;
403 overflow_norm = p.sign;
404 break;
405 case float_round_down:
406 inc = p.sign ? round_mask : 0;
407 overflow_norm = !p.sign;
408 break;
409 default:
410 g_assert_not_reached();
411 }
412
413 exp += parm->exp_bias;
414 if (likely(exp > 0)) {
415 if (frac & round_mask) {
416 flags |= float_flag_inexact;
417 frac += inc;
418 if (frac & DECOMPOSED_OVERFLOW_BIT) {
419 frac >>= 1;
420 exp++;
421 }
422 }
423 frac >>= frac_shift;
424
425 if (unlikely(exp >= exp_max)) {
426 flags |= float_flag_overflow | float_flag_inexact;
427 if (overflow_norm) {
428 exp = exp_max - 1;
429 frac = -1;
430 } else {
431 p.cls = float_class_inf;
432 goto do_inf;
433 }
434 }
435 } else if (s->flush_to_zero) {
436 flags |= float_flag_output_denormal;
437 p.cls = float_class_zero;
438 goto do_zero;
439 } else {
440 bool is_tiny = (s->float_detect_tininess
441 == float_tininess_before_rounding)
442 || (exp < 0)
443 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
444
445 shift64RightJamming(frac, 1 - exp, &frac);
446 if (frac & round_mask) {
447
448 if (s->float_rounding_mode == float_round_nearest_even) {
449 inc = ((frac & roundeven_mask) != frac_lsbm1
450 ? frac_lsbm1 : 0);
451 }
452 flags |= float_flag_inexact;
453 frac += inc;
454 }
455
456 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
457 frac >>= frac_shift;
458
459 if (is_tiny && (flags & float_flag_inexact)) {
460 flags |= float_flag_underflow;
461 }
462 if (exp == 0 && frac == 0) {
463 p.cls = float_class_zero;
464 }
465 }
466 break;
467
468 case float_class_zero:
469 do_zero:
470 exp = 0;
471 frac = 0;
472 break;
473
474 case float_class_inf:
475 do_inf:
476 exp = exp_max;
477 frac = 0;
478 break;
479
480 case float_class_qnan:
481 case float_class_snan:
482 exp = exp_max;
483 break;
484
485 default:
486 g_assert_not_reached();
487 }
488
489 float_raise(flags, s);
490 p.exp = exp;
491 p.frac = frac;
492 return p;
493}
494
495static FloatParts float16_unpack_canonical(float16 f, float_status *s)
496{
497 return canonicalize(float16_unpack_raw(f), &float16_params, s);
498}
499
500static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
501{
502 switch (p.cls) {
503 case float_class_dnan:
504 return float16_default_nan(s);
505 case float_class_msnan:
506 return float16_maybe_silence_nan(float16_pack_raw(p), s);
507 default:
508 p = round_canonical(p, s, &float16_params);
509 return float16_pack_raw(p);
510 }
511}
512
513static FloatParts float32_unpack_canonical(float32 f, float_status *s)
514{
515 return canonicalize(float32_unpack_raw(f), &float32_params, s);
516}
517
518static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
519{
520 switch (p.cls) {
521 case float_class_dnan:
522 return float32_default_nan(s);
523 case float_class_msnan:
524 return float32_maybe_silence_nan(float32_pack_raw(p), s);
525 default:
526 p = round_canonical(p, s, &float32_params);
527 return float32_pack_raw(p);
528 }
529}
530
531static FloatParts float64_unpack_canonical(float64 f, float_status *s)
532{
533 return canonicalize(float64_unpack_raw(f), &float64_params, s);
534}
535
536static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
537{
538 switch (p.cls) {
539 case float_class_dnan:
540 return float64_default_nan(s);
541 case float_class_msnan:
542 return float64_maybe_silence_nan(float64_pack_raw(p), s);
543 default:
544 p = round_canonical(p, s, &float64_params);
545 return float64_pack_raw(p);
546 }
547}
548
549
550static bool is_nan(FloatClass c)
551{
552 return unlikely(c >= float_class_qnan);
553}
554static bool is_snan(FloatClass c)
555{
556 return c == float_class_snan;
557}
558static bool is_qnan(FloatClass c)
559{
560 return c == float_class_qnan;
561}
562
563static FloatParts return_nan(FloatParts a, float_status *s)
564{
565 switch (a.cls) {
566 case float_class_snan:
567 s->float_exception_flags |= float_flag_invalid;
568 a.cls = float_class_msnan;
569
570 case float_class_qnan:
571 if (s->default_nan_mode) {
572 a.cls = float_class_dnan;
573 }
574 break;
575
576 default:
577 g_assert_not_reached();
578 }
579 return a;
580}
581
582static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
583{
584 if (is_snan(a.cls) || is_snan(b.cls)) {
585 s->float_exception_flags |= float_flag_invalid;
586 }
587
588 if (s->default_nan_mode) {
589 a.cls = float_class_dnan;
590 } else {
591 if (pickNaN(is_qnan(a.cls), is_snan(a.cls),
592 is_qnan(b.cls), is_snan(b.cls),
593 a.frac > b.frac ||
594 (a.frac == b.frac && a.sign < b.sign))) {
595 a = b;
596 }
597 a.cls = float_class_msnan;
598 }
599 return a;
600}
601
602static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
603 bool inf_zero, float_status *s)
604{
605 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
606 s->float_exception_flags |= float_flag_invalid;
607 }
608
609 if (s->default_nan_mode) {
610 a.cls = float_class_dnan;
611 } else {
612 switch (pickNaNMulAdd(is_qnan(a.cls), is_snan(a.cls),
613 is_qnan(b.cls), is_snan(b.cls),
614 is_qnan(c.cls), is_snan(c.cls),
615 inf_zero, s)) {
616 case 0:
617 break;
618 case 1:
619 a = b;
620 break;
621 case 2:
622 a = c;
623 break;
624 case 3:
625 a.cls = float_class_dnan;
626 return a;
627 default:
628 g_assert_not_reached();
629 }
630
631 a.cls = float_class_msnan;
632 }
633 return a;
634}
635
636
637
638
639
640
641
642
643static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
644 float_status *s)
645{
646 bool a_sign = a.sign;
647 bool b_sign = b.sign ^ subtract;
648
649 if (a_sign != b_sign) {
650
651
652 if (a.cls == float_class_normal && b.cls == float_class_normal) {
653 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
654 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
655 a.frac = a.frac - b.frac;
656 } else {
657 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
658 a.frac = b.frac - a.frac;
659 a.exp = b.exp;
660 a_sign ^= 1;
661 }
662
663 if (a.frac == 0) {
664 a.cls = float_class_zero;
665 a.sign = s->float_rounding_mode == float_round_down;
666 } else {
667 int shift = clz64(a.frac) - 1;
668 a.frac = a.frac << shift;
669 a.exp = a.exp - shift;
670 a.sign = a_sign;
671 }
672 return a;
673 }
674 if (is_nan(a.cls) || is_nan(b.cls)) {
675 return pick_nan(a, b, s);
676 }
677 if (a.cls == float_class_inf) {
678 if (b.cls == float_class_inf) {
679 float_raise(float_flag_invalid, s);
680 a.cls = float_class_dnan;
681 }
682 return a;
683 }
684 if (a.cls == float_class_zero && b.cls == float_class_zero) {
685 a.sign = s->float_rounding_mode == float_round_down;
686 return a;
687 }
688 if (a.cls == float_class_zero || b.cls == float_class_inf) {
689 b.sign = a_sign ^ 1;
690 return b;
691 }
692 if (b.cls == float_class_zero) {
693 return a;
694 }
695 } else {
696
697 if (a.cls == float_class_normal && b.cls == float_class_normal) {
698 if (a.exp > b.exp) {
699 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
700 } else if (a.exp < b.exp) {
701 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
702 a.exp = b.exp;
703 }
704 a.frac += b.frac;
705 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
706 a.frac >>= 1;
707 a.exp += 1;
708 }
709 return a;
710 }
711 if (is_nan(a.cls) || is_nan(b.cls)) {
712 return pick_nan(a, b, s);
713 }
714 if (a.cls == float_class_inf || b.cls == float_class_zero) {
715 return a;
716 }
717 if (b.cls == float_class_inf || a.cls == float_class_zero) {
718 b.sign = b_sign;
719 return b;
720 }
721 }
722 g_assert_not_reached();
723}
724
725
726
727
728
729
730
731float16 __attribute__((flatten)) float16_add(float16 a, float16 b,
732 float_status *status)
733{
734 FloatParts pa = float16_unpack_canonical(a, status);
735 FloatParts pb = float16_unpack_canonical(b, status);
736 FloatParts pr = addsub_floats(pa, pb, false, status);
737
738 return float16_round_pack_canonical(pr, status);
739}
740
741float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
742 float_status *status)
743{
744 FloatParts pa = float32_unpack_canonical(a, status);
745 FloatParts pb = float32_unpack_canonical(b, status);
746 FloatParts pr = addsub_floats(pa, pb, false, status);
747
748 return float32_round_pack_canonical(pr, status);
749}
750
751float64 __attribute__((flatten)) float64_add(float64 a, float64 b,
752 float_status *status)
753{
754 FloatParts pa = float64_unpack_canonical(a, status);
755 FloatParts pb = float64_unpack_canonical(b, status);
756 FloatParts pr = addsub_floats(pa, pb, false, status);
757
758 return float64_round_pack_canonical(pr, status);
759}
760
761float16 __attribute__((flatten)) float16_sub(float16 a, float16 b,
762 float_status *status)
763{
764 FloatParts pa = float16_unpack_canonical(a, status);
765 FloatParts pb = float16_unpack_canonical(b, status);
766 FloatParts pr = addsub_floats(pa, pb, true, status);
767
768 return float16_round_pack_canonical(pr, status);
769}
770
771float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
772 float_status *status)
773{
774 FloatParts pa = float32_unpack_canonical(a, status);
775 FloatParts pb = float32_unpack_canonical(b, status);
776 FloatParts pr = addsub_floats(pa, pb, true, status);
777
778 return float32_round_pack_canonical(pr, status);
779}
780
781float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
782 float_status *status)
783{
784 FloatParts pa = float64_unpack_canonical(a, status);
785 FloatParts pb = float64_unpack_canonical(b, status);
786 FloatParts pr = addsub_floats(pa, pb, true, status);
787
788 return float64_round_pack_canonical(pr, status);
789}
790
791
792
793
794
795
796
797static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
798{
799 bool sign = a.sign ^ b.sign;
800
801 if (a.cls == float_class_normal && b.cls == float_class_normal) {
802 uint64_t hi, lo;
803 int exp = a.exp + b.exp;
804
805 mul64To128(a.frac, b.frac, &hi, &lo);
806 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
807 if (lo & DECOMPOSED_OVERFLOW_BIT) {
808 shift64RightJamming(lo, 1, &lo);
809 exp += 1;
810 }
811
812
813 a.exp = exp;
814 a.sign = sign;
815 a.frac = lo;
816 return a;
817 }
818
819 if (is_nan(a.cls) || is_nan(b.cls)) {
820 return pick_nan(a, b, s);
821 }
822
823 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
824 (a.cls == float_class_zero && b.cls == float_class_inf)) {
825 s->float_exception_flags |= float_flag_invalid;
826 a.cls = float_class_dnan;
827 a.sign = sign;
828 return a;
829 }
830
831 if (a.cls == float_class_inf || a.cls == float_class_zero) {
832 a.sign = sign;
833 return a;
834 }
835 if (b.cls == float_class_inf || b.cls == float_class_zero) {
836 b.sign = sign;
837 return b;
838 }
839 g_assert_not_reached();
840}
841
842float16 __attribute__((flatten)) float16_mul(float16 a, float16 b,
843 float_status *status)
844{
845 FloatParts pa = float16_unpack_canonical(a, status);
846 FloatParts pb = float16_unpack_canonical(b, status);
847 FloatParts pr = mul_floats(pa, pb, status);
848
849 return float16_round_pack_canonical(pr, status);
850}
851
852float32 __attribute__((flatten)) float32_mul(float32 a, float32 b,
853 float_status *status)
854{
855 FloatParts pa = float32_unpack_canonical(a, status);
856 FloatParts pb = float32_unpack_canonical(b, status);
857 FloatParts pr = mul_floats(pa, pb, status);
858
859 return float32_round_pack_canonical(pr, status);
860}
861
862float64 __attribute__((flatten)) float64_mul(float64 a, float64 b,
863 float_status *status)
864{
865 FloatParts pa = float64_unpack_canonical(a, status);
866 FloatParts pb = float64_unpack_canonical(b, status);
867 FloatParts pr = mul_floats(pa, pb, status);
868
869 return float64_round_pack_canonical(pr, status);
870}
871
872
873
874
875
876
877
878
879
880
881
882
883
884static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
885 int flags, float_status *s)
886{
887 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
888 ((1 << float_class_inf) | (1 << float_class_zero));
889 bool p_sign;
890 bool sign_flip = flags & float_muladd_negate_result;
891 FloatClass p_class;
892 uint64_t hi, lo;
893 int p_exp;
894
895
896
897
898
899
900 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
901 return pick_nan_muladd(a, b, c, inf_zero, s);
902 }
903
904 if (inf_zero) {
905 s->float_exception_flags |= float_flag_invalid;
906 a.cls = float_class_dnan;
907 return a;
908 }
909
910 if (flags & float_muladd_negate_c) {
911 c.sign ^= 1;
912 }
913
914 p_sign = a.sign ^ b.sign;
915
916 if (flags & float_muladd_negate_product) {
917 p_sign ^= 1;
918 }
919
920 if (a.cls == float_class_inf || b.cls == float_class_inf) {
921 p_class = float_class_inf;
922 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
923 p_class = float_class_zero;
924 } else {
925 p_class = float_class_normal;
926 }
927
928 if (c.cls == float_class_inf) {
929 if (p_class == float_class_inf && p_sign != c.sign) {
930 s->float_exception_flags |= float_flag_invalid;
931 a.cls = float_class_dnan;
932 } else {
933 a.cls = float_class_inf;
934 a.sign = c.sign ^ sign_flip;
935 }
936 return a;
937 }
938
939 if (p_class == float_class_inf) {
940 a.cls = float_class_inf;
941 a.sign = p_sign ^ sign_flip;
942 return a;
943 }
944
945 if (p_class == float_class_zero) {
946 if (c.cls == float_class_zero) {
947 if (p_sign != c.sign) {
948 p_sign = s->float_rounding_mode == float_round_down;
949 }
950 c.sign = p_sign;
951 } else if (flags & float_muladd_halve_result) {
952 c.exp -= 1;
953 }
954 c.sign ^= sign_flip;
955 return c;
956 }
957
958
959 assert(a.cls == float_class_normal &&
960 b.cls == float_class_normal);
961
962 p_exp = a.exp + b.exp;
963
964
965
966
967 mul64To128(a.frac, b.frac, &hi, &lo);
968
969
970
971 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
972 shift128RightJamming(hi, lo, 1, &hi, &lo);
973 p_exp += 1;
974 }
975
976
977 if (c.cls == float_class_zero) {
978
979 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
980 } else {
981 int exp_diff = p_exp - c.exp;
982 if (p_sign == c.sign) {
983
984 if (exp_diff <= 0) {
985 shift128RightJamming(hi, lo,
986 DECOMPOSED_BINARY_POINT - exp_diff,
987 &hi, &lo);
988 lo += c.frac;
989 p_exp = c.exp;
990 } else {
991 uint64_t c_hi, c_lo;
992
993 c_hi = c.frac >> 2;
994 c_lo = 0;
995 shift128RightJamming(c_hi, c_lo,
996 exp_diff,
997 &c_hi, &c_lo);
998 add128(hi, lo, c_hi, c_lo, &hi, &lo);
999
1000 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1001 }
1002
1003 if (lo & DECOMPOSED_OVERFLOW_BIT) {
1004 shift64RightJamming(lo, 1, &lo);
1005 p_exp += 1;
1006 }
1007
1008 } else {
1009
1010 uint64_t c_hi, c_lo;
1011
1012 c_hi = c.frac >> 2;
1013 c_lo = 0;
1014
1015 if (exp_diff <= 0) {
1016 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1017 if (exp_diff == 0
1018 &&
1019 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1020 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1021 } else {
1022 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1023 p_sign ^= 1;
1024 p_exp = c.exp;
1025 }
1026 } else {
1027 shift128RightJamming(c_hi, c_lo,
1028 exp_diff,
1029 &c_hi, &c_lo);
1030 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1031 }
1032
1033 if (hi == 0 && lo == 0) {
1034 a.cls = float_class_zero;
1035 a.sign = s->float_rounding_mode == float_round_down;
1036 a.sign ^= sign_flip;
1037 return a;
1038 } else {
1039 int shift;
1040 if (hi != 0) {
1041 shift = clz64(hi);
1042 } else {
1043 shift = clz64(lo) + 64;
1044 }
1045
1046
1047
1048
1049
1050
1051 shift -= 1;
1052 if (shift >= 64) {
1053 lo = lo << (shift - 64);
1054 } else {
1055 hi = (hi << shift) | (lo >> (64 - shift));
1056 lo = hi | ((lo << shift) != 0);
1057 }
1058 p_exp -= shift - 2;
1059 }
1060 }
1061 }
1062
1063 if (flags & float_muladd_halve_result) {
1064 p_exp -= 1;
1065 }
1066
1067
1068 a.cls = float_class_normal;
1069 a.sign = p_sign ^ sign_flip;
1070 a.exp = p_exp;
1071 a.frac = lo;
1072
1073 return a;
1074}
1075
1076float16 __attribute__((flatten)) float16_muladd(float16 a, float16 b, float16 c,
1077 int flags, float_status *status)
1078{
1079 FloatParts pa = float16_unpack_canonical(a, status);
1080 FloatParts pb = float16_unpack_canonical(b, status);
1081 FloatParts pc = float16_unpack_canonical(c, status);
1082 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1083
1084 return float16_round_pack_canonical(pr, status);
1085}
1086
1087float32 __attribute__((flatten)) float32_muladd(float32 a, float32 b, float32 c,
1088 int flags, float_status *status)
1089{
1090 FloatParts pa = float32_unpack_canonical(a, status);
1091 FloatParts pb = float32_unpack_canonical(b, status);
1092 FloatParts pc = float32_unpack_canonical(c, status);
1093 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1094
1095 return float32_round_pack_canonical(pr, status);
1096}
1097
1098float64 __attribute__((flatten)) float64_muladd(float64 a, float64 b, float64 c,
1099 int flags, float_status *status)
1100{
1101 FloatParts pa = float64_unpack_canonical(a, status);
1102 FloatParts pb = float64_unpack_canonical(b, status);
1103 FloatParts pc = float64_unpack_canonical(c, status);
1104 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1105
1106 return float64_round_pack_canonical(pr, status);
1107}
1108
1109
1110
1111
1112
1113
1114
1115static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1116{
1117 bool sign = a.sign ^ b.sign;
1118
1119 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1120 uint64_t temp_lo, temp_hi;
1121 int exp = a.exp - b.exp;
1122 if (a.frac < b.frac) {
1123 exp -= 1;
1124 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1,
1125 &temp_hi, &temp_lo);
1126 } else {
1127 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT,
1128 &temp_hi, &temp_lo);
1129 }
1130
1131
1132 a.frac = div128To64(temp_lo, temp_hi, b.frac);
1133 a.sign = sign;
1134 a.exp = exp;
1135 return a;
1136 }
1137
1138 if (is_nan(a.cls) || is_nan(b.cls)) {
1139 return pick_nan(a, b, s);
1140 }
1141
1142 if (a.cls == b.cls
1143 &&
1144 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1145 s->float_exception_flags |= float_flag_invalid;
1146 a.cls = float_class_dnan;
1147 return a;
1148 }
1149
1150 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1151 a.sign = sign;
1152 return a;
1153 }
1154
1155 if (b.cls == float_class_zero) {
1156 s->float_exception_flags |= float_flag_divbyzero;
1157 a.cls = float_class_inf;
1158 a.sign = sign;
1159 return a;
1160 }
1161
1162 if (b.cls == float_class_inf) {
1163 a.cls = float_class_zero;
1164 a.sign = sign;
1165 return a;
1166 }
1167 g_assert_not_reached();
1168}
1169
1170float16 float16_div(float16 a, float16 b, float_status *status)
1171{
1172 FloatParts pa = float16_unpack_canonical(a, status);
1173 FloatParts pb = float16_unpack_canonical(b, status);
1174 FloatParts pr = div_floats(pa, pb, status);
1175
1176 return float16_round_pack_canonical(pr, status);
1177}
1178
1179float32 float32_div(float32 a, float32 b, float_status *status)
1180{
1181 FloatParts pa = float32_unpack_canonical(a, status);
1182 FloatParts pb = float32_unpack_canonical(b, status);
1183 FloatParts pr = div_floats(pa, pb, status);
1184
1185 return float32_round_pack_canonical(pr, status);
1186}
1187
1188float64 float64_div(float64 a, float64 b, float_status *status)
1189{
1190 FloatParts pa = float64_unpack_canonical(a, status);
1191 FloatParts pb = float64_unpack_canonical(b, status);
1192 FloatParts pr = div_floats(pa, pb, status);
1193
1194 return float64_round_pack_canonical(pr, status);
1195}
1196
1197
1198
1199
1200
1201
1202
1203
1204static FloatParts round_to_int(FloatParts a, int rounding_mode, float_status *s)
1205{
1206 if (is_nan(a.cls)) {
1207 return return_nan(a, s);
1208 }
1209
1210 switch (a.cls) {
1211 case float_class_zero:
1212 case float_class_inf:
1213 case float_class_qnan:
1214
1215 break;
1216 case float_class_normal:
1217 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1218
1219 break;
1220 }
1221 if (a.exp < 0) {
1222 bool one;
1223
1224 s->float_exception_flags |= float_flag_inexact;
1225 switch (rounding_mode) {
1226 case float_round_nearest_even:
1227 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1228 break;
1229 case float_round_ties_away:
1230 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1231 break;
1232 case float_round_to_zero:
1233 one = false;
1234 break;
1235 case float_round_up:
1236 one = !a.sign;
1237 break;
1238 case float_round_down:
1239 one = a.sign;
1240 break;
1241 default:
1242 g_assert_not_reached();
1243 }
1244
1245 if (one) {
1246 a.frac = DECOMPOSED_IMPLICIT_BIT;
1247 a.exp = 0;
1248 } else {
1249 a.cls = float_class_zero;
1250 }
1251 } else {
1252 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1253 uint64_t frac_lsbm1 = frac_lsb >> 1;
1254 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1255 uint64_t rnd_mask = rnd_even_mask >> 1;
1256 uint64_t inc;
1257
1258 switch (rounding_mode) {
1259 case float_round_nearest_even:
1260 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1261 break;
1262 case float_round_ties_away:
1263 inc = frac_lsbm1;
1264 break;
1265 case float_round_to_zero:
1266 inc = 0;
1267 break;
1268 case float_round_up:
1269 inc = a.sign ? 0 : rnd_mask;
1270 break;
1271 case float_round_down:
1272 inc = a.sign ? rnd_mask : 0;
1273 break;
1274 default:
1275 g_assert_not_reached();
1276 }
1277
1278 if (a.frac & rnd_mask) {
1279 s->float_exception_flags |= float_flag_inexact;
1280 a.frac += inc;
1281 a.frac &= ~rnd_mask;
1282 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1283 a.frac >>= 1;
1284 a.exp++;
1285 }
1286 }
1287 }
1288 break;
1289 default:
1290 g_assert_not_reached();
1291 }
1292 return a;
1293}
1294
1295float16 float16_round_to_int(float16 a, float_status *s)
1296{
1297 FloatParts pa = float16_unpack_canonical(a, s);
1298 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1299 return float16_round_pack_canonical(pr, s);
1300}
1301
1302float32 float32_round_to_int(float32 a, float_status *s)
1303{
1304 FloatParts pa = float32_unpack_canonical(a, s);
1305 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1306 return float32_round_pack_canonical(pr, s);
1307}
1308
1309float64 float64_round_to_int(float64 a, float_status *s)
1310{
1311 FloatParts pa = float64_unpack_canonical(a, s);
1312 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1313 return float64_round_pack_canonical(pr, s);
1314}
1315
1316float64 float64_trunc_to_int(float64 a, float_status *s)
1317{
1318 FloatParts pa = float64_unpack_canonical(a, s);
1319 FloatParts pr = round_to_int(pa, float_round_to_zero, s);
1320 return float64_round_pack_canonical(pr, s);
1321}
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334static int64_t round_to_int_and_pack(FloatParts in, int rmode,
1335 int64_t min, int64_t max,
1336 float_status *s)
1337{
1338 uint64_t r;
1339 int orig_flags = get_float_exception_flags(s);
1340 FloatParts p = round_to_int(in, rmode, s);
1341
1342 switch (p.cls) {
1343 case float_class_snan:
1344 case float_class_qnan:
1345 case float_class_dnan:
1346 case float_class_msnan:
1347 s->float_exception_flags = orig_flags | float_flag_invalid;
1348 return max;
1349 case float_class_inf:
1350 s->float_exception_flags = orig_flags | float_flag_invalid;
1351 return p.sign ? min : max;
1352 case float_class_zero:
1353 return 0;
1354 case float_class_normal:
1355 if (p.exp < DECOMPOSED_BINARY_POINT) {
1356 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1357 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1358 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1359 } else {
1360 r = UINT64_MAX;
1361 }
1362 if (p.sign) {
1363 if (r < -(uint64_t) min) {
1364 return -r;
1365 } else {
1366 s->float_exception_flags = orig_flags | float_flag_invalid;
1367 return min;
1368 }
1369 } else {
1370 if (r < max) {
1371 return r;
1372 } else {
1373 s->float_exception_flags = orig_flags | float_flag_invalid;
1374 return max;
1375 }
1376 }
1377 default:
1378 g_assert_not_reached();
1379 }
1380}
1381
1382#define FLOAT_TO_INT(fsz, isz) \
1383int ## isz ## _t float ## fsz ## _to_int ## isz(float ## fsz a, \
1384 float_status *s) \
1385{ \
1386 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1387 return round_to_int_and_pack(p, s->float_rounding_mode, \
1388 INT ## isz ## _MIN, INT ## isz ## _MAX,\
1389 s); \
1390} \
1391 \
1392int ## isz ## _t float ## fsz ## _to_int ## isz ## _round_to_zero \
1393 (float ## fsz a, float_status *s) \
1394{ \
1395 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1396 return round_to_int_and_pack(p, float_round_to_zero, \
1397 INT ## isz ## _MIN, INT ## isz ## _MAX,\
1398 s); \
1399}
1400
1401FLOAT_TO_INT(16, 16)
1402FLOAT_TO_INT(16, 32)
1403FLOAT_TO_INT(16, 64)
1404
1405FLOAT_TO_INT(32, 16)
1406FLOAT_TO_INT(32, 32)
1407FLOAT_TO_INT(32, 64)
1408
1409FLOAT_TO_INT(64, 16)
1410FLOAT_TO_INT(64, 32)
1411FLOAT_TO_INT(64, 64)
1412
1413#undef FLOAT_TO_INT
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, uint64_t max,
1429 float_status *s)
1430{
1431 int orig_flags = get_float_exception_flags(s);
1432 FloatParts p = round_to_int(in, rmode, s);
1433
1434 switch (p.cls) {
1435 case float_class_snan:
1436 case float_class_qnan:
1437 case float_class_dnan:
1438 case float_class_msnan:
1439 s->float_exception_flags = orig_flags | float_flag_invalid;
1440 return max;
1441 case float_class_inf:
1442 s->float_exception_flags = orig_flags | float_flag_invalid;
1443 return p.sign ? 0 : max;
1444 case float_class_zero:
1445 return 0;
1446 case float_class_normal:
1447 {
1448 uint64_t r;
1449 if (p.sign) {
1450 s->float_exception_flags = orig_flags | float_flag_invalid;
1451 return 0;
1452 }
1453
1454 if (p.exp < DECOMPOSED_BINARY_POINT) {
1455 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1456 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1457 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1458 } else {
1459 s->float_exception_flags = orig_flags | float_flag_invalid;
1460 return max;
1461 }
1462
1463
1464
1465
1466
1467 if (r > max) {
1468 s->float_exception_flags = orig_flags | float_flag_invalid;
1469 return max;
1470 } else {
1471 return r;
1472 }
1473 }
1474 default:
1475 g_assert_not_reached();
1476 }
1477}
1478
1479#define FLOAT_TO_UINT(fsz, isz) \
1480uint ## isz ## _t float ## fsz ## _to_uint ## isz(float ## fsz a, \
1481 float_status *s) \
1482{ \
1483 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1484 return round_to_uint_and_pack(p, s->float_rounding_mode, \
1485 UINT ## isz ## _MAX, s); \
1486} \
1487 \
1488uint ## isz ## _t float ## fsz ## _to_uint ## isz ## _round_to_zero \
1489 (float ## fsz a, float_status *s) \
1490{ \
1491 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1492 return round_to_uint_and_pack(p, float_round_to_zero, \
1493 UINT ## isz ## _MAX, s); \
1494}
1495
1496FLOAT_TO_UINT(16, 16)
1497FLOAT_TO_UINT(16, 32)
1498FLOAT_TO_UINT(16, 64)
1499
1500FLOAT_TO_UINT(32, 16)
1501FLOAT_TO_UINT(32, 32)
1502FLOAT_TO_UINT(32, 64)
1503
1504FLOAT_TO_UINT(64, 16)
1505FLOAT_TO_UINT(64, 32)
1506FLOAT_TO_UINT(64, 64)
1507
1508#undef FLOAT_TO_UINT
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518static FloatParts int_to_float(int64_t a, float_status *status)
1519{
1520 FloatParts r;
1521 if (a == 0) {
1522 r.cls = float_class_zero;
1523 r.sign = false;
1524 } else if (a == (1ULL << 63)) {
1525 r.cls = float_class_normal;
1526 r.sign = true;
1527 r.frac = DECOMPOSED_IMPLICIT_BIT;
1528 r.exp = 63;
1529 } else {
1530 uint64_t f;
1531 if (a < 0) {
1532 f = -a;
1533 r.sign = true;
1534 } else {
1535 f = a;
1536 r.sign = false;
1537 }
1538 int shift = clz64(f) - 1;
1539 r.cls = float_class_normal;
1540 r.exp = (DECOMPOSED_BINARY_POINT - shift);
1541 r.frac = f << shift;
1542 }
1543
1544 return r;
1545}
1546
1547float16 int64_to_float16(int64_t a, float_status *status)
1548{
1549 FloatParts pa = int_to_float(a, status);
1550 return float16_round_pack_canonical(pa, status);
1551}
1552
1553float16 int32_to_float16(int32_t a, float_status *status)
1554{
1555 return int64_to_float16(a, status);
1556}
1557
1558float16 int16_to_float16(int16_t a, float_status *status)
1559{
1560 return int64_to_float16(a, status);
1561}
1562
1563float32 int64_to_float32(int64_t a, float_status *status)
1564{
1565 FloatParts pa = int_to_float(a, status);
1566 return float32_round_pack_canonical(pa, status);
1567}
1568
1569float32 int32_to_float32(int32_t a, float_status *status)
1570{
1571 return int64_to_float32(a, status);
1572}
1573
1574float32 int16_to_float32(int16_t a, float_status *status)
1575{
1576 return int64_to_float32(a, status);
1577}
1578
1579float64 int64_to_float64(int64_t a, float_status *status)
1580{
1581 FloatParts pa = int_to_float(a, status);
1582 return float64_round_pack_canonical(pa, status);
1583}
1584
1585float64 int32_to_float64(int32_t a, float_status *status)
1586{
1587 return int64_to_float64(a, status);
1588}
1589
1590float64 int16_to_float64(int16_t a, float_status *status)
1591{
1592 return int64_to_float64(a, status);
1593}
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604static FloatParts uint_to_float(uint64_t a, float_status *status)
1605{
1606 FloatParts r = { .sign = false};
1607
1608 if (a == 0) {
1609 r.cls = float_class_zero;
1610 } else {
1611 int spare_bits = clz64(a) - 1;
1612 r.cls = float_class_normal;
1613 r.exp = DECOMPOSED_BINARY_POINT - spare_bits;
1614 if (spare_bits < 0) {
1615 shift64RightJamming(a, -spare_bits, &a);
1616 r.frac = a;
1617 } else {
1618 r.frac = a << spare_bits;
1619 }
1620 }
1621
1622 return r;
1623}
1624
1625float16 uint64_to_float16(uint64_t a, float_status *status)
1626{
1627 FloatParts pa = uint_to_float(a, status);
1628 return float16_round_pack_canonical(pa, status);
1629}
1630
1631float16 uint32_to_float16(uint32_t a, float_status *status)
1632{
1633 return uint64_to_float16(a, status);
1634}
1635
1636float16 uint16_to_float16(uint16_t a, float_status *status)
1637{
1638 return uint64_to_float16(a, status);
1639}
1640
1641float32 uint64_to_float32(uint64_t a, float_status *status)
1642{
1643 FloatParts pa = uint_to_float(a, status);
1644 return float32_round_pack_canonical(pa, status);
1645}
1646
1647float32 uint32_to_float32(uint32_t a, float_status *status)
1648{
1649 return uint64_to_float32(a, status);
1650}
1651
1652float32 uint16_to_float32(uint16_t a, float_status *status)
1653{
1654 return uint64_to_float32(a, status);
1655}
1656
1657float64 uint64_to_float64(uint64_t a, float_status *status)
1658{
1659 FloatParts pa = uint_to_float(a, status);
1660 return float64_round_pack_canonical(pa, status);
1661}
1662
1663float64 uint32_to_float64(uint32_t a, float_status *status)
1664{
1665 return uint64_to_float64(a, status);
1666}
1667
1668float64 uint16_to_float64(uint16_t a, float_status *status)
1669{
1670 return uint64_to_float64(a, status);
1671}
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
1690 bool ieee, bool ismag, float_status *s)
1691{
1692 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
1693 if (ieee) {
1694
1695
1696
1697
1698
1699 if (is_snan(a.cls) || is_snan(b.cls)) {
1700 return pick_nan(a, b, s);
1701 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
1702 return b;
1703 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
1704 return a;
1705 }
1706 }
1707 return pick_nan(a, b, s);
1708 } else {
1709 int a_exp, b_exp;
1710
1711 switch (a.cls) {
1712 case float_class_normal:
1713 a_exp = a.exp;
1714 break;
1715 case float_class_inf:
1716 a_exp = INT_MAX;
1717 break;
1718 case float_class_zero:
1719 a_exp = INT_MIN;
1720 break;
1721 default:
1722 g_assert_not_reached();
1723 break;
1724 }
1725 switch (b.cls) {
1726 case float_class_normal:
1727 b_exp = b.exp;
1728 break;
1729 case float_class_inf:
1730 b_exp = INT_MAX;
1731 break;
1732 case float_class_zero:
1733 b_exp = INT_MIN;
1734 break;
1735 default:
1736 g_assert_not_reached();
1737 break;
1738 }
1739
1740 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
1741 bool a_less = a_exp < b_exp;
1742 if (a_exp == b_exp) {
1743 a_less = a.frac < b.frac;
1744 }
1745 return a_less ^ ismin ? b : a;
1746 }
1747
1748 if (a.sign == b.sign) {
1749 bool a_less = a_exp < b_exp;
1750 if (a_exp == b_exp) {
1751 a_less = a.frac < b.frac;
1752 }
1753 return a.sign ^ a_less ^ ismin ? b : a;
1754 } else {
1755 return a.sign ^ ismin ? b : a;
1756 }
1757 }
1758}
1759
1760#define MINMAX(sz, name, ismin, isiee, ismag) \
1761float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
1762 float_status *s) \
1763{ \
1764 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1765 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1766 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
1767 \
1768 return float ## sz ## _round_pack_canonical(pr, s); \
1769}
1770
1771MINMAX(16, min, true, false, false)
1772MINMAX(16, minnum, true, true, false)
1773MINMAX(16, minnummag, true, true, true)
1774MINMAX(16, max, false, false, false)
1775MINMAX(16, maxnum, false, true, false)
1776MINMAX(16, maxnummag, false, true, true)
1777
1778MINMAX(32, min, true, false, false)
1779MINMAX(32, minnum, true, true, false)
1780MINMAX(32, minnummag, true, true, true)
1781MINMAX(32, max, false, false, false)
1782MINMAX(32, maxnum, false, true, false)
1783MINMAX(32, maxnummag, false, true, true)
1784
1785MINMAX(64, min, true, false, false)
1786MINMAX(64, minnum, true, true, false)
1787MINMAX(64, minnummag, true, true, true)
1788MINMAX(64, max, false, false, false)
1789MINMAX(64, maxnum, false, true, false)
1790MINMAX(64, maxnummag, false, true, true)
1791
1792#undef MINMAX
1793
1794
1795static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
1796 float_status *s)
1797{
1798 if (is_nan(a.cls) || is_nan(b.cls)) {
1799 if (!is_quiet ||
1800 a.cls == float_class_snan ||
1801 b.cls == float_class_snan) {
1802 s->float_exception_flags |= float_flag_invalid;
1803 }
1804 return float_relation_unordered;
1805 }
1806
1807 if (a.cls == float_class_zero) {
1808 if (b.cls == float_class_zero) {
1809 return float_relation_equal;
1810 }
1811 return b.sign ? float_relation_greater : float_relation_less;
1812 } else if (b.cls == float_class_zero) {
1813 return a.sign ? float_relation_less : float_relation_greater;
1814 }
1815
1816
1817
1818
1819 if (a.cls == float_class_inf) {
1820 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
1821 return float_relation_equal;
1822 }
1823 return a.sign ? float_relation_less : float_relation_greater;
1824 } else if (b.cls == float_class_inf) {
1825 return b.sign ? float_relation_greater : float_relation_less;
1826 }
1827
1828 if (a.sign != b.sign) {
1829 return a.sign ? float_relation_less : float_relation_greater;
1830 }
1831
1832 if (a.exp == b.exp) {
1833 if (a.frac == b.frac) {
1834 return float_relation_equal;
1835 }
1836 if (a.sign) {
1837 return a.frac > b.frac ?
1838 float_relation_less : float_relation_greater;
1839 } else {
1840 return a.frac > b.frac ?
1841 float_relation_greater : float_relation_less;
1842 }
1843 } else {
1844 if (a.sign) {
1845 return a.exp > b.exp ? float_relation_less : float_relation_greater;
1846 } else {
1847 return a.exp > b.exp ? float_relation_greater : float_relation_less;
1848 }
1849 }
1850}
1851
1852#define COMPARE(sz) \
1853int float ## sz ## _compare(float ## sz a, float ## sz b, \
1854 float_status *s) \
1855{ \
1856 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1857 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1858 return compare_floats(pa, pb, false, s); \
1859} \
1860int float ## sz ## _compare_quiet(float ## sz a, float ## sz b, \
1861 float_status *s) \
1862{ \
1863 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1864 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1865 return compare_floats(pa, pb, true, s); \
1866}
1867
1868COMPARE(16)
1869COMPARE(32)
1870COMPARE(64)
1871
1872#undef COMPARE
1873
1874
1875static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
1876{
1877 if (unlikely(is_nan(a.cls))) {
1878 return return_nan(a, s);
1879 }
1880 if (a.cls == float_class_normal) {
1881
1882
1883
1884
1885
1886 n = MIN(MAX(n, -0x10000), 0x10000);
1887 a.exp += n;
1888 }
1889 return a;
1890}
1891
1892float16 float16_scalbn(float16 a, int n, float_status *status)
1893{
1894 FloatParts pa = float16_unpack_canonical(a, status);
1895 FloatParts pr = scalbn_decomposed(pa, n, status);
1896 return float16_round_pack_canonical(pr, status);
1897}
1898
1899float32 float32_scalbn(float32 a, int n, float_status *status)
1900{
1901 FloatParts pa = float32_unpack_canonical(a, status);
1902 FloatParts pr = scalbn_decomposed(pa, n, status);
1903 return float32_round_pack_canonical(pr, status);
1904}
1905
1906float64 float64_scalbn(float64 a, int n, float_status *status)
1907{
1908 FloatParts pa = float64_unpack_canonical(a, status);
1909 FloatParts pr = scalbn_decomposed(pa, n, status);
1910 return float64_round_pack_canonical(pr, status);
1911}
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
1926{
1927 uint64_t a_frac, r_frac, s_frac;
1928 int bit, last_bit;
1929
1930 if (is_nan(a.cls)) {
1931 return return_nan(a, s);
1932 }
1933 if (a.cls == float_class_zero) {
1934 return a;
1935 }
1936 if (a.sign) {
1937 s->float_exception_flags |= float_flag_invalid;
1938 a.cls = float_class_dnan;
1939 return a;
1940 }
1941 if (a.cls == float_class_inf) {
1942 return a;
1943 }
1944
1945 assert(a.cls == float_class_normal);
1946
1947
1948
1949
1950
1951
1952 a_frac = a.frac;
1953 if (!(a.exp & 1)) {
1954 a_frac >>= 1;
1955 }
1956 a.exp >>= 1;
1957
1958
1959 r_frac = 0;
1960 s_frac = 0;
1961
1962
1963
1964
1965
1966 bit = DECOMPOSED_BINARY_POINT - 1;
1967 last_bit = MAX(p->frac_shift - 4, 0);
1968 do {
1969 uint64_t q = 1ULL << bit;
1970 uint64_t t_frac = s_frac + q;
1971 if (t_frac <= a_frac) {
1972 s_frac = t_frac + q;
1973 a_frac -= t_frac;
1974 r_frac += q;
1975 }
1976 a_frac <<= 1;
1977 } while (--bit >= last_bit);
1978
1979
1980
1981
1982 a.frac = (r_frac << 1) + (a_frac != 0);
1983
1984 return a;
1985}
1986
1987float16 __attribute__((flatten)) float16_sqrt(float16 a, float_status *status)
1988{
1989 FloatParts pa = float16_unpack_canonical(a, status);
1990 FloatParts pr = sqrt_float(pa, status, &float16_params);
1991 return float16_round_pack_canonical(pr, status);
1992}
1993
1994float32 __attribute__((flatten)) float32_sqrt(float32 a, float_status *status)
1995{
1996 FloatParts pa = float32_unpack_canonical(a, status);
1997 FloatParts pr = sqrt_float(pa, status, &float32_params);
1998 return float32_round_pack_canonical(pr, status);
1999}
2000
2001float64 __attribute__((flatten)) float64_sqrt(float64 a, float_status *status)
2002{
2003 FloatParts pa = float64_unpack_canonical(a, status);
2004 FloatParts pr = sqrt_float(pa, status, &float64_params);
2005 return float64_round_pack_canonical(pr, status);
2006}
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
2021{
2022 int8_t roundingMode;
2023 flag roundNearestEven;
2024 int8_t roundIncrement, roundBits;
2025 int32_t z;
2026
2027 roundingMode = status->float_rounding_mode;
2028 roundNearestEven = ( roundingMode == float_round_nearest_even );
2029 switch (roundingMode) {
2030 case float_round_nearest_even:
2031 case float_round_ties_away:
2032 roundIncrement = 0x40;
2033 break;
2034 case float_round_to_zero:
2035 roundIncrement = 0;
2036 break;
2037 case float_round_up:
2038 roundIncrement = zSign ? 0 : 0x7f;
2039 break;
2040 case float_round_down:
2041 roundIncrement = zSign ? 0x7f : 0;
2042 break;
2043 default:
2044 abort();
2045 }
2046 roundBits = absZ & 0x7F;
2047 absZ = ( absZ + roundIncrement )>>7;
2048 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2049 z = absZ;
2050 if ( zSign ) z = - z;
2051 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
2052 float_raise(float_flag_invalid, status);
2053 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
2054 }
2055 if (roundBits) {
2056 status->float_exception_flags |= float_flag_inexact;
2057 }
2058 return z;
2059
2060}
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
2075 float_status *status)
2076{
2077 int8_t roundingMode;
2078 flag roundNearestEven, increment;
2079 int64_t z;
2080
2081 roundingMode = status->float_rounding_mode;
2082 roundNearestEven = ( roundingMode == float_round_nearest_even );
2083 switch (roundingMode) {
2084 case float_round_nearest_even:
2085 case float_round_ties_away:
2086 increment = ((int64_t) absZ1 < 0);
2087 break;
2088 case float_round_to_zero:
2089 increment = 0;
2090 break;
2091 case float_round_up:
2092 increment = !zSign && absZ1;
2093 break;
2094 case float_round_down:
2095 increment = zSign && absZ1;
2096 break;
2097 default:
2098 abort();
2099 }
2100 if ( increment ) {
2101 ++absZ0;
2102 if ( absZ0 == 0 ) goto overflow;
2103 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
2104 }
2105 z = absZ0;
2106 if ( zSign ) z = - z;
2107 if ( z && ( ( z < 0 ) ^ zSign ) ) {
2108 overflow:
2109 float_raise(float_flag_invalid, status);
2110 return
2111 zSign ? (int64_t) LIT64( 0x8000000000000000 )
2112 : LIT64( 0x7FFFFFFFFFFFFFFF );
2113 }
2114 if (absZ1) {
2115 status->float_exception_flags |= float_flag_inexact;
2116 }
2117 return z;
2118
2119}
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
2132 uint64_t absZ1, float_status *status)
2133{
2134 int8_t roundingMode;
2135 flag roundNearestEven, increment;
2136
2137 roundingMode = status->float_rounding_mode;
2138 roundNearestEven = (roundingMode == float_round_nearest_even);
2139 switch (roundingMode) {
2140 case float_round_nearest_even:
2141 case float_round_ties_away:
2142 increment = ((int64_t)absZ1 < 0);
2143 break;
2144 case float_round_to_zero:
2145 increment = 0;
2146 break;
2147 case float_round_up:
2148 increment = !zSign && absZ1;
2149 break;
2150 case float_round_down:
2151 increment = zSign && absZ1;
2152 break;
2153 default:
2154 abort();
2155 }
2156 if (increment) {
2157 ++absZ0;
2158 if (absZ0 == 0) {
2159 float_raise(float_flag_invalid, status);
2160 return LIT64(0xFFFFFFFFFFFFFFFF);
2161 }
2162 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
2163 }
2164
2165 if (zSign && absZ0) {
2166 float_raise(float_flag_invalid, status);
2167 return 0;
2168 }
2169
2170 if (absZ1) {
2171 status->float_exception_flags |= float_flag_inexact;
2172 }
2173 return absZ0;
2174}
2175
2176
2177
2178
2179
2180float32 float32_squash_input_denormal(float32 a, float_status *status)
2181{
2182 if (status->flush_inputs_to_zero) {
2183 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
2184 float_raise(float_flag_input_denormal, status);
2185 return make_float32(float32_val(a) & 0x80000000);
2186 }
2187 }
2188 return a;
2189}
2190
2191
2192
2193
2194
2195
2196
2197
2198static void
2199 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
2200{
2201 int8_t shiftCount;
2202
2203 shiftCount = countLeadingZeros32( aSig ) - 8;
2204 *zSigPtr = aSig<<shiftCount;
2205 *zExpPtr = 1 - shiftCount;
2206
2207}
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
2232 float_status *status)
2233{
2234 int8_t roundingMode;
2235 flag roundNearestEven;
2236 int8_t roundIncrement, roundBits;
2237 flag isTiny;
2238
2239 roundingMode = status->float_rounding_mode;
2240 roundNearestEven = ( roundingMode == float_round_nearest_even );
2241 switch (roundingMode) {
2242 case float_round_nearest_even:
2243 case float_round_ties_away:
2244 roundIncrement = 0x40;
2245 break;
2246 case float_round_to_zero:
2247 roundIncrement = 0;
2248 break;
2249 case float_round_up:
2250 roundIncrement = zSign ? 0 : 0x7f;
2251 break;
2252 case float_round_down:
2253 roundIncrement = zSign ? 0x7f : 0;
2254 break;
2255 default:
2256 abort();
2257 break;
2258 }
2259 roundBits = zSig & 0x7F;
2260 if ( 0xFD <= (uint16_t) zExp ) {
2261 if ( ( 0xFD < zExp )
2262 || ( ( zExp == 0xFD )
2263 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
2264 ) {
2265 float_raise(float_flag_overflow | float_flag_inexact, status);
2266 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
2267 }
2268 if ( zExp < 0 ) {
2269 if (status->flush_to_zero) {
2270 float_raise(float_flag_output_denormal, status);
2271 return packFloat32(zSign, 0, 0);
2272 }
2273 isTiny =
2274 (status->float_detect_tininess
2275 == float_tininess_before_rounding)
2276 || ( zExp < -1 )
2277 || ( zSig + roundIncrement < 0x80000000 );
2278 shift32RightJamming( zSig, - zExp, &zSig );
2279 zExp = 0;
2280 roundBits = zSig & 0x7F;
2281 if (isTiny && roundBits) {
2282 float_raise(float_flag_underflow, status);
2283 }
2284 }
2285 }
2286 if (roundBits) {
2287 status->float_exception_flags |= float_flag_inexact;
2288 }
2289 zSig = ( zSig + roundIncrement )>>7;
2290 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2291 if ( zSig == 0 ) zExp = 0;
2292 return packFloat32( zSign, zExp, zSig );
2293
2294}
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305static float32
2306 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
2307 float_status *status)
2308{
2309 int8_t shiftCount;
2310
2311 shiftCount = countLeadingZeros32( zSig ) - 1;
2312 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
2313 status);
2314
2315}
2316
2317
2318
2319
2320
2321float64 float64_squash_input_denormal(float64 a, float_status *status)
2322{
2323 if (status->flush_inputs_to_zero) {
2324 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
2325 float_raise(float_flag_input_denormal, status);
2326 return make_float64(float64_val(a) & (1ULL << 63));
2327 }
2328 }
2329 return a;
2330}
2331
2332
2333
2334
2335
2336
2337
2338
2339static void
2340 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
2341{
2342 int8_t shiftCount;
2343
2344 shiftCount = countLeadingZeros64( aSig ) - 11;
2345 *zSigPtr = aSig<<shiftCount;
2346 *zExpPtr = 1 - shiftCount;
2347
2348}
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
2362{
2363
2364 return make_float64(
2365 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
2366
2367}
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
2392 float_status *status)
2393{
2394 int8_t roundingMode;
2395 flag roundNearestEven;
2396 int roundIncrement, roundBits;
2397 flag isTiny;
2398
2399 roundingMode = status->float_rounding_mode;
2400 roundNearestEven = ( roundingMode == float_round_nearest_even );
2401 switch (roundingMode) {
2402 case float_round_nearest_even:
2403 case float_round_ties_away:
2404 roundIncrement = 0x200;
2405 break;
2406 case float_round_to_zero:
2407 roundIncrement = 0;
2408 break;
2409 case float_round_up:
2410 roundIncrement = zSign ? 0 : 0x3ff;
2411 break;
2412 case float_round_down:
2413 roundIncrement = zSign ? 0x3ff : 0;
2414 break;
2415 case float_round_to_odd:
2416 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2417 break;
2418 default:
2419 abort();
2420 }
2421 roundBits = zSig & 0x3FF;
2422 if ( 0x7FD <= (uint16_t) zExp ) {
2423 if ( ( 0x7FD < zExp )
2424 || ( ( zExp == 0x7FD )
2425 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
2426 ) {
2427 bool overflow_to_inf = roundingMode != float_round_to_odd &&
2428 roundIncrement != 0;
2429 float_raise(float_flag_overflow | float_flag_inexact, status);
2430 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
2431 }
2432 if ( zExp < 0 ) {
2433 if (status->flush_to_zero) {
2434 float_raise(float_flag_output_denormal, status);
2435 return packFloat64(zSign, 0, 0);
2436 }
2437 isTiny =
2438 (status->float_detect_tininess
2439 == float_tininess_before_rounding)
2440 || ( zExp < -1 )
2441 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
2442 shift64RightJamming( zSig, - zExp, &zSig );
2443 zExp = 0;
2444 roundBits = zSig & 0x3FF;
2445 if (isTiny && roundBits) {
2446 float_raise(float_flag_underflow, status);
2447 }
2448 if (roundingMode == float_round_to_odd) {
2449
2450
2451
2452
2453 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2454 }
2455 }
2456 }
2457 if (roundBits) {
2458 status->float_exception_flags |= float_flag_inexact;
2459 }
2460 zSig = ( zSig + roundIncrement )>>10;
2461 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
2462 if ( zSig == 0 ) zExp = 0;
2463 return packFloat64( zSign, zExp, zSig );
2464
2465}
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476static float64
2477 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
2478 float_status *status)
2479{
2480 int8_t shiftCount;
2481
2482 shiftCount = countLeadingZeros64( zSig ) - 1;
2483 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
2484 status);
2485
2486}
2487
2488
2489
2490
2491
2492
2493
2494
2495void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
2496 uint64_t *zSigPtr)
2497{
2498 int8_t shiftCount;
2499
2500 shiftCount = countLeadingZeros64( aSig );
2501 *zSigPtr = aSig<<shiftCount;
2502 *zExpPtr = 1 - shiftCount;
2503}
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
2530 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
2531 float_status *status)
2532{
2533 int8_t roundingMode;
2534 flag roundNearestEven, increment, isTiny;
2535 int64_t roundIncrement, roundMask, roundBits;
2536
2537 roundingMode = status->float_rounding_mode;
2538 roundNearestEven = ( roundingMode == float_round_nearest_even );
2539 if ( roundingPrecision == 80 ) goto precision80;
2540 if ( roundingPrecision == 64 ) {
2541 roundIncrement = LIT64( 0x0000000000000400 );
2542 roundMask = LIT64( 0x00000000000007FF );
2543 }
2544 else if ( roundingPrecision == 32 ) {
2545 roundIncrement = LIT64( 0x0000008000000000 );
2546 roundMask = LIT64( 0x000000FFFFFFFFFF );
2547 }
2548 else {
2549 goto precision80;
2550 }
2551 zSig0 |= ( zSig1 != 0 );
2552 switch (roundingMode) {
2553 case float_round_nearest_even:
2554 case float_round_ties_away:
2555 break;
2556 case float_round_to_zero:
2557 roundIncrement = 0;
2558 break;
2559 case float_round_up:
2560 roundIncrement = zSign ? 0 : roundMask;
2561 break;
2562 case float_round_down:
2563 roundIncrement = zSign ? roundMask : 0;
2564 break;
2565 default:
2566 abort();
2567 }
2568 roundBits = zSig0 & roundMask;
2569 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
2570 if ( ( 0x7FFE < zExp )
2571 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
2572 ) {
2573 goto overflow;
2574 }
2575 if ( zExp <= 0 ) {
2576 if (status->flush_to_zero) {
2577 float_raise(float_flag_output_denormal, status);
2578 return packFloatx80(zSign, 0, 0);
2579 }
2580 isTiny =
2581 (status->float_detect_tininess
2582 == float_tininess_before_rounding)
2583 || ( zExp < 0 )
2584 || ( zSig0 <= zSig0 + roundIncrement );
2585 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
2586 zExp = 0;
2587 roundBits = zSig0 & roundMask;
2588 if (isTiny && roundBits) {
2589 float_raise(float_flag_underflow, status);
2590 }
2591 if (roundBits) {
2592 status->float_exception_flags |= float_flag_inexact;
2593 }
2594 zSig0 += roundIncrement;
2595 if ( (int64_t) zSig0 < 0 ) zExp = 1;
2596 roundIncrement = roundMask + 1;
2597 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
2598 roundMask |= roundIncrement;
2599 }
2600 zSig0 &= ~ roundMask;
2601 return packFloatx80( zSign, zExp, zSig0 );
2602 }
2603 }
2604 if (roundBits) {
2605 status->float_exception_flags |= float_flag_inexact;
2606 }
2607 zSig0 += roundIncrement;
2608 if ( zSig0 < roundIncrement ) {
2609 ++zExp;
2610 zSig0 = LIT64( 0x8000000000000000 );
2611 }
2612 roundIncrement = roundMask + 1;
2613 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
2614 roundMask |= roundIncrement;
2615 }
2616 zSig0 &= ~ roundMask;
2617 if ( zSig0 == 0 ) zExp = 0;
2618 return packFloatx80( zSign, zExp, zSig0 );
2619 precision80:
2620 switch (roundingMode) {
2621 case float_round_nearest_even:
2622 case float_round_ties_away:
2623 increment = ((int64_t)zSig1 < 0);
2624 break;
2625 case float_round_to_zero:
2626 increment = 0;
2627 break;
2628 case float_round_up:
2629 increment = !zSign && zSig1;
2630 break;
2631 case float_round_down:
2632 increment = zSign && zSig1;
2633 break;
2634 default:
2635 abort();
2636 }
2637 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
2638 if ( ( 0x7FFE < zExp )
2639 || ( ( zExp == 0x7FFE )
2640 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
2641 && increment
2642 )
2643 ) {
2644 roundMask = 0;
2645 overflow:
2646 float_raise(float_flag_overflow | float_flag_inexact, status);
2647 if ( ( roundingMode == float_round_to_zero )
2648 || ( zSign && ( roundingMode == float_round_up ) )
2649 || ( ! zSign && ( roundingMode == float_round_down ) )
2650 ) {
2651 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
2652 }
2653 return packFloatx80(zSign,
2654 floatx80_infinity_high,
2655 floatx80_infinity_low);
2656 }
2657 if ( zExp <= 0 ) {
2658 isTiny =
2659 (status->float_detect_tininess
2660 == float_tininess_before_rounding)
2661 || ( zExp < 0 )
2662 || ! increment
2663 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
2664 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
2665 zExp = 0;
2666 if (isTiny && zSig1) {
2667 float_raise(float_flag_underflow, status);
2668 }
2669 if (zSig1) {
2670 status->float_exception_flags |= float_flag_inexact;
2671 }
2672 switch (roundingMode) {
2673 case float_round_nearest_even:
2674 case float_round_ties_away:
2675 increment = ((int64_t)zSig1 < 0);
2676 break;
2677 case float_round_to_zero:
2678 increment = 0;
2679 break;
2680 case float_round_up:
2681 increment = !zSign && zSig1;
2682 break;
2683 case float_round_down:
2684 increment = zSign && zSig1;
2685 break;
2686 default:
2687 abort();
2688 }
2689 if ( increment ) {
2690 ++zSig0;
2691 zSig0 &=
2692 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
2693 if ( (int64_t) zSig0 < 0 ) zExp = 1;
2694 }
2695 return packFloatx80( zSign, zExp, zSig0 );
2696 }
2697 }
2698 if (zSig1) {
2699 status->float_exception_flags |= float_flag_inexact;
2700 }
2701 if ( increment ) {
2702 ++zSig0;
2703 if ( zSig0 == 0 ) {
2704 ++zExp;
2705 zSig0 = LIT64( 0x8000000000000000 );
2706 }
2707 else {
2708 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
2709 }
2710 }
2711 else {
2712 if ( zSig0 == 0 ) zExp = 0;
2713 }
2714 return packFloatx80( zSign, zExp, zSig0 );
2715
2716}
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
2728 flag zSign, int32_t zExp,
2729 uint64_t zSig0, uint64_t zSig1,
2730 float_status *status)
2731{
2732 int8_t shiftCount;
2733
2734 if ( zSig0 == 0 ) {
2735 zSig0 = zSig1;
2736 zSig1 = 0;
2737 zExp -= 64;
2738 }
2739 shiftCount = countLeadingZeros64( zSig0 );
2740 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
2741 zExp -= shiftCount;
2742 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
2743 zSig0, zSig1, status);
2744
2745}
2746
2747
2748
2749
2750
2751
2752static inline uint64_t extractFloat128Frac1( float128 a )
2753{
2754
2755 return a.low;
2756
2757}
2758
2759
2760
2761
2762
2763
2764static inline uint64_t extractFloat128Frac0( float128 a )
2765{
2766
2767 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
2768
2769}
2770
2771
2772
2773
2774
2775
2776static inline int32_t extractFloat128Exp( float128 a )
2777{
2778
2779 return ( a.high>>48 ) & 0x7FFF;
2780
2781}
2782
2783
2784
2785
2786
2787static inline flag extractFloat128Sign( float128 a )
2788{
2789
2790 return a.high>>63;
2791
2792}
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804static void
2805 normalizeFloat128Subnormal(
2806 uint64_t aSig0,
2807 uint64_t aSig1,
2808 int32_t *zExpPtr,
2809 uint64_t *zSig0Ptr,
2810 uint64_t *zSig1Ptr
2811 )
2812{
2813 int8_t shiftCount;
2814
2815 if ( aSig0 == 0 ) {
2816 shiftCount = countLeadingZeros64( aSig1 ) - 15;
2817 if ( shiftCount < 0 ) {
2818 *zSig0Ptr = aSig1>>( - shiftCount );
2819 *zSig1Ptr = aSig1<<( shiftCount & 63 );
2820 }
2821 else {
2822 *zSig0Ptr = aSig1<<shiftCount;
2823 *zSig1Ptr = 0;
2824 }
2825 *zExpPtr = - shiftCount - 63;
2826 }
2827 else {
2828 shiftCount = countLeadingZeros64( aSig0 ) - 15;
2829 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
2830 *zExpPtr = 1 - shiftCount;
2831 }
2832
2833}
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848static inline float128
2849 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
2850{
2851 float128 z;
2852
2853 z.low = zSig1;
2854 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
2855 return z;
2856
2857}
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
2881 uint64_t zSig0, uint64_t zSig1,
2882 uint64_t zSig2, float_status *status)
2883{
2884 int8_t roundingMode;
2885 flag roundNearestEven, increment, isTiny;
2886
2887 roundingMode = status->float_rounding_mode;
2888 roundNearestEven = ( roundingMode == float_round_nearest_even );
2889 switch (roundingMode) {
2890 case float_round_nearest_even:
2891 case float_round_ties_away:
2892 increment = ((int64_t)zSig2 < 0);
2893 break;
2894 case float_round_to_zero:
2895 increment = 0;
2896 break;
2897 case float_round_up:
2898 increment = !zSign && zSig2;
2899 break;
2900 case float_round_down:
2901 increment = zSign && zSig2;
2902 break;
2903 case float_round_to_odd:
2904 increment = !(zSig1 & 0x1) && zSig2;
2905 break;
2906 default:
2907 abort();
2908 }
2909 if ( 0x7FFD <= (uint32_t) zExp ) {
2910 if ( ( 0x7FFD < zExp )
2911 || ( ( zExp == 0x7FFD )
2912 && eq128(
2913 LIT64( 0x0001FFFFFFFFFFFF ),
2914 LIT64( 0xFFFFFFFFFFFFFFFF ),
2915 zSig0,
2916 zSig1
2917 )
2918 && increment
2919 )
2920 ) {
2921 float_raise(float_flag_overflow | float_flag_inexact, status);
2922 if ( ( roundingMode == float_round_to_zero )
2923 || ( zSign && ( roundingMode == float_round_up ) )
2924 || ( ! zSign && ( roundingMode == float_round_down ) )
2925 || (roundingMode == float_round_to_odd)
2926 ) {
2927 return
2928 packFloat128(
2929 zSign,
2930 0x7FFE,
2931 LIT64( 0x0000FFFFFFFFFFFF ),
2932 LIT64( 0xFFFFFFFFFFFFFFFF )
2933 );
2934 }
2935 return packFloat128( zSign, 0x7FFF, 0, 0 );
2936 }
2937 if ( zExp < 0 ) {
2938 if (status->flush_to_zero) {
2939 float_raise(float_flag_output_denormal, status);
2940 return packFloat128(zSign, 0, 0, 0);
2941 }
2942 isTiny =
2943 (status->float_detect_tininess
2944 == float_tininess_before_rounding)
2945 || ( zExp < -1 )
2946 || ! increment
2947 || lt128(
2948 zSig0,
2949 zSig1,
2950 LIT64( 0x0001FFFFFFFFFFFF ),
2951 LIT64( 0xFFFFFFFFFFFFFFFF )
2952 );
2953 shift128ExtraRightJamming(
2954 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
2955 zExp = 0;
2956 if (isTiny && zSig2) {
2957 float_raise(float_flag_underflow, status);
2958 }
2959 switch (roundingMode) {
2960 case float_round_nearest_even:
2961 case float_round_ties_away:
2962 increment = ((int64_t)zSig2 < 0);
2963 break;
2964 case float_round_to_zero:
2965 increment = 0;
2966 break;
2967 case float_round_up:
2968 increment = !zSign && zSig2;
2969 break;
2970 case float_round_down:
2971 increment = zSign && zSig2;
2972 break;
2973 case float_round_to_odd:
2974 increment = !(zSig1 & 0x1) && zSig2;
2975 break;
2976 default:
2977 abort();
2978 }
2979 }
2980 }
2981 if (zSig2) {
2982 status->float_exception_flags |= float_flag_inexact;
2983 }
2984 if ( increment ) {
2985 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
2986 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
2987 }
2988 else {
2989 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
2990 }
2991 return packFloat128( zSign, zExp, zSig0, zSig1 );
2992
2993}
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
3006 uint64_t zSig0, uint64_t zSig1,
3007 float_status *status)
3008{
3009 int8_t shiftCount;
3010 uint64_t zSig2;
3011
3012 if ( zSig0 == 0 ) {
3013 zSig0 = zSig1;
3014 zSig1 = 0;
3015 zExp -= 64;
3016 }
3017 shiftCount = countLeadingZeros64( zSig0 ) - 15;
3018 if ( 0 <= shiftCount ) {
3019 zSig2 = 0;
3020 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3021 }
3022 else {
3023 shift128ExtraRightJamming(
3024 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
3025 }
3026 zExp -= shiftCount;
3027 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
3028
3029}
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039floatx80 int32_to_floatx80(int32_t a, float_status *status)
3040{
3041 flag zSign;
3042 uint32_t absA;
3043 int8_t shiftCount;
3044 uint64_t zSig;
3045
3046 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3047 zSign = ( a < 0 );
3048 absA = zSign ? - a : a;
3049 shiftCount = countLeadingZeros32( absA ) + 32;
3050 zSig = absA;
3051 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
3052
3053}
3054
3055
3056
3057
3058
3059
3060
3061float128 int32_to_float128(int32_t a, float_status *status)
3062{
3063 flag zSign;
3064 uint32_t absA;
3065 int8_t shiftCount;
3066 uint64_t zSig0;
3067
3068 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3069 zSign = ( a < 0 );
3070 absA = zSign ? - a : a;
3071 shiftCount = countLeadingZeros32( absA ) + 17;
3072 zSig0 = absA;
3073 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
3074
3075}
3076
3077
3078
3079
3080
3081
3082
3083
3084floatx80 int64_to_floatx80(int64_t a, float_status *status)
3085{
3086 flag zSign;
3087 uint64_t absA;
3088 int8_t shiftCount;
3089
3090 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3091 zSign = ( a < 0 );
3092 absA = zSign ? - a : a;
3093 shiftCount = countLeadingZeros64( absA );
3094 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
3095
3096}
3097
3098
3099
3100
3101
3102
3103
3104float128 int64_to_float128(int64_t a, float_status *status)
3105{
3106 flag zSign;
3107 uint64_t absA;
3108 int8_t shiftCount;
3109 int32_t zExp;
3110 uint64_t zSig0, zSig1;
3111
3112 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3113 zSign = ( a < 0 );
3114 absA = zSign ? - a : a;
3115 shiftCount = countLeadingZeros64( absA ) + 49;
3116 zExp = 0x406E - shiftCount;
3117 if ( 64 <= shiftCount ) {
3118 zSig1 = 0;
3119 zSig0 = absA;
3120 shiftCount -= 64;
3121 }
3122 else {
3123 zSig1 = absA;
3124 zSig0 = 0;
3125 }
3126 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3127 return packFloat128( zSign, zExp, zSig0, zSig1 );
3128
3129}
3130
3131
3132
3133
3134
3135
3136
3137float128 uint64_to_float128(uint64_t a, float_status *status)
3138{
3139 if (a == 0) {
3140 return float128_zero;
3141 }
3142 return normalizeRoundAndPackFloat128(0, 0x406E, a, 0, status);
3143}
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155float64 float32_to_float64(float32 a, float_status *status)
3156{
3157 flag aSign;
3158 int aExp;
3159 uint32_t aSig;
3160 a = float32_squash_input_denormal(a, status);
3161
3162 aSig = extractFloat32Frac( a );
3163 aExp = extractFloat32Exp( a );
3164 aSign = extractFloat32Sign( a );
3165 if ( aExp == 0xFF ) {
3166 if (aSig) {
3167 return commonNaNToFloat64(float32ToCommonNaN(a, status), status);
3168 }
3169 return packFloat64( aSign, 0x7FF, 0 );
3170 }
3171 if ( aExp == 0 ) {
3172 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
3173 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3174 --aExp;
3175 }
3176 return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
3177
3178}
3179
3180
3181
3182
3183
3184
3185
3186
3187floatx80 float32_to_floatx80(float32 a, float_status *status)
3188{
3189 flag aSign;
3190 int aExp;
3191 uint32_t aSig;
3192
3193 a = float32_squash_input_denormal(a, status);
3194 aSig = extractFloat32Frac( a );
3195 aExp = extractFloat32Exp( a );
3196 aSign = extractFloat32Sign( a );
3197 if ( aExp == 0xFF ) {
3198 if (aSig) {
3199 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
3200 }
3201 return packFloatx80(aSign,
3202 floatx80_infinity_high,
3203 floatx80_infinity_low);
3204 }
3205 if ( aExp == 0 ) {
3206 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3207 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3208 }
3209 aSig |= 0x00800000;
3210 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
3211
3212}
3213
3214
3215
3216
3217
3218
3219
3220
3221float128 float32_to_float128(float32 a, float_status *status)
3222{
3223 flag aSign;
3224 int aExp;
3225 uint32_t aSig;
3226
3227 a = float32_squash_input_denormal(a, status);
3228 aSig = extractFloat32Frac( a );
3229 aExp = extractFloat32Exp( a );
3230 aSign = extractFloat32Sign( a );
3231 if ( aExp == 0xFF ) {
3232 if (aSig) {
3233 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
3234 }
3235 return packFloat128( aSign, 0x7FFF, 0, 0 );
3236 }
3237 if ( aExp == 0 ) {
3238 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3239 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3240 --aExp;
3241 }
3242 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
3243
3244}
3245
3246
3247
3248
3249
3250
3251
3252float32 float32_rem(float32 a, float32 b, float_status *status)
3253{
3254 flag aSign, zSign;
3255 int aExp, bExp, expDiff;
3256 uint32_t aSig, bSig;
3257 uint32_t q;
3258 uint64_t aSig64, bSig64, q64;
3259 uint32_t alternateASig;
3260 int32_t sigMean;
3261 a = float32_squash_input_denormal(a, status);
3262 b = float32_squash_input_denormal(b, status);
3263
3264 aSig = extractFloat32Frac( a );
3265 aExp = extractFloat32Exp( a );
3266 aSign = extractFloat32Sign( a );
3267 bSig = extractFloat32Frac( b );
3268 bExp = extractFloat32Exp( b );
3269 if ( aExp == 0xFF ) {
3270 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
3271 return propagateFloat32NaN(a, b, status);
3272 }
3273 float_raise(float_flag_invalid, status);
3274 return float32_default_nan(status);
3275 }
3276 if ( bExp == 0xFF ) {
3277 if (bSig) {
3278 return propagateFloat32NaN(a, b, status);
3279 }
3280 return a;
3281 }
3282 if ( bExp == 0 ) {
3283 if ( bSig == 0 ) {
3284 float_raise(float_flag_invalid, status);
3285 return float32_default_nan(status);
3286 }
3287 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
3288 }
3289 if ( aExp == 0 ) {
3290 if ( aSig == 0 ) return a;
3291 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3292 }
3293 expDiff = aExp - bExp;
3294 aSig |= 0x00800000;
3295 bSig |= 0x00800000;
3296 if ( expDiff < 32 ) {
3297 aSig <<= 8;
3298 bSig <<= 8;
3299 if ( expDiff < 0 ) {
3300 if ( expDiff < -1 ) return a;
3301 aSig >>= 1;
3302 }
3303 q = ( bSig <= aSig );
3304 if ( q ) aSig -= bSig;
3305 if ( 0 < expDiff ) {
3306 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
3307 q >>= 32 - expDiff;
3308 bSig >>= 2;
3309 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3310 }
3311 else {
3312 aSig >>= 2;
3313 bSig >>= 2;
3314 }
3315 }
3316 else {
3317 if ( bSig <= aSig ) aSig -= bSig;
3318 aSig64 = ( (uint64_t) aSig )<<40;
3319 bSig64 = ( (uint64_t) bSig )<<40;
3320 expDiff -= 64;
3321 while ( 0 < expDiff ) {
3322 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3323 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3324 aSig64 = - ( ( bSig * q64 )<<38 );
3325 expDiff -= 62;
3326 }
3327 expDiff += 64;
3328 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3329 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3330 q = q64>>( 64 - expDiff );
3331 bSig <<= 6;
3332 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
3333 }
3334 do {
3335 alternateASig = aSig;
3336 ++q;
3337 aSig -= bSig;
3338 } while ( 0 <= (int32_t) aSig );
3339 sigMean = aSig + alternateASig;
3340 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3341 aSig = alternateASig;
3342 }
3343 zSign = ( (int32_t) aSig < 0 );
3344 if ( zSign ) aSig = - aSig;
3345 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
3346}
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368static const float64 float32_exp2_coefficients[15] =
3369{
3370 const_float64( 0x3ff0000000000000ll ),
3371 const_float64( 0x3fe0000000000000ll ),
3372 const_float64( 0x3fc5555555555555ll ),
3373 const_float64( 0x3fa5555555555555ll ),
3374 const_float64( 0x3f81111111111111ll ),
3375 const_float64( 0x3f56c16c16c16c17ll ),
3376 const_float64( 0x3f2a01a01a01a01all ),
3377 const_float64( 0x3efa01a01a01a01all ),
3378 const_float64( 0x3ec71de3a556c734ll ),
3379 const_float64( 0x3e927e4fb7789f5cll ),
3380 const_float64( 0x3e5ae64567f544e4ll ),
3381 const_float64( 0x3e21eed8eff8d898ll ),
3382 const_float64( 0x3de6124613a86d09ll ),
3383 const_float64( 0x3da93974a8c07c9dll ),
3384 const_float64( 0x3d6ae7f3e733b81fll ),
3385};
3386
3387float32 float32_exp2(float32 a, float_status *status)
3388{
3389 flag aSign;
3390 int aExp;
3391 uint32_t aSig;
3392 float64 r, x, xn;
3393 int i;
3394 a = float32_squash_input_denormal(a, status);
3395
3396 aSig = extractFloat32Frac( a );
3397 aExp = extractFloat32Exp( a );
3398 aSign = extractFloat32Sign( a );
3399
3400 if ( aExp == 0xFF) {
3401 if (aSig) {
3402 return propagateFloat32NaN(a, float32_zero, status);
3403 }
3404 return (aSign) ? float32_zero : a;
3405 }
3406 if (aExp == 0) {
3407 if (aSig == 0) return float32_one;
3408 }
3409
3410 float_raise(float_flag_inexact, status);
3411
3412
3413
3414
3415 x = float32_to_float64(a, status);
3416 x = float64_mul(x, float64_ln2, status);
3417
3418 xn = x;
3419 r = float64_one;
3420 for (i = 0 ; i < 15 ; i++) {
3421 float64 f;
3422
3423 f = float64_mul(xn, float32_exp2_coefficients[i], status);
3424 r = float64_add(r, f, status);
3425
3426 xn = float64_mul(xn, x, status);
3427 }
3428
3429 return float64_to_float32(r, status);
3430}
3431
3432
3433
3434
3435
3436
3437float32 float32_log2(float32 a, float_status *status)
3438{
3439 flag aSign, zSign;
3440 int aExp;
3441 uint32_t aSig, zSig, i;
3442
3443 a = float32_squash_input_denormal(a, status);
3444 aSig = extractFloat32Frac( a );
3445 aExp = extractFloat32Exp( a );
3446 aSign = extractFloat32Sign( a );
3447
3448 if ( aExp == 0 ) {
3449 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
3450 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3451 }
3452 if ( aSign ) {
3453 float_raise(float_flag_invalid, status);
3454 return float32_default_nan(status);
3455 }
3456 if ( aExp == 0xFF ) {
3457 if (aSig) {
3458 return propagateFloat32NaN(a, float32_zero, status);
3459 }
3460 return a;
3461 }
3462
3463 aExp -= 0x7F;
3464 aSig |= 0x00800000;
3465 zSign = aExp < 0;
3466 zSig = aExp << 23;
3467
3468 for (i = 1 << 22; i > 0; i >>= 1) {
3469 aSig = ( (uint64_t)aSig * aSig ) >> 23;
3470 if ( aSig & 0x01000000 ) {
3471 aSig >>= 1;
3472 zSig |= i;
3473 }
3474 }
3475
3476 if ( zSign )
3477 zSig = -zSig;
3478
3479 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
3480}
3481
3482
3483
3484
3485
3486
3487
3488
3489int float32_eq(float32 a, float32 b, float_status *status)
3490{
3491 uint32_t av, bv;
3492 a = float32_squash_input_denormal(a, status);
3493 b = float32_squash_input_denormal(b, status);
3494
3495 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3496 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3497 ) {
3498 float_raise(float_flag_invalid, status);
3499 return 0;
3500 }
3501 av = float32_val(a);
3502 bv = float32_val(b);
3503 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
3504}
3505
3506
3507
3508
3509
3510
3511
3512
3513int float32_le(float32 a, float32 b, float_status *status)
3514{
3515 flag aSign, bSign;
3516 uint32_t av, bv;
3517 a = float32_squash_input_denormal(a, status);
3518 b = float32_squash_input_denormal(b, status);
3519
3520 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3521 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3522 ) {
3523 float_raise(float_flag_invalid, status);
3524 return 0;
3525 }
3526 aSign = extractFloat32Sign( a );
3527 bSign = extractFloat32Sign( b );
3528 av = float32_val(a);
3529 bv = float32_val(b);
3530 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
3531 return ( av == bv ) || ( aSign ^ ( av < bv ) );
3532
3533}
3534
3535
3536
3537
3538
3539
3540
3541
3542int float32_lt(float32 a, float32 b, float_status *status)
3543{
3544 flag aSign, bSign;
3545 uint32_t av, bv;
3546 a = float32_squash_input_denormal(a, status);
3547 b = float32_squash_input_denormal(b, status);
3548
3549 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3550 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3551 ) {
3552 float_raise(float_flag_invalid, status);
3553 return 0;
3554 }
3555 aSign = extractFloat32Sign( a );
3556 bSign = extractFloat32Sign( b );
3557 av = float32_val(a);
3558 bv = float32_val(b);
3559 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
3560 return ( av != bv ) && ( aSign ^ ( av < bv ) );
3561
3562}
3563
3564
3565
3566
3567
3568
3569
3570
3571int float32_unordered(float32 a, float32 b, float_status *status)
3572{
3573 a = float32_squash_input_denormal(a, status);
3574 b = float32_squash_input_denormal(b, status);
3575
3576 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3577 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3578 ) {
3579 float_raise(float_flag_invalid, status);
3580 return 1;
3581 }
3582 return 0;
3583}
3584
3585
3586
3587
3588
3589
3590
3591
3592int float32_eq_quiet(float32 a, float32 b, float_status *status)
3593{
3594 a = float32_squash_input_denormal(a, status);
3595 b = float32_squash_input_denormal(b, status);
3596
3597 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3598 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3599 ) {
3600 if (float32_is_signaling_nan(a, status)
3601 || float32_is_signaling_nan(b, status)) {
3602 float_raise(float_flag_invalid, status);
3603 }
3604 return 0;
3605 }
3606 return ( float32_val(a) == float32_val(b) ) ||
3607 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
3608}
3609
3610
3611
3612
3613
3614
3615
3616
3617int float32_le_quiet(float32 a, float32 b, float_status *status)
3618{
3619 flag aSign, bSign;
3620 uint32_t av, bv;
3621 a = float32_squash_input_denormal(a, status);
3622 b = float32_squash_input_denormal(b, status);
3623
3624 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3625 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3626 ) {
3627 if (float32_is_signaling_nan(a, status)
3628 || float32_is_signaling_nan(b, status)) {
3629 float_raise(float_flag_invalid, status);
3630 }
3631 return 0;
3632 }
3633 aSign = extractFloat32Sign( a );
3634 bSign = extractFloat32Sign( b );
3635 av = float32_val(a);
3636 bv = float32_val(b);
3637 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
3638 return ( av == bv ) || ( aSign ^ ( av < bv ) );
3639
3640}
3641
3642
3643
3644
3645
3646
3647
3648
3649int float32_lt_quiet(float32 a, float32 b, float_status *status)
3650{
3651 flag aSign, bSign;
3652 uint32_t av, bv;
3653 a = float32_squash_input_denormal(a, status);
3654 b = float32_squash_input_denormal(b, status);
3655
3656 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3657 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3658 ) {
3659 if (float32_is_signaling_nan(a, status)
3660 || float32_is_signaling_nan(b, status)) {
3661 float_raise(float_flag_invalid, status);
3662 }
3663 return 0;
3664 }
3665 aSign = extractFloat32Sign( a );
3666 bSign = extractFloat32Sign( b );
3667 av = float32_val(a);
3668 bv = float32_val(b);
3669 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
3670 return ( av != bv ) && ( aSign ^ ( av < bv ) );
3671
3672}
3673
3674
3675
3676
3677
3678
3679
3680
3681int float32_unordered_quiet(float32 a, float32 b, float_status *status)
3682{
3683 a = float32_squash_input_denormal(a, status);
3684 b = float32_squash_input_denormal(b, status);
3685
3686 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3687 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3688 ) {
3689 if (float32_is_signaling_nan(a, status)
3690 || float32_is_signaling_nan(b, status)) {
3691 float_raise(float_flag_invalid, status);
3692 }
3693 return 1;
3694 }
3695 return 0;
3696}
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706float32 float64_to_float32(float64 a, float_status *status)
3707{
3708 flag aSign;
3709 int aExp;
3710 uint64_t aSig;
3711 uint32_t zSig;
3712 a = float64_squash_input_denormal(a, status);
3713
3714 aSig = extractFloat64Frac( a );
3715 aExp = extractFloat64Exp( a );
3716 aSign = extractFloat64Sign( a );
3717 if ( aExp == 0x7FF ) {
3718 if (aSig) {
3719 return commonNaNToFloat32(float64ToCommonNaN(a, status), status);
3720 }
3721 return packFloat32( aSign, 0xFF, 0 );
3722 }
3723 shift64RightJamming( aSig, 22, &aSig );
3724 zSig = aSig;
3725 if ( aExp || zSig ) {
3726 zSig |= 0x40000000;
3727 aExp -= 0x381;
3728 }
3729 return roundAndPackFloat32(aSign, aExp, zSig, status);
3730
3731}
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744static float16 packFloat16(flag zSign, int zExp, uint16_t zSig)
3745{
3746 return make_float16(
3747 (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
3748}
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778static float16 roundAndPackFloat16(flag zSign, int zExp,
3779 uint32_t zSig, flag ieee,
3780 float_status *status)
3781{
3782 int maxexp = ieee ? 29 : 30;
3783 uint32_t mask;
3784 uint32_t increment;
3785 bool rounding_bumps_exp;
3786 bool is_tiny = false;
3787
3788
3789
3790
3791 if (zExp < 1) {
3792
3793 mask = 0x00ffffff;
3794 if (zExp >= -11) {
3795 mask >>= 11 + zExp;
3796 }
3797 } else {
3798
3799 mask = 0x00001fff;
3800 }
3801
3802 switch (status->float_rounding_mode) {
3803 case float_round_nearest_even:
3804 increment = (mask + 1) >> 1;
3805 if ((zSig & mask) == increment) {
3806 increment = zSig & (increment << 1);
3807 }
3808 break;
3809 case float_round_ties_away:
3810 increment = (mask + 1) >> 1;
3811 break;
3812 case float_round_up:
3813 increment = zSign ? 0 : mask;
3814 break;
3815 case float_round_down:
3816 increment = zSign ? mask : 0;
3817 break;
3818 default:
3819 increment = 0;
3820 break;
3821 }
3822
3823 rounding_bumps_exp = (zSig + increment >= 0x01000000);
3824
3825 if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) {
3826 if (ieee) {
3827 float_raise(float_flag_overflow | float_flag_inexact, status);
3828 return packFloat16(zSign, 0x1f, 0);
3829 } else {
3830 float_raise(float_flag_invalid, status);
3831 return packFloat16(zSign, 0x1f, 0x3ff);
3832 }
3833 }
3834
3835 if (zExp < 0) {
3836
3837 is_tiny =
3838 (status->float_detect_tininess == float_tininess_before_rounding)
3839 || (zExp < -1)
3840 || (!rounding_bumps_exp);
3841 }
3842 if (zSig & mask) {
3843 float_raise(float_flag_inexact, status);
3844 if (is_tiny) {
3845 float_raise(float_flag_underflow, status);
3846 }
3847 }
3848
3849 zSig += increment;
3850 if (rounding_bumps_exp) {
3851 zSig >>= 1;
3852 zExp++;
3853 }
3854
3855 if (zExp < -10) {
3856 return packFloat16(zSign, 0, 0);
3857 }
3858 if (zExp < 0) {
3859 zSig >>= -zExp;
3860 zExp = 0;
3861 }
3862 return packFloat16(zSign, zExp, zSig >> 13);
3863}
3864
3865
3866
3867
3868
3869float16 float16_squash_input_denormal(float16 a, float_status *status)
3870{
3871 if (status->flush_inputs_to_zero) {
3872 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
3873 float_raise(float_flag_input_denormal, status);
3874 return make_float16(float16_val(a) & 0x8000);
3875 }
3876 }
3877 return a;
3878}
3879
3880static void normalizeFloat16Subnormal(uint32_t aSig, int *zExpPtr,
3881 uint32_t *zSigPtr)
3882{
3883 int8_t shiftCount = countLeadingZeros32(aSig) - 21;
3884 *zSigPtr = aSig << shiftCount;
3885 *zExpPtr = 1 - shiftCount;
3886}
3887
3888
3889
3890
3891float32 float16_to_float32(float16 a, flag ieee, float_status *status)
3892{
3893 flag aSign;
3894 int aExp;
3895 uint32_t aSig;
3896
3897 aSign = extractFloat16Sign(a);
3898 aExp = extractFloat16Exp(a);
3899 aSig = extractFloat16Frac(a);
3900
3901 if (aExp == 0x1f && ieee) {
3902 if (aSig) {
3903 return commonNaNToFloat32(float16ToCommonNaN(a, status), status);
3904 }
3905 return packFloat32(aSign, 0xff, 0);
3906 }
3907 if (aExp == 0) {
3908 if (aSig == 0) {
3909 return packFloat32(aSign, 0, 0);
3910 }
3911
3912 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3913 aExp--;
3914 }
3915 return packFloat32( aSign, aExp + 0x70, aSig << 13);
3916}
3917
3918float16 float32_to_float16(float32 a, flag ieee, float_status *status)
3919{
3920 flag aSign;
3921 int aExp;
3922 uint32_t aSig;
3923
3924 a = float32_squash_input_denormal(a, status);
3925
3926 aSig = extractFloat32Frac( a );
3927 aExp = extractFloat32Exp( a );
3928 aSign = extractFloat32Sign( a );
3929 if ( aExp == 0xFF ) {
3930 if (aSig) {
3931
3932 if (!ieee) {
3933 float_raise(float_flag_invalid, status);
3934 return packFloat16(aSign, 0, 0);
3935 }
3936 return commonNaNToFloat16(
3937 float32ToCommonNaN(a, status), status);
3938 }
3939
3940 if (!ieee) {
3941 float_raise(float_flag_invalid, status);
3942 return packFloat16(aSign, 0x1f, 0x3ff);
3943 }
3944 return packFloat16(aSign, 0x1f, 0);
3945 }
3946 if (aExp == 0 && aSig == 0) {
3947 return packFloat16(aSign, 0, 0);
3948 }
3949
3950
3951
3952
3953
3954
3955
3956 aSig |= 0x00800000;
3957 aExp -= 0x71;
3958
3959 return roundAndPackFloat16(aSign, aExp, aSig, ieee, status);
3960}
3961
3962float64 float16_to_float64(float16 a, flag ieee, float_status *status)
3963{
3964 flag aSign;
3965 int aExp;
3966 uint32_t aSig;
3967
3968 aSign = extractFloat16Sign(a);
3969 aExp = extractFloat16Exp(a);
3970 aSig = extractFloat16Frac(a);
3971
3972 if (aExp == 0x1f && ieee) {
3973 if (aSig) {
3974 return commonNaNToFloat64(
3975 float16ToCommonNaN(a, status), status);
3976 }
3977 return packFloat64(aSign, 0x7ff, 0);
3978 }
3979 if (aExp == 0) {
3980 if (aSig == 0) {
3981 return packFloat64(aSign, 0, 0);
3982 }
3983
3984 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3985 aExp--;
3986 }
3987 return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
3988}
3989
3990float16 float64_to_float16(float64 a, flag ieee, float_status *status)
3991{
3992 flag aSign;
3993 int aExp;
3994 uint64_t aSig;
3995 uint32_t zSig;
3996
3997 a = float64_squash_input_denormal(a, status);
3998
3999 aSig = extractFloat64Frac(a);
4000 aExp = extractFloat64Exp(a);
4001 aSign = extractFloat64Sign(a);
4002 if (aExp == 0x7FF) {
4003 if (aSig) {
4004
4005 if (!ieee) {
4006 float_raise(float_flag_invalid, status);
4007 return packFloat16(aSign, 0, 0);
4008 }
4009 return commonNaNToFloat16(
4010 float64ToCommonNaN(a, status), status);
4011 }
4012
4013 if (!ieee) {
4014 float_raise(float_flag_invalid, status);
4015 return packFloat16(aSign, 0x1f, 0x3ff);
4016 }
4017 return packFloat16(aSign, 0x1f, 0);
4018 }
4019 shift64RightJamming(aSig, 29, &aSig);
4020 zSig = aSig;
4021 if (aExp == 0 && zSig == 0) {
4022 return packFloat16(aSign, 0, 0);
4023 }
4024
4025
4026
4027
4028
4029
4030
4031 zSig |= 0x00800000;
4032 aExp -= 0x3F1;
4033
4034 return roundAndPackFloat16(aSign, aExp, zSig, ieee, status);
4035}
4036
4037
4038
4039
4040
4041
4042
4043
4044floatx80 float64_to_floatx80(float64 a, float_status *status)
4045{
4046 flag aSign;
4047 int aExp;
4048 uint64_t aSig;
4049
4050 a = float64_squash_input_denormal(a, status);
4051 aSig = extractFloat64Frac( a );
4052 aExp = extractFloat64Exp( a );
4053 aSign = extractFloat64Sign( a );
4054 if ( aExp == 0x7FF ) {
4055 if (aSig) {
4056 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
4057 }
4058 return packFloatx80(aSign,
4059 floatx80_infinity_high,
4060 floatx80_infinity_low);
4061 }
4062 if ( aExp == 0 ) {
4063 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4064 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4065 }
4066 return
4067 packFloatx80(
4068 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
4069
4070}
4071
4072
4073
4074
4075
4076
4077
4078
4079float128 float64_to_float128(float64 a, float_status *status)
4080{
4081 flag aSign;
4082 int aExp;
4083 uint64_t aSig, zSig0, zSig1;
4084
4085 a = float64_squash_input_denormal(a, status);
4086 aSig = extractFloat64Frac( a );
4087 aExp = extractFloat64Exp( a );
4088 aSign = extractFloat64Sign( a );
4089 if ( aExp == 0x7FF ) {
4090 if (aSig) {
4091 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
4092 }
4093 return packFloat128( aSign, 0x7FFF, 0, 0 );
4094 }
4095 if ( aExp == 0 ) {
4096 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4097 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4098 --aExp;
4099 }
4100 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
4101 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
4102
4103}
4104
4105
4106
4107
4108
4109
4110
4111
4112float64 float64_rem(float64 a, float64 b, float_status *status)
4113{
4114 flag aSign, zSign;
4115 int aExp, bExp, expDiff;
4116 uint64_t aSig, bSig;
4117 uint64_t q, alternateASig;
4118 int64_t sigMean;
4119
4120 a = float64_squash_input_denormal(a, status);
4121 b = float64_squash_input_denormal(b, status);
4122 aSig = extractFloat64Frac( a );
4123 aExp = extractFloat64Exp( a );
4124 aSign = extractFloat64Sign( a );
4125 bSig = extractFloat64Frac( b );
4126 bExp = extractFloat64Exp( b );
4127 if ( aExp == 0x7FF ) {
4128 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
4129 return propagateFloat64NaN(a, b, status);
4130 }
4131 float_raise(float_flag_invalid, status);
4132 return float64_default_nan(status);
4133 }
4134 if ( bExp == 0x7FF ) {
4135 if (bSig) {
4136 return propagateFloat64NaN(a, b, status);
4137 }
4138 return a;
4139 }
4140 if ( bExp == 0 ) {
4141 if ( bSig == 0 ) {
4142 float_raise(float_flag_invalid, status);
4143 return float64_default_nan(status);
4144 }
4145 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4146 }
4147 if ( aExp == 0 ) {
4148 if ( aSig == 0 ) return a;
4149 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4150 }
4151 expDiff = aExp - bExp;
4152 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4153 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4154 if ( expDiff < 0 ) {
4155 if ( expDiff < -1 ) return a;
4156 aSig >>= 1;
4157 }
4158 q = ( bSig <= aSig );
4159 if ( q ) aSig -= bSig;
4160 expDiff -= 64;
4161 while ( 0 < expDiff ) {
4162 q = estimateDiv128To64( aSig, 0, bSig );
4163 q = ( 2 < q ) ? q - 2 : 0;
4164 aSig = - ( ( bSig>>2 ) * q );
4165 expDiff -= 62;
4166 }
4167 expDiff += 64;
4168 if ( 0 < expDiff ) {
4169 q = estimateDiv128To64( aSig, 0, bSig );
4170 q = ( 2 < q ) ? q - 2 : 0;
4171 q >>= 64 - expDiff;
4172 bSig >>= 2;
4173 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4174 }
4175 else {
4176 aSig >>= 2;
4177 bSig >>= 2;
4178 }
4179 do {
4180 alternateASig = aSig;
4181 ++q;
4182 aSig -= bSig;
4183 } while ( 0 <= (int64_t) aSig );
4184 sigMean = aSig + alternateASig;
4185 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4186 aSig = alternateASig;
4187 }
4188 zSign = ( (int64_t) aSig < 0 );
4189 if ( zSign ) aSig = - aSig;
4190 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
4191
4192}
4193
4194
4195
4196
4197
4198
4199float64 float64_log2(float64 a, float_status *status)
4200{
4201 flag aSign, zSign;
4202 int aExp;
4203 uint64_t aSig, aSig0, aSig1, zSig, i;
4204 a = float64_squash_input_denormal(a, status);
4205
4206 aSig = extractFloat64Frac( a );
4207 aExp = extractFloat64Exp( a );
4208 aSign = extractFloat64Sign( a );
4209
4210 if ( aExp == 0 ) {
4211 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4212 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4213 }
4214 if ( aSign ) {
4215 float_raise(float_flag_invalid, status);
4216 return float64_default_nan(status);
4217 }
4218 if ( aExp == 0x7FF ) {
4219 if (aSig) {
4220 return propagateFloat64NaN(a, float64_zero, status);
4221 }
4222 return a;
4223 }
4224
4225 aExp -= 0x3FF;
4226 aSig |= LIT64( 0x0010000000000000 );
4227 zSign = aExp < 0;
4228 zSig = (uint64_t)aExp << 52;
4229 for (i = 1LL << 51; i > 0; i >>= 1) {
4230 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4231 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4232 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4233 aSig >>= 1;
4234 zSig |= i;
4235 }
4236 }
4237
4238 if ( zSign )
4239 zSig = -zSig;
4240 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
4241}
4242
4243
4244
4245
4246
4247
4248
4249
4250int float64_eq(float64 a, float64 b, float_status *status)
4251{
4252 uint64_t av, bv;
4253 a = float64_squash_input_denormal(a, status);
4254 b = float64_squash_input_denormal(b, status);
4255
4256 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4257 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4258 ) {
4259 float_raise(float_flag_invalid, status);
4260 return 0;
4261 }
4262 av = float64_val(a);
4263 bv = float64_val(b);
4264 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4265
4266}
4267
4268
4269
4270
4271
4272
4273
4274
4275int float64_le(float64 a, float64 b, float_status *status)
4276{
4277 flag aSign, bSign;
4278 uint64_t av, bv;
4279 a = float64_squash_input_denormal(a, status);
4280 b = float64_squash_input_denormal(b, status);
4281
4282 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4283 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4284 ) {
4285 float_raise(float_flag_invalid, status);
4286 return 0;
4287 }
4288 aSign = extractFloat64Sign( a );
4289 bSign = extractFloat64Sign( b );
4290 av = float64_val(a);
4291 bv = float64_val(b);
4292 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4293 return ( av == bv ) || ( aSign ^ ( av < bv ) );
4294
4295}
4296
4297
4298
4299
4300
4301
4302
4303
4304int float64_lt(float64 a, float64 b, float_status *status)
4305{
4306 flag aSign, bSign;
4307 uint64_t av, bv;
4308
4309 a = float64_squash_input_denormal(a, status);
4310 b = float64_squash_input_denormal(b, status);
4311 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4312 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4313 ) {
4314 float_raise(float_flag_invalid, status);
4315 return 0;
4316 }
4317 aSign = extractFloat64Sign( a );
4318 bSign = extractFloat64Sign( b );
4319 av = float64_val(a);
4320 bv = float64_val(b);
4321 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4322 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4323
4324}
4325
4326
4327
4328
4329
4330
4331
4332
4333int float64_unordered(float64 a, float64 b, float_status *status)
4334{
4335 a = float64_squash_input_denormal(a, status);
4336 b = float64_squash_input_denormal(b, status);
4337
4338 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4339 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4340 ) {
4341 float_raise(float_flag_invalid, status);
4342 return 1;
4343 }
4344 return 0;
4345}
4346
4347
4348
4349
4350
4351
4352
4353
4354int float64_eq_quiet(float64 a, float64 b, float_status *status)
4355{
4356 uint64_t av, bv;
4357 a = float64_squash_input_denormal(a, status);
4358 b = float64_squash_input_denormal(b, status);
4359
4360 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4361 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4362 ) {
4363 if (float64_is_signaling_nan(a, status)
4364 || float64_is_signaling_nan(b, status)) {
4365 float_raise(float_flag_invalid, status);
4366 }
4367 return 0;
4368 }
4369 av = float64_val(a);
4370 bv = float64_val(b);
4371 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4372
4373}
4374
4375
4376
4377
4378
4379
4380
4381
4382int float64_le_quiet(float64 a, float64 b, float_status *status)
4383{
4384 flag aSign, bSign;
4385 uint64_t av, bv;
4386 a = float64_squash_input_denormal(a, status);
4387 b = float64_squash_input_denormal(b, status);
4388
4389 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4390 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4391 ) {
4392 if (float64_is_signaling_nan(a, status)
4393 || float64_is_signaling_nan(b, status)) {
4394 float_raise(float_flag_invalid, status);
4395 }
4396 return 0;
4397 }
4398 aSign = extractFloat64Sign( a );
4399 bSign = extractFloat64Sign( b );
4400 av = float64_val(a);
4401 bv = float64_val(b);
4402 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4403 return ( av == bv ) || ( aSign ^ ( av < bv ) );
4404
4405}
4406
4407
4408
4409
4410
4411
4412
4413
4414int float64_lt_quiet(float64 a, float64 b, float_status *status)
4415{
4416 flag aSign, bSign;
4417 uint64_t av, bv;
4418 a = float64_squash_input_denormal(a, status);
4419 b = float64_squash_input_denormal(b, status);
4420
4421 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4422 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4423 ) {
4424 if (float64_is_signaling_nan(a, status)
4425 || float64_is_signaling_nan(b, status)) {
4426 float_raise(float_flag_invalid, status);
4427 }
4428 return 0;
4429 }
4430 aSign = extractFloat64Sign( a );
4431 bSign = extractFloat64Sign( b );
4432 av = float64_val(a);
4433 bv = float64_val(b);
4434 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4435 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4436
4437}
4438
4439
4440
4441
4442
4443
4444
4445
4446int float64_unordered_quiet(float64 a, float64 b, float_status *status)
4447{
4448 a = float64_squash_input_denormal(a, status);
4449 b = float64_squash_input_denormal(b, status);
4450
4451 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4452 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4453 ) {
4454 if (float64_is_signaling_nan(a, status)
4455 || float64_is_signaling_nan(b, status)) {
4456 float_raise(float_flag_invalid, status);
4457 }
4458 return 1;
4459 }
4460 return 0;
4461}
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473int32_t floatx80_to_int32(floatx80 a, float_status *status)
4474{
4475 flag aSign;
4476 int32_t aExp, shiftCount;
4477 uint64_t aSig;
4478
4479 if (floatx80_invalid_encoding(a)) {
4480 float_raise(float_flag_invalid, status);
4481 return 1 << 31;
4482 }
4483 aSig = extractFloatx80Frac( a );
4484 aExp = extractFloatx80Exp( a );
4485 aSign = extractFloatx80Sign( a );
4486 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4487 shiftCount = 0x4037 - aExp;
4488 if ( shiftCount <= 0 ) shiftCount = 1;
4489 shift64RightJamming( aSig, shiftCount, &aSig );
4490 return roundAndPackInt32(aSign, aSig, status);
4491
4492}
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
4505{
4506 flag aSign;
4507 int32_t aExp, shiftCount;
4508 uint64_t aSig, savedASig;
4509 int32_t z;
4510
4511 if (floatx80_invalid_encoding(a)) {
4512 float_raise(float_flag_invalid, status);
4513 return 1 << 31;
4514 }
4515 aSig = extractFloatx80Frac( a );
4516 aExp = extractFloatx80Exp( a );
4517 aSign = extractFloatx80Sign( a );
4518 if ( 0x401E < aExp ) {
4519 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4520 goto invalid;
4521 }
4522 else if ( aExp < 0x3FFF ) {
4523 if (aExp || aSig) {
4524 status->float_exception_flags |= float_flag_inexact;
4525 }
4526 return 0;
4527 }
4528 shiftCount = 0x403E - aExp;
4529 savedASig = aSig;
4530 aSig >>= shiftCount;
4531 z = aSig;
4532 if ( aSign ) z = - z;
4533 if ( ( z < 0 ) ^ aSign ) {
4534 invalid:
4535 float_raise(float_flag_invalid, status);
4536 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
4537 }
4538 if ( ( aSig<<shiftCount ) != savedASig ) {
4539 status->float_exception_flags |= float_flag_inexact;
4540 }
4541 return z;
4542
4543}
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555int64_t floatx80_to_int64(floatx80 a, float_status *status)
4556{
4557 flag aSign;
4558 int32_t aExp, shiftCount;
4559 uint64_t aSig, aSigExtra;
4560
4561 if (floatx80_invalid_encoding(a)) {
4562 float_raise(float_flag_invalid, status);
4563 return 1ULL << 63;
4564 }
4565 aSig = extractFloatx80Frac( a );
4566 aExp = extractFloatx80Exp( a );
4567 aSign = extractFloatx80Sign( a );
4568 shiftCount = 0x403E - aExp;
4569 if ( shiftCount <= 0 ) {
4570 if ( shiftCount ) {
4571 float_raise(float_flag_invalid, status);
4572 if (!aSign || floatx80_is_any_nan(a)) {
4573 return LIT64( 0x7FFFFFFFFFFFFFFF );
4574 }
4575 return (int64_t) LIT64( 0x8000000000000000 );
4576 }
4577 aSigExtra = 0;
4578 }
4579 else {
4580 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4581 }
4582 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
4583
4584}
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
4597{
4598 flag aSign;
4599 int32_t aExp, shiftCount;
4600 uint64_t aSig;
4601 int64_t z;
4602
4603 if (floatx80_invalid_encoding(a)) {
4604 float_raise(float_flag_invalid, status);
4605 return 1ULL << 63;
4606 }
4607 aSig = extractFloatx80Frac( a );
4608 aExp = extractFloatx80Exp( a );
4609 aSign = extractFloatx80Sign( a );
4610 shiftCount = aExp - 0x403E;
4611 if ( 0 <= shiftCount ) {
4612 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4613 if ( ( a.high != 0xC03E ) || aSig ) {
4614 float_raise(float_flag_invalid, status);
4615 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4616 return LIT64( 0x7FFFFFFFFFFFFFFF );
4617 }
4618 }
4619 return (int64_t) LIT64( 0x8000000000000000 );
4620 }
4621 else if ( aExp < 0x3FFF ) {
4622 if (aExp | aSig) {
4623 status->float_exception_flags |= float_flag_inexact;
4624 }
4625 return 0;
4626 }
4627 z = aSig>>( - shiftCount );
4628 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
4629 status->float_exception_flags |= float_flag_inexact;
4630 }
4631 if ( aSign ) z = - z;
4632 return z;
4633
4634}
4635
4636
4637
4638
4639
4640
4641
4642
4643float32 floatx80_to_float32(floatx80 a, float_status *status)
4644{
4645 flag aSign;
4646 int32_t aExp;
4647 uint64_t aSig;
4648
4649 if (floatx80_invalid_encoding(a)) {
4650 float_raise(float_flag_invalid, status);
4651 return float32_default_nan(status);
4652 }
4653 aSig = extractFloatx80Frac( a );
4654 aExp = extractFloatx80Exp( a );
4655 aSign = extractFloatx80Sign( a );
4656 if ( aExp == 0x7FFF ) {
4657 if ( (uint64_t) ( aSig<<1 ) ) {
4658 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
4659 }
4660 return packFloat32( aSign, 0xFF, 0 );
4661 }
4662 shift64RightJamming( aSig, 33, &aSig );
4663 if ( aExp || aSig ) aExp -= 0x3F81;
4664 return roundAndPackFloat32(aSign, aExp, aSig, status);
4665
4666}
4667
4668
4669
4670
4671
4672
4673
4674
4675float64 floatx80_to_float64(floatx80 a, float_status *status)
4676{
4677 flag aSign;
4678 int32_t aExp;
4679 uint64_t aSig, zSig;
4680
4681 if (floatx80_invalid_encoding(a)) {
4682 float_raise(float_flag_invalid, status);
4683 return float64_default_nan(status);
4684 }
4685 aSig = extractFloatx80Frac( a );
4686 aExp = extractFloatx80Exp( a );
4687 aSign = extractFloatx80Sign( a );
4688 if ( aExp == 0x7FFF ) {
4689 if ( (uint64_t) ( aSig<<1 ) ) {
4690 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
4691 }
4692 return packFloat64( aSign, 0x7FF, 0 );
4693 }
4694 shift64RightJamming( aSig, 1, &zSig );
4695 if ( aExp || aSig ) aExp -= 0x3C01;
4696 return roundAndPackFloat64(aSign, aExp, zSig, status);
4697
4698}
4699
4700
4701
4702
4703
4704
4705
4706
4707float128 floatx80_to_float128(floatx80 a, float_status *status)
4708{
4709 flag aSign;
4710 int aExp;
4711 uint64_t aSig, zSig0, zSig1;
4712
4713 if (floatx80_invalid_encoding(a)) {
4714 float_raise(float_flag_invalid, status);
4715 return float128_default_nan(status);
4716 }
4717 aSig = extractFloatx80Frac( a );
4718 aExp = extractFloatx80Exp( a );
4719 aSign = extractFloatx80Sign( a );
4720 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
4721 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
4722 }
4723 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4724 return packFloat128( aSign, aExp, zSig0, zSig1 );
4725
4726}
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736floatx80 floatx80_round(floatx80 a, float_status *status)
4737{
4738 return roundAndPackFloatx80(status->floatx80_rounding_precision,
4739 extractFloatx80Sign(a),
4740 extractFloatx80Exp(a),
4741 extractFloatx80Frac(a), 0, status);
4742}
4743
4744
4745
4746
4747
4748
4749
4750
4751floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
4752{
4753 flag aSign;
4754 int32_t aExp;
4755 uint64_t lastBitMask, roundBitsMask;
4756 floatx80 z;
4757
4758 if (floatx80_invalid_encoding(a)) {
4759 float_raise(float_flag_invalid, status);
4760 return floatx80_default_nan(status);
4761 }
4762 aExp = extractFloatx80Exp( a );
4763 if ( 0x403E <= aExp ) {
4764 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
4765 return propagateFloatx80NaN(a, a, status);
4766 }
4767 return a;
4768 }
4769 if ( aExp < 0x3FFF ) {
4770 if ( ( aExp == 0 )
4771 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
4772 return a;
4773 }
4774 status->float_exception_flags |= float_flag_inexact;
4775 aSign = extractFloatx80Sign( a );
4776 switch (status->float_rounding_mode) {
4777 case float_round_nearest_even:
4778 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
4779 ) {
4780 return
4781 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4782 }
4783 break;
4784 case float_round_ties_away:
4785 if (aExp == 0x3FFE) {
4786 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
4787 }
4788 break;
4789 case float_round_down:
4790 return
4791 aSign ?
4792 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4793 : packFloatx80( 0, 0, 0 );
4794 case float_round_up:
4795 return
4796 aSign ? packFloatx80( 1, 0, 0 )
4797 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4798 }
4799 return packFloatx80( aSign, 0, 0 );
4800 }
4801 lastBitMask = 1;
4802 lastBitMask <<= 0x403E - aExp;
4803 roundBitsMask = lastBitMask - 1;
4804 z = a;
4805 switch (status->float_rounding_mode) {
4806 case float_round_nearest_even:
4807 z.low += lastBitMask>>1;
4808 if ((z.low & roundBitsMask) == 0) {
4809 z.low &= ~lastBitMask;
4810 }
4811 break;
4812 case float_round_ties_away:
4813 z.low += lastBitMask >> 1;
4814 break;
4815 case float_round_to_zero:
4816 break;
4817 case float_round_up:
4818 if (!extractFloatx80Sign(z)) {
4819 z.low += roundBitsMask;
4820 }
4821 break;
4822 case float_round_down:
4823 if (extractFloatx80Sign(z)) {
4824 z.low += roundBitsMask;
4825 }
4826 break;
4827 default:
4828 abort();
4829 }
4830 z.low &= ~ roundBitsMask;
4831 if ( z.low == 0 ) {
4832 ++z.high;
4833 z.low = LIT64( 0x8000000000000000 );
4834 }
4835 if (z.low != a.low) {
4836 status->float_exception_flags |= float_flag_inexact;
4837 }
4838 return z;
4839
4840}
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4851 float_status *status)
4852{
4853 int32_t aExp, bExp, zExp;
4854 uint64_t aSig, bSig, zSig0, zSig1;
4855 int32_t expDiff;
4856
4857 aSig = extractFloatx80Frac( a );
4858 aExp = extractFloatx80Exp( a );
4859 bSig = extractFloatx80Frac( b );
4860 bExp = extractFloatx80Exp( b );
4861 expDiff = aExp - bExp;
4862 if ( 0 < expDiff ) {
4863 if ( aExp == 0x7FFF ) {
4864 if ((uint64_t)(aSig << 1)) {
4865 return propagateFloatx80NaN(a, b, status);
4866 }
4867 return a;
4868 }
4869 if ( bExp == 0 ) --expDiff;
4870 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4871 zExp = aExp;
4872 }
4873 else if ( expDiff < 0 ) {
4874 if ( bExp == 0x7FFF ) {
4875 if ((uint64_t)(bSig << 1)) {
4876 return propagateFloatx80NaN(a, b, status);
4877 }
4878 return packFloatx80(zSign,
4879 floatx80_infinity_high,
4880 floatx80_infinity_low);
4881 }
4882 if ( aExp == 0 ) ++expDiff;
4883 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4884 zExp = bExp;
4885 }
4886 else {
4887 if ( aExp == 0x7FFF ) {
4888 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
4889 return propagateFloatx80NaN(a, b, status);
4890 }
4891 return a;
4892 }
4893 zSig1 = 0;
4894 zSig0 = aSig + bSig;
4895 if ( aExp == 0 ) {
4896 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4897 goto roundAndPack;
4898 }
4899 zExp = aExp;
4900 goto shiftRight1;
4901 }
4902 zSig0 = aSig + bSig;
4903 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
4904 shiftRight1:
4905 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4906 zSig0 |= LIT64( 0x8000000000000000 );
4907 ++zExp;
4908 roundAndPack:
4909 return roundAndPackFloatx80(status->floatx80_rounding_precision,
4910 zSign, zExp, zSig0, zSig1, status);
4911}
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4922 float_status *status)
4923{
4924 int32_t aExp, bExp, zExp;
4925 uint64_t aSig, bSig, zSig0, zSig1;
4926 int32_t expDiff;
4927
4928 aSig = extractFloatx80Frac( a );
4929 aExp = extractFloatx80Exp( a );
4930 bSig = extractFloatx80Frac( b );
4931 bExp = extractFloatx80Exp( b );
4932 expDiff = aExp - bExp;
4933 if ( 0 < expDiff ) goto aExpBigger;
4934 if ( expDiff < 0 ) goto bExpBigger;
4935 if ( aExp == 0x7FFF ) {
4936 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
4937 return propagateFloatx80NaN(a, b, status);
4938 }
4939 float_raise(float_flag_invalid, status);
4940 return floatx80_default_nan(status);
4941 }
4942 if ( aExp == 0 ) {
4943 aExp = 1;
4944 bExp = 1;
4945 }
4946 zSig1 = 0;
4947 if ( bSig < aSig ) goto aBigger;
4948 if ( aSig < bSig ) goto bBigger;
4949 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
4950 bExpBigger:
4951 if ( bExp == 0x7FFF ) {
4952 if ((uint64_t)(bSig << 1)) {
4953 return propagateFloatx80NaN(a, b, status);
4954 }
4955 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
4956 floatx80_infinity_low);
4957 }
4958 if ( aExp == 0 ) ++expDiff;
4959 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4960 bBigger:
4961 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4962 zExp = bExp;
4963 zSign ^= 1;
4964 goto normalizeRoundAndPack;
4965 aExpBigger:
4966 if ( aExp == 0x7FFF ) {
4967 if ((uint64_t)(aSig << 1)) {
4968 return propagateFloatx80NaN(a, b, status);
4969 }
4970 return a;
4971 }
4972 if ( bExp == 0 ) --expDiff;
4973 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4974 aBigger:
4975 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4976 zExp = aExp;
4977 normalizeRoundAndPack:
4978 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
4979 zSign, zExp, zSig0, zSig1, status);
4980}
4981
4982
4983
4984
4985
4986
4987
4988floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
4989{
4990 flag aSign, bSign;
4991
4992 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4993 float_raise(float_flag_invalid, status);
4994 return floatx80_default_nan(status);
4995 }
4996 aSign = extractFloatx80Sign( a );
4997 bSign = extractFloatx80Sign( b );
4998 if ( aSign == bSign ) {
4999 return addFloatx80Sigs(a, b, aSign, status);
5000 }
5001 else {
5002 return subFloatx80Sigs(a, b, aSign, status);
5003 }
5004
5005}
5006
5007
5008
5009
5010
5011
5012
5013floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5014{
5015 flag aSign, bSign;
5016
5017 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5018 float_raise(float_flag_invalid, status);
5019 return floatx80_default_nan(status);
5020 }
5021 aSign = extractFloatx80Sign( a );
5022 bSign = extractFloatx80Sign( b );
5023 if ( aSign == bSign ) {
5024 return subFloatx80Sigs(a, b, aSign, status);
5025 }
5026 else {
5027 return addFloatx80Sigs(a, b, aSign, status);
5028 }
5029
5030}
5031
5032
5033
5034
5035
5036
5037
5038floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5039{
5040 flag aSign, bSign, zSign;
5041 int32_t aExp, bExp, zExp;
5042 uint64_t aSig, bSig, zSig0, zSig1;
5043
5044 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5045 float_raise(float_flag_invalid, status);
5046 return floatx80_default_nan(status);
5047 }
5048 aSig = extractFloatx80Frac( a );
5049 aExp = extractFloatx80Exp( a );
5050 aSign = extractFloatx80Sign( a );
5051 bSig = extractFloatx80Frac( b );
5052 bExp = extractFloatx80Exp( b );
5053 bSign = extractFloatx80Sign( b );
5054 zSign = aSign ^ bSign;
5055 if ( aExp == 0x7FFF ) {
5056 if ( (uint64_t) ( aSig<<1 )
5057 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5058 return propagateFloatx80NaN(a, b, status);
5059 }
5060 if ( ( bExp | bSig ) == 0 ) goto invalid;
5061 return packFloatx80(zSign, floatx80_infinity_high,
5062 floatx80_infinity_low);
5063 }
5064 if ( bExp == 0x7FFF ) {
5065 if ((uint64_t)(bSig << 1)) {
5066 return propagateFloatx80NaN(a, b, status);
5067 }
5068 if ( ( aExp | aSig ) == 0 ) {
5069 invalid:
5070 float_raise(float_flag_invalid, status);
5071 return floatx80_default_nan(status);
5072 }
5073 return packFloatx80(zSign, floatx80_infinity_high,
5074 floatx80_infinity_low);
5075 }
5076 if ( aExp == 0 ) {
5077 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5078 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5079 }
5080 if ( bExp == 0 ) {
5081 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5082 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5083 }
5084 zExp = aExp + bExp - 0x3FFE;
5085 mul64To128( aSig, bSig, &zSig0, &zSig1 );
5086 if ( 0 < (int64_t) zSig0 ) {
5087 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5088 --zExp;
5089 }
5090 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5091 zSign, zExp, zSig0, zSig1, status);
5092}
5093
5094
5095
5096
5097
5098
5099
5100floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
5101{
5102 flag aSign, bSign, zSign;
5103 int32_t aExp, bExp, zExp;
5104 uint64_t aSig, bSig, zSig0, zSig1;
5105 uint64_t rem0, rem1, rem2, term0, term1, term2;
5106
5107 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5108 float_raise(float_flag_invalid, status);
5109 return floatx80_default_nan(status);
5110 }
5111 aSig = extractFloatx80Frac( a );
5112 aExp = extractFloatx80Exp( a );
5113 aSign = extractFloatx80Sign( a );
5114 bSig = extractFloatx80Frac( b );
5115 bExp = extractFloatx80Exp( b );
5116 bSign = extractFloatx80Sign( b );
5117 zSign = aSign ^ bSign;
5118 if ( aExp == 0x7FFF ) {
5119 if ((uint64_t)(aSig << 1)) {
5120 return propagateFloatx80NaN(a, b, status);
5121 }
5122 if ( bExp == 0x7FFF ) {
5123 if ((uint64_t)(bSig << 1)) {
5124 return propagateFloatx80NaN(a, b, status);
5125 }
5126 goto invalid;
5127 }
5128 return packFloatx80(zSign, floatx80_infinity_high,
5129 floatx80_infinity_low);
5130 }
5131 if ( bExp == 0x7FFF ) {
5132 if ((uint64_t)(bSig << 1)) {
5133 return propagateFloatx80NaN(a, b, status);
5134 }
5135 return packFloatx80( zSign, 0, 0 );
5136 }
5137 if ( bExp == 0 ) {
5138 if ( bSig == 0 ) {
5139 if ( ( aExp | aSig ) == 0 ) {
5140 invalid:
5141 float_raise(float_flag_invalid, status);
5142 return floatx80_default_nan(status);
5143 }
5144 float_raise(float_flag_divbyzero, status);
5145 return packFloatx80(zSign, floatx80_infinity_high,
5146 floatx80_infinity_low);
5147 }
5148 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5149 }
5150 if ( aExp == 0 ) {
5151 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5152 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5153 }
5154 zExp = aExp - bExp + 0x3FFE;
5155 rem1 = 0;
5156 if ( bSig <= aSig ) {
5157 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5158 ++zExp;
5159 }
5160 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5161 mul64To128( bSig, zSig0, &term0, &term1 );
5162 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
5163 while ( (int64_t) rem0 < 0 ) {
5164 --zSig0;
5165 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5166 }
5167 zSig1 = estimateDiv128To64( rem1, 0, bSig );
5168 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
5169 mul64To128( bSig, zSig1, &term1, &term2 );
5170 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5171 while ( (int64_t) rem1 < 0 ) {
5172 --zSig1;
5173 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5174 }
5175 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5176 }
5177 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5178 zSign, zExp, zSig0, zSig1, status);
5179}
5180
5181
5182
5183
5184
5185
5186
5187floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
5188{
5189 flag aSign, zSign;
5190 int32_t aExp, bExp, expDiff;
5191 uint64_t aSig0, aSig1, bSig;
5192 uint64_t q, term0, term1, alternateASig0, alternateASig1;
5193
5194 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5195 float_raise(float_flag_invalid, status);
5196 return floatx80_default_nan(status);
5197 }
5198 aSig0 = extractFloatx80Frac( a );
5199 aExp = extractFloatx80Exp( a );
5200 aSign = extractFloatx80Sign( a );
5201 bSig = extractFloatx80Frac( b );
5202 bExp = extractFloatx80Exp( b );
5203 if ( aExp == 0x7FFF ) {
5204 if ( (uint64_t) ( aSig0<<1 )
5205 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5206 return propagateFloatx80NaN(a, b, status);
5207 }
5208 goto invalid;
5209 }
5210 if ( bExp == 0x7FFF ) {
5211 if ((uint64_t)(bSig << 1)) {
5212 return propagateFloatx80NaN(a, b, status);
5213 }
5214 return a;
5215 }
5216 if ( bExp == 0 ) {
5217 if ( bSig == 0 ) {
5218 invalid:
5219 float_raise(float_flag_invalid, status);
5220 return floatx80_default_nan(status);
5221 }
5222 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5223 }
5224 if ( aExp == 0 ) {
5225 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
5226 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5227 }
5228 bSig |= LIT64( 0x8000000000000000 );
5229 zSign = aSign;
5230 expDiff = aExp - bExp;
5231 aSig1 = 0;
5232 if ( expDiff < 0 ) {
5233 if ( expDiff < -1 ) return a;
5234 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5235 expDiff = 0;
5236 }
5237 q = ( bSig <= aSig0 );
5238 if ( q ) aSig0 -= bSig;
5239 expDiff -= 64;
5240 while ( 0 < expDiff ) {
5241 q = estimateDiv128To64( aSig0, aSig1, bSig );
5242 q = ( 2 < q ) ? q - 2 : 0;
5243 mul64To128( bSig, q, &term0, &term1 );
5244 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5245 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5246 expDiff -= 62;
5247 }
5248 expDiff += 64;
5249 if ( 0 < expDiff ) {
5250 q = estimateDiv128To64( aSig0, aSig1, bSig );
5251 q = ( 2 < q ) ? q - 2 : 0;
5252 q >>= 64 - expDiff;
5253 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5254 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5255 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5256 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5257 ++q;
5258 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5259 }
5260 }
5261 else {
5262 term1 = 0;
5263 term0 = bSig;
5264 }
5265 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5266 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5267 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5268 && ( q & 1 ) )
5269 ) {
5270 aSig0 = alternateASig0;
5271 aSig1 = alternateASig1;
5272 zSign = ! zSign;
5273 }
5274 return
5275 normalizeRoundAndPackFloatx80(
5276 80, zSign, bExp + expDiff, aSig0, aSig1, status);
5277
5278}
5279
5280
5281
5282
5283
5284
5285
5286floatx80 floatx80_sqrt(floatx80 a, float_status *status)
5287{
5288 flag aSign;
5289 int32_t aExp, zExp;
5290 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5291 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5292
5293 if (floatx80_invalid_encoding(a)) {
5294 float_raise(float_flag_invalid, status);
5295 return floatx80_default_nan(status);
5296 }
5297 aSig0 = extractFloatx80Frac( a );
5298 aExp = extractFloatx80Exp( a );
5299 aSign = extractFloatx80Sign( a );
5300 if ( aExp == 0x7FFF ) {
5301 if ((uint64_t)(aSig0 << 1)) {
5302 return propagateFloatx80NaN(a, a, status);
5303 }
5304 if ( ! aSign ) return a;
5305 goto invalid;
5306 }
5307 if ( aSign ) {
5308 if ( ( aExp | aSig0 ) == 0 ) return a;
5309 invalid:
5310 float_raise(float_flag_invalid, status);
5311 return floatx80_default_nan(status);
5312 }
5313 if ( aExp == 0 ) {
5314 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5315 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5316 }
5317 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5318 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5319 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5320 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5321 doubleZSig0 = zSig0<<1;
5322 mul64To128( zSig0, zSig0, &term0, &term1 );
5323 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5324 while ( (int64_t) rem0 < 0 ) {
5325 --zSig0;
5326 doubleZSig0 -= 2;
5327 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5328 }
5329 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5330 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5331 if ( zSig1 == 0 ) zSig1 = 1;
5332 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5333 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5334 mul64To128( zSig1, zSig1, &term2, &term3 );
5335 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5336 while ( (int64_t) rem1 < 0 ) {
5337 --zSig1;
5338 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5339 term3 |= 1;
5340 term2 |= doubleZSig0;
5341 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5342 }
5343 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5344 }
5345 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5346 zSig0 |= doubleZSig0;
5347 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5348 0, zExp, zSig0, zSig1, status);
5349}
5350
5351
5352
5353
5354
5355
5356
5357
5358int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
5359{
5360
5361 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5362 || (extractFloatx80Exp(a) == 0x7FFF
5363 && (uint64_t) (extractFloatx80Frac(a) << 1))
5364 || (extractFloatx80Exp(b) == 0x7FFF
5365 && (uint64_t) (extractFloatx80Frac(b) << 1))
5366 ) {
5367 float_raise(float_flag_invalid, status);
5368 return 0;
5369 }
5370 return
5371 ( a.low == b.low )
5372 && ( ( a.high == b.high )
5373 || ( ( a.low == 0 )
5374 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5375 );
5376
5377}
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387int floatx80_le(floatx80 a, floatx80 b, float_status *status)
5388{
5389 flag aSign, bSign;
5390
5391 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5392 || (extractFloatx80Exp(a) == 0x7FFF
5393 && (uint64_t) (extractFloatx80Frac(a) << 1))
5394 || (extractFloatx80Exp(b) == 0x7FFF
5395 && (uint64_t) (extractFloatx80Frac(b) << 1))
5396 ) {
5397 float_raise(float_flag_invalid, status);
5398 return 0;
5399 }
5400 aSign = extractFloatx80Sign( a );
5401 bSign = extractFloatx80Sign( b );
5402 if ( aSign != bSign ) {
5403 return
5404 aSign
5405 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5406 == 0 );
5407 }
5408 return
5409 aSign ? le128( b.high, b.low, a.high, a.low )
5410 : le128( a.high, a.low, b.high, b.low );
5411
5412}
5413
5414
5415
5416
5417
5418
5419
5420
5421int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
5422{
5423 flag aSign, bSign;
5424
5425 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5426 || (extractFloatx80Exp(a) == 0x7FFF
5427 && (uint64_t) (extractFloatx80Frac(a) << 1))
5428 || (extractFloatx80Exp(b) == 0x7FFF
5429 && (uint64_t) (extractFloatx80Frac(b) << 1))
5430 ) {
5431 float_raise(float_flag_invalid, status);
5432 return 0;
5433 }
5434 aSign = extractFloatx80Sign( a );
5435 bSign = extractFloatx80Sign( b );
5436 if ( aSign != bSign ) {
5437 return
5438 aSign
5439 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5440 != 0 );
5441 }
5442 return
5443 aSign ? lt128( b.high, b.low, a.high, a.low )
5444 : lt128( a.high, a.low, b.high, b.low );
5445
5446}
5447
5448
5449
5450
5451
5452
5453
5454int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
5455{
5456 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5457 || (extractFloatx80Exp(a) == 0x7FFF
5458 && (uint64_t) (extractFloatx80Frac(a) << 1))
5459 || (extractFloatx80Exp(b) == 0x7FFF
5460 && (uint64_t) (extractFloatx80Frac(b) << 1))
5461 ) {
5462 float_raise(float_flag_invalid, status);
5463 return 1;
5464 }
5465 return 0;
5466}
5467
5468
5469
5470
5471
5472
5473
5474
5475int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
5476{
5477
5478 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5479 float_raise(float_flag_invalid, status);
5480 return 0;
5481 }
5482 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5483 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5484 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5485 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5486 ) {
5487 if (floatx80_is_signaling_nan(a, status)
5488 || floatx80_is_signaling_nan(b, status)) {
5489 float_raise(float_flag_invalid, status);
5490 }
5491 return 0;
5492 }
5493 return
5494 ( a.low == b.low )
5495 && ( ( a.high == b.high )
5496 || ( ( a.low == 0 )
5497 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5498 );
5499
5500}
5501
5502
5503
5504
5505
5506
5507
5508
5509int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
5510{
5511 flag aSign, bSign;
5512
5513 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5514 float_raise(float_flag_invalid, status);
5515 return 0;
5516 }
5517 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5518 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5519 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5520 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5521 ) {
5522 if (floatx80_is_signaling_nan(a, status)
5523 || floatx80_is_signaling_nan(b, status)) {
5524 float_raise(float_flag_invalid, status);
5525 }
5526 return 0;
5527 }
5528 aSign = extractFloatx80Sign( a );
5529 bSign = extractFloatx80Sign( b );
5530 if ( aSign != bSign ) {
5531 return
5532 aSign
5533 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5534 == 0 );
5535 }
5536 return
5537 aSign ? le128( b.high, b.low, a.high, a.low )
5538 : le128( a.high, a.low, b.high, b.low );
5539
5540}
5541
5542
5543
5544
5545
5546
5547
5548
5549int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
5550{
5551 flag aSign, bSign;
5552
5553 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5554 float_raise(float_flag_invalid, status);
5555 return 0;
5556 }
5557 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5558 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5559 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5560 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5561 ) {
5562 if (floatx80_is_signaling_nan(a, status)
5563 || floatx80_is_signaling_nan(b, status)) {
5564 float_raise(float_flag_invalid, status);
5565 }
5566 return 0;
5567 }
5568 aSign = extractFloatx80Sign( a );
5569 bSign = extractFloatx80Sign( b );
5570 if ( aSign != bSign ) {
5571 return
5572 aSign
5573 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5574 != 0 );
5575 }
5576 return
5577 aSign ? lt128( b.high, b.low, a.high, a.low )
5578 : lt128( a.high, a.low, b.high, b.low );
5579
5580}
5581
5582
5583
5584
5585
5586
5587
5588int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
5589{
5590 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5591 float_raise(float_flag_invalid, status);
5592 return 1;
5593 }
5594 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5595 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5596 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5597 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5598 ) {
5599 if (floatx80_is_signaling_nan(a, status)
5600 || floatx80_is_signaling_nan(b, status)) {
5601 float_raise(float_flag_invalid, status);
5602 }
5603 return 1;
5604 }
5605 return 0;
5606}
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618int32_t float128_to_int32(float128 a, float_status *status)
5619{
5620 flag aSign;
5621 int32_t aExp, shiftCount;
5622 uint64_t aSig0, aSig1;
5623
5624 aSig1 = extractFloat128Frac1( a );
5625 aSig0 = extractFloat128Frac0( a );
5626 aExp = extractFloat128Exp( a );
5627 aSign = extractFloat128Sign( a );
5628 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5629 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5630 aSig0 |= ( aSig1 != 0 );
5631 shiftCount = 0x4028 - aExp;
5632 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
5633 return roundAndPackInt32(aSign, aSig0, status);
5634
5635}
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
5648{
5649 flag aSign;
5650 int32_t aExp, shiftCount;
5651 uint64_t aSig0, aSig1, savedASig;
5652 int32_t z;
5653
5654 aSig1 = extractFloat128Frac1( a );
5655 aSig0 = extractFloat128Frac0( a );
5656 aExp = extractFloat128Exp( a );
5657 aSign = extractFloat128Sign( a );
5658 aSig0 |= ( aSig1 != 0 );
5659 if ( 0x401E < aExp ) {
5660 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5661 goto invalid;
5662 }
5663 else if ( aExp < 0x3FFF ) {
5664 if (aExp || aSig0) {
5665 status->float_exception_flags |= float_flag_inexact;
5666 }
5667 return 0;
5668 }
5669 aSig0 |= LIT64( 0x0001000000000000 );
5670 shiftCount = 0x402F - aExp;
5671 savedASig = aSig0;
5672 aSig0 >>= shiftCount;
5673 z = aSig0;
5674 if ( aSign ) z = - z;
5675 if ( ( z < 0 ) ^ aSign ) {
5676 invalid:
5677 float_raise(float_flag_invalid, status);
5678 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5679 }
5680 if ( ( aSig0<<shiftCount ) != savedASig ) {
5681 status->float_exception_flags |= float_flag_inexact;
5682 }
5683 return z;
5684
5685}
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697int64_t float128_to_int64(float128 a, float_status *status)
5698{
5699 flag aSign;
5700 int32_t aExp, shiftCount;
5701 uint64_t aSig0, aSig1;
5702
5703 aSig1 = extractFloat128Frac1( a );
5704 aSig0 = extractFloat128Frac0( a );
5705 aExp = extractFloat128Exp( a );
5706 aSign = extractFloat128Sign( a );
5707 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5708 shiftCount = 0x402F - aExp;
5709 if ( shiftCount <= 0 ) {
5710 if ( 0x403E < aExp ) {
5711 float_raise(float_flag_invalid, status);
5712 if ( ! aSign
5713 || ( ( aExp == 0x7FFF )
5714 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5715 )
5716 ) {
5717 return LIT64( 0x7FFFFFFFFFFFFFFF );
5718 }
5719 return (int64_t) LIT64( 0x8000000000000000 );
5720 }
5721 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5722 }
5723 else {
5724 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5725 }
5726 return roundAndPackInt64(aSign, aSig0, aSig1, status);
5727
5728}
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
5741{
5742 flag aSign;
5743 int32_t aExp, shiftCount;
5744 uint64_t aSig0, aSig1;
5745 int64_t z;
5746
5747 aSig1 = extractFloat128Frac1( a );
5748 aSig0 = extractFloat128Frac0( a );
5749 aExp = extractFloat128Exp( a );
5750 aSign = extractFloat128Sign( a );
5751 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5752 shiftCount = aExp - 0x402F;
5753 if ( 0 < shiftCount ) {
5754 if ( 0x403E <= aExp ) {
5755 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5756 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5757 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
5758 if (aSig1) {
5759 status->float_exception_flags |= float_flag_inexact;
5760 }
5761 }
5762 else {
5763 float_raise(float_flag_invalid, status);
5764 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5765 return LIT64( 0x7FFFFFFFFFFFFFFF );
5766 }
5767 }
5768 return (int64_t) LIT64( 0x8000000000000000 );
5769 }
5770 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
5771 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
5772 status->float_exception_flags |= float_flag_inexact;
5773 }
5774 }
5775 else {
5776 if ( aExp < 0x3FFF ) {
5777 if ( aExp | aSig0 | aSig1 ) {
5778 status->float_exception_flags |= float_flag_inexact;
5779 }
5780 return 0;
5781 }
5782 z = aSig0>>( - shiftCount );
5783 if ( aSig1
5784 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
5785 status->float_exception_flags |= float_flag_inexact;
5786 }
5787 }
5788 if ( aSign ) z = - z;
5789 return z;
5790
5791}
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805uint64_t float128_to_uint64(float128 a, float_status *status)
5806{
5807 flag aSign;
5808 int aExp;
5809 int shiftCount;
5810 uint64_t aSig0, aSig1;
5811
5812 aSig0 = extractFloat128Frac0(a);
5813 aSig1 = extractFloat128Frac1(a);
5814 aExp = extractFloat128Exp(a);
5815 aSign = extractFloat128Sign(a);
5816 if (aSign && (aExp > 0x3FFE)) {
5817 float_raise(float_flag_invalid, status);
5818 if (float128_is_any_nan(a)) {
5819 return LIT64(0xFFFFFFFFFFFFFFFF);
5820 } else {
5821 return 0;
5822 }
5823 }
5824 if (aExp) {
5825 aSig0 |= LIT64(0x0001000000000000);
5826 }
5827 shiftCount = 0x402F - aExp;
5828 if (shiftCount <= 0) {
5829 if (0x403E < aExp) {
5830 float_raise(float_flag_invalid, status);
5831 return LIT64(0xFFFFFFFFFFFFFFFF);
5832 }
5833 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
5834 } else {
5835 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
5836 }
5837 return roundAndPackUint64(aSign, aSig0, aSig1, status);
5838}
5839
5840uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
5841{
5842 uint64_t v;
5843 signed char current_rounding_mode = status->float_rounding_mode;
5844
5845 set_float_rounding_mode(float_round_to_zero, status);
5846 v = float128_to_uint64(a, status);
5847 set_float_rounding_mode(current_rounding_mode, status);
5848
5849 return v;
5850}
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
5864{
5865 uint64_t v;
5866 uint32_t res;
5867 int old_exc_flags = get_float_exception_flags(status);
5868
5869 v = float128_to_uint64_round_to_zero(a, status);
5870 if (v > 0xffffffff) {
5871 res = 0xffffffff;
5872 } else {
5873 return v;
5874 }
5875 set_float_exception_flags(old_exc_flags, status);
5876 float_raise(float_flag_invalid, status);
5877 return res;
5878}
5879
5880
5881
5882
5883
5884
5885
5886
5887float32 float128_to_float32(float128 a, float_status *status)
5888{
5889 flag aSign;
5890 int32_t aExp;
5891 uint64_t aSig0, aSig1;
5892 uint32_t zSig;
5893
5894 aSig1 = extractFloat128Frac1( a );
5895 aSig0 = extractFloat128Frac0( a );
5896 aExp = extractFloat128Exp( a );
5897 aSign = extractFloat128Sign( a );
5898 if ( aExp == 0x7FFF ) {
5899 if ( aSig0 | aSig1 ) {
5900 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
5901 }
5902 return packFloat32( aSign, 0xFF, 0 );
5903 }
5904 aSig0 |= ( aSig1 != 0 );
5905 shift64RightJamming( aSig0, 18, &aSig0 );
5906 zSig = aSig0;
5907 if ( aExp || zSig ) {
5908 zSig |= 0x40000000;
5909 aExp -= 0x3F81;
5910 }
5911 return roundAndPackFloat32(aSign, aExp, zSig, status);
5912
5913}
5914
5915
5916
5917
5918
5919
5920
5921
5922float64 float128_to_float64(float128 a, float_status *status)
5923{
5924 flag aSign;
5925 int32_t aExp;
5926 uint64_t aSig0, aSig1;
5927
5928 aSig1 = extractFloat128Frac1( a );
5929 aSig0 = extractFloat128Frac0( a );
5930 aExp = extractFloat128Exp( a );
5931 aSign = extractFloat128Sign( a );
5932 if ( aExp == 0x7FFF ) {
5933 if ( aSig0 | aSig1 ) {
5934 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
5935 }
5936 return packFloat64( aSign, 0x7FF, 0 );
5937 }
5938 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5939 aSig0 |= ( aSig1 != 0 );
5940 if ( aExp || aSig0 ) {
5941 aSig0 |= LIT64( 0x4000000000000000 );
5942 aExp -= 0x3C01;
5943 }
5944 return roundAndPackFloat64(aSign, aExp, aSig0, status);
5945
5946}
5947
5948
5949
5950
5951
5952
5953
5954
5955floatx80 float128_to_floatx80(float128 a, float_status *status)
5956{
5957 flag aSign;
5958 int32_t aExp;
5959 uint64_t aSig0, aSig1;
5960
5961 aSig1 = extractFloat128Frac1( a );
5962 aSig0 = extractFloat128Frac0( a );
5963 aExp = extractFloat128Exp( a );
5964 aSign = extractFloat128Sign( a );
5965 if ( aExp == 0x7FFF ) {
5966 if ( aSig0 | aSig1 ) {
5967 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
5968 }
5969 return packFloatx80(aSign, floatx80_infinity_high,
5970 floatx80_infinity_low);
5971 }
5972 if ( aExp == 0 ) {
5973 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5974 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5975 }
5976 else {
5977 aSig0 |= LIT64( 0x0001000000000000 );
5978 }
5979 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
5980 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
5981
5982}
5983
5984
5985
5986
5987
5988
5989
5990
5991float128 float128_round_to_int(float128 a, float_status *status)
5992{
5993 flag aSign;
5994 int32_t aExp;
5995 uint64_t lastBitMask, roundBitsMask;
5996 float128 z;
5997
5998 aExp = extractFloat128Exp( a );
5999 if ( 0x402F <= aExp ) {
6000 if ( 0x406F <= aExp ) {
6001 if ( ( aExp == 0x7FFF )
6002 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6003 ) {
6004 return propagateFloat128NaN(a, a, status);
6005 }
6006 return a;
6007 }
6008 lastBitMask = 1;
6009 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6010 roundBitsMask = lastBitMask - 1;
6011 z = a;
6012 switch (status->float_rounding_mode) {
6013 case float_round_nearest_even:
6014 if ( lastBitMask ) {
6015 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6016 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6017 }
6018 else {
6019 if ( (int64_t) z.low < 0 ) {
6020 ++z.high;
6021 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
6022 }
6023 }
6024 break;
6025 case float_round_ties_away:
6026 if (lastBitMask) {
6027 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6028 } else {
6029 if ((int64_t) z.low < 0) {
6030 ++z.high;
6031 }
6032 }
6033 break;
6034 case float_round_to_zero:
6035 break;
6036 case float_round_up:
6037 if (!extractFloat128Sign(z)) {
6038 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6039 }
6040 break;
6041 case float_round_down:
6042 if (extractFloat128Sign(z)) {
6043 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6044 }
6045 break;
6046 default:
6047 abort();
6048 }
6049 z.low &= ~ roundBitsMask;
6050 }
6051 else {
6052 if ( aExp < 0x3FFF ) {
6053 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
6054 status->float_exception_flags |= float_flag_inexact;
6055 aSign = extractFloat128Sign( a );
6056 switch (status->float_rounding_mode) {
6057 case float_round_nearest_even:
6058 if ( ( aExp == 0x3FFE )
6059 && ( extractFloat128Frac0( a )
6060 | extractFloat128Frac1( a ) )
6061 ) {
6062 return packFloat128( aSign, 0x3FFF, 0, 0 );
6063 }
6064 break;
6065 case float_round_ties_away:
6066 if (aExp == 0x3FFE) {
6067 return packFloat128(aSign, 0x3FFF, 0, 0);
6068 }
6069 break;
6070 case float_round_down:
6071 return
6072 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6073 : packFloat128( 0, 0, 0, 0 );
6074 case float_round_up:
6075 return
6076 aSign ? packFloat128( 1, 0, 0, 0 )
6077 : packFloat128( 0, 0x3FFF, 0, 0 );
6078 }
6079 return packFloat128( aSign, 0, 0, 0 );
6080 }
6081 lastBitMask = 1;
6082 lastBitMask <<= 0x402F - aExp;
6083 roundBitsMask = lastBitMask - 1;
6084 z.low = 0;
6085 z.high = a.high;
6086 switch (status->float_rounding_mode) {
6087 case float_round_nearest_even:
6088 z.high += lastBitMask>>1;
6089 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6090 z.high &= ~ lastBitMask;
6091 }
6092 break;
6093 case float_round_ties_away:
6094 z.high += lastBitMask>>1;
6095 break;
6096 case float_round_to_zero:
6097 break;
6098 case float_round_up:
6099 if (!extractFloat128Sign(z)) {
6100 z.high |= ( a.low != 0 );
6101 z.high += roundBitsMask;
6102 }
6103 break;
6104 case float_round_down:
6105 if (extractFloat128Sign(z)) {
6106 z.high |= (a.low != 0);
6107 z.high += roundBitsMask;
6108 }
6109 break;
6110 default:
6111 abort();
6112 }
6113 z.high &= ~ roundBitsMask;
6114 }
6115 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
6116 status->float_exception_flags |= float_flag_inexact;
6117 }
6118 return z;
6119
6120}
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6131 float_status *status)
6132{
6133 int32_t aExp, bExp, zExp;
6134 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6135 int32_t expDiff;
6136
6137 aSig1 = extractFloat128Frac1( a );
6138 aSig0 = extractFloat128Frac0( a );
6139 aExp = extractFloat128Exp( a );
6140 bSig1 = extractFloat128Frac1( b );
6141 bSig0 = extractFloat128Frac0( b );
6142 bExp = extractFloat128Exp( b );
6143 expDiff = aExp - bExp;
6144 if ( 0 < expDiff ) {
6145 if ( aExp == 0x7FFF ) {
6146 if (aSig0 | aSig1) {
6147 return propagateFloat128NaN(a, b, status);
6148 }
6149 return a;
6150 }
6151 if ( bExp == 0 ) {
6152 --expDiff;
6153 }
6154 else {
6155 bSig0 |= LIT64( 0x0001000000000000 );
6156 }
6157 shift128ExtraRightJamming(
6158 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6159 zExp = aExp;
6160 }
6161 else if ( expDiff < 0 ) {
6162 if ( bExp == 0x7FFF ) {
6163 if (bSig0 | bSig1) {
6164 return propagateFloat128NaN(a, b, status);
6165 }
6166 return packFloat128( zSign, 0x7FFF, 0, 0 );
6167 }
6168 if ( aExp == 0 ) {
6169 ++expDiff;
6170 }
6171 else {
6172 aSig0 |= LIT64( 0x0001000000000000 );
6173 }
6174 shift128ExtraRightJamming(
6175 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6176 zExp = bExp;
6177 }
6178 else {
6179 if ( aExp == 0x7FFF ) {
6180 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6181 return propagateFloat128NaN(a, b, status);
6182 }
6183 return a;
6184 }
6185 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6186 if ( aExp == 0 ) {
6187 if (status->flush_to_zero) {
6188 if (zSig0 | zSig1) {
6189 float_raise(float_flag_output_denormal, status);
6190 }
6191 return packFloat128(zSign, 0, 0, 0);
6192 }
6193 return packFloat128( zSign, 0, zSig0, zSig1 );
6194 }
6195 zSig2 = 0;
6196 zSig0 |= LIT64( 0x0002000000000000 );
6197 zExp = aExp;
6198 goto shiftRight1;
6199 }
6200 aSig0 |= LIT64( 0x0001000000000000 );
6201 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6202 --zExp;
6203 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6204 ++zExp;
6205 shiftRight1:
6206 shift128ExtraRightJamming(
6207 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6208 roundAndPack:
6209 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6210
6211}
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6222 float_status *status)
6223{
6224 int32_t aExp, bExp, zExp;
6225 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
6226 int32_t expDiff;
6227
6228 aSig1 = extractFloat128Frac1( a );
6229 aSig0 = extractFloat128Frac0( a );
6230 aExp = extractFloat128Exp( a );
6231 bSig1 = extractFloat128Frac1( b );
6232 bSig0 = extractFloat128Frac0( b );
6233 bExp = extractFloat128Exp( b );
6234 expDiff = aExp - bExp;
6235 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6236 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6237 if ( 0 < expDiff ) goto aExpBigger;
6238 if ( expDiff < 0 ) goto bExpBigger;
6239 if ( aExp == 0x7FFF ) {
6240 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6241 return propagateFloat128NaN(a, b, status);
6242 }
6243 float_raise(float_flag_invalid, status);
6244 return float128_default_nan(status);
6245 }
6246 if ( aExp == 0 ) {
6247 aExp = 1;
6248 bExp = 1;
6249 }
6250 if ( bSig0 < aSig0 ) goto aBigger;
6251 if ( aSig0 < bSig0 ) goto bBigger;
6252 if ( bSig1 < aSig1 ) goto aBigger;
6253 if ( aSig1 < bSig1 ) goto bBigger;
6254 return packFloat128(status->float_rounding_mode == float_round_down,
6255 0, 0, 0);
6256 bExpBigger:
6257 if ( bExp == 0x7FFF ) {
6258 if (bSig0 | bSig1) {
6259 return propagateFloat128NaN(a, b, status);
6260 }
6261 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6262 }
6263 if ( aExp == 0 ) {
6264 ++expDiff;
6265 }
6266 else {
6267 aSig0 |= LIT64( 0x4000000000000000 );
6268 }
6269 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6270 bSig0 |= LIT64( 0x4000000000000000 );
6271 bBigger:
6272 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6273 zExp = bExp;
6274 zSign ^= 1;
6275 goto normalizeRoundAndPack;
6276 aExpBigger:
6277 if ( aExp == 0x7FFF ) {
6278 if (aSig0 | aSig1) {
6279 return propagateFloat128NaN(a, b, status);
6280 }
6281 return a;
6282 }
6283 if ( bExp == 0 ) {
6284 --expDiff;
6285 }
6286 else {
6287 bSig0 |= LIT64( 0x4000000000000000 );
6288 }
6289 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6290 aSig0 |= LIT64( 0x4000000000000000 );
6291 aBigger:
6292 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6293 zExp = aExp;
6294 normalizeRoundAndPack:
6295 --zExp;
6296 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6297 status);
6298
6299}
6300
6301
6302
6303
6304
6305
6306
6307float128 float128_add(float128 a, float128 b, float_status *status)
6308{
6309 flag aSign, bSign;
6310
6311 aSign = extractFloat128Sign( a );
6312 bSign = extractFloat128Sign( b );
6313 if ( aSign == bSign ) {
6314 return addFloat128Sigs(a, b, aSign, status);
6315 }
6316 else {
6317 return subFloat128Sigs(a, b, aSign, status);
6318 }
6319
6320}
6321
6322
6323
6324
6325
6326
6327
6328float128 float128_sub(float128 a, float128 b, float_status *status)
6329{
6330 flag aSign, bSign;
6331
6332 aSign = extractFloat128Sign( a );
6333 bSign = extractFloat128Sign( b );
6334 if ( aSign == bSign ) {
6335 return subFloat128Sigs(a, b, aSign, status);
6336 }
6337 else {
6338 return addFloat128Sigs(a, b, aSign, status);
6339 }
6340
6341}
6342
6343
6344
6345
6346
6347
6348
6349float128 float128_mul(float128 a, float128 b, float_status *status)
6350{
6351 flag aSign, bSign, zSign;
6352 int32_t aExp, bExp, zExp;
6353 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
6354
6355 aSig1 = extractFloat128Frac1( a );
6356 aSig0 = extractFloat128Frac0( a );
6357 aExp = extractFloat128Exp( a );
6358 aSign = extractFloat128Sign( a );
6359 bSig1 = extractFloat128Frac1( b );
6360 bSig0 = extractFloat128Frac0( b );
6361 bExp = extractFloat128Exp( b );
6362 bSign = extractFloat128Sign( b );
6363 zSign = aSign ^ bSign;
6364 if ( aExp == 0x7FFF ) {
6365 if ( ( aSig0 | aSig1 )
6366 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6367 return propagateFloat128NaN(a, b, status);
6368 }
6369 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6370 return packFloat128( zSign, 0x7FFF, 0, 0 );
6371 }
6372 if ( bExp == 0x7FFF ) {
6373 if (bSig0 | bSig1) {
6374 return propagateFloat128NaN(a, b, status);
6375 }
6376 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6377 invalid:
6378 float_raise(float_flag_invalid, status);
6379 return float128_default_nan(status);
6380 }
6381 return packFloat128( zSign, 0x7FFF, 0, 0 );
6382 }
6383 if ( aExp == 0 ) {
6384 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6385 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6386 }
6387 if ( bExp == 0 ) {
6388 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6389 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6390 }
6391 zExp = aExp + bExp - 0x4000;
6392 aSig0 |= LIT64( 0x0001000000000000 );
6393 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6394 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6395 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6396 zSig2 |= ( zSig3 != 0 );
6397 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6398 shift128ExtraRightJamming(
6399 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6400 ++zExp;
6401 }
6402 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6403
6404}
6405
6406
6407
6408
6409
6410
6411
6412float128 float128_div(float128 a, float128 b, float_status *status)
6413{
6414 flag aSign, bSign, zSign;
6415 int32_t aExp, bExp, zExp;
6416 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6417 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6418
6419 aSig1 = extractFloat128Frac1( a );
6420 aSig0 = extractFloat128Frac0( a );
6421 aExp = extractFloat128Exp( a );
6422 aSign = extractFloat128Sign( a );
6423 bSig1 = extractFloat128Frac1( b );
6424 bSig0 = extractFloat128Frac0( b );
6425 bExp = extractFloat128Exp( b );
6426 bSign = extractFloat128Sign( b );
6427 zSign = aSign ^ bSign;
6428 if ( aExp == 0x7FFF ) {
6429 if (aSig0 | aSig1) {
6430 return propagateFloat128NaN(a, b, status);
6431 }
6432 if ( bExp == 0x7FFF ) {
6433 if (bSig0 | bSig1) {
6434 return propagateFloat128NaN(a, b, status);
6435 }
6436 goto invalid;
6437 }
6438 return packFloat128( zSign, 0x7FFF, 0, 0 );
6439 }
6440 if ( bExp == 0x7FFF ) {
6441 if (bSig0 | bSig1) {
6442 return propagateFloat128NaN(a, b, status);
6443 }
6444 return packFloat128( zSign, 0, 0, 0 );
6445 }
6446 if ( bExp == 0 ) {
6447 if ( ( bSig0 | bSig1 ) == 0 ) {
6448 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6449 invalid:
6450 float_raise(float_flag_invalid, status);
6451 return float128_default_nan(status);
6452 }
6453 float_raise(float_flag_divbyzero, status);
6454 return packFloat128( zSign, 0x7FFF, 0, 0 );
6455 }
6456 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6457 }
6458 if ( aExp == 0 ) {
6459 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6460 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6461 }
6462 zExp = aExp - bExp + 0x3FFD;
6463 shortShift128Left(
6464 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6465 shortShift128Left(
6466 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6467 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6468 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6469 ++zExp;
6470 }
6471 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6472 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6473 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
6474 while ( (int64_t) rem0 < 0 ) {
6475 --zSig0;
6476 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6477 }
6478 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6479 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6480 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6481 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
6482 while ( (int64_t) rem1 < 0 ) {
6483 --zSig1;
6484 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6485 }
6486 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6487 }
6488 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
6489 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6490
6491}
6492
6493
6494
6495
6496
6497
6498
6499float128 float128_rem(float128 a, float128 b, float_status *status)
6500{
6501 flag aSign, zSign;
6502 int32_t aExp, bExp, expDiff;
6503 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6504 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6505 int64_t sigMean0;
6506
6507 aSig1 = extractFloat128Frac1( a );
6508 aSig0 = extractFloat128Frac0( a );
6509 aExp = extractFloat128Exp( a );
6510 aSign = extractFloat128Sign( a );
6511 bSig1 = extractFloat128Frac1( b );
6512 bSig0 = extractFloat128Frac0( b );
6513 bExp = extractFloat128Exp( b );
6514 if ( aExp == 0x7FFF ) {
6515 if ( ( aSig0 | aSig1 )
6516 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6517 return propagateFloat128NaN(a, b, status);
6518 }
6519 goto invalid;
6520 }
6521 if ( bExp == 0x7FFF ) {
6522 if (bSig0 | bSig1) {
6523 return propagateFloat128NaN(a, b, status);
6524 }
6525 return a;
6526 }
6527 if ( bExp == 0 ) {
6528 if ( ( bSig0 | bSig1 ) == 0 ) {
6529 invalid:
6530 float_raise(float_flag_invalid, status);
6531 return float128_default_nan(status);
6532 }
6533 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6534 }
6535 if ( aExp == 0 ) {
6536 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6537 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6538 }
6539 expDiff = aExp - bExp;
6540 if ( expDiff < -1 ) return a;
6541 shortShift128Left(
6542 aSig0 | LIT64( 0x0001000000000000 ),
6543 aSig1,
6544 15 - ( expDiff < 0 ),
6545 &aSig0,
6546 &aSig1
6547 );
6548 shortShift128Left(
6549 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6550 q = le128( bSig0, bSig1, aSig0, aSig1 );
6551 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6552 expDiff -= 64;
6553 while ( 0 < expDiff ) {
6554 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6555 q = ( 4 < q ) ? q - 4 : 0;
6556 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6557 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6558 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6559 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6560 expDiff -= 61;
6561 }
6562 if ( -64 < expDiff ) {
6563 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6564 q = ( 4 < q ) ? q - 4 : 0;
6565 q >>= - expDiff;
6566 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6567 expDiff += 52;
6568 if ( expDiff < 0 ) {
6569 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6570 }
6571 else {
6572 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6573 }
6574 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6575 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6576 }
6577 else {
6578 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6579 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6580 }
6581 do {
6582 alternateASig0 = aSig0;
6583 alternateASig1 = aSig1;
6584 ++q;
6585 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6586 } while ( 0 <= (int64_t) aSig0 );
6587 add128(
6588 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6589 if ( ( sigMean0 < 0 )
6590 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6591 aSig0 = alternateASig0;
6592 aSig1 = alternateASig1;
6593 }
6594 zSign = ( (int64_t) aSig0 < 0 );
6595 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6596 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6597 status);
6598}
6599
6600
6601
6602
6603
6604
6605
6606float128 float128_sqrt(float128 a, float_status *status)
6607{
6608 flag aSign;
6609 int32_t aExp, zExp;
6610 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6611 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6612
6613 aSig1 = extractFloat128Frac1( a );
6614 aSig0 = extractFloat128Frac0( a );
6615 aExp = extractFloat128Exp( a );
6616 aSign = extractFloat128Sign( a );
6617 if ( aExp == 0x7FFF ) {
6618 if (aSig0 | aSig1) {
6619 return propagateFloat128NaN(a, a, status);
6620 }
6621 if ( ! aSign ) return a;
6622 goto invalid;
6623 }
6624 if ( aSign ) {
6625 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6626 invalid:
6627 float_raise(float_flag_invalid, status);
6628 return float128_default_nan(status);
6629 }
6630 if ( aExp == 0 ) {
6631 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6632 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6633 }
6634 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6635 aSig0 |= LIT64( 0x0001000000000000 );
6636 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6637 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6638 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6639 doubleZSig0 = zSig0<<1;
6640 mul64To128( zSig0, zSig0, &term0, &term1 );
6641 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6642 while ( (int64_t) rem0 < 0 ) {
6643 --zSig0;
6644 doubleZSig0 -= 2;
6645 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6646 }
6647 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6648 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6649 if ( zSig1 == 0 ) zSig1 = 1;
6650 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6651 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6652 mul64To128( zSig1, zSig1, &term2, &term3 );
6653 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6654 while ( (int64_t) rem1 < 0 ) {
6655 --zSig1;
6656 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6657 term3 |= 1;
6658 term2 |= doubleZSig0;
6659 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6660 }
6661 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6662 }
6663 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6664 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6665
6666}
6667
6668
6669
6670
6671
6672
6673
6674
6675int float128_eq(float128 a, float128 b, float_status *status)
6676{
6677
6678 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6679 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6680 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6681 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6682 ) {
6683 float_raise(float_flag_invalid, status);
6684 return 0;
6685 }
6686 return
6687 ( a.low == b.low )
6688 && ( ( a.high == b.high )
6689 || ( ( a.low == 0 )
6690 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
6691 );
6692
6693}
6694
6695
6696
6697
6698
6699
6700
6701
6702int float128_le(float128 a, float128 b, float_status *status)
6703{
6704 flag aSign, bSign;
6705
6706 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6707 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6708 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6709 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6710 ) {
6711 float_raise(float_flag_invalid, status);
6712 return 0;
6713 }
6714 aSign = extractFloat128Sign( a );
6715 bSign = extractFloat128Sign( b );
6716 if ( aSign != bSign ) {
6717 return
6718 aSign
6719 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6720 == 0 );
6721 }
6722 return
6723 aSign ? le128( b.high, b.low, a.high, a.low )
6724 : le128( a.high, a.low, b.high, b.low );
6725
6726}
6727
6728
6729
6730
6731
6732
6733
6734
6735int float128_lt(float128 a, float128 b, float_status *status)
6736{
6737 flag aSign, bSign;
6738
6739 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6740 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6741 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6742 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6743 ) {
6744 float_raise(float_flag_invalid, status);
6745 return 0;
6746 }
6747 aSign = extractFloat128Sign( a );
6748 bSign = extractFloat128Sign( b );
6749 if ( aSign != bSign ) {
6750 return
6751 aSign
6752 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6753 != 0 );
6754 }
6755 return
6756 aSign ? lt128( b.high, b.low, a.high, a.low )
6757 : lt128( a.high, a.low, b.high, b.low );
6758
6759}
6760
6761
6762
6763
6764
6765
6766
6767
6768int float128_unordered(float128 a, float128 b, float_status *status)
6769{
6770 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6771 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6772 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6773 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6774 ) {
6775 float_raise(float_flag_invalid, status);
6776 return 1;
6777 }
6778 return 0;
6779}
6780
6781
6782
6783
6784
6785
6786
6787
6788int float128_eq_quiet(float128 a, float128 b, float_status *status)
6789{
6790
6791 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6792 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6793 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6794 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6795 ) {
6796 if (float128_is_signaling_nan(a, status)
6797 || float128_is_signaling_nan(b, status)) {
6798 float_raise(float_flag_invalid, status);
6799 }
6800 return 0;
6801 }
6802 return
6803 ( a.low == b.low )
6804 && ( ( a.high == b.high )
6805 || ( ( a.low == 0 )
6806 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
6807 );
6808
6809}
6810
6811
6812
6813
6814
6815
6816
6817
6818int float128_le_quiet(float128 a, float128 b, float_status *status)
6819{
6820 flag aSign, bSign;
6821
6822 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6823 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6824 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6825 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6826 ) {
6827 if (float128_is_signaling_nan(a, status)
6828 || float128_is_signaling_nan(b, status)) {
6829 float_raise(float_flag_invalid, status);
6830 }
6831 return 0;
6832 }
6833 aSign = extractFloat128Sign( a );
6834 bSign = extractFloat128Sign( b );
6835 if ( aSign != bSign ) {
6836 return
6837 aSign
6838 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6839 == 0 );
6840 }
6841 return
6842 aSign ? le128( b.high, b.low, a.high, a.low )
6843 : le128( a.high, a.low, b.high, b.low );
6844
6845}
6846
6847
6848
6849
6850
6851
6852
6853
6854int float128_lt_quiet(float128 a, float128 b, float_status *status)
6855{
6856 flag aSign, bSign;
6857
6858 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6859 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6860 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6861 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6862 ) {
6863 if (float128_is_signaling_nan(a, status)
6864 || float128_is_signaling_nan(b, status)) {
6865 float_raise(float_flag_invalid, status);
6866 }
6867 return 0;
6868 }
6869 aSign = extractFloat128Sign( a );
6870 bSign = extractFloat128Sign( b );
6871 if ( aSign != bSign ) {
6872 return
6873 aSign
6874 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6875 != 0 );
6876 }
6877 return
6878 aSign ? lt128( b.high, b.low, a.high, a.low )
6879 : lt128( a.high, a.low, b.high, b.low );
6880
6881}
6882
6883
6884
6885
6886
6887
6888
6889
6890int float128_unordered_quiet(float128 a, float128 b, float_status *status)
6891{
6892 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6893 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6894 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6895 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6896 ) {
6897 if (float128_is_signaling_nan(a, status)
6898 || float128_is_signaling_nan(b, status)) {
6899 float_raise(float_flag_invalid, status);
6900 }
6901 return 1;
6902 }
6903 return 0;
6904}
6905
6906static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
6907 int is_quiet, float_status *status)
6908{
6909 flag aSign, bSign;
6910
6911 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6912 float_raise(float_flag_invalid, status);
6913 return float_relation_unordered;
6914 }
6915 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6916 ( extractFloatx80Frac( a )<<1 ) ) ||
6917 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6918 ( extractFloatx80Frac( b )<<1 ) )) {
6919 if (!is_quiet ||
6920 floatx80_is_signaling_nan(a, status) ||
6921 floatx80_is_signaling_nan(b, status)) {
6922 float_raise(float_flag_invalid, status);
6923 }
6924 return float_relation_unordered;
6925 }
6926 aSign = extractFloatx80Sign( a );
6927 bSign = extractFloatx80Sign( b );
6928 if ( aSign != bSign ) {
6929
6930 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6931 ( ( a.low | b.low ) == 0 ) ) {
6932
6933 return float_relation_equal;
6934 } else {
6935 return 1 - (2 * aSign);
6936 }
6937 } else {
6938 if (a.low == b.low && a.high == b.high) {
6939 return float_relation_equal;
6940 } else {
6941 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6942 }
6943 }
6944}
6945
6946int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
6947{
6948 return floatx80_compare_internal(a, b, 0, status);
6949}
6950
6951int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
6952{
6953 return floatx80_compare_internal(a, b, 1, status);
6954}
6955
6956static inline int float128_compare_internal(float128 a, float128 b,
6957 int is_quiet, float_status *status)
6958{
6959 flag aSign, bSign;
6960
6961 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6962 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6963 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6964 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6965 if (!is_quiet ||
6966 float128_is_signaling_nan(a, status) ||
6967 float128_is_signaling_nan(b, status)) {
6968 float_raise(float_flag_invalid, status);
6969 }
6970 return float_relation_unordered;
6971 }
6972 aSign = extractFloat128Sign( a );
6973 bSign = extractFloat128Sign( b );
6974 if ( aSign != bSign ) {
6975 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6976
6977 return float_relation_equal;
6978 } else {
6979 return 1 - (2 * aSign);
6980 }
6981 } else {
6982 if (a.low == b.low && a.high == b.high) {
6983 return float_relation_equal;
6984 } else {
6985 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6986 }
6987 }
6988}
6989
6990int float128_compare(float128 a, float128 b, float_status *status)
6991{
6992 return float128_compare_internal(a, b, 0, status);
6993}
6994
6995int float128_compare_quiet(float128 a, float128 b, float_status *status)
6996{
6997 return float128_compare_internal(a, b, 1, status);
6998}
6999
7000floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7001{
7002 flag aSign;
7003 int32_t aExp;
7004 uint64_t aSig;
7005
7006 if (floatx80_invalid_encoding(a)) {
7007 float_raise(float_flag_invalid, status);
7008 return floatx80_default_nan(status);
7009 }
7010 aSig = extractFloatx80Frac( a );
7011 aExp = extractFloatx80Exp( a );
7012 aSign = extractFloatx80Sign( a );
7013
7014 if ( aExp == 0x7FFF ) {
7015 if ( aSig<<1 ) {
7016 return propagateFloatx80NaN(a, a, status);
7017 }
7018 return a;
7019 }
7020
7021 if (aExp == 0) {
7022 if (aSig == 0) {
7023 return a;
7024 }
7025 aExp++;
7026 }
7027
7028 if (n > 0x10000) {
7029 n = 0x10000;
7030 } else if (n < -0x10000) {
7031 n = -0x10000;
7032 }
7033
7034 aExp += n;
7035 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7036 aSign, aExp, aSig, 0, status);
7037}
7038
7039float128 float128_scalbn(float128 a, int n, float_status *status)
7040{
7041 flag aSign;
7042 int32_t aExp;
7043 uint64_t aSig0, aSig1;
7044
7045 aSig1 = extractFloat128Frac1( a );
7046 aSig0 = extractFloat128Frac0( a );
7047 aExp = extractFloat128Exp( a );
7048 aSign = extractFloat128Sign( a );
7049 if ( aExp == 0x7FFF ) {
7050 if ( aSig0 | aSig1 ) {
7051 return propagateFloat128NaN(a, a, status);
7052 }
7053 return a;
7054 }
7055 if (aExp != 0) {
7056 aSig0 |= LIT64( 0x0001000000000000 );
7057 } else if (aSig0 == 0 && aSig1 == 0) {
7058 return a;
7059 } else {
7060 aExp++;
7061 }
7062
7063 if (n > 0x10000) {
7064 n = 0x10000;
7065 } else if (n < -0x10000) {
7066 n = -0x10000;
7067 }
7068
7069 aExp += n - 1;
7070 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7071 , status);
7072
7073}
7074