1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85#include "config.h"
86
87#include "fpu/softfloat.h"
88
89
90#include <stdlib.h>
91
92
93
94
95
96
97#include "softfloat-macros.h"
98
99
100
101
102
103
104
105
106
107#include "softfloat-specialize.h"
108
109
110
111
112
113static inline uint32_t extractFloat16Frac(float16 a)
114{
115 return float16_val(a) & 0x3ff;
116}
117
118
119
120
121
122static inline int_fast16_t extractFloat16Exp(float16 a)
123{
124 return (float16_val(a) >> 10) & 0x1f;
125}
126
127
128
129
130
131static inline flag extractFloat16Sign(float16 a)
132{
133 return float16_val(a)>>15;
134}
135
136
137
138
139
140
141
142
143
144
145
146
147static int32 roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
148{
149 int8 roundingMode;
150 flag roundNearestEven;
151 int8 roundIncrement, roundBits;
152 int32_t z;
153
154 roundingMode = status->float_rounding_mode;
155 roundNearestEven = ( roundingMode == float_round_nearest_even );
156 switch (roundingMode) {
157 case float_round_nearest_even:
158 case float_round_ties_away:
159 roundIncrement = 0x40;
160 break;
161 case float_round_to_zero:
162 roundIncrement = 0;
163 break;
164 case float_round_up:
165 roundIncrement = zSign ? 0 : 0x7f;
166 break;
167 case float_round_down:
168 roundIncrement = zSign ? 0x7f : 0;
169 break;
170 default:
171 abort();
172 }
173 roundBits = absZ & 0x7F;
174 absZ = ( absZ + roundIncrement )>>7;
175 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
176 z = absZ;
177 if ( zSign ) z = - z;
178 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
179 float_raise(float_flag_invalid, status);
180 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
181 }
182 if (roundBits) {
183 status->float_exception_flags |= float_flag_inexact;
184 }
185 return z;
186
187}
188
189
190
191
192
193
194
195
196
197
198
199
200
201static int64 roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
202 float_status *status)
203{
204 int8 roundingMode;
205 flag roundNearestEven, increment;
206 int64_t z;
207
208 roundingMode = status->float_rounding_mode;
209 roundNearestEven = ( roundingMode == float_round_nearest_even );
210 switch (roundingMode) {
211 case float_round_nearest_even:
212 case float_round_ties_away:
213 increment = ((int64_t) absZ1 < 0);
214 break;
215 case float_round_to_zero:
216 increment = 0;
217 break;
218 case float_round_up:
219 increment = !zSign && absZ1;
220 break;
221 case float_round_down:
222 increment = zSign && absZ1;
223 break;
224 default:
225 abort();
226 }
227 if ( increment ) {
228 ++absZ0;
229 if ( absZ0 == 0 ) goto overflow;
230 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
231 }
232 z = absZ0;
233 if ( zSign ) z = - z;
234 if ( z && ( ( z < 0 ) ^ zSign ) ) {
235 overflow:
236 float_raise(float_flag_invalid, status);
237 return
238 zSign ? (int64_t) LIT64( 0x8000000000000000 )
239 : LIT64( 0x7FFFFFFFFFFFFFFF );
240 }
241 if (absZ1) {
242 status->float_exception_flags |= float_flag_inexact;
243 }
244 return z;
245
246}
247
248
249
250
251
252
253
254
255
256
257
258static int64 roundAndPackUint64(flag zSign, uint64_t absZ0,
259 uint64_t absZ1, float_status *status)
260{
261 int8 roundingMode;
262 flag roundNearestEven, increment;
263
264 roundingMode = status->float_rounding_mode;
265 roundNearestEven = (roundingMode == float_round_nearest_even);
266 switch (roundingMode) {
267 case float_round_nearest_even:
268 case float_round_ties_away:
269 increment = ((int64_t)absZ1 < 0);
270 break;
271 case float_round_to_zero:
272 increment = 0;
273 break;
274 case float_round_up:
275 increment = !zSign && absZ1;
276 break;
277 case float_round_down:
278 increment = zSign && absZ1;
279 break;
280 default:
281 abort();
282 }
283 if (increment) {
284 ++absZ0;
285 if (absZ0 == 0) {
286 float_raise(float_flag_invalid, status);
287 return LIT64(0xFFFFFFFFFFFFFFFF);
288 }
289 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
290 }
291
292 if (zSign && absZ0) {
293 float_raise(float_flag_invalid, status);
294 return 0;
295 }
296
297 if (absZ1) {
298 status->float_exception_flags |= float_flag_inexact;
299 }
300 return absZ0;
301}
302
303
304
305
306
307static inline uint32_t extractFloat32Frac( float32 a )
308{
309
310 return float32_val(a) & 0x007FFFFF;
311
312}
313
314
315
316
317
318static inline int_fast16_t extractFloat32Exp(float32 a)
319{
320
321 return ( float32_val(a)>>23 ) & 0xFF;
322
323}
324
325
326
327
328
329static inline flag extractFloat32Sign( float32 a )
330{
331
332 return float32_val(a)>>31;
333
334}
335
336
337
338
339
340float32 float32_squash_input_denormal(float32 a, float_status *status)
341{
342 if (status->flush_inputs_to_zero) {
343 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
344 float_raise(float_flag_input_denormal, status);
345 return make_float32(float32_val(a) & 0x80000000);
346 }
347 }
348 return a;
349}
350
351
352
353
354
355
356
357
358static void
359 normalizeFloat32Subnormal(uint32_t aSig, int_fast16_t *zExpPtr, uint32_t *zSigPtr)
360{
361 int8 shiftCount;
362
363 shiftCount = countLeadingZeros32( aSig ) - 8;
364 *zSigPtr = aSig<<shiftCount;
365 *zExpPtr = 1 - shiftCount;
366
367}
368
369
370
371
372
373
374
375
376
377
378
379
380static inline float32 packFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig)
381{
382
383 return make_float32(
384 ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
385
386}
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410static float32 roundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig,
411 float_status *status)
412{
413 int8 roundingMode;
414 flag roundNearestEven;
415 int8 roundIncrement, roundBits;
416 flag isTiny;
417
418 roundingMode = status->float_rounding_mode;
419 roundNearestEven = ( roundingMode == float_round_nearest_even );
420 switch (roundingMode) {
421 case float_round_nearest_even:
422 case float_round_ties_away:
423 roundIncrement = 0x40;
424 break;
425 case float_round_to_zero:
426 roundIncrement = 0;
427 break;
428 case float_round_up:
429 roundIncrement = zSign ? 0 : 0x7f;
430 break;
431 case float_round_down:
432 roundIncrement = zSign ? 0x7f : 0;
433 break;
434 default:
435 abort();
436 break;
437 }
438 roundBits = zSig & 0x7F;
439 if ( 0xFD <= (uint16_t) zExp ) {
440 if ( ( 0xFD < zExp )
441 || ( ( zExp == 0xFD )
442 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
443 ) {
444 float_raise(float_flag_overflow | float_flag_inexact, status);
445 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
446 }
447 if ( zExp < 0 ) {
448 if (status->flush_to_zero) {
449 float_raise(float_flag_output_denormal, status);
450 return packFloat32(zSign, 0, 0);
451 }
452 isTiny =
453 (status->float_detect_tininess
454 == float_tininess_before_rounding)
455 || ( zExp < -1 )
456 || ( zSig + roundIncrement < 0x80000000 );
457 shift32RightJamming( zSig, - zExp, &zSig );
458 zExp = 0;
459 roundBits = zSig & 0x7F;
460 if (isTiny && roundBits) {
461 float_raise(float_flag_underflow, status);
462 }
463 }
464 }
465 if (roundBits) {
466 status->float_exception_flags |= float_flag_inexact;
467 }
468 zSig = ( zSig + roundIncrement )>>7;
469 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
470 if ( zSig == 0 ) zExp = 0;
471 return packFloat32( zSign, zExp, zSig );
472
473}
474
475
476
477
478
479
480
481
482
483
484static float32
485 normalizeRoundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig,
486 float_status *status)
487{
488 int8 shiftCount;
489
490 shiftCount = countLeadingZeros32( zSig ) - 1;
491 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
492 status);
493
494}
495
496
497
498
499
500static inline uint64_t extractFloat64Frac( float64 a )
501{
502
503 return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
504
505}
506
507
508
509
510
511static inline int_fast16_t extractFloat64Exp(float64 a)
512{
513
514 return ( float64_val(a)>>52 ) & 0x7FF;
515
516}
517
518
519
520
521
522static inline flag extractFloat64Sign( float64 a )
523{
524
525 return float64_val(a)>>63;
526
527}
528
529
530
531
532
533float64 float64_squash_input_denormal(float64 a, float_status *status)
534{
535 if (status->flush_inputs_to_zero) {
536 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
537 float_raise(float_flag_input_denormal, status);
538 return make_float64(float64_val(a) & (1ULL << 63));
539 }
540 }
541 return a;
542}
543
544
545
546
547
548
549
550
551static void
552 normalizeFloat64Subnormal(uint64_t aSig, int_fast16_t *zExpPtr, uint64_t *zSigPtr)
553{
554 int8 shiftCount;
555
556 shiftCount = countLeadingZeros64( aSig ) - 11;
557 *zSigPtr = aSig<<shiftCount;
558 *zExpPtr = 1 - shiftCount;
559
560}
561
562
563
564
565
566
567
568
569
570
571
572
573static inline float64 packFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig)
574{
575
576 return make_float64(
577 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
578
579}
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603static float64 roundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig,
604 float_status *status)
605{
606 int8 roundingMode;
607 flag roundNearestEven;
608 int_fast16_t roundIncrement, roundBits;
609 flag isTiny;
610
611 roundingMode = status->float_rounding_mode;
612 roundNearestEven = ( roundingMode == float_round_nearest_even );
613 switch (roundingMode) {
614 case float_round_nearest_even:
615 case float_round_ties_away:
616 roundIncrement = 0x200;
617 break;
618 case float_round_to_zero:
619 roundIncrement = 0;
620 break;
621 case float_round_up:
622 roundIncrement = zSign ? 0 : 0x3ff;
623 break;
624 case float_round_down:
625 roundIncrement = zSign ? 0x3ff : 0;
626 break;
627 default:
628 abort();
629 }
630 roundBits = zSig & 0x3FF;
631 if ( 0x7FD <= (uint16_t) zExp ) {
632 if ( ( 0x7FD < zExp )
633 || ( ( zExp == 0x7FD )
634 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
635 ) {
636 float_raise(float_flag_overflow | float_flag_inexact, status);
637 return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
638 }
639 if ( zExp < 0 ) {
640 if (status->flush_to_zero) {
641 float_raise(float_flag_output_denormal, status);
642 return packFloat64(zSign, 0, 0);
643 }
644 isTiny =
645 (status->float_detect_tininess
646 == float_tininess_before_rounding)
647 || ( zExp < -1 )
648 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
649 shift64RightJamming( zSig, - zExp, &zSig );
650 zExp = 0;
651 roundBits = zSig & 0x3FF;
652 if (isTiny && roundBits) {
653 float_raise(float_flag_underflow, status);
654 }
655 }
656 }
657 if (roundBits) {
658 status->float_exception_flags |= float_flag_inexact;
659 }
660 zSig = ( zSig + roundIncrement )>>10;
661 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
662 if ( zSig == 0 ) zExp = 0;
663 return packFloat64( zSign, zExp, zSig );
664
665}
666
667
668
669
670
671
672
673
674
675
676static float64
677 normalizeRoundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig,
678 float_status *status)
679{
680 int8 shiftCount;
681
682 shiftCount = countLeadingZeros64( zSig ) - 1;
683 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
684 status);
685
686}
687
688
689
690
691
692
693static inline uint64_t extractFloatx80Frac( floatx80 a )
694{
695
696 return a.low;
697
698}
699
700
701
702
703
704
705static inline int32 extractFloatx80Exp( floatx80 a )
706{
707
708 return a.high & 0x7FFF;
709
710}
711
712
713
714
715
716
717static inline flag extractFloatx80Sign( floatx80 a )
718{
719
720 return a.high>>15;
721
722}
723
724
725
726
727
728
729
730
731static void
732 normalizeFloatx80Subnormal( uint64_t aSig, int32 *zExpPtr, uint64_t *zSigPtr )
733{
734 int8 shiftCount;
735
736 shiftCount = countLeadingZeros64( aSig );
737 *zSigPtr = aSig<<shiftCount;
738 *zExpPtr = 1 - shiftCount;
739
740}
741
742
743
744
745
746
747static inline floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
748{
749 floatx80 z;
750
751 z.low = zSig;
752 z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
753 return z;
754
755}
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781static floatx80 roundAndPackFloatx80(int8 roundingPrecision, flag zSign,
782 int32 zExp, uint64_t zSig0, uint64_t zSig1,
783 float_status *status)
784{
785 int8 roundingMode;
786 flag roundNearestEven, increment, isTiny;
787 int64 roundIncrement, roundMask, roundBits;
788
789 roundingMode = status->float_rounding_mode;
790 roundNearestEven = ( roundingMode == float_round_nearest_even );
791 if ( roundingPrecision == 80 ) goto precision80;
792 if ( roundingPrecision == 64 ) {
793 roundIncrement = LIT64( 0x0000000000000400 );
794 roundMask = LIT64( 0x00000000000007FF );
795 }
796 else if ( roundingPrecision == 32 ) {
797 roundIncrement = LIT64( 0x0000008000000000 );
798 roundMask = LIT64( 0x000000FFFFFFFFFF );
799 }
800 else {
801 goto precision80;
802 }
803 zSig0 |= ( zSig1 != 0 );
804 switch (roundingMode) {
805 case float_round_nearest_even:
806 case float_round_ties_away:
807 break;
808 case float_round_to_zero:
809 roundIncrement = 0;
810 break;
811 case float_round_up:
812 roundIncrement = zSign ? 0 : roundMask;
813 break;
814 case float_round_down:
815 roundIncrement = zSign ? roundMask : 0;
816 break;
817 default:
818 abort();
819 }
820 roundBits = zSig0 & roundMask;
821 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
822 if ( ( 0x7FFE < zExp )
823 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
824 ) {
825 goto overflow;
826 }
827 if ( zExp <= 0 ) {
828 if (status->flush_to_zero) {
829 float_raise(float_flag_output_denormal, status);
830 return packFloatx80(zSign, 0, 0);
831 }
832 isTiny =
833 (status->float_detect_tininess
834 == float_tininess_before_rounding)
835 || ( zExp < 0 )
836 || ( zSig0 <= zSig0 + roundIncrement );
837 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
838 zExp = 0;
839 roundBits = zSig0 & roundMask;
840 if (isTiny && roundBits) {
841 float_raise(float_flag_underflow, status);
842 }
843 if (roundBits) {
844 status->float_exception_flags |= float_flag_inexact;
845 }
846 zSig0 += roundIncrement;
847 if ( (int64_t) zSig0 < 0 ) zExp = 1;
848 roundIncrement = roundMask + 1;
849 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
850 roundMask |= roundIncrement;
851 }
852 zSig0 &= ~ roundMask;
853 return packFloatx80( zSign, zExp, zSig0 );
854 }
855 }
856 if (roundBits) {
857 status->float_exception_flags |= float_flag_inexact;
858 }
859 zSig0 += roundIncrement;
860 if ( zSig0 < roundIncrement ) {
861 ++zExp;
862 zSig0 = LIT64( 0x8000000000000000 );
863 }
864 roundIncrement = roundMask + 1;
865 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
866 roundMask |= roundIncrement;
867 }
868 zSig0 &= ~ roundMask;
869 if ( zSig0 == 0 ) zExp = 0;
870 return packFloatx80( zSign, zExp, zSig0 );
871 precision80:
872 switch (roundingMode) {
873 case float_round_nearest_even:
874 case float_round_ties_away:
875 increment = ((int64_t)zSig1 < 0);
876 break;
877 case float_round_to_zero:
878 increment = 0;
879 break;
880 case float_round_up:
881 increment = !zSign && zSig1;
882 break;
883 case float_round_down:
884 increment = zSign && zSig1;
885 break;
886 default:
887 abort();
888 }
889 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
890 if ( ( 0x7FFE < zExp )
891 || ( ( zExp == 0x7FFE )
892 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
893 && increment
894 )
895 ) {
896 roundMask = 0;
897 overflow:
898 float_raise(float_flag_overflow | float_flag_inexact, status);
899 if ( ( roundingMode == float_round_to_zero )
900 || ( zSign && ( roundingMode == float_round_up ) )
901 || ( ! zSign && ( roundingMode == float_round_down ) )
902 ) {
903 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
904 }
905 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
906 }
907 if ( zExp <= 0 ) {
908 isTiny =
909 (status->float_detect_tininess
910 == float_tininess_before_rounding)
911 || ( zExp < 0 )
912 || ! increment
913 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
914 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
915 zExp = 0;
916 if (isTiny && zSig1) {
917 float_raise(float_flag_underflow, status);
918 }
919 if (zSig1) {
920 status->float_exception_flags |= float_flag_inexact;
921 }
922 switch (roundingMode) {
923 case float_round_nearest_even:
924 case float_round_ties_away:
925 increment = ((int64_t)zSig1 < 0);
926 break;
927 case float_round_to_zero:
928 increment = 0;
929 break;
930 case float_round_up:
931 increment = !zSign && zSig1;
932 break;
933 case float_round_down:
934 increment = zSign && zSig1;
935 break;
936 default:
937 abort();
938 }
939 if ( increment ) {
940 ++zSig0;
941 zSig0 &=
942 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
943 if ( (int64_t) zSig0 < 0 ) zExp = 1;
944 }
945 return packFloatx80( zSign, zExp, zSig0 );
946 }
947 }
948 if (zSig1) {
949 status->float_exception_flags |= float_flag_inexact;
950 }
951 if ( increment ) {
952 ++zSig0;
953 if ( zSig0 == 0 ) {
954 ++zExp;
955 zSig0 = LIT64( 0x8000000000000000 );
956 }
957 else {
958 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
959 }
960 }
961 else {
962 if ( zSig0 == 0 ) zExp = 0;
963 }
964 return packFloatx80( zSign, zExp, zSig0 );
965
966}
967
968
969
970
971
972
973
974
975
976
977static floatx80 normalizeRoundAndPackFloatx80(int8 roundingPrecision,
978 flag zSign, int32 zExp,
979 uint64_t zSig0, uint64_t zSig1,
980 float_status *status)
981{
982 int8 shiftCount;
983
984 if ( zSig0 == 0 ) {
985 zSig0 = zSig1;
986 zSig1 = 0;
987 zExp -= 64;
988 }
989 shiftCount = countLeadingZeros64( zSig0 );
990 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
991 zExp -= shiftCount;
992 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
993 zSig0, zSig1, status);
994
995}
996
997
998
999
1000
1001
1002static inline uint64_t extractFloat128Frac1( float128 a )
1003{
1004
1005 return a.low;
1006
1007}
1008
1009
1010
1011
1012
1013
1014static inline uint64_t extractFloat128Frac0( float128 a )
1015{
1016
1017 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
1018
1019}
1020
1021
1022
1023
1024
1025
1026static inline int32 extractFloat128Exp( float128 a )
1027{
1028
1029 return ( a.high>>48 ) & 0x7FFF;
1030
1031}
1032
1033
1034
1035
1036
1037static inline flag extractFloat128Sign( float128 a )
1038{
1039
1040 return a.high>>63;
1041
1042}
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054static void
1055 normalizeFloat128Subnormal(
1056 uint64_t aSig0,
1057 uint64_t aSig1,
1058 int32 *zExpPtr,
1059 uint64_t *zSig0Ptr,
1060 uint64_t *zSig1Ptr
1061 )
1062{
1063 int8 shiftCount;
1064
1065 if ( aSig0 == 0 ) {
1066 shiftCount = countLeadingZeros64( aSig1 ) - 15;
1067 if ( shiftCount < 0 ) {
1068 *zSig0Ptr = aSig1>>( - shiftCount );
1069 *zSig1Ptr = aSig1<<( shiftCount & 63 );
1070 }
1071 else {
1072 *zSig0Ptr = aSig1<<shiftCount;
1073 *zSig1Ptr = 0;
1074 }
1075 *zExpPtr = - shiftCount - 63;
1076 }
1077 else {
1078 shiftCount = countLeadingZeros64( aSig0 ) - 15;
1079 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
1080 *zExpPtr = 1 - shiftCount;
1081 }
1082
1083}
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098static inline float128
1099 packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
1100{
1101 float128 z;
1102
1103 z.low = zSig1;
1104 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
1105 return z;
1106
1107}
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130static float128 roundAndPackFloat128(flag zSign, int32 zExp,
1131 uint64_t zSig0, uint64_t zSig1,
1132 uint64_t zSig2, float_status *status)
1133{
1134 int8 roundingMode;
1135 flag roundNearestEven, increment, isTiny;
1136
1137 roundingMode = status->float_rounding_mode;
1138 roundNearestEven = ( roundingMode == float_round_nearest_even );
1139 switch (roundingMode) {
1140 case float_round_nearest_even:
1141 case float_round_ties_away:
1142 increment = ((int64_t)zSig2 < 0);
1143 break;
1144 case float_round_to_zero:
1145 increment = 0;
1146 break;
1147 case float_round_up:
1148 increment = !zSign && zSig2;
1149 break;
1150 case float_round_down:
1151 increment = zSign && zSig2;
1152 break;
1153 default:
1154 abort();
1155 }
1156 if ( 0x7FFD <= (uint32_t) zExp ) {
1157 if ( ( 0x7FFD < zExp )
1158 || ( ( zExp == 0x7FFD )
1159 && eq128(
1160 LIT64( 0x0001FFFFFFFFFFFF ),
1161 LIT64( 0xFFFFFFFFFFFFFFFF ),
1162 zSig0,
1163 zSig1
1164 )
1165 && increment
1166 )
1167 ) {
1168 float_raise(float_flag_overflow | float_flag_inexact, status);
1169 if ( ( roundingMode == float_round_to_zero )
1170 || ( zSign && ( roundingMode == float_round_up ) )
1171 || ( ! zSign && ( roundingMode == float_round_down ) )
1172 ) {
1173 return
1174 packFloat128(
1175 zSign,
1176 0x7FFE,
1177 LIT64( 0x0000FFFFFFFFFFFF ),
1178 LIT64( 0xFFFFFFFFFFFFFFFF )
1179 );
1180 }
1181 return packFloat128( zSign, 0x7FFF, 0, 0 );
1182 }
1183 if ( zExp < 0 ) {
1184 if (status->flush_to_zero) {
1185 float_raise(float_flag_output_denormal, status);
1186 return packFloat128(zSign, 0, 0, 0);
1187 }
1188 isTiny =
1189 (status->float_detect_tininess
1190 == float_tininess_before_rounding)
1191 || ( zExp < -1 )
1192 || ! increment
1193 || lt128(
1194 zSig0,
1195 zSig1,
1196 LIT64( 0x0001FFFFFFFFFFFF ),
1197 LIT64( 0xFFFFFFFFFFFFFFFF )
1198 );
1199 shift128ExtraRightJamming(
1200 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1201 zExp = 0;
1202 if (isTiny && zSig2) {
1203 float_raise(float_flag_underflow, status);
1204 }
1205 switch (roundingMode) {
1206 case float_round_nearest_even:
1207 case float_round_ties_away:
1208 increment = ((int64_t)zSig2 < 0);
1209 break;
1210 case float_round_to_zero:
1211 increment = 0;
1212 break;
1213 case float_round_up:
1214 increment = !zSign && zSig2;
1215 break;
1216 case float_round_down:
1217 increment = zSign && zSig2;
1218 break;
1219 default:
1220 abort();
1221 }
1222 }
1223 }
1224 if (zSig2) {
1225 status->float_exception_flags |= float_flag_inexact;
1226 }
1227 if ( increment ) {
1228 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1229 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1230 }
1231 else {
1232 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1233 }
1234 return packFloat128( zSign, zExp, zSig0, zSig1 );
1235
1236}
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248static float128 normalizeRoundAndPackFloat128(flag zSign, int32 zExp,
1249 uint64_t zSig0, uint64_t zSig1,
1250 float_status *status)
1251{
1252 int8 shiftCount;
1253 uint64_t zSig2;
1254
1255 if ( zSig0 == 0 ) {
1256 zSig0 = zSig1;
1257 zSig1 = 0;
1258 zExp -= 64;
1259 }
1260 shiftCount = countLeadingZeros64( zSig0 ) - 15;
1261 if ( 0 <= shiftCount ) {
1262 zSig2 = 0;
1263 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1264 }
1265 else {
1266 shift128ExtraRightJamming(
1267 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1268 }
1269 zExp -= shiftCount;
1270 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
1271
1272}
1273
1274
1275
1276
1277
1278
1279
1280float32 int32_to_float32(int32_t a, float_status *status)
1281{
1282 flag zSign;
1283
1284 if ( a == 0 ) return float32_zero;
1285 if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1286 zSign = ( a < 0 );
1287 return normalizeRoundAndPackFloat32(zSign, 0x9C, zSign ? -a : a, status);
1288}
1289
1290
1291
1292
1293
1294
1295
1296float64 int32_to_float64(int32_t a, float_status *status)
1297{
1298 flag zSign;
1299 uint32 absA;
1300 int8 shiftCount;
1301 uint64_t zSig;
1302
1303 if ( a == 0 ) return float64_zero;
1304 zSign = ( a < 0 );
1305 absA = zSign ? - a : a;
1306 shiftCount = countLeadingZeros32( absA ) + 21;
1307 zSig = absA;
1308 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1309
1310}
1311
1312
1313
1314
1315
1316
1317
1318
1319floatx80 int32_to_floatx80(int32_t a, float_status *status)
1320{
1321 flag zSign;
1322 uint32 absA;
1323 int8 shiftCount;
1324 uint64_t zSig;
1325
1326 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1327 zSign = ( a < 0 );
1328 absA = zSign ? - a : a;
1329 shiftCount = countLeadingZeros32( absA ) + 32;
1330 zSig = absA;
1331 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1332
1333}
1334
1335
1336
1337
1338
1339
1340
1341float128 int32_to_float128(int32_t a, float_status *status)
1342{
1343 flag zSign;
1344 uint32 absA;
1345 int8 shiftCount;
1346 uint64_t zSig0;
1347
1348 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1349 zSign = ( a < 0 );
1350 absA = zSign ? - a : a;
1351 shiftCount = countLeadingZeros32( absA ) + 17;
1352 zSig0 = absA;
1353 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1354
1355}
1356
1357
1358
1359
1360
1361
1362
1363float32 int64_to_float32(int64_t a, float_status *status)
1364{
1365 flag zSign;
1366 uint64 absA;
1367 int8 shiftCount;
1368
1369 if ( a == 0 ) return float32_zero;
1370 zSign = ( a < 0 );
1371 absA = zSign ? - a : a;
1372 shiftCount = countLeadingZeros64( absA ) - 40;
1373 if ( 0 <= shiftCount ) {
1374 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1375 }
1376 else {
1377 shiftCount += 7;
1378 if ( shiftCount < 0 ) {
1379 shift64RightJamming( absA, - shiftCount, &absA );
1380 }
1381 else {
1382 absA <<= shiftCount;
1383 }
1384 return roundAndPackFloat32(zSign, 0x9C - shiftCount, absA, status);
1385 }
1386
1387}
1388
1389
1390
1391
1392
1393
1394
1395float64 int64_to_float64(int64_t a, float_status *status)
1396{
1397 flag zSign;
1398
1399 if ( a == 0 ) return float64_zero;
1400 if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
1401 return packFloat64( 1, 0x43E, 0 );
1402 }
1403 zSign = ( a < 0 );
1404 return normalizeRoundAndPackFloat64(zSign, 0x43C, zSign ? -a : a, status);
1405}
1406
1407
1408
1409
1410
1411
1412
1413
1414floatx80 int64_to_floatx80(int64_t a, float_status *status)
1415{
1416 flag zSign;
1417 uint64 absA;
1418 int8 shiftCount;
1419
1420 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1421 zSign = ( a < 0 );
1422 absA = zSign ? - a : a;
1423 shiftCount = countLeadingZeros64( absA );
1424 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1425
1426}
1427
1428
1429
1430
1431
1432
1433
1434float128 int64_to_float128(int64_t a, float_status *status)
1435{
1436 flag zSign;
1437 uint64 absA;
1438 int8 shiftCount;
1439 int32 zExp;
1440 uint64_t zSig0, zSig1;
1441
1442 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1443 zSign = ( a < 0 );
1444 absA = zSign ? - a : a;
1445 shiftCount = countLeadingZeros64( absA ) + 49;
1446 zExp = 0x406E - shiftCount;
1447 if ( 64 <= shiftCount ) {
1448 zSig1 = 0;
1449 zSig0 = absA;
1450 shiftCount -= 64;
1451 }
1452 else {
1453 zSig1 = absA;
1454 zSig0 = 0;
1455 }
1456 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1457 return packFloat128( zSign, zExp, zSig0, zSig1 );
1458
1459}
1460
1461
1462
1463
1464
1465
1466
1467float32 uint64_to_float32(uint64_t a, float_status *status)
1468{
1469 int shiftcount;
1470
1471 if (a == 0) {
1472 return float32_zero;
1473 }
1474
1475
1476
1477
1478
1479 shiftcount = countLeadingZeros64(a) - 40;
1480 if (shiftcount >= 0) {
1481 return packFloat32(0, 0x95 - shiftcount, a << shiftcount);
1482 }
1483
1484
1485
1486 shiftcount += 7;
1487 if (shiftcount < 0) {
1488 shift64RightJamming(a, -shiftcount, &a);
1489 } else {
1490 a <<= shiftcount;
1491 }
1492
1493 return roundAndPackFloat32(0, 0x9c - shiftcount, a, status);
1494}
1495
1496
1497
1498
1499
1500
1501
1502float64 uint64_to_float64(uint64_t a, float_status *status)
1503{
1504 int exp = 0x43C;
1505 int shiftcount;
1506
1507 if (a == 0) {
1508 return float64_zero;
1509 }
1510
1511 shiftcount = countLeadingZeros64(a) - 1;
1512 if (shiftcount < 0) {
1513 shift64RightJamming(a, -shiftcount, &a);
1514 } else {
1515 a <<= shiftcount;
1516 }
1517 return roundAndPackFloat64(0, exp - shiftcount, a, status);
1518}
1519
1520
1521
1522
1523
1524
1525
1526float128 uint64_to_float128(uint64_t a, float_status *status)
1527{
1528 if (a == 0) {
1529 return float128_zero;
1530 }
1531 return normalizeRoundAndPackFloat128(0, 0x406E, a, 0, status);
1532}
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544int32 float32_to_int32(float32 a, float_status *status)
1545{
1546 flag aSign;
1547 int_fast16_t aExp, shiftCount;
1548 uint32_t aSig;
1549 uint64_t aSig64;
1550
1551 a = float32_squash_input_denormal(a, status);
1552 aSig = extractFloat32Frac( a );
1553 aExp = extractFloat32Exp( a );
1554 aSign = extractFloat32Sign( a );
1555 if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1556 if ( aExp ) aSig |= 0x00800000;
1557 shiftCount = 0xAF - aExp;
1558 aSig64 = aSig;
1559 aSig64 <<= 32;
1560 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1561 return roundAndPackInt32(aSign, aSig64, status);
1562
1563}
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575int32 float32_to_int32_round_to_zero(float32 a, float_status *status)
1576{
1577 flag aSign;
1578 int_fast16_t aExp, shiftCount;
1579 uint32_t aSig;
1580 int32_t z;
1581 a = float32_squash_input_denormal(a, status);
1582
1583 aSig = extractFloat32Frac( a );
1584 aExp = extractFloat32Exp( a );
1585 aSign = extractFloat32Sign( a );
1586 shiftCount = aExp - 0x9E;
1587 if ( 0 <= shiftCount ) {
1588 if ( float32_val(a) != 0xCF000000 ) {
1589 float_raise(float_flag_invalid, status);
1590 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1591 }
1592 return (int32_t) 0x80000000;
1593 }
1594 else if ( aExp <= 0x7E ) {
1595 if (aExp | aSig) {
1596 status->float_exception_flags |= float_flag_inexact;
1597 }
1598 return 0;
1599 }
1600 aSig = ( aSig | 0x00800000 )<<8;
1601 z = aSig>>( - shiftCount );
1602 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1603 status->float_exception_flags |= float_flag_inexact;
1604 }
1605 if ( aSign ) z = - z;
1606 return z;
1607
1608}
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620int_fast16_t float32_to_int16_round_to_zero(float32 a, float_status *status)
1621{
1622 flag aSign;
1623 int_fast16_t aExp, shiftCount;
1624 uint32_t aSig;
1625 int32 z;
1626
1627 aSig = extractFloat32Frac( a );
1628 aExp = extractFloat32Exp( a );
1629 aSign = extractFloat32Sign( a );
1630 shiftCount = aExp - 0x8E;
1631 if ( 0 <= shiftCount ) {
1632 if ( float32_val(a) != 0xC7000000 ) {
1633 float_raise(float_flag_invalid, status);
1634 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1635 return 0x7FFF;
1636 }
1637 }
1638 return (int32_t) 0xffff8000;
1639 }
1640 else if ( aExp <= 0x7E ) {
1641 if ( aExp | aSig ) {
1642 status->float_exception_flags |= float_flag_inexact;
1643 }
1644 return 0;
1645 }
1646 shiftCount -= 0x10;
1647 aSig = ( aSig | 0x00800000 )<<8;
1648 z = aSig>>( - shiftCount );
1649 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1650 status->float_exception_flags |= float_flag_inexact;
1651 }
1652 if ( aSign ) {
1653 z = - z;
1654 }
1655 return z;
1656
1657}
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669int64 float32_to_int64(float32 a, float_status *status)
1670{
1671 flag aSign;
1672 int_fast16_t aExp, shiftCount;
1673 uint32_t aSig;
1674 uint64_t aSig64, aSigExtra;
1675 a = float32_squash_input_denormal(a, status);
1676
1677 aSig = extractFloat32Frac( a );
1678 aExp = extractFloat32Exp( a );
1679 aSign = extractFloat32Sign( a );
1680 shiftCount = 0xBE - aExp;
1681 if ( shiftCount < 0 ) {
1682 float_raise(float_flag_invalid, status);
1683 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1684 return LIT64( 0x7FFFFFFFFFFFFFFF );
1685 }
1686 return (int64_t) LIT64( 0x8000000000000000 );
1687 }
1688 if ( aExp ) aSig |= 0x00800000;
1689 aSig64 = aSig;
1690 aSig64 <<= 40;
1691 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1692 return roundAndPackInt64(aSign, aSig64, aSigExtra, status);
1693
1694}
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708uint64 float32_to_uint64(float32 a, float_status *status)
1709{
1710 flag aSign;
1711 int_fast16_t aExp, shiftCount;
1712 uint32_t aSig;
1713 uint64_t aSig64, aSigExtra;
1714 a = float32_squash_input_denormal(a, status);
1715
1716 aSig = extractFloat32Frac(a);
1717 aExp = extractFloat32Exp(a);
1718 aSign = extractFloat32Sign(a);
1719 if ((aSign) && (aExp > 126)) {
1720 float_raise(float_flag_invalid, status);
1721 if (float32_is_any_nan(a)) {
1722 return LIT64(0xFFFFFFFFFFFFFFFF);
1723 } else {
1724 return 0;
1725 }
1726 }
1727 shiftCount = 0xBE - aExp;
1728 if (aExp) {
1729 aSig |= 0x00800000;
1730 }
1731 if (shiftCount < 0) {
1732 float_raise(float_flag_invalid, status);
1733 return LIT64(0xFFFFFFFFFFFFFFFF);
1734 }
1735
1736 aSig64 = aSig;
1737 aSig64 <<= 40;
1738 shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
1739 return roundAndPackUint64(aSign, aSig64, aSigExtra, status);
1740}
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753uint64 float32_to_uint64_round_to_zero(float32 a, float_status *status)
1754{
1755 signed char current_rounding_mode = status->float_rounding_mode;
1756 set_float_rounding_mode(float_round_to_zero, status);
1757 int64_t v = float32_to_uint64(a, status);
1758 set_float_rounding_mode(current_rounding_mode, status);
1759 return v;
1760}
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772int64 float32_to_int64_round_to_zero(float32 a, float_status *status)
1773{
1774 flag aSign;
1775 int_fast16_t aExp, shiftCount;
1776 uint32_t aSig;
1777 uint64_t aSig64;
1778 int64 z;
1779 a = float32_squash_input_denormal(a, status);
1780
1781 aSig = extractFloat32Frac( a );
1782 aExp = extractFloat32Exp( a );
1783 aSign = extractFloat32Sign( a );
1784 shiftCount = aExp - 0xBE;
1785 if ( 0 <= shiftCount ) {
1786 if ( float32_val(a) != 0xDF000000 ) {
1787 float_raise(float_flag_invalid, status);
1788 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1789 return LIT64( 0x7FFFFFFFFFFFFFFF );
1790 }
1791 }
1792 return (int64_t) LIT64( 0x8000000000000000 );
1793 }
1794 else if ( aExp <= 0x7E ) {
1795 if (aExp | aSig) {
1796 status->float_exception_flags |= float_flag_inexact;
1797 }
1798 return 0;
1799 }
1800 aSig64 = aSig | 0x00800000;
1801 aSig64 <<= 40;
1802 z = aSig64>>( - shiftCount );
1803 if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
1804 status->float_exception_flags |= float_flag_inexact;
1805 }
1806 if ( aSign ) z = - z;
1807 return z;
1808
1809}
1810
1811
1812
1813
1814
1815
1816
1817
1818float64 float32_to_float64(float32 a, float_status *status)
1819{
1820 flag aSign;
1821 int_fast16_t aExp;
1822 uint32_t aSig;
1823 a = float32_squash_input_denormal(a, status);
1824
1825 aSig = extractFloat32Frac( a );
1826 aExp = extractFloat32Exp( a );
1827 aSign = extractFloat32Sign( a );
1828 if ( aExp == 0xFF ) {
1829 if (aSig) {
1830 return commonNaNToFloat64(float32ToCommonNaN(a, status), status);
1831 }
1832 return packFloat64( aSign, 0x7FF, 0 );
1833 }
1834 if ( aExp == 0 ) {
1835 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1836 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1837 --aExp;
1838 }
1839 return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
1840
1841}
1842
1843
1844
1845
1846
1847
1848
1849
1850floatx80 float32_to_floatx80(float32 a, float_status *status)
1851{
1852 flag aSign;
1853 int_fast16_t aExp;
1854 uint32_t aSig;
1855
1856 a = float32_squash_input_denormal(a, status);
1857 aSig = extractFloat32Frac( a );
1858 aExp = extractFloat32Exp( a );
1859 aSign = extractFloat32Sign( a );
1860 if ( aExp == 0xFF ) {
1861 if (aSig) {
1862 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
1863 }
1864 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1865 }
1866 if ( aExp == 0 ) {
1867 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1868 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1869 }
1870 aSig |= 0x00800000;
1871 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
1872
1873}
1874
1875
1876
1877
1878
1879
1880
1881
1882float128 float32_to_float128(float32 a, float_status *status)
1883{
1884 flag aSign;
1885 int_fast16_t aExp;
1886 uint32_t aSig;
1887
1888 a = float32_squash_input_denormal(a, status);
1889 aSig = extractFloat32Frac( a );
1890 aExp = extractFloat32Exp( a );
1891 aSign = extractFloat32Sign( a );
1892 if ( aExp == 0xFF ) {
1893 if (aSig) {
1894 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
1895 }
1896 return packFloat128( aSign, 0x7FFF, 0, 0 );
1897 }
1898 if ( aExp == 0 ) {
1899 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1900 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1901 --aExp;
1902 }
1903 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
1904
1905}
1906
1907
1908
1909
1910
1911
1912
1913
1914float32 float32_round_to_int(float32 a, float_status *status)
1915{
1916 flag aSign;
1917 int_fast16_t aExp;
1918 uint32_t lastBitMask, roundBitsMask;
1919 uint32_t z;
1920 a = float32_squash_input_denormal(a, status);
1921
1922 aExp = extractFloat32Exp( a );
1923 if ( 0x96 <= aExp ) {
1924 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1925 return propagateFloat32NaN(a, a, status);
1926 }
1927 return a;
1928 }
1929 if ( aExp <= 0x7E ) {
1930 if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
1931 status->float_exception_flags |= float_flag_inexact;
1932 aSign = extractFloat32Sign( a );
1933 switch (status->float_rounding_mode) {
1934 case float_round_nearest_even:
1935 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1936 return packFloat32( aSign, 0x7F, 0 );
1937 }
1938 break;
1939 case float_round_ties_away:
1940 if (aExp == 0x7E) {
1941 return packFloat32(aSign, 0x7F, 0);
1942 }
1943 break;
1944 case float_round_down:
1945 return make_float32(aSign ? 0xBF800000 : 0);
1946 case float_round_up:
1947 return make_float32(aSign ? 0x80000000 : 0x3F800000);
1948 }
1949 return packFloat32( aSign, 0, 0 );
1950 }
1951 lastBitMask = 1;
1952 lastBitMask <<= 0x96 - aExp;
1953 roundBitsMask = lastBitMask - 1;
1954 z = float32_val(a);
1955 switch (status->float_rounding_mode) {
1956 case float_round_nearest_even:
1957 z += lastBitMask>>1;
1958 if ((z & roundBitsMask) == 0) {
1959 z &= ~lastBitMask;
1960 }
1961 break;
1962 case float_round_ties_away:
1963 z += lastBitMask >> 1;
1964 break;
1965 case float_round_to_zero:
1966 break;
1967 case float_round_up:
1968 if (!extractFloat32Sign(make_float32(z))) {
1969 z += roundBitsMask;
1970 }
1971 break;
1972 case float_round_down:
1973 if (extractFloat32Sign(make_float32(z))) {
1974 z += roundBitsMask;
1975 }
1976 break;
1977 default:
1978 abort();
1979 }
1980 z &= ~ roundBitsMask;
1981 if (z != float32_val(a)) {
1982 status->float_exception_flags |= float_flag_inexact;
1983 }
1984 return make_float32(z);
1985
1986}
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996static float32 addFloat32Sigs(float32 a, float32 b, flag zSign,
1997 float_status *status)
1998{
1999 int_fast16_t aExp, bExp, zExp;
2000 uint32_t aSig, bSig, zSig;
2001 int_fast16_t expDiff;
2002
2003 aSig = extractFloat32Frac( a );
2004 aExp = extractFloat32Exp( a );
2005 bSig = extractFloat32Frac( b );
2006 bExp = extractFloat32Exp( b );
2007 expDiff = aExp - bExp;
2008 aSig <<= 6;
2009 bSig <<= 6;
2010 if ( 0 < expDiff ) {
2011 if ( aExp == 0xFF ) {
2012 if (aSig) {
2013 return propagateFloat32NaN(a, b, status);
2014 }
2015 return a;
2016 }
2017 if ( bExp == 0 ) {
2018 --expDiff;
2019 }
2020 else {
2021 bSig |= 0x20000000;
2022 }
2023 shift32RightJamming( bSig, expDiff, &bSig );
2024 zExp = aExp;
2025 }
2026 else if ( expDiff < 0 ) {
2027 if ( bExp == 0xFF ) {
2028 if (bSig) {
2029 return propagateFloat32NaN(a, b, status);
2030 }
2031 return packFloat32( zSign, 0xFF, 0 );
2032 }
2033 if ( aExp == 0 ) {
2034 ++expDiff;
2035 }
2036 else {
2037 aSig |= 0x20000000;
2038 }
2039 shift32RightJamming( aSig, - expDiff, &aSig );
2040 zExp = bExp;
2041 }
2042 else {
2043 if ( aExp == 0xFF ) {
2044 if (aSig | bSig) {
2045 return propagateFloat32NaN(a, b, status);
2046 }
2047 return a;
2048 }
2049 if ( aExp == 0 ) {
2050 if (status->flush_to_zero) {
2051 if (aSig | bSig) {
2052 float_raise(float_flag_output_denormal, status);
2053 }
2054 return packFloat32(zSign, 0, 0);
2055 }
2056 return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
2057 }
2058 zSig = 0x40000000 + aSig + bSig;
2059 zExp = aExp;
2060 goto roundAndPack;
2061 }
2062 aSig |= 0x20000000;
2063 zSig = ( aSig + bSig )<<1;
2064 --zExp;
2065 if ( (int32_t) zSig < 0 ) {
2066 zSig = aSig + bSig;
2067 ++zExp;
2068 }
2069 roundAndPack:
2070 return roundAndPackFloat32(zSign, zExp, zSig, status);
2071
2072}
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082static float32 subFloat32Sigs(float32 a, float32 b, flag zSign,
2083 float_status *status)
2084{
2085 int_fast16_t aExp, bExp, zExp;
2086 uint32_t aSig, bSig, zSig;
2087 int_fast16_t expDiff;
2088
2089 aSig = extractFloat32Frac( a );
2090 aExp = extractFloat32Exp( a );
2091 bSig = extractFloat32Frac( b );
2092 bExp = extractFloat32Exp( b );
2093 expDiff = aExp - bExp;
2094 aSig <<= 7;
2095 bSig <<= 7;
2096 if ( 0 < expDiff ) goto aExpBigger;
2097 if ( expDiff < 0 ) goto bExpBigger;
2098 if ( aExp == 0xFF ) {
2099 if (aSig | bSig) {
2100 return propagateFloat32NaN(a, b, status);
2101 }
2102 float_raise(float_flag_invalid, status);
2103 return float32_default_nan;
2104 }
2105 if ( aExp == 0 ) {
2106 aExp = 1;
2107 bExp = 1;
2108 }
2109 if ( bSig < aSig ) goto aBigger;
2110 if ( aSig < bSig ) goto bBigger;
2111 return packFloat32(status->float_rounding_mode == float_round_down, 0, 0);
2112 bExpBigger:
2113 if ( bExp == 0xFF ) {
2114 if (bSig) {
2115 return propagateFloat32NaN(a, b, status);
2116 }
2117 return packFloat32( zSign ^ 1, 0xFF, 0 );
2118 }
2119 if ( aExp == 0 ) {
2120 ++expDiff;
2121 }
2122 else {
2123 aSig |= 0x40000000;
2124 }
2125 shift32RightJamming( aSig, - expDiff, &aSig );
2126 bSig |= 0x40000000;
2127 bBigger:
2128 zSig = bSig - aSig;
2129 zExp = bExp;
2130 zSign ^= 1;
2131 goto normalizeRoundAndPack;
2132 aExpBigger:
2133 if ( aExp == 0xFF ) {
2134 if (aSig) {
2135 return propagateFloat32NaN(a, b, status);
2136 }
2137 return a;
2138 }
2139 if ( bExp == 0 ) {
2140 --expDiff;
2141 }
2142 else {
2143 bSig |= 0x40000000;
2144 }
2145 shift32RightJamming( bSig, expDiff, &bSig );
2146 aSig |= 0x40000000;
2147 aBigger:
2148 zSig = aSig - bSig;
2149 zExp = aExp;
2150 normalizeRoundAndPack:
2151 --zExp;
2152 return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status);
2153
2154}
2155
2156
2157
2158
2159
2160
2161
2162float32 float32_add(float32 a, float32 b, float_status *status)
2163{
2164 flag aSign, bSign;
2165 a = float32_squash_input_denormal(a, status);
2166 b = float32_squash_input_denormal(b, status);
2167
2168 aSign = extractFloat32Sign( a );
2169 bSign = extractFloat32Sign( b );
2170 if ( aSign == bSign ) {
2171 return addFloat32Sigs(a, b, aSign, status);
2172 }
2173 else {
2174 return subFloat32Sigs(a, b, aSign, status);
2175 }
2176
2177}
2178
2179
2180
2181
2182
2183
2184
2185float32 float32_sub(float32 a, float32 b, float_status *status)
2186{
2187 flag aSign, bSign;
2188 a = float32_squash_input_denormal(a, status);
2189 b = float32_squash_input_denormal(b, status);
2190
2191 aSign = extractFloat32Sign( a );
2192 bSign = extractFloat32Sign( b );
2193 if ( aSign == bSign ) {
2194 return subFloat32Sigs(a, b, aSign, status);
2195 }
2196 else {
2197 return addFloat32Sigs(a, b, aSign, status);
2198 }
2199
2200}
2201
2202
2203
2204
2205
2206
2207
2208float32 float32_mul(float32 a, float32 b, float_status *status)
2209{
2210 flag aSign, bSign, zSign;
2211 int_fast16_t aExp, bExp, zExp;
2212 uint32_t aSig, bSig;
2213 uint64_t zSig64;
2214 uint32_t zSig;
2215
2216 a = float32_squash_input_denormal(a, status);
2217 b = float32_squash_input_denormal(b, status);
2218
2219 aSig = extractFloat32Frac( a );
2220 aExp = extractFloat32Exp( a );
2221 aSign = extractFloat32Sign( a );
2222 bSig = extractFloat32Frac( b );
2223 bExp = extractFloat32Exp( b );
2224 bSign = extractFloat32Sign( b );
2225 zSign = aSign ^ bSign;
2226 if ( aExp == 0xFF ) {
2227 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2228 return propagateFloat32NaN(a, b, status);
2229 }
2230 if ( ( bExp | bSig ) == 0 ) {
2231 float_raise(float_flag_invalid, status);
2232 return float32_default_nan;
2233 }
2234 return packFloat32( zSign, 0xFF, 0 );
2235 }
2236 if ( bExp == 0xFF ) {
2237 if (bSig) {
2238 return propagateFloat32NaN(a, b, status);
2239 }
2240 if ( ( aExp | aSig ) == 0 ) {
2241 float_raise(float_flag_invalid, status);
2242 return float32_default_nan;
2243 }
2244 return packFloat32( zSign, 0xFF, 0 );
2245 }
2246 if ( aExp == 0 ) {
2247 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2248 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2249 }
2250 if ( bExp == 0 ) {
2251 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
2252 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2253 }
2254 zExp = aExp + bExp - 0x7F;
2255 aSig = ( aSig | 0x00800000 )<<7;
2256 bSig = ( bSig | 0x00800000 )<<8;
2257 shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
2258 zSig = zSig64;
2259 if ( 0 <= (int32_t) ( zSig<<1 ) ) {
2260 zSig <<= 1;
2261 --zExp;
2262 }
2263 return roundAndPackFloat32(zSign, zExp, zSig, status);
2264
2265}
2266
2267
2268
2269
2270
2271
2272
2273float32 float32_div(float32 a, float32 b, float_status *status)
2274{
2275 flag aSign, bSign, zSign;
2276 int_fast16_t aExp, bExp, zExp;
2277 uint32_t aSig, bSig, zSig;
2278 a = float32_squash_input_denormal(a, status);
2279 b = float32_squash_input_denormal(b, status);
2280
2281 aSig = extractFloat32Frac( a );
2282 aExp = extractFloat32Exp( a );
2283 aSign = extractFloat32Sign( a );
2284 bSig = extractFloat32Frac( b );
2285 bExp = extractFloat32Exp( b );
2286 bSign = extractFloat32Sign( b );
2287 zSign = aSign ^ bSign;
2288 if ( aExp == 0xFF ) {
2289 if (aSig) {
2290 return propagateFloat32NaN(a, b, status);
2291 }
2292 if ( bExp == 0xFF ) {
2293 if (bSig) {
2294 return propagateFloat32NaN(a, b, status);
2295 }
2296 float_raise(float_flag_invalid, status);
2297 return float32_default_nan;
2298 }
2299 return packFloat32( zSign, 0xFF, 0 );
2300 }
2301 if ( bExp == 0xFF ) {
2302 if (bSig) {
2303 return propagateFloat32NaN(a, b, status);
2304 }
2305 return packFloat32( zSign, 0, 0 );
2306 }
2307 if ( bExp == 0 ) {
2308 if ( bSig == 0 ) {
2309 if ( ( aExp | aSig ) == 0 ) {
2310 float_raise(float_flag_invalid, status);
2311 return float32_default_nan;
2312 }
2313 float_raise(float_flag_divbyzero, status);
2314 return packFloat32( zSign, 0xFF, 0 );
2315 }
2316 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2317 }
2318 if ( aExp == 0 ) {
2319 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2320 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2321 }
2322 zExp = aExp - bExp + 0x7D;
2323 aSig = ( aSig | 0x00800000 )<<7;
2324 bSig = ( bSig | 0x00800000 )<<8;
2325 if ( bSig <= ( aSig + aSig ) ) {
2326 aSig >>= 1;
2327 ++zExp;
2328 }
2329 zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
2330 if ( ( zSig & 0x3F ) == 0 ) {
2331 zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
2332 }
2333 return roundAndPackFloat32(zSign, zExp, zSig, status);
2334
2335}
2336
2337
2338
2339
2340
2341
2342
2343float32 float32_rem(float32 a, float32 b, float_status *status)
2344{
2345 flag aSign, zSign;
2346 int_fast16_t aExp, bExp, expDiff;
2347 uint32_t aSig, bSig;
2348 uint32_t q;
2349 uint64_t aSig64, bSig64, q64;
2350 uint32_t alternateASig;
2351 int32_t sigMean;
2352 a = float32_squash_input_denormal(a, status);
2353 b = float32_squash_input_denormal(b, status);
2354
2355 aSig = extractFloat32Frac( a );
2356 aExp = extractFloat32Exp( a );
2357 aSign = extractFloat32Sign( a );
2358 bSig = extractFloat32Frac( b );
2359 bExp = extractFloat32Exp( b );
2360 if ( aExp == 0xFF ) {
2361 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2362 return propagateFloat32NaN(a, b, status);
2363 }
2364 float_raise(float_flag_invalid, status);
2365 return float32_default_nan;
2366 }
2367 if ( bExp == 0xFF ) {
2368 if (bSig) {
2369 return propagateFloat32NaN(a, b, status);
2370 }
2371 return a;
2372 }
2373 if ( bExp == 0 ) {
2374 if ( bSig == 0 ) {
2375 float_raise(float_flag_invalid, status);
2376 return float32_default_nan;
2377 }
2378 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2379 }
2380 if ( aExp == 0 ) {
2381 if ( aSig == 0 ) return a;
2382 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2383 }
2384 expDiff = aExp - bExp;
2385 aSig |= 0x00800000;
2386 bSig |= 0x00800000;
2387 if ( expDiff < 32 ) {
2388 aSig <<= 8;
2389 bSig <<= 8;
2390 if ( expDiff < 0 ) {
2391 if ( expDiff < -1 ) return a;
2392 aSig >>= 1;
2393 }
2394 q = ( bSig <= aSig );
2395 if ( q ) aSig -= bSig;
2396 if ( 0 < expDiff ) {
2397 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
2398 q >>= 32 - expDiff;
2399 bSig >>= 2;
2400 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2401 }
2402 else {
2403 aSig >>= 2;
2404 bSig >>= 2;
2405 }
2406 }
2407 else {
2408 if ( bSig <= aSig ) aSig -= bSig;
2409 aSig64 = ( (uint64_t) aSig )<<40;
2410 bSig64 = ( (uint64_t) bSig )<<40;
2411 expDiff -= 64;
2412 while ( 0 < expDiff ) {
2413 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2414 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2415 aSig64 = - ( ( bSig * q64 )<<38 );
2416 expDiff -= 62;
2417 }
2418 expDiff += 64;
2419 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2420 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2421 q = q64>>( 64 - expDiff );
2422 bSig <<= 6;
2423 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2424 }
2425 do {
2426 alternateASig = aSig;
2427 ++q;
2428 aSig -= bSig;
2429 } while ( 0 <= (int32_t) aSig );
2430 sigMean = aSig + alternateASig;
2431 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2432 aSig = alternateASig;
2433 }
2434 zSign = ( (int32_t) aSig < 0 );
2435 if ( zSign ) aSig = - aSig;
2436 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
2437}
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450float32 float32_muladd(float32 a, float32 b, float32 c, int flags,
2451 float_status *status)
2452{
2453 flag aSign, bSign, cSign, zSign;
2454 int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
2455 uint32_t aSig, bSig, cSig;
2456 flag pInf, pZero, pSign;
2457 uint64_t pSig64, cSig64, zSig64;
2458 uint32_t pSig;
2459 int shiftcount;
2460 flag signflip, infzero;
2461
2462 a = float32_squash_input_denormal(a, status);
2463 b = float32_squash_input_denormal(b, status);
2464 c = float32_squash_input_denormal(c, status);
2465 aSig = extractFloat32Frac(a);
2466 aExp = extractFloat32Exp(a);
2467 aSign = extractFloat32Sign(a);
2468 bSig = extractFloat32Frac(b);
2469 bExp = extractFloat32Exp(b);
2470 bSign = extractFloat32Sign(b);
2471 cSig = extractFloat32Frac(c);
2472 cExp = extractFloat32Exp(c);
2473 cSign = extractFloat32Sign(c);
2474
2475 infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
2476 (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
2477
2478
2479
2480
2481
2482
2483 if (((aExp == 0xff) && aSig) ||
2484 ((bExp == 0xff) && bSig) ||
2485 ((cExp == 0xff) && cSig)) {
2486 return propagateFloat32MulAddNaN(a, b, c, infzero, status);
2487 }
2488
2489 if (infzero) {
2490 float_raise(float_flag_invalid, status);
2491 return float32_default_nan;
2492 }
2493
2494 if (flags & float_muladd_negate_c) {
2495 cSign ^= 1;
2496 }
2497
2498 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
2499
2500
2501 pSign = aSign ^ bSign;
2502 if (flags & float_muladd_negate_product) {
2503 pSign ^= 1;
2504 }
2505 pInf = (aExp == 0xff) || (bExp == 0xff);
2506 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
2507
2508 if (cExp == 0xff) {
2509 if (pInf && (pSign ^ cSign)) {
2510
2511 float_raise(float_flag_invalid, status);
2512 return float32_default_nan;
2513 }
2514
2515 return packFloat32(cSign ^ signflip, 0xff, 0);
2516 }
2517
2518 if (pInf) {
2519 return packFloat32(pSign ^ signflip, 0xff, 0);
2520 }
2521
2522 if (pZero) {
2523 if (cExp == 0) {
2524 if (cSig == 0) {
2525
2526 if (pSign == cSign) {
2527 zSign = pSign;
2528 } else if (status->float_rounding_mode == float_round_down) {
2529 zSign = 1;
2530 } else {
2531 zSign = 0;
2532 }
2533 return packFloat32(zSign ^ signflip, 0, 0);
2534 }
2535
2536 if (status->flush_to_zero) {
2537 float_raise(float_flag_output_denormal, status);
2538 return packFloat32(cSign ^ signflip, 0, 0);
2539 }
2540 }
2541
2542 if (flags & float_muladd_halve_result) {
2543 if (cExp == 0) {
2544 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2545 }
2546
2547
2548
2549 cExp -= 2;
2550 cSig = (cSig | 0x00800000) << 7;
2551 return roundAndPackFloat32(cSign ^ signflip, cExp, cSig, status);
2552 }
2553 return packFloat32(cSign ^ signflip, cExp, cSig);
2554 }
2555
2556 if (aExp == 0) {
2557 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
2558 }
2559 if (bExp == 0) {
2560 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
2561 }
2562
2563
2564
2565
2566
2567
2568
2569
2570 pExp = aExp + bExp - 0x7e;
2571 aSig = (aSig | 0x00800000) << 7;
2572 bSig = (bSig | 0x00800000) << 8;
2573 pSig64 = (uint64_t)aSig * bSig;
2574 if ((int64_t)(pSig64 << 1) >= 0) {
2575 pSig64 <<= 1;
2576 pExp--;
2577 }
2578
2579 zSign = pSign ^ signflip;
2580
2581
2582
2583
2584 if (cExp == 0) {
2585 if (!cSig) {
2586
2587 shift64RightJamming(pSig64, 32, &pSig64);
2588 pSig = pSig64;
2589 if (flags & float_muladd_halve_result) {
2590 pExp--;
2591 }
2592 return roundAndPackFloat32(zSign, pExp - 1,
2593 pSig, status);
2594 }
2595 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2596 }
2597
2598 cSig64 = (uint64_t)cSig << (62 - 23);
2599 cSig64 |= LIT64(0x4000000000000000);
2600 expDiff = pExp - cExp;
2601
2602 if (pSign == cSign) {
2603
2604 if (expDiff > 0) {
2605
2606 shift64RightJamming(cSig64, expDiff, &cSig64);
2607 zExp = pExp;
2608 } else if (expDiff < 0) {
2609
2610 shift64RightJamming(pSig64, -expDiff, &pSig64);
2611 zExp = cExp;
2612 } else {
2613
2614 zExp = cExp;
2615 }
2616
2617 zSig64 = pSig64 + cSig64;
2618 if ((int64_t)zSig64 < 0) {
2619 shift64RightJamming(zSig64, 1, &zSig64);
2620 } else {
2621 zExp--;
2622 }
2623 } else {
2624
2625 if (expDiff > 0) {
2626 shift64RightJamming(cSig64, expDiff, &cSig64);
2627 zSig64 = pSig64 - cSig64;
2628 zExp = pExp;
2629 } else if (expDiff < 0) {
2630 shift64RightJamming(pSig64, -expDiff, &pSig64);
2631 zSig64 = cSig64 - pSig64;
2632 zExp = cExp;
2633 zSign ^= 1;
2634 } else {
2635 zExp = pExp;
2636 if (cSig64 < pSig64) {
2637 zSig64 = pSig64 - cSig64;
2638 } else if (pSig64 < cSig64) {
2639 zSig64 = cSig64 - pSig64;
2640 zSign ^= 1;
2641 } else {
2642
2643 zSign = signflip;
2644 if (status->float_rounding_mode == float_round_down) {
2645 zSign ^= 1;
2646 }
2647 return packFloat32(zSign, 0, 0);
2648 }
2649 }
2650 --zExp;
2651
2652 shiftcount = countLeadingZeros64(zSig64) - 1;
2653 zSig64 <<= shiftcount;
2654 zExp -= shiftcount;
2655 }
2656 if (flags & float_muladd_halve_result) {
2657 zExp--;
2658 }
2659
2660 shift64RightJamming(zSig64, 32, &zSig64);
2661 return roundAndPackFloat32(zSign, zExp, zSig64, status);
2662}
2663
2664
2665
2666
2667
2668
2669
2670
2671float32 float32_sqrt(float32 a, float_status *status)
2672{
2673 flag aSign;
2674 int_fast16_t aExp, zExp;
2675 uint32_t aSig, zSig;
2676 uint64_t rem, term;
2677 a = float32_squash_input_denormal(a, status);
2678
2679 aSig = extractFloat32Frac( a );
2680 aExp = extractFloat32Exp( a );
2681 aSign = extractFloat32Sign( a );
2682 if ( aExp == 0xFF ) {
2683 if (aSig) {
2684 return propagateFloat32NaN(a, float32_zero, status);
2685 }
2686 if ( ! aSign ) return a;
2687 float_raise(float_flag_invalid, status);
2688 return float32_default_nan;
2689 }
2690 if ( aSign ) {
2691 if ( ( aExp | aSig ) == 0 ) return a;
2692 float_raise(float_flag_invalid, status);
2693 return float32_default_nan;
2694 }
2695 if ( aExp == 0 ) {
2696 if ( aSig == 0 ) return float32_zero;
2697 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2698 }
2699 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2700 aSig = ( aSig | 0x00800000 )<<8;
2701 zSig = estimateSqrt32( aExp, aSig ) + 2;
2702 if ( ( zSig & 0x7F ) <= 5 ) {
2703 if ( zSig < 2 ) {
2704 zSig = 0x7FFFFFFF;
2705 goto roundAndPack;
2706 }
2707 aSig >>= aExp & 1;
2708 term = ( (uint64_t) zSig ) * zSig;
2709 rem = ( ( (uint64_t) aSig )<<32 ) - term;
2710 while ( (int64_t) rem < 0 ) {
2711 --zSig;
2712 rem += ( ( (uint64_t) zSig )<<1 ) | 1;
2713 }
2714 zSig |= ( rem != 0 );
2715 }
2716 shift32RightJamming( zSig, 1, &zSig );
2717 roundAndPack:
2718 return roundAndPackFloat32(0, zExp, zSig, status);
2719
2720}
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740static const float64 float32_exp2_coefficients[15] =
2741{
2742 const_float64( 0x3ff0000000000000ll ),
2743 const_float64( 0x3fe0000000000000ll ),
2744 const_float64( 0x3fc5555555555555ll ),
2745 const_float64( 0x3fa5555555555555ll ),
2746 const_float64( 0x3f81111111111111ll ),
2747 const_float64( 0x3f56c16c16c16c17ll ),
2748 const_float64( 0x3f2a01a01a01a01all ),
2749 const_float64( 0x3efa01a01a01a01all ),
2750 const_float64( 0x3ec71de3a556c734ll ),
2751 const_float64( 0x3e927e4fb7789f5cll ),
2752 const_float64( 0x3e5ae64567f544e4ll ),
2753 const_float64( 0x3e21eed8eff8d898ll ),
2754 const_float64( 0x3de6124613a86d09ll ),
2755 const_float64( 0x3da93974a8c07c9dll ),
2756 const_float64( 0x3d6ae7f3e733b81fll ),
2757};
2758
2759float32 float32_exp2(float32 a, float_status *status)
2760{
2761 flag aSign;
2762 int_fast16_t aExp;
2763 uint32_t aSig;
2764 float64 r, x, xn;
2765 int i;
2766 a = float32_squash_input_denormal(a, status);
2767
2768 aSig = extractFloat32Frac( a );
2769 aExp = extractFloat32Exp( a );
2770 aSign = extractFloat32Sign( a );
2771
2772 if ( aExp == 0xFF) {
2773 if (aSig) {
2774 return propagateFloat32NaN(a, float32_zero, status);
2775 }
2776 return (aSign) ? float32_zero : a;
2777 }
2778 if (aExp == 0) {
2779 if (aSig == 0) return float32_one;
2780 }
2781
2782 float_raise(float_flag_inexact, status);
2783
2784
2785
2786
2787 x = float32_to_float64(a, status);
2788 x = float64_mul(x, float64_ln2, status);
2789
2790 xn = x;
2791 r = float64_one;
2792 for (i = 0 ; i < 15 ; i++) {
2793 float64 f;
2794
2795 f = float64_mul(xn, float32_exp2_coefficients[i], status);
2796 r = float64_add(r, f, status);
2797
2798 xn = float64_mul(xn, x, status);
2799 }
2800
2801 return float64_to_float32(r, status);
2802}
2803
2804
2805
2806
2807
2808
2809float32 float32_log2(float32 a, float_status *status)
2810{
2811 flag aSign, zSign;
2812 int_fast16_t aExp;
2813 uint32_t aSig, zSig, i;
2814
2815 a = float32_squash_input_denormal(a, status);
2816 aSig = extractFloat32Frac( a );
2817 aExp = extractFloat32Exp( a );
2818 aSign = extractFloat32Sign( a );
2819
2820 if ( aExp == 0 ) {
2821 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2822 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2823 }
2824 if ( aSign ) {
2825 float_raise(float_flag_invalid, status);
2826 return float32_default_nan;
2827 }
2828 if ( aExp == 0xFF ) {
2829 if (aSig) {
2830 return propagateFloat32NaN(a, float32_zero, status);
2831 }
2832 return a;
2833 }
2834
2835 aExp -= 0x7F;
2836 aSig |= 0x00800000;
2837 zSign = aExp < 0;
2838 zSig = aExp << 23;
2839
2840 for (i = 1 << 22; i > 0; i >>= 1) {
2841 aSig = ( (uint64_t)aSig * aSig ) >> 23;
2842 if ( aSig & 0x01000000 ) {
2843 aSig >>= 1;
2844 zSig |= i;
2845 }
2846 }
2847
2848 if ( zSign )
2849 zSig = -zSig;
2850
2851 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
2852}
2853
2854
2855
2856
2857
2858
2859
2860
2861int float32_eq(float32 a, float32 b, float_status *status)
2862{
2863 uint32_t av, bv;
2864 a = float32_squash_input_denormal(a, status);
2865 b = float32_squash_input_denormal(b, status);
2866
2867 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2868 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2869 ) {
2870 float_raise(float_flag_invalid, status);
2871 return 0;
2872 }
2873 av = float32_val(a);
2874 bv = float32_val(b);
2875 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2876}
2877
2878
2879
2880
2881
2882
2883
2884
2885int float32_le(float32 a, float32 b, float_status *status)
2886{
2887 flag aSign, bSign;
2888 uint32_t av, bv;
2889 a = float32_squash_input_denormal(a, status);
2890 b = float32_squash_input_denormal(b, status);
2891
2892 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2893 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2894 ) {
2895 float_raise(float_flag_invalid, status);
2896 return 0;
2897 }
2898 aSign = extractFloat32Sign( a );
2899 bSign = extractFloat32Sign( b );
2900 av = float32_val(a);
2901 bv = float32_val(b);
2902 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2903 return ( av == bv ) || ( aSign ^ ( av < bv ) );
2904
2905}
2906
2907
2908
2909
2910
2911
2912
2913
2914int float32_lt(float32 a, float32 b, float_status *status)
2915{
2916 flag aSign, bSign;
2917 uint32_t av, bv;
2918 a = float32_squash_input_denormal(a, status);
2919 b = float32_squash_input_denormal(b, status);
2920
2921 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2922 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2923 ) {
2924 float_raise(float_flag_invalid, status);
2925 return 0;
2926 }
2927 aSign = extractFloat32Sign( a );
2928 bSign = extractFloat32Sign( b );
2929 av = float32_val(a);
2930 bv = float32_val(b);
2931 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2932 return ( av != bv ) && ( aSign ^ ( av < bv ) );
2933
2934}
2935
2936
2937
2938
2939
2940
2941
2942
2943int float32_unordered(float32 a, float32 b, float_status *status)
2944{
2945 a = float32_squash_input_denormal(a, status);
2946 b = float32_squash_input_denormal(b, status);
2947
2948 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2949 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2950 ) {
2951 float_raise(float_flag_invalid, status);
2952 return 1;
2953 }
2954 return 0;
2955}
2956
2957
2958
2959
2960
2961
2962
2963
2964int float32_eq_quiet(float32 a, float32 b, float_status *status)
2965{
2966 a = float32_squash_input_denormal(a, status);
2967 b = float32_squash_input_denormal(b, status);
2968
2969 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2970 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2971 ) {
2972 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2973 float_raise(float_flag_invalid, status);
2974 }
2975 return 0;
2976 }
2977 return ( float32_val(a) == float32_val(b) ) ||
2978 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
2979}
2980
2981
2982
2983
2984
2985
2986
2987
2988int float32_le_quiet(float32 a, float32 b, float_status *status)
2989{
2990 flag aSign, bSign;
2991 uint32_t av, bv;
2992 a = float32_squash_input_denormal(a, status);
2993 b = float32_squash_input_denormal(b, status);
2994
2995 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2996 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2997 ) {
2998 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2999 float_raise(float_flag_invalid, status);
3000 }
3001 return 0;
3002 }
3003 aSign = extractFloat32Sign( a );
3004 bSign = extractFloat32Sign( b );
3005 av = float32_val(a);
3006 bv = float32_val(b);
3007 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
3008 return ( av == bv ) || ( aSign ^ ( av < bv ) );
3009
3010}
3011
3012
3013
3014
3015
3016
3017
3018
3019int float32_lt_quiet(float32 a, float32 b, float_status *status)
3020{
3021 flag aSign, bSign;
3022 uint32_t av, bv;
3023 a = float32_squash_input_denormal(a, status);
3024 b = float32_squash_input_denormal(b, status);
3025
3026 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3027 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3028 ) {
3029 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
3030 float_raise(float_flag_invalid, status);
3031 }
3032 return 0;
3033 }
3034 aSign = extractFloat32Sign( a );
3035 bSign = extractFloat32Sign( b );
3036 av = float32_val(a);
3037 bv = float32_val(b);
3038 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
3039 return ( av != bv ) && ( aSign ^ ( av < bv ) );
3040
3041}
3042
3043
3044
3045
3046
3047
3048
3049
3050int float32_unordered_quiet(float32 a, float32 b, float_status *status)
3051{
3052 a = float32_squash_input_denormal(a, status);
3053 b = float32_squash_input_denormal(b, status);
3054
3055 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3056 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3057 ) {
3058 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
3059 float_raise(float_flag_invalid, status);
3060 }
3061 return 1;
3062 }
3063 return 0;
3064}
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076int32 float64_to_int32(float64 a, float_status *status)
3077{
3078 flag aSign;
3079 int_fast16_t aExp, shiftCount;
3080 uint64_t aSig;
3081 a = float64_squash_input_denormal(a, status);
3082
3083 aSig = extractFloat64Frac( a );
3084 aExp = extractFloat64Exp( a );
3085 aSign = extractFloat64Sign( a );
3086 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
3087 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3088 shiftCount = 0x42C - aExp;
3089 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
3090 return roundAndPackInt32(aSign, aSig, status);
3091
3092}
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104int32 float64_to_int32_round_to_zero(float64 a, float_status *status)
3105{
3106 flag aSign;
3107 int_fast16_t aExp, shiftCount;
3108 uint64_t aSig, savedASig;
3109 int32_t z;
3110 a = float64_squash_input_denormal(a, status);
3111
3112 aSig = extractFloat64Frac( a );
3113 aExp = extractFloat64Exp( a );
3114 aSign = extractFloat64Sign( a );
3115 if ( 0x41E < aExp ) {
3116 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
3117 goto invalid;
3118 }
3119 else if ( aExp < 0x3FF ) {
3120 if (aExp || aSig) {
3121 status->float_exception_flags |= float_flag_inexact;
3122 }
3123 return 0;
3124 }
3125 aSig |= LIT64( 0x0010000000000000 );
3126 shiftCount = 0x433 - aExp;
3127 savedASig = aSig;
3128 aSig >>= shiftCount;
3129 z = aSig;
3130 if ( aSign ) z = - z;
3131 if ( ( z < 0 ) ^ aSign ) {
3132 invalid:
3133 float_raise(float_flag_invalid, status);
3134 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
3135 }
3136 if ( ( aSig<<shiftCount ) != savedASig ) {
3137 status->float_exception_flags |= float_flag_inexact;
3138 }
3139 return z;
3140
3141}
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153int_fast16_t float64_to_int16_round_to_zero(float64 a, float_status *status)
3154{
3155 flag aSign;
3156 int_fast16_t aExp, shiftCount;
3157 uint64_t aSig, savedASig;
3158 int32 z;
3159
3160 aSig = extractFloat64Frac( a );
3161 aExp = extractFloat64Exp( a );
3162 aSign = extractFloat64Sign( a );
3163 if ( 0x40E < aExp ) {
3164 if ( ( aExp == 0x7FF ) && aSig ) {
3165 aSign = 0;
3166 }
3167 goto invalid;
3168 }
3169 else if ( aExp < 0x3FF ) {
3170 if ( aExp || aSig ) {
3171 status->float_exception_flags |= float_flag_inexact;
3172 }
3173 return 0;
3174 }
3175 aSig |= LIT64( 0x0010000000000000 );
3176 shiftCount = 0x433 - aExp;
3177 savedASig = aSig;
3178 aSig >>= shiftCount;
3179 z = aSig;
3180 if ( aSign ) {
3181 z = - z;
3182 }
3183 if ( ( (int16_t)z < 0 ) ^ aSign ) {
3184 invalid:
3185 float_raise(float_flag_invalid, status);
3186 return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
3187 }
3188 if ( ( aSig<<shiftCount ) != savedASig ) {
3189 status->float_exception_flags |= float_flag_inexact;
3190 }
3191 return z;
3192}
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204int64 float64_to_int64(float64 a, float_status *status)
3205{
3206 flag aSign;
3207 int_fast16_t aExp, shiftCount;
3208 uint64_t aSig, aSigExtra;
3209 a = float64_squash_input_denormal(a, status);
3210
3211 aSig = extractFloat64Frac( a );
3212 aExp = extractFloat64Exp( a );
3213 aSign = extractFloat64Sign( a );
3214 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3215 shiftCount = 0x433 - aExp;
3216 if ( shiftCount <= 0 ) {
3217 if ( 0x43E < aExp ) {
3218 float_raise(float_flag_invalid, status);
3219 if ( ! aSign
3220 || ( ( aExp == 0x7FF )
3221 && ( aSig != LIT64( 0x0010000000000000 ) ) )
3222 ) {
3223 return LIT64( 0x7FFFFFFFFFFFFFFF );
3224 }
3225 return (int64_t) LIT64( 0x8000000000000000 );
3226 }
3227 aSigExtra = 0;
3228 aSig <<= - shiftCount;
3229 }
3230 else {
3231 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3232 }
3233 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
3234
3235}
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247int64 float64_to_int64_round_to_zero(float64 a, float_status *status)
3248{
3249 flag aSign;
3250 int_fast16_t aExp, shiftCount;
3251 uint64_t aSig;
3252 int64 z;
3253 a = float64_squash_input_denormal(a, status);
3254
3255 aSig = extractFloat64Frac( a );
3256 aExp = extractFloat64Exp( a );
3257 aSign = extractFloat64Sign( a );
3258 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3259 shiftCount = aExp - 0x433;
3260 if ( 0 <= shiftCount ) {
3261 if ( 0x43E <= aExp ) {
3262 if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
3263 float_raise(float_flag_invalid, status);
3264 if ( ! aSign
3265 || ( ( aExp == 0x7FF )
3266 && ( aSig != LIT64( 0x0010000000000000 ) ) )
3267 ) {
3268 return LIT64( 0x7FFFFFFFFFFFFFFF );
3269 }
3270 }
3271 return (int64_t) LIT64( 0x8000000000000000 );
3272 }
3273 z = aSig<<shiftCount;
3274 }
3275 else {
3276 if ( aExp < 0x3FE ) {
3277 if (aExp | aSig) {
3278 status->float_exception_flags |= float_flag_inexact;
3279 }
3280 return 0;
3281 }
3282 z = aSig>>( - shiftCount );
3283 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
3284 status->float_exception_flags |= float_flag_inexact;
3285 }
3286 }
3287 if ( aSign ) z = - z;
3288 return z;
3289
3290}
3291
3292
3293
3294
3295
3296
3297
3298
3299float32 float64_to_float32(float64 a, float_status *status)
3300{
3301 flag aSign;
3302 int_fast16_t aExp;
3303 uint64_t aSig;
3304 uint32_t zSig;
3305 a = float64_squash_input_denormal(a, status);
3306
3307 aSig = extractFloat64Frac( a );
3308 aExp = extractFloat64Exp( a );
3309 aSign = extractFloat64Sign( a );
3310 if ( aExp == 0x7FF ) {
3311 if (aSig) {
3312 return commonNaNToFloat32(float64ToCommonNaN(a, status), status);
3313 }
3314 return packFloat32( aSign, 0xFF, 0 );
3315 }
3316 shift64RightJamming( aSig, 22, &aSig );
3317 zSig = aSig;
3318 if ( aExp || zSig ) {
3319 zSig |= 0x40000000;
3320 aExp -= 0x381;
3321 }
3322 return roundAndPackFloat32(aSign, aExp, zSig, status);
3323
3324}
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337static float16 packFloat16(flag zSign, int_fast16_t zExp, uint16_t zSig)
3338{
3339 return make_float16(
3340 (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
3341}
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371static float32 roundAndPackFloat16(flag zSign, int_fast16_t zExp,
3372 uint32_t zSig, flag ieee,
3373 float_status *status)
3374{
3375 int maxexp = ieee ? 29 : 30;
3376 uint32_t mask;
3377 uint32_t increment;
3378 bool rounding_bumps_exp;
3379 bool is_tiny = false;
3380
3381
3382
3383
3384 if (zExp < 1) {
3385
3386 mask = 0x00ffffff;
3387 if (zExp >= -11) {
3388 mask >>= 11 + zExp;
3389 }
3390 } else {
3391
3392 mask = 0x00001fff;
3393 }
3394
3395 switch (status->float_rounding_mode) {
3396 case float_round_nearest_even:
3397 increment = (mask + 1) >> 1;
3398 if ((zSig & mask) == increment) {
3399 increment = zSig & (increment << 1);
3400 }
3401 break;
3402 case float_round_ties_away:
3403 increment = (mask + 1) >> 1;
3404 break;
3405 case float_round_up:
3406 increment = zSign ? 0 : mask;
3407 break;
3408 case float_round_down:
3409 increment = zSign ? mask : 0;
3410 break;
3411 default:
3412 increment = 0;
3413 break;
3414 }
3415
3416 rounding_bumps_exp = (zSig + increment >= 0x01000000);
3417
3418 if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) {
3419 if (ieee) {
3420 float_raise(float_flag_overflow | float_flag_inexact, status);
3421 return packFloat16(zSign, 0x1f, 0);
3422 } else {
3423 float_raise(float_flag_invalid, status);
3424 return packFloat16(zSign, 0x1f, 0x3ff);
3425 }
3426 }
3427
3428 if (zExp < 0) {
3429
3430 is_tiny =
3431 (status->float_detect_tininess == float_tininess_before_rounding)
3432 || (zExp < -1)
3433 || (!rounding_bumps_exp);
3434 }
3435 if (zSig & mask) {
3436 float_raise(float_flag_inexact, status);
3437 if (is_tiny) {
3438 float_raise(float_flag_underflow, status);
3439 }
3440 }
3441
3442 zSig += increment;
3443 if (rounding_bumps_exp) {
3444 zSig >>= 1;
3445 zExp++;
3446 }
3447
3448 if (zExp < -10) {
3449 return packFloat16(zSign, 0, 0);
3450 }
3451 if (zExp < 0) {
3452 zSig >>= -zExp;
3453 zExp = 0;
3454 }
3455 return packFloat16(zSign, zExp, zSig >> 13);
3456}
3457
3458static void normalizeFloat16Subnormal(uint32_t aSig, int_fast16_t *zExpPtr,
3459 uint32_t *zSigPtr)
3460{
3461 int8_t shiftCount = countLeadingZeros32(aSig) - 21;
3462 *zSigPtr = aSig << shiftCount;
3463 *zExpPtr = 1 - shiftCount;
3464}
3465
3466
3467
3468
3469float32 float16_to_float32(float16 a, flag ieee, float_status *status)
3470{
3471 flag aSign;
3472 int_fast16_t aExp;
3473 uint32_t aSig;
3474
3475 aSign = extractFloat16Sign(a);
3476 aExp = extractFloat16Exp(a);
3477 aSig = extractFloat16Frac(a);
3478
3479 if (aExp == 0x1f && ieee) {
3480 if (aSig) {
3481 return commonNaNToFloat32(float16ToCommonNaN(a, status), status);
3482 }
3483 return packFloat32(aSign, 0xff, 0);
3484 }
3485 if (aExp == 0) {
3486 if (aSig == 0) {
3487 return packFloat32(aSign, 0, 0);
3488 }
3489
3490 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3491 aExp--;
3492 }
3493 return packFloat32( aSign, aExp + 0x70, aSig << 13);
3494}
3495
3496float16 float32_to_float16(float32 a, flag ieee, float_status *status)
3497{
3498 flag aSign;
3499 int_fast16_t aExp;
3500 uint32_t aSig;
3501
3502 a = float32_squash_input_denormal(a, status);
3503
3504 aSig = extractFloat32Frac( a );
3505 aExp = extractFloat32Exp( a );
3506 aSign = extractFloat32Sign( a );
3507 if ( aExp == 0xFF ) {
3508 if (aSig) {
3509
3510 if (!ieee) {
3511 float_raise(float_flag_invalid, status);
3512 return packFloat16(aSign, 0, 0);
3513 }
3514 return commonNaNToFloat16(
3515 float32ToCommonNaN(a, status), status);
3516 }
3517
3518 if (!ieee) {
3519 float_raise(float_flag_invalid, status);
3520 return packFloat16(aSign, 0x1f, 0x3ff);
3521 }
3522 return packFloat16(aSign, 0x1f, 0);
3523 }
3524 if (aExp == 0 && aSig == 0) {
3525 return packFloat16(aSign, 0, 0);
3526 }
3527
3528
3529
3530
3531
3532
3533
3534 aSig |= 0x00800000;
3535 aExp -= 0x71;
3536
3537 return roundAndPackFloat16(aSign, aExp, aSig, ieee, status);
3538}
3539
3540float64 float16_to_float64(float16 a, flag ieee, float_status *status)
3541{
3542 flag aSign;
3543 int_fast16_t aExp;
3544 uint32_t aSig;
3545
3546 aSign = extractFloat16Sign(a);
3547 aExp = extractFloat16Exp(a);
3548 aSig = extractFloat16Frac(a);
3549
3550 if (aExp == 0x1f && ieee) {
3551 if (aSig) {
3552 return commonNaNToFloat64(
3553 float16ToCommonNaN(a, status), status);
3554 }
3555 return packFloat64(aSign, 0x7ff, 0);
3556 }
3557 if (aExp == 0) {
3558 if (aSig == 0) {
3559 return packFloat64(aSign, 0, 0);
3560 }
3561
3562 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3563 aExp--;
3564 }
3565 return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
3566}
3567
3568float16 float64_to_float16(float64 a, flag ieee, float_status *status)
3569{
3570 flag aSign;
3571 int_fast16_t aExp;
3572 uint64_t aSig;
3573 uint32_t zSig;
3574
3575 a = float64_squash_input_denormal(a, status);
3576
3577 aSig = extractFloat64Frac(a);
3578 aExp = extractFloat64Exp(a);
3579 aSign = extractFloat64Sign(a);
3580 if (aExp == 0x7FF) {
3581 if (aSig) {
3582
3583 if (!ieee) {
3584 float_raise(float_flag_invalid, status);
3585 return packFloat16(aSign, 0, 0);
3586 }
3587 return commonNaNToFloat16(
3588 float64ToCommonNaN(a, status), status);
3589 }
3590
3591 if (!ieee) {
3592 float_raise(float_flag_invalid, status);
3593 return packFloat16(aSign, 0x1f, 0x3ff);
3594 }
3595 return packFloat16(aSign, 0x1f, 0);
3596 }
3597 shift64RightJamming(aSig, 29, &aSig);
3598 zSig = aSig;
3599 if (aExp == 0 && zSig == 0) {
3600 return packFloat16(aSign, 0, 0);
3601 }
3602
3603
3604
3605
3606
3607
3608
3609 zSig |= 0x00800000;
3610 aExp -= 0x3F1;
3611
3612 return roundAndPackFloat16(aSign, aExp, zSig, ieee, status);
3613}
3614
3615
3616
3617
3618
3619
3620
3621
3622floatx80 float64_to_floatx80(float64 a, float_status *status)
3623{
3624 flag aSign;
3625 int_fast16_t aExp;
3626 uint64_t aSig;
3627
3628 a = float64_squash_input_denormal(a, status);
3629 aSig = extractFloat64Frac( a );
3630 aExp = extractFloat64Exp( a );
3631 aSign = extractFloat64Sign( a );
3632 if ( aExp == 0x7FF ) {
3633 if (aSig) {
3634 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
3635 }
3636 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3637 }
3638 if ( aExp == 0 ) {
3639 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3640 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3641 }
3642 return
3643 packFloatx80(
3644 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3645
3646}
3647
3648
3649
3650
3651
3652
3653
3654
3655float128 float64_to_float128(float64 a, float_status *status)
3656{
3657 flag aSign;
3658 int_fast16_t aExp;
3659 uint64_t aSig, zSig0, zSig1;
3660
3661 a = float64_squash_input_denormal(a, status);
3662 aSig = extractFloat64Frac( a );
3663 aExp = extractFloat64Exp( a );
3664 aSign = extractFloat64Sign( a );
3665 if ( aExp == 0x7FF ) {
3666 if (aSig) {
3667 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
3668 }
3669 return packFloat128( aSign, 0x7FFF, 0, 0 );
3670 }
3671 if ( aExp == 0 ) {
3672 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3673 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3674 --aExp;
3675 }
3676 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3677 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3678
3679}
3680
3681
3682
3683
3684
3685
3686
3687
3688float64 float64_round_to_int(float64 a, float_status *status)
3689{
3690 flag aSign;
3691 int_fast16_t aExp;
3692 uint64_t lastBitMask, roundBitsMask;
3693 uint64_t z;
3694 a = float64_squash_input_denormal(a, status);
3695
3696 aExp = extractFloat64Exp( a );
3697 if ( 0x433 <= aExp ) {
3698 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
3699 return propagateFloat64NaN(a, a, status);
3700 }
3701 return a;
3702 }
3703 if ( aExp < 0x3FF ) {
3704 if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
3705 status->float_exception_flags |= float_flag_inexact;
3706 aSign = extractFloat64Sign( a );
3707 switch (status->float_rounding_mode) {
3708 case float_round_nearest_even:
3709 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3710 return packFloat64( aSign, 0x3FF, 0 );
3711 }
3712 break;
3713 case float_round_ties_away:
3714 if (aExp == 0x3FE) {
3715 return packFloat64(aSign, 0x3ff, 0);
3716 }
3717 break;
3718 case float_round_down:
3719 return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
3720 case float_round_up:
3721 return make_float64(
3722 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
3723 }
3724 return packFloat64( aSign, 0, 0 );
3725 }
3726 lastBitMask = 1;
3727 lastBitMask <<= 0x433 - aExp;
3728 roundBitsMask = lastBitMask - 1;
3729 z = float64_val(a);
3730 switch (status->float_rounding_mode) {
3731 case float_round_nearest_even:
3732 z += lastBitMask >> 1;
3733 if ((z & roundBitsMask) == 0) {
3734 z &= ~lastBitMask;
3735 }
3736 break;
3737 case float_round_ties_away:
3738 z += lastBitMask >> 1;
3739 break;
3740 case float_round_to_zero:
3741 break;
3742 case float_round_up:
3743 if (!extractFloat64Sign(make_float64(z))) {
3744 z += roundBitsMask;
3745 }
3746 break;
3747 case float_round_down:
3748 if (extractFloat64Sign(make_float64(z))) {
3749 z += roundBitsMask;
3750 }
3751 break;
3752 default:
3753 abort();
3754 }
3755 z &= ~ roundBitsMask;
3756 if (z != float64_val(a)) {
3757 status->float_exception_flags |= float_flag_inexact;
3758 }
3759 return make_float64(z);
3760
3761}
3762
3763float64 float64_trunc_to_int(float64 a, float_status *status)
3764{
3765 int oldmode;
3766 float64 res;
3767 oldmode = status->float_rounding_mode;
3768 status->float_rounding_mode = float_round_to_zero;
3769 res = float64_round_to_int(a, status);
3770 status->float_rounding_mode = oldmode;
3771 return res;
3772}
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782static float64 addFloat64Sigs(float64 a, float64 b, flag zSign,
3783 float_status *status)
3784{
3785 int_fast16_t aExp, bExp, zExp;
3786 uint64_t aSig, bSig, zSig;
3787 int_fast16_t expDiff;
3788
3789 aSig = extractFloat64Frac( a );
3790 aExp = extractFloat64Exp( a );
3791 bSig = extractFloat64Frac( b );
3792 bExp = extractFloat64Exp( b );
3793 expDiff = aExp - bExp;
3794 aSig <<= 9;
3795 bSig <<= 9;
3796 if ( 0 < expDiff ) {
3797 if ( aExp == 0x7FF ) {
3798 if (aSig) {
3799 return propagateFloat64NaN(a, b, status);
3800 }
3801 return a;
3802 }
3803 if ( bExp == 0 ) {
3804 --expDiff;
3805 }
3806 else {
3807 bSig |= LIT64( 0x2000000000000000 );
3808 }
3809 shift64RightJamming( bSig, expDiff, &bSig );
3810 zExp = aExp;
3811 }
3812 else if ( expDiff < 0 ) {
3813 if ( bExp == 0x7FF ) {
3814 if (bSig) {
3815 return propagateFloat64NaN(a, b, status);
3816 }
3817 return packFloat64( zSign, 0x7FF, 0 );
3818 }
3819 if ( aExp == 0 ) {
3820 ++expDiff;
3821 }
3822 else {
3823 aSig |= LIT64( 0x2000000000000000 );
3824 }
3825 shift64RightJamming( aSig, - expDiff, &aSig );
3826 zExp = bExp;
3827 }
3828 else {
3829 if ( aExp == 0x7FF ) {
3830 if (aSig | bSig) {
3831 return propagateFloat64NaN(a, b, status);
3832 }
3833 return a;
3834 }
3835 if ( aExp == 0 ) {
3836 if (status->flush_to_zero) {
3837 if (aSig | bSig) {
3838 float_raise(float_flag_output_denormal, status);
3839 }
3840 return packFloat64(zSign, 0, 0);
3841 }
3842 return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3843 }
3844 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3845 zExp = aExp;
3846 goto roundAndPack;
3847 }
3848 aSig |= LIT64( 0x2000000000000000 );
3849 zSig = ( aSig + bSig )<<1;
3850 --zExp;
3851 if ( (int64_t) zSig < 0 ) {
3852 zSig = aSig + bSig;
3853 ++zExp;
3854 }
3855 roundAndPack:
3856 return roundAndPackFloat64(zSign, zExp, zSig, status);
3857
3858}
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868static float64 subFloat64Sigs(float64 a, float64 b, flag zSign,
3869 float_status *status)
3870{
3871 int_fast16_t aExp, bExp, zExp;
3872 uint64_t aSig, bSig, zSig;
3873 int_fast16_t expDiff;
3874
3875 aSig = extractFloat64Frac( a );
3876 aExp = extractFloat64Exp( a );
3877 bSig = extractFloat64Frac( b );
3878 bExp = extractFloat64Exp( b );
3879 expDiff = aExp - bExp;
3880 aSig <<= 10;
3881 bSig <<= 10;
3882 if ( 0 < expDiff ) goto aExpBigger;
3883 if ( expDiff < 0 ) goto bExpBigger;
3884 if ( aExp == 0x7FF ) {
3885 if (aSig | bSig) {
3886 return propagateFloat64NaN(a, b, status);
3887 }
3888 float_raise(float_flag_invalid, status);
3889 return float64_default_nan;
3890 }
3891 if ( aExp == 0 ) {
3892 aExp = 1;
3893 bExp = 1;
3894 }
3895 if ( bSig < aSig ) goto aBigger;
3896 if ( aSig < bSig ) goto bBigger;
3897 return packFloat64(status->float_rounding_mode == float_round_down, 0, 0);
3898 bExpBigger:
3899 if ( bExp == 0x7FF ) {
3900 if (bSig) {
3901 return propagateFloat64NaN(a, b, status);
3902 }
3903 return packFloat64( zSign ^ 1, 0x7FF, 0 );
3904 }
3905 if ( aExp == 0 ) {
3906 ++expDiff;
3907 }
3908 else {
3909 aSig |= LIT64( 0x4000000000000000 );
3910 }
3911 shift64RightJamming( aSig, - expDiff, &aSig );
3912 bSig |= LIT64( 0x4000000000000000 );
3913 bBigger:
3914 zSig = bSig - aSig;
3915 zExp = bExp;
3916 zSign ^= 1;
3917 goto normalizeRoundAndPack;
3918 aExpBigger:
3919 if ( aExp == 0x7FF ) {
3920 if (aSig) {
3921 return propagateFloat64NaN(a, b, status);
3922 }
3923 return a;
3924 }
3925 if ( bExp == 0 ) {
3926 --expDiff;
3927 }
3928 else {
3929 bSig |= LIT64( 0x4000000000000000 );
3930 }
3931 shift64RightJamming( bSig, expDiff, &bSig );
3932 aSig |= LIT64( 0x4000000000000000 );
3933 aBigger:
3934 zSig = aSig - bSig;
3935 zExp = aExp;
3936 normalizeRoundAndPack:
3937 --zExp;
3938 return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status);
3939
3940}
3941
3942
3943
3944
3945
3946
3947
3948float64 float64_add(float64 a, float64 b, float_status *status)
3949{
3950 flag aSign, bSign;
3951 a = float64_squash_input_denormal(a, status);
3952 b = float64_squash_input_denormal(b, status);
3953
3954 aSign = extractFloat64Sign( a );
3955 bSign = extractFloat64Sign( b );
3956 if ( aSign == bSign ) {
3957 return addFloat64Sigs(a, b, aSign, status);
3958 }
3959 else {
3960 return subFloat64Sigs(a, b, aSign, status);
3961 }
3962
3963}
3964
3965
3966
3967
3968
3969
3970
3971float64 float64_sub(float64 a, float64 b, float_status *status)
3972{
3973 flag aSign, bSign;
3974 a = float64_squash_input_denormal(a, status);
3975 b = float64_squash_input_denormal(b, status);
3976
3977 aSign = extractFloat64Sign( a );
3978 bSign = extractFloat64Sign( b );
3979 if ( aSign == bSign ) {
3980 return subFloat64Sigs(a, b, aSign, status);
3981 }
3982 else {
3983 return addFloat64Sigs(a, b, aSign, status);
3984 }
3985
3986}
3987
3988
3989
3990
3991
3992
3993
3994float64 float64_mul(float64 a, float64 b, float_status *status)
3995{
3996 flag aSign, bSign, zSign;
3997 int_fast16_t aExp, bExp, zExp;
3998 uint64_t aSig, bSig, zSig0, zSig1;
3999
4000 a = float64_squash_input_denormal(a, status);
4001 b = float64_squash_input_denormal(b, status);
4002
4003 aSig = extractFloat64Frac( a );
4004 aExp = extractFloat64Exp( a );
4005 aSign = extractFloat64Sign( a );
4006 bSig = extractFloat64Frac( b );
4007 bExp = extractFloat64Exp( b );
4008 bSign = extractFloat64Sign( b );
4009 zSign = aSign ^ bSign;
4010 if ( aExp == 0x7FF ) {
4011 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
4012 return propagateFloat64NaN(a, b, status);
4013 }
4014 if ( ( bExp | bSig ) == 0 ) {
4015 float_raise(float_flag_invalid, status);
4016 return float64_default_nan;
4017 }
4018 return packFloat64( zSign, 0x7FF, 0 );
4019 }
4020 if ( bExp == 0x7FF ) {
4021 if (bSig) {
4022 return propagateFloat64NaN(a, b, status);
4023 }
4024 if ( ( aExp | aSig ) == 0 ) {
4025 float_raise(float_flag_invalid, status);
4026 return float64_default_nan;
4027 }
4028 return packFloat64( zSign, 0x7FF, 0 );
4029 }
4030 if ( aExp == 0 ) {
4031 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
4032 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4033 }
4034 if ( bExp == 0 ) {
4035 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
4036 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4037 }
4038 zExp = aExp + bExp - 0x3FF;
4039 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
4040 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4041 mul64To128( aSig, bSig, &zSig0, &zSig1 );
4042 zSig0 |= ( zSig1 != 0 );
4043 if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
4044 zSig0 <<= 1;
4045 --zExp;
4046 }
4047 return roundAndPackFloat64(zSign, zExp, zSig0, status);
4048
4049}
4050
4051
4052
4053
4054
4055
4056
4057float64 float64_div(float64 a, float64 b, float_status *status)
4058{
4059 flag aSign, bSign, zSign;
4060 int_fast16_t aExp, bExp, zExp;
4061 uint64_t aSig, bSig, zSig;
4062 uint64_t rem0, rem1;
4063 uint64_t term0, term1;
4064 a = float64_squash_input_denormal(a, status);
4065 b = float64_squash_input_denormal(b, status);
4066
4067 aSig = extractFloat64Frac( a );
4068 aExp = extractFloat64Exp( a );
4069 aSign = extractFloat64Sign( a );
4070 bSig = extractFloat64Frac( b );
4071 bExp = extractFloat64Exp( b );
4072 bSign = extractFloat64Sign( b );
4073 zSign = aSign ^ bSign;
4074 if ( aExp == 0x7FF ) {
4075 if (aSig) {
4076 return propagateFloat64NaN(a, b, status);
4077 }
4078 if ( bExp == 0x7FF ) {
4079 if (bSig) {
4080 return propagateFloat64NaN(a, b, status);
4081 }
4082 float_raise(float_flag_invalid, status);
4083 return float64_default_nan;
4084 }
4085 return packFloat64( zSign, 0x7FF, 0 );
4086 }
4087 if ( bExp == 0x7FF ) {
4088 if (bSig) {
4089 return propagateFloat64NaN(a, b, status);
4090 }
4091 return packFloat64( zSign, 0, 0 );
4092 }
4093 if ( bExp == 0 ) {
4094 if ( bSig == 0 ) {
4095 if ( ( aExp | aSig ) == 0 ) {
4096 float_raise(float_flag_invalid, status);
4097 return float64_default_nan;
4098 }
4099 float_raise(float_flag_divbyzero, status);
4100 return packFloat64( zSign, 0x7FF, 0 );
4101 }
4102 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4103 }
4104 if ( aExp == 0 ) {
4105 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
4106 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4107 }
4108 zExp = aExp - bExp + 0x3FD;
4109 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
4110 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4111 if ( bSig <= ( aSig + aSig ) ) {
4112 aSig >>= 1;
4113 ++zExp;
4114 }
4115 zSig = estimateDiv128To64( aSig, 0, bSig );
4116 if ( ( zSig & 0x1FF ) <= 2 ) {
4117 mul64To128( bSig, zSig, &term0, &term1 );
4118 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
4119 while ( (int64_t) rem0 < 0 ) {
4120 --zSig;
4121 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4122 }
4123 zSig |= ( rem1 != 0 );
4124 }
4125 return roundAndPackFloat64(zSign, zExp, zSig, status);
4126
4127}
4128
4129
4130
4131
4132
4133
4134
4135float64 float64_rem(float64 a, float64 b, float_status *status)
4136{
4137 flag aSign, zSign;
4138 int_fast16_t aExp, bExp, expDiff;
4139 uint64_t aSig, bSig;
4140 uint64_t q, alternateASig;
4141 int64_t sigMean;
4142
4143 a = float64_squash_input_denormal(a, status);
4144 b = float64_squash_input_denormal(b, status);
4145 aSig = extractFloat64Frac( a );
4146 aExp = extractFloat64Exp( a );
4147 aSign = extractFloat64Sign( a );
4148 bSig = extractFloat64Frac( b );
4149 bExp = extractFloat64Exp( b );
4150 if ( aExp == 0x7FF ) {
4151 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
4152 return propagateFloat64NaN(a, b, status);
4153 }
4154 float_raise(float_flag_invalid, status);
4155 return float64_default_nan;
4156 }
4157 if ( bExp == 0x7FF ) {
4158 if (bSig) {
4159 return propagateFloat64NaN(a, b, status);
4160 }
4161 return a;
4162 }
4163 if ( bExp == 0 ) {
4164 if ( bSig == 0 ) {
4165 float_raise(float_flag_invalid, status);
4166 return float64_default_nan;
4167 }
4168 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4169 }
4170 if ( aExp == 0 ) {
4171 if ( aSig == 0 ) return a;
4172 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4173 }
4174 expDiff = aExp - bExp;
4175 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4176 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4177 if ( expDiff < 0 ) {
4178 if ( expDiff < -1 ) return a;
4179 aSig >>= 1;
4180 }
4181 q = ( bSig <= aSig );
4182 if ( q ) aSig -= bSig;
4183 expDiff -= 64;
4184 while ( 0 < expDiff ) {
4185 q = estimateDiv128To64( aSig, 0, bSig );
4186 q = ( 2 < q ) ? q - 2 : 0;
4187 aSig = - ( ( bSig>>2 ) * q );
4188 expDiff -= 62;
4189 }
4190 expDiff += 64;
4191 if ( 0 < expDiff ) {
4192 q = estimateDiv128To64( aSig, 0, bSig );
4193 q = ( 2 < q ) ? q - 2 : 0;
4194 q >>= 64 - expDiff;
4195 bSig >>= 2;
4196 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4197 }
4198 else {
4199 aSig >>= 2;
4200 bSig >>= 2;
4201 }
4202 do {
4203 alternateASig = aSig;
4204 ++q;
4205 aSig -= bSig;
4206 } while ( 0 <= (int64_t) aSig );
4207 sigMean = aSig + alternateASig;
4208 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4209 aSig = alternateASig;
4210 }
4211 zSign = ( (int64_t) aSig < 0 );
4212 if ( zSign ) aSig = - aSig;
4213 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
4214
4215}
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228float64 float64_muladd(float64 a, float64 b, float64 c, int flags,
4229 float_status *status)
4230{
4231 flag aSign, bSign, cSign, zSign;
4232 int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
4233 uint64_t aSig, bSig, cSig;
4234 flag pInf, pZero, pSign;
4235 uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
4236 int shiftcount;
4237 flag signflip, infzero;
4238
4239 a = float64_squash_input_denormal(a, status);
4240 b = float64_squash_input_denormal(b, status);
4241 c = float64_squash_input_denormal(c, status);
4242 aSig = extractFloat64Frac(a);
4243 aExp = extractFloat64Exp(a);
4244 aSign = extractFloat64Sign(a);
4245 bSig = extractFloat64Frac(b);
4246 bExp = extractFloat64Exp(b);
4247 bSign = extractFloat64Sign(b);
4248 cSig = extractFloat64Frac(c);
4249 cExp = extractFloat64Exp(c);
4250 cSign = extractFloat64Sign(c);
4251
4252 infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
4253 (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
4254
4255
4256
4257
4258
4259
4260 if (((aExp == 0x7ff) && aSig) ||
4261 ((bExp == 0x7ff) && bSig) ||
4262 ((cExp == 0x7ff) && cSig)) {
4263 return propagateFloat64MulAddNaN(a, b, c, infzero, status);
4264 }
4265
4266 if (infzero) {
4267 float_raise(float_flag_invalid, status);
4268 return float64_default_nan;
4269 }
4270
4271 if (flags & float_muladd_negate_c) {
4272 cSign ^= 1;
4273 }
4274
4275 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
4276
4277
4278 pSign = aSign ^ bSign;
4279 if (flags & float_muladd_negate_product) {
4280 pSign ^= 1;
4281 }
4282 pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
4283 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
4284
4285 if (cExp == 0x7ff) {
4286 if (pInf && (pSign ^ cSign)) {
4287
4288 float_raise(float_flag_invalid, status);
4289 return float64_default_nan;
4290 }
4291
4292 return packFloat64(cSign ^ signflip, 0x7ff, 0);
4293 }
4294
4295 if (pInf) {
4296 return packFloat64(pSign ^ signflip, 0x7ff, 0);
4297 }
4298
4299 if (pZero) {
4300 if (cExp == 0) {
4301 if (cSig == 0) {
4302
4303 if (pSign == cSign) {
4304 zSign = pSign;
4305 } else if (status->float_rounding_mode == float_round_down) {
4306 zSign = 1;
4307 } else {
4308 zSign = 0;
4309 }
4310 return packFloat64(zSign ^ signflip, 0, 0);
4311 }
4312
4313 if (status->flush_to_zero) {
4314 float_raise(float_flag_output_denormal, status);
4315 return packFloat64(cSign ^ signflip, 0, 0);
4316 }
4317 }
4318
4319 if (flags & float_muladd_halve_result) {
4320 if (cExp == 0) {
4321 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
4322 }
4323
4324
4325
4326 cExp -= 2;
4327 cSig = (cSig | 0x0010000000000000ULL) << 10;
4328 return roundAndPackFloat64(cSign ^ signflip, cExp, cSig, status);
4329 }
4330 return packFloat64(cSign ^ signflip, cExp, cSig);
4331 }
4332
4333 if (aExp == 0) {
4334 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
4335 }
4336 if (bExp == 0) {
4337 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
4338 }
4339
4340
4341
4342
4343
4344
4345
4346
4347 pExp = aExp + bExp - 0x3fe;
4348 aSig = (aSig | LIT64(0x0010000000000000))<<10;
4349 bSig = (bSig | LIT64(0x0010000000000000))<<11;
4350 mul64To128(aSig, bSig, &pSig0, &pSig1);
4351 if ((int64_t)(pSig0 << 1) >= 0) {
4352 shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
4353 pExp--;
4354 }
4355
4356 zSign = pSign ^ signflip;
4357
4358
4359
4360
4361 if (cExp == 0) {
4362 if (!cSig) {
4363
4364 shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
4365 if (flags & float_muladd_halve_result) {
4366 pExp--;
4367 }
4368 return roundAndPackFloat64(zSign, pExp - 1,
4369 pSig1, status);
4370 }
4371 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
4372 }
4373
4374
4375
4376
4377 cSig0 = cSig << (126 - 64 - 52);
4378 cSig1 = 0;
4379 cSig0 |= LIT64(0x4000000000000000);
4380 expDiff = pExp - cExp;
4381
4382 if (pSign == cSign) {
4383
4384 if (expDiff > 0) {
4385
4386 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
4387 zExp = pExp;
4388 } else if (expDiff < 0) {
4389
4390 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
4391 zExp = cExp;
4392 } else {
4393
4394 zExp = cExp;
4395 }
4396
4397 add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4398 if ((int64_t)zSig0 < 0) {
4399 shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
4400 } else {
4401 zExp--;
4402 }
4403 shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
4404 if (flags & float_muladd_halve_result) {
4405 zExp--;
4406 }
4407 return roundAndPackFloat64(zSign, zExp, zSig1, status);
4408 } else {
4409
4410 if (expDiff > 0) {
4411 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
4412 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4413 zExp = pExp;
4414 } else if (expDiff < 0) {
4415 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
4416 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
4417 zExp = cExp;
4418 zSign ^= 1;
4419 } else {
4420 zExp = pExp;
4421 if (lt128(cSig0, cSig1, pSig0, pSig1)) {
4422 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4423 } else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
4424 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
4425 zSign ^= 1;
4426 } else {
4427
4428 zSign = signflip;
4429 if (status->float_rounding_mode == float_round_down) {
4430 zSign ^= 1;
4431 }
4432 return packFloat64(zSign, 0, 0);
4433 }
4434 }
4435 --zExp;
4436
4437
4438
4439 if (zSig0) {
4440 shiftcount = countLeadingZeros64(zSig0) - 1;
4441 shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
4442 if (zSig1) {
4443 zSig0 |= 1;
4444 }
4445 zExp -= shiftcount;
4446 } else {
4447 shiftcount = countLeadingZeros64(zSig1);
4448 if (shiftcount == 0) {
4449 zSig0 = (zSig1 >> 1) | (zSig1 & 1);
4450 zExp -= 63;
4451 } else {
4452 shiftcount--;
4453 zSig0 = zSig1 << shiftcount;
4454 zExp -= (shiftcount + 64);
4455 }
4456 }
4457 if (flags & float_muladd_halve_result) {
4458 zExp--;
4459 }
4460 return roundAndPackFloat64(zSign, zExp, zSig0, status);
4461 }
4462}
4463
4464
4465
4466
4467
4468
4469
4470float64 float64_sqrt(float64 a, float_status *status)
4471{
4472 flag aSign;
4473 int_fast16_t aExp, zExp;
4474 uint64_t aSig, zSig, doubleZSig;
4475 uint64_t rem0, rem1, term0, term1;
4476 a = float64_squash_input_denormal(a, status);
4477
4478 aSig = extractFloat64Frac( a );
4479 aExp = extractFloat64Exp( a );
4480 aSign = extractFloat64Sign( a );
4481 if ( aExp == 0x7FF ) {
4482 if (aSig) {
4483 return propagateFloat64NaN(a, a, status);
4484 }
4485 if ( ! aSign ) return a;
4486 float_raise(float_flag_invalid, status);
4487 return float64_default_nan;
4488 }
4489 if ( aSign ) {
4490 if ( ( aExp | aSig ) == 0 ) return a;
4491 float_raise(float_flag_invalid, status);
4492 return float64_default_nan;
4493 }
4494 if ( aExp == 0 ) {
4495 if ( aSig == 0 ) return float64_zero;
4496 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4497 }
4498 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
4499 aSig |= LIT64( 0x0010000000000000 );
4500 zSig = estimateSqrt32( aExp, aSig>>21 );
4501 aSig <<= 9 - ( aExp & 1 );
4502 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
4503 if ( ( zSig & 0x1FF ) <= 5 ) {
4504 doubleZSig = zSig<<1;
4505 mul64To128( zSig, zSig, &term0, &term1 );
4506 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
4507 while ( (int64_t) rem0 < 0 ) {
4508 --zSig;
4509 doubleZSig -= 2;
4510 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
4511 }
4512 zSig |= ( ( rem0 | rem1 ) != 0 );
4513 }
4514 return roundAndPackFloat64(0, zExp, zSig, status);
4515
4516}
4517
4518
4519
4520
4521
4522
4523float64 float64_log2(float64 a, float_status *status)
4524{
4525 flag aSign, zSign;
4526 int_fast16_t aExp;
4527 uint64_t aSig, aSig0, aSig1, zSig, i;
4528 a = float64_squash_input_denormal(a, status);
4529
4530 aSig = extractFloat64Frac( a );
4531 aExp = extractFloat64Exp( a );
4532 aSign = extractFloat64Sign( a );
4533
4534 if ( aExp == 0 ) {
4535 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4536 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4537 }
4538 if ( aSign ) {
4539 float_raise(float_flag_invalid, status);
4540 return float64_default_nan;
4541 }
4542 if ( aExp == 0x7FF ) {
4543 if (aSig) {
4544 return propagateFloat64NaN(a, float64_zero, status);
4545 }
4546 return a;
4547 }
4548
4549 aExp -= 0x3FF;
4550 aSig |= LIT64( 0x0010000000000000 );
4551 zSign = aExp < 0;
4552 zSig = (uint64_t)aExp << 52;
4553 for (i = 1LL << 51; i > 0; i >>= 1) {
4554 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4555 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4556 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4557 aSig >>= 1;
4558 zSig |= i;
4559 }
4560 }
4561
4562 if ( zSign )
4563 zSig = -zSig;
4564 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
4565}
4566
4567
4568
4569
4570
4571
4572
4573
4574int float64_eq(float64 a, float64 b, float_status *status)
4575{
4576 uint64_t av, bv;
4577 a = float64_squash_input_denormal(a, status);
4578 b = float64_squash_input_denormal(b, status);
4579
4580 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4581 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4582 ) {
4583 float_raise(float_flag_invalid, status);
4584 return 0;
4585 }
4586 av = float64_val(a);
4587 bv = float64_val(b);
4588 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4589
4590}
4591
4592
4593
4594
4595
4596
4597
4598
4599int float64_le(float64 a, float64 b, float_status *status)
4600{
4601 flag aSign, bSign;
4602 uint64_t av, bv;
4603 a = float64_squash_input_denormal(a, status);
4604 b = float64_squash_input_denormal(b, status);
4605
4606 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4607 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4608 ) {
4609 float_raise(float_flag_invalid, status);
4610 return 0;
4611 }
4612 aSign = extractFloat64Sign( a );
4613 bSign = extractFloat64Sign( b );
4614 av = float64_val(a);
4615 bv = float64_val(b);
4616 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4617 return ( av == bv ) || ( aSign ^ ( av < bv ) );
4618
4619}
4620
4621
4622
4623
4624
4625
4626
4627
4628int float64_lt(float64 a, float64 b, float_status *status)
4629{
4630 flag aSign, bSign;
4631 uint64_t av, bv;
4632
4633 a = float64_squash_input_denormal(a, status);
4634 b = float64_squash_input_denormal(b, status);
4635 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4636 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4637 ) {
4638 float_raise(float_flag_invalid, status);
4639 return 0;
4640 }
4641 aSign = extractFloat64Sign( a );
4642 bSign = extractFloat64Sign( b );
4643 av = float64_val(a);
4644 bv = float64_val(b);
4645 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4646 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4647
4648}
4649
4650
4651
4652
4653
4654
4655
4656
4657int float64_unordered(float64 a, float64 b, float_status *status)
4658{
4659 a = float64_squash_input_denormal(a, status);
4660 b = float64_squash_input_denormal(b, status);
4661
4662 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4663 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4664 ) {
4665 float_raise(float_flag_invalid, status);
4666 return 1;
4667 }
4668 return 0;
4669}
4670
4671
4672
4673
4674
4675
4676
4677
4678int float64_eq_quiet(float64 a, float64 b, float_status *status)
4679{
4680 uint64_t av, bv;
4681 a = float64_squash_input_denormal(a, status);
4682 b = float64_squash_input_denormal(b, status);
4683
4684 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4685 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4686 ) {
4687 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4688 float_raise(float_flag_invalid, status);
4689 }
4690 return 0;
4691 }
4692 av = float64_val(a);
4693 bv = float64_val(b);
4694 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4695
4696}
4697
4698
4699
4700
4701
4702
4703
4704
4705int float64_le_quiet(float64 a, float64 b, float_status *status)
4706{
4707 flag aSign, bSign;
4708 uint64_t av, bv;
4709 a = float64_squash_input_denormal(a, status);
4710 b = float64_squash_input_denormal(b, status);
4711
4712 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4713 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4714 ) {
4715 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4716 float_raise(float_flag_invalid, status);
4717 }
4718 return 0;
4719 }
4720 aSign = extractFloat64Sign( a );
4721 bSign = extractFloat64Sign( b );
4722 av = float64_val(a);
4723 bv = float64_val(b);
4724 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4725 return ( av == bv ) || ( aSign ^ ( av < bv ) );
4726
4727}
4728
4729
4730
4731
4732
4733
4734
4735
4736int float64_lt_quiet(float64 a, float64 b, float_status *status)
4737{
4738 flag aSign, bSign;
4739 uint64_t av, bv;
4740 a = float64_squash_input_denormal(a, status);
4741 b = float64_squash_input_denormal(b, status);
4742
4743 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4744 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4745 ) {
4746 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4747 float_raise(float_flag_invalid, status);
4748 }
4749 return 0;
4750 }
4751 aSign = extractFloat64Sign( a );
4752 bSign = extractFloat64Sign( b );
4753 av = float64_val(a);
4754 bv = float64_val(b);
4755 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4756 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4757
4758}
4759
4760
4761
4762
4763
4764
4765
4766
4767int float64_unordered_quiet(float64 a, float64 b, float_status *status)
4768{
4769 a = float64_squash_input_denormal(a, status);
4770 b = float64_squash_input_denormal(b, status);
4771
4772 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4773 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4774 ) {
4775 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4776 float_raise(float_flag_invalid, status);
4777 }
4778 return 1;
4779 }
4780 return 0;
4781}
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793int32 floatx80_to_int32(floatx80 a, float_status *status)
4794{
4795 flag aSign;
4796 int32 aExp, shiftCount;
4797 uint64_t aSig;
4798
4799 aSig = extractFloatx80Frac( a );
4800 aExp = extractFloatx80Exp( a );
4801 aSign = extractFloatx80Sign( a );
4802 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4803 shiftCount = 0x4037 - aExp;
4804 if ( shiftCount <= 0 ) shiftCount = 1;
4805 shift64RightJamming( aSig, shiftCount, &aSig );
4806 return roundAndPackInt32(aSign, aSig, status);
4807
4808}
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820int32 floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
4821{
4822 flag aSign;
4823 int32 aExp, shiftCount;
4824 uint64_t aSig, savedASig;
4825 int32_t z;
4826
4827 aSig = extractFloatx80Frac( a );
4828 aExp = extractFloatx80Exp( a );
4829 aSign = extractFloatx80Sign( a );
4830 if ( 0x401E < aExp ) {
4831 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4832 goto invalid;
4833 }
4834 else if ( aExp < 0x3FFF ) {
4835 if (aExp || aSig) {
4836 status->float_exception_flags |= float_flag_inexact;
4837 }
4838 return 0;
4839 }
4840 shiftCount = 0x403E - aExp;
4841 savedASig = aSig;
4842 aSig >>= shiftCount;
4843 z = aSig;
4844 if ( aSign ) z = - z;
4845 if ( ( z < 0 ) ^ aSign ) {
4846 invalid:
4847 float_raise(float_flag_invalid, status);
4848 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
4849 }
4850 if ( ( aSig<<shiftCount ) != savedASig ) {
4851 status->float_exception_flags |= float_flag_inexact;
4852 }
4853 return z;
4854
4855}
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867int64 floatx80_to_int64(floatx80 a, float_status *status)
4868{
4869 flag aSign;
4870 int32 aExp, shiftCount;
4871 uint64_t aSig, aSigExtra;
4872
4873 aSig = extractFloatx80Frac( a );
4874 aExp = extractFloatx80Exp( a );
4875 aSign = extractFloatx80Sign( a );
4876 shiftCount = 0x403E - aExp;
4877 if ( shiftCount <= 0 ) {
4878 if ( shiftCount ) {
4879 float_raise(float_flag_invalid, status);
4880 if ( ! aSign
4881 || ( ( aExp == 0x7FFF )
4882 && ( aSig != LIT64( 0x8000000000000000 ) ) )
4883 ) {
4884 return LIT64( 0x7FFFFFFFFFFFFFFF );
4885 }
4886 return (int64_t) LIT64( 0x8000000000000000 );
4887 }
4888 aSigExtra = 0;
4889 }
4890 else {
4891 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4892 }
4893 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
4894
4895}
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907int64 floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
4908{
4909 flag aSign;
4910 int32 aExp, shiftCount;
4911 uint64_t aSig;
4912 int64 z;
4913
4914 aSig = extractFloatx80Frac( a );
4915 aExp = extractFloatx80Exp( a );
4916 aSign = extractFloatx80Sign( a );
4917 shiftCount = aExp - 0x403E;
4918 if ( 0 <= shiftCount ) {
4919 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4920 if ( ( a.high != 0xC03E ) || aSig ) {
4921 float_raise(float_flag_invalid, status);
4922 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4923 return LIT64( 0x7FFFFFFFFFFFFFFF );
4924 }
4925 }
4926 return (int64_t) LIT64( 0x8000000000000000 );
4927 }
4928 else if ( aExp < 0x3FFF ) {
4929 if (aExp | aSig) {
4930 status->float_exception_flags |= float_flag_inexact;
4931 }
4932 return 0;
4933 }
4934 z = aSig>>( - shiftCount );
4935 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
4936 status->float_exception_flags |= float_flag_inexact;
4937 }
4938 if ( aSign ) z = - z;
4939 return z;
4940
4941}
4942
4943
4944
4945
4946
4947
4948
4949
4950float32 floatx80_to_float32(floatx80 a, float_status *status)
4951{
4952 flag aSign;
4953 int32 aExp;
4954 uint64_t aSig;
4955
4956 aSig = extractFloatx80Frac( a );
4957 aExp = extractFloatx80Exp( a );
4958 aSign = extractFloatx80Sign( a );
4959 if ( aExp == 0x7FFF ) {
4960 if ( (uint64_t) ( aSig<<1 ) ) {
4961 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
4962 }
4963 return packFloat32( aSign, 0xFF, 0 );
4964 }
4965 shift64RightJamming( aSig, 33, &aSig );
4966 if ( aExp || aSig ) aExp -= 0x3F81;
4967 return roundAndPackFloat32(aSign, aExp, aSig, status);
4968
4969}
4970
4971
4972
4973
4974
4975
4976
4977
4978float64 floatx80_to_float64(floatx80 a, float_status *status)
4979{
4980 flag aSign;
4981 int32 aExp;
4982 uint64_t aSig, zSig;
4983
4984 aSig = extractFloatx80Frac( a );
4985 aExp = extractFloatx80Exp( a );
4986 aSign = extractFloatx80Sign( a );
4987 if ( aExp == 0x7FFF ) {
4988 if ( (uint64_t) ( aSig<<1 ) ) {
4989 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
4990 }
4991 return packFloat64( aSign, 0x7FF, 0 );
4992 }
4993 shift64RightJamming( aSig, 1, &zSig );
4994 if ( aExp || aSig ) aExp -= 0x3C01;
4995 return roundAndPackFloat64(aSign, aExp, zSig, status);
4996
4997}
4998
4999
5000
5001
5002
5003
5004
5005
5006float128 floatx80_to_float128(floatx80 a, float_status *status)
5007{
5008 flag aSign;
5009 int_fast16_t aExp;
5010 uint64_t aSig, zSig0, zSig1;
5011
5012 aSig = extractFloatx80Frac( a );
5013 aExp = extractFloatx80Exp( a );
5014 aSign = extractFloatx80Sign( a );
5015 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5016 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
5017 }
5018 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5019 return packFloat128( aSign, aExp, zSig0, zSig1 );
5020
5021}
5022
5023
5024
5025
5026
5027
5028
5029
5030floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5031{
5032 flag aSign;
5033 int32 aExp;
5034 uint64_t lastBitMask, roundBitsMask;
5035 floatx80 z;
5036
5037 aExp = extractFloatx80Exp( a );
5038 if ( 0x403E <= aExp ) {
5039 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5040 return propagateFloatx80NaN(a, a, status);
5041 }
5042 return a;
5043 }
5044 if ( aExp < 0x3FFF ) {
5045 if ( ( aExp == 0 )
5046 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
5047 return a;
5048 }
5049 status->float_exception_flags |= float_flag_inexact;
5050 aSign = extractFloatx80Sign( a );
5051 switch (status->float_rounding_mode) {
5052 case float_round_nearest_even:
5053 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5054 ) {
5055 return
5056 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
5057 }
5058 break;
5059 case float_round_ties_away:
5060 if (aExp == 0x3FFE) {
5061 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
5062 }
5063 break;
5064 case float_round_down:
5065 return
5066 aSign ?
5067 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
5068 : packFloatx80( 0, 0, 0 );
5069 case float_round_up:
5070 return
5071 aSign ? packFloatx80( 1, 0, 0 )
5072 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
5073 }
5074 return packFloatx80( aSign, 0, 0 );
5075 }
5076 lastBitMask = 1;
5077 lastBitMask <<= 0x403E - aExp;
5078 roundBitsMask = lastBitMask - 1;
5079 z = a;
5080 switch (status->float_rounding_mode) {
5081 case float_round_nearest_even:
5082 z.low += lastBitMask>>1;
5083 if ((z.low & roundBitsMask) == 0) {
5084 z.low &= ~lastBitMask;
5085 }
5086 break;
5087 case float_round_ties_away:
5088 z.low += lastBitMask >> 1;
5089 break;
5090 case float_round_to_zero:
5091 break;
5092 case float_round_up:
5093 if (!extractFloatx80Sign(z)) {
5094 z.low += roundBitsMask;
5095 }
5096 break;
5097 case float_round_down:
5098 if (extractFloatx80Sign(z)) {
5099 z.low += roundBitsMask;
5100 }
5101 break;
5102 default:
5103 abort();
5104 }
5105 z.low &= ~ roundBitsMask;
5106 if ( z.low == 0 ) {
5107 ++z.high;
5108 z.low = LIT64( 0x8000000000000000 );
5109 }
5110 if (z.low != a.low) {
5111 status->float_exception_flags |= float_flag_inexact;
5112 }
5113 return z;
5114
5115}
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5126 float_status *status)
5127{
5128 int32 aExp, bExp, zExp;
5129 uint64_t aSig, bSig, zSig0, zSig1;
5130 int32 expDiff;
5131
5132 aSig = extractFloatx80Frac( a );
5133 aExp = extractFloatx80Exp( a );
5134 bSig = extractFloatx80Frac( b );
5135 bExp = extractFloatx80Exp( b );
5136 expDiff = aExp - bExp;
5137 if ( 0 < expDiff ) {
5138 if ( aExp == 0x7FFF ) {
5139 if ((uint64_t)(aSig << 1)) {
5140 return propagateFloatx80NaN(a, b, status);
5141 }
5142 return a;
5143 }
5144 if ( bExp == 0 ) --expDiff;
5145 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5146 zExp = aExp;
5147 }
5148 else if ( expDiff < 0 ) {
5149 if ( bExp == 0x7FFF ) {
5150 if ((uint64_t)(bSig << 1)) {
5151 return propagateFloatx80NaN(a, b, status);
5152 }
5153 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5154 }
5155 if ( aExp == 0 ) ++expDiff;
5156 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5157 zExp = bExp;
5158 }
5159 else {
5160 if ( aExp == 0x7FFF ) {
5161 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5162 return propagateFloatx80NaN(a, b, status);
5163 }
5164 return a;
5165 }
5166 zSig1 = 0;
5167 zSig0 = aSig + bSig;
5168 if ( aExp == 0 ) {
5169 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5170 goto roundAndPack;
5171 }
5172 zExp = aExp;
5173 goto shiftRight1;
5174 }
5175 zSig0 = aSig + bSig;
5176 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5177 shiftRight1:
5178 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5179 zSig0 |= LIT64( 0x8000000000000000 );
5180 ++zExp;
5181 roundAndPack:
5182 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5183 zSign, zExp, zSig0, zSig1, status);
5184}
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5195 float_status *status)
5196{
5197 int32 aExp, bExp, zExp;
5198 uint64_t aSig, bSig, zSig0, zSig1;
5199 int32 expDiff;
5200 floatx80 z;
5201
5202 aSig = extractFloatx80Frac( a );
5203 aExp = extractFloatx80Exp( a );
5204 bSig = extractFloatx80Frac( b );
5205 bExp = extractFloatx80Exp( b );
5206 expDiff = aExp - bExp;
5207 if ( 0 < expDiff ) goto aExpBigger;
5208 if ( expDiff < 0 ) goto bExpBigger;
5209 if ( aExp == 0x7FFF ) {
5210 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5211 return propagateFloatx80NaN(a, b, status);
5212 }
5213 float_raise(float_flag_invalid, status);
5214 z.low = floatx80_default_nan_low;
5215 z.high = floatx80_default_nan_high;
5216 return z;
5217 }
5218 if ( aExp == 0 ) {
5219 aExp = 1;
5220 bExp = 1;
5221 }
5222 zSig1 = 0;
5223 if ( bSig < aSig ) goto aBigger;
5224 if ( aSig < bSig ) goto bBigger;
5225 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5226 bExpBigger:
5227 if ( bExp == 0x7FFF ) {
5228 if ((uint64_t)(bSig << 1)) {
5229 return propagateFloatx80NaN(a, b, status);
5230 }
5231 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
5232 }
5233 if ( aExp == 0 ) ++expDiff;
5234 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5235 bBigger:
5236 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5237 zExp = bExp;
5238 zSign ^= 1;
5239 goto normalizeRoundAndPack;
5240 aExpBigger:
5241 if ( aExp == 0x7FFF ) {
5242 if ((uint64_t)(aSig << 1)) {
5243 return propagateFloatx80NaN(a, b, status);
5244 }
5245 return a;
5246 }
5247 if ( bExp == 0 ) --expDiff;
5248 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5249 aBigger:
5250 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5251 zExp = aExp;
5252 normalizeRoundAndPack:
5253 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5254 zSign, zExp, zSig0, zSig1, status);
5255}
5256
5257
5258
5259
5260
5261
5262
5263floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5264{
5265 flag aSign, bSign;
5266
5267 aSign = extractFloatx80Sign( a );
5268 bSign = extractFloatx80Sign( b );
5269 if ( aSign == bSign ) {
5270 return addFloatx80Sigs(a, b, aSign, status);
5271 }
5272 else {
5273 return subFloatx80Sigs(a, b, aSign, status);
5274 }
5275
5276}
5277
5278
5279
5280
5281
5282
5283
5284floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5285{
5286 flag aSign, bSign;
5287
5288 aSign = extractFloatx80Sign( a );
5289 bSign = extractFloatx80Sign( b );
5290 if ( aSign == bSign ) {
5291 return subFloatx80Sigs(a, b, aSign, status);
5292 }
5293 else {
5294 return addFloatx80Sigs(a, b, aSign, status);
5295 }
5296
5297}
5298
5299
5300
5301
5302
5303
5304
5305floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5306{
5307 flag aSign, bSign, zSign;
5308 int32 aExp, bExp, zExp;
5309 uint64_t aSig, bSig, zSig0, zSig1;
5310 floatx80 z;
5311
5312 aSig = extractFloatx80Frac( a );
5313 aExp = extractFloatx80Exp( a );
5314 aSign = extractFloatx80Sign( a );
5315 bSig = extractFloatx80Frac( b );
5316 bExp = extractFloatx80Exp( b );
5317 bSign = extractFloatx80Sign( b );
5318 zSign = aSign ^ bSign;
5319 if ( aExp == 0x7FFF ) {
5320 if ( (uint64_t) ( aSig<<1 )
5321 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5322 return propagateFloatx80NaN(a, b, status);
5323 }
5324 if ( ( bExp | bSig ) == 0 ) goto invalid;
5325 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5326 }
5327 if ( bExp == 0x7FFF ) {
5328 if ((uint64_t)(bSig << 1)) {
5329 return propagateFloatx80NaN(a, b, status);
5330 }
5331 if ( ( aExp | aSig ) == 0 ) {
5332 invalid:
5333 float_raise(float_flag_invalid, status);
5334 z.low = floatx80_default_nan_low;
5335 z.high = floatx80_default_nan_high;
5336 return z;
5337 }
5338 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5339 }
5340 if ( aExp == 0 ) {
5341 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5342 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5343 }
5344 if ( bExp == 0 ) {
5345 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5346 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5347 }
5348 zExp = aExp + bExp - 0x3FFE;
5349 mul64To128( aSig, bSig, &zSig0, &zSig1 );
5350 if ( 0 < (int64_t) zSig0 ) {
5351 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5352 --zExp;
5353 }
5354 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5355 zSign, zExp, zSig0, zSig1, status);
5356}
5357
5358
5359
5360
5361
5362
5363
5364floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
5365{
5366 flag aSign, bSign, zSign;
5367 int32 aExp, bExp, zExp;
5368 uint64_t aSig, bSig, zSig0, zSig1;
5369 uint64_t rem0, rem1, rem2, term0, term1, term2;
5370 floatx80 z;
5371
5372 aSig = extractFloatx80Frac( a );
5373 aExp = extractFloatx80Exp( a );
5374 aSign = extractFloatx80Sign( a );
5375 bSig = extractFloatx80Frac( b );
5376 bExp = extractFloatx80Exp( b );
5377 bSign = extractFloatx80Sign( b );
5378 zSign = aSign ^ bSign;
5379 if ( aExp == 0x7FFF ) {
5380 if ((uint64_t)(aSig << 1)) {
5381 return propagateFloatx80NaN(a, b, status);
5382 }
5383 if ( bExp == 0x7FFF ) {
5384 if ((uint64_t)(bSig << 1)) {
5385 return propagateFloatx80NaN(a, b, status);
5386 }
5387 goto invalid;
5388 }
5389 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5390 }
5391 if ( bExp == 0x7FFF ) {
5392 if ((uint64_t)(bSig << 1)) {
5393 return propagateFloatx80NaN(a, b, status);
5394 }
5395 return packFloatx80( zSign, 0, 0 );
5396 }
5397 if ( bExp == 0 ) {
5398 if ( bSig == 0 ) {
5399 if ( ( aExp | aSig ) == 0 ) {
5400 invalid:
5401 float_raise(float_flag_invalid, status);
5402 z.low = floatx80_default_nan_low;
5403 z.high = floatx80_default_nan_high;
5404 return z;
5405 }
5406 float_raise(float_flag_divbyzero, status);
5407 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5408 }
5409 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5410 }
5411 if ( aExp == 0 ) {
5412 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5413 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5414 }
5415 zExp = aExp - bExp + 0x3FFE;
5416 rem1 = 0;
5417 if ( bSig <= aSig ) {
5418 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5419 ++zExp;
5420 }
5421 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5422 mul64To128( bSig, zSig0, &term0, &term1 );
5423 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
5424 while ( (int64_t) rem0 < 0 ) {
5425 --zSig0;
5426 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5427 }
5428 zSig1 = estimateDiv128To64( rem1, 0, bSig );
5429 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
5430 mul64To128( bSig, zSig1, &term1, &term2 );
5431 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5432 while ( (int64_t) rem1 < 0 ) {
5433 --zSig1;
5434 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5435 }
5436 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5437 }
5438 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5439 zSign, zExp, zSig0, zSig1, status);
5440}
5441
5442
5443
5444
5445
5446
5447
5448floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
5449{
5450 flag aSign, zSign;
5451 int32 aExp, bExp, expDiff;
5452 uint64_t aSig0, aSig1, bSig;
5453 uint64_t q, term0, term1, alternateASig0, alternateASig1;
5454 floatx80 z;
5455
5456 aSig0 = extractFloatx80Frac( a );
5457 aExp = extractFloatx80Exp( a );
5458 aSign = extractFloatx80Sign( a );
5459 bSig = extractFloatx80Frac( b );
5460 bExp = extractFloatx80Exp( b );
5461 if ( aExp == 0x7FFF ) {
5462 if ( (uint64_t) ( aSig0<<1 )
5463 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5464 return propagateFloatx80NaN(a, b, status);
5465 }
5466 goto invalid;
5467 }
5468 if ( bExp == 0x7FFF ) {
5469 if ((uint64_t)(bSig << 1)) {
5470 return propagateFloatx80NaN(a, b, status);
5471 }
5472 return a;
5473 }
5474 if ( bExp == 0 ) {
5475 if ( bSig == 0 ) {
5476 invalid:
5477 float_raise(float_flag_invalid, status);
5478 z.low = floatx80_default_nan_low;
5479 z.high = floatx80_default_nan_high;
5480 return z;
5481 }
5482 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5483 }
5484 if ( aExp == 0 ) {
5485 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
5486 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5487 }
5488 bSig |= LIT64( 0x8000000000000000 );
5489 zSign = aSign;
5490 expDiff = aExp - bExp;
5491 aSig1 = 0;
5492 if ( expDiff < 0 ) {
5493 if ( expDiff < -1 ) return a;
5494 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5495 expDiff = 0;
5496 }
5497 q = ( bSig <= aSig0 );
5498 if ( q ) aSig0 -= bSig;
5499 expDiff -= 64;
5500 while ( 0 < expDiff ) {
5501 q = estimateDiv128To64( aSig0, aSig1, bSig );
5502 q = ( 2 < q ) ? q - 2 : 0;
5503 mul64To128( bSig, q, &term0, &term1 );
5504 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5505 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5506 expDiff -= 62;
5507 }
5508 expDiff += 64;
5509 if ( 0 < expDiff ) {
5510 q = estimateDiv128To64( aSig0, aSig1, bSig );
5511 q = ( 2 < q ) ? q - 2 : 0;
5512 q >>= 64 - expDiff;
5513 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5514 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5515 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5516 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5517 ++q;
5518 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5519 }
5520 }
5521 else {
5522 term1 = 0;
5523 term0 = bSig;
5524 }
5525 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5526 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5527 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5528 && ( q & 1 ) )
5529 ) {
5530 aSig0 = alternateASig0;
5531 aSig1 = alternateASig1;
5532 zSign = ! zSign;
5533 }
5534 return
5535 normalizeRoundAndPackFloatx80(
5536 80, zSign, bExp + expDiff, aSig0, aSig1, status);
5537
5538}
5539
5540
5541
5542
5543
5544
5545
5546floatx80 floatx80_sqrt(floatx80 a, float_status *status)
5547{
5548 flag aSign;
5549 int32 aExp, zExp;
5550 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5551 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5552 floatx80 z;
5553
5554 aSig0 = extractFloatx80Frac( a );
5555 aExp = extractFloatx80Exp( a );
5556 aSign = extractFloatx80Sign( a );
5557 if ( aExp == 0x7FFF ) {
5558 if ((uint64_t)(aSig0 << 1)) {
5559 return propagateFloatx80NaN(a, a, status);
5560 }
5561 if ( ! aSign ) return a;
5562 goto invalid;
5563 }
5564 if ( aSign ) {
5565 if ( ( aExp | aSig0 ) == 0 ) return a;
5566 invalid:
5567 float_raise(float_flag_invalid, status);
5568 z.low = floatx80_default_nan_low;
5569 z.high = floatx80_default_nan_high;
5570 return z;
5571 }
5572 if ( aExp == 0 ) {
5573 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5574 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5575 }
5576 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5577 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5578 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5579 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5580 doubleZSig0 = zSig0<<1;
5581 mul64To128( zSig0, zSig0, &term0, &term1 );
5582 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5583 while ( (int64_t) rem0 < 0 ) {
5584 --zSig0;
5585 doubleZSig0 -= 2;
5586 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5587 }
5588 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5589 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5590 if ( zSig1 == 0 ) zSig1 = 1;
5591 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5592 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5593 mul64To128( zSig1, zSig1, &term2, &term3 );
5594 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5595 while ( (int64_t) rem1 < 0 ) {
5596 --zSig1;
5597 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5598 term3 |= 1;
5599 term2 |= doubleZSig0;
5600 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5601 }
5602 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5603 }
5604 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5605 zSig0 |= doubleZSig0;
5606 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5607 0, zExp, zSig0, zSig1, status);
5608}
5609
5610
5611
5612
5613
5614
5615
5616
5617int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
5618{
5619
5620 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5621 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5622 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5623 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5624 ) {
5625 float_raise(float_flag_invalid, status);
5626 return 0;
5627 }
5628 return
5629 ( a.low == b.low )
5630 && ( ( a.high == b.high )
5631 || ( ( a.low == 0 )
5632 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5633 );
5634
5635}
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645int floatx80_le(floatx80 a, floatx80 b, float_status *status)
5646{
5647 flag aSign, bSign;
5648
5649 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5650 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5651 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5652 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5653 ) {
5654 float_raise(float_flag_invalid, status);
5655 return 0;
5656 }
5657 aSign = extractFloatx80Sign( a );
5658 bSign = extractFloatx80Sign( b );
5659 if ( aSign != bSign ) {
5660 return
5661 aSign
5662 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5663 == 0 );
5664 }
5665 return
5666 aSign ? le128( b.high, b.low, a.high, a.low )
5667 : le128( a.high, a.low, b.high, b.low );
5668
5669}
5670
5671
5672
5673
5674
5675
5676
5677
5678int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
5679{
5680 flag aSign, bSign;
5681
5682 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5683 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5684 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5685 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5686 ) {
5687 float_raise(float_flag_invalid, status);
5688 return 0;
5689 }
5690 aSign = extractFloatx80Sign( a );
5691 bSign = extractFloatx80Sign( b );
5692 if ( aSign != bSign ) {
5693 return
5694 aSign
5695 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5696 != 0 );
5697 }
5698 return
5699 aSign ? lt128( b.high, b.low, a.high, a.low )
5700 : lt128( a.high, a.low, b.high, b.low );
5701
5702}
5703
5704
5705
5706
5707
5708
5709
5710int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
5711{
5712 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5713 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5714 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5715 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5716 ) {
5717 float_raise(float_flag_invalid, status);
5718 return 1;
5719 }
5720 return 0;
5721}
5722
5723
5724
5725
5726
5727
5728
5729
5730int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
5731{
5732
5733 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5734 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5735 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5736 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5737 ) {
5738 if ( floatx80_is_signaling_nan( a )
5739 || floatx80_is_signaling_nan( b ) ) {
5740 float_raise(float_flag_invalid, status);
5741 }
5742 return 0;
5743 }
5744 return
5745 ( a.low == b.low )
5746 && ( ( a.high == b.high )
5747 || ( ( a.low == 0 )
5748 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5749 );
5750
5751}
5752
5753
5754
5755
5756
5757
5758
5759
5760int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
5761{
5762 flag aSign, bSign;
5763
5764 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5765 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5766 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5767 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5768 ) {
5769 if ( floatx80_is_signaling_nan( a )
5770 || floatx80_is_signaling_nan( b ) ) {
5771 float_raise(float_flag_invalid, status);
5772 }
5773 return 0;
5774 }
5775 aSign = extractFloatx80Sign( a );
5776 bSign = extractFloatx80Sign( b );
5777 if ( aSign != bSign ) {
5778 return
5779 aSign
5780 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5781 == 0 );
5782 }
5783 return
5784 aSign ? le128( b.high, b.low, a.high, a.low )
5785 : le128( a.high, a.low, b.high, b.low );
5786
5787}
5788
5789
5790
5791
5792
5793
5794
5795
5796int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
5797{
5798 flag aSign, bSign;
5799
5800 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5801 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5802 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5803 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5804 ) {
5805 if ( floatx80_is_signaling_nan( a )
5806 || floatx80_is_signaling_nan( b ) ) {
5807 float_raise(float_flag_invalid, status);
5808 }
5809 return 0;
5810 }
5811 aSign = extractFloatx80Sign( a );
5812 bSign = extractFloatx80Sign( b );
5813 if ( aSign != bSign ) {
5814 return
5815 aSign
5816 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5817 != 0 );
5818 }
5819 return
5820 aSign ? lt128( b.high, b.low, a.high, a.low )
5821 : lt128( a.high, a.low, b.high, b.low );
5822
5823}
5824
5825
5826
5827
5828
5829
5830
5831int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
5832{
5833 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5834 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5835 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5836 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5837 ) {
5838 if ( floatx80_is_signaling_nan( a )
5839 || floatx80_is_signaling_nan( b ) ) {
5840 float_raise(float_flag_invalid, status);
5841 }
5842 return 1;
5843 }
5844 return 0;
5845}
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857int32 float128_to_int32(float128 a, float_status *status)
5858{
5859 flag aSign;
5860 int32 aExp, shiftCount;
5861 uint64_t aSig0, aSig1;
5862
5863 aSig1 = extractFloat128Frac1( a );
5864 aSig0 = extractFloat128Frac0( a );
5865 aExp = extractFloat128Exp( a );
5866 aSign = extractFloat128Sign( a );
5867 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5868 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5869 aSig0 |= ( aSig1 != 0 );
5870 shiftCount = 0x4028 - aExp;
5871 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
5872 return roundAndPackInt32(aSign, aSig0, status);
5873
5874}
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886int32 float128_to_int32_round_to_zero(float128 a, float_status *status)
5887{
5888 flag aSign;
5889 int32 aExp, shiftCount;
5890 uint64_t aSig0, aSig1, savedASig;
5891 int32_t z;
5892
5893 aSig1 = extractFloat128Frac1( a );
5894 aSig0 = extractFloat128Frac0( a );
5895 aExp = extractFloat128Exp( a );
5896 aSign = extractFloat128Sign( a );
5897 aSig0 |= ( aSig1 != 0 );
5898 if ( 0x401E < aExp ) {
5899 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5900 goto invalid;
5901 }
5902 else if ( aExp < 0x3FFF ) {
5903 if (aExp || aSig0) {
5904 status->float_exception_flags |= float_flag_inexact;
5905 }
5906 return 0;
5907 }
5908 aSig0 |= LIT64( 0x0001000000000000 );
5909 shiftCount = 0x402F - aExp;
5910 savedASig = aSig0;
5911 aSig0 >>= shiftCount;
5912 z = aSig0;
5913 if ( aSign ) z = - z;
5914 if ( ( z < 0 ) ^ aSign ) {
5915 invalid:
5916 float_raise(float_flag_invalid, status);
5917 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5918 }
5919 if ( ( aSig0<<shiftCount ) != savedASig ) {
5920 status->float_exception_flags |= float_flag_inexact;
5921 }
5922 return z;
5923
5924}
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936int64 float128_to_int64(float128 a, float_status *status)
5937{
5938 flag aSign;
5939 int32 aExp, shiftCount;
5940 uint64_t aSig0, aSig1;
5941
5942 aSig1 = extractFloat128Frac1( a );
5943 aSig0 = extractFloat128Frac0( a );
5944 aExp = extractFloat128Exp( a );
5945 aSign = extractFloat128Sign( a );
5946 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5947 shiftCount = 0x402F - aExp;
5948 if ( shiftCount <= 0 ) {
5949 if ( 0x403E < aExp ) {
5950 float_raise(float_flag_invalid, status);
5951 if ( ! aSign
5952 || ( ( aExp == 0x7FFF )
5953 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5954 )
5955 ) {
5956 return LIT64( 0x7FFFFFFFFFFFFFFF );
5957 }
5958 return (int64_t) LIT64( 0x8000000000000000 );
5959 }
5960 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5961 }
5962 else {
5963 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5964 }
5965 return roundAndPackInt64(aSign, aSig0, aSig1, status);
5966
5967}
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979int64 float128_to_int64_round_to_zero(float128 a, float_status *status)
5980{
5981 flag aSign;
5982 int32 aExp, shiftCount;
5983 uint64_t aSig0, aSig1;
5984 int64 z;
5985
5986 aSig1 = extractFloat128Frac1( a );
5987 aSig0 = extractFloat128Frac0( a );
5988 aExp = extractFloat128Exp( a );
5989 aSign = extractFloat128Sign( a );
5990 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5991 shiftCount = aExp - 0x402F;
5992 if ( 0 < shiftCount ) {
5993 if ( 0x403E <= aExp ) {
5994 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5995 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5996 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
5997 if (aSig1) {
5998 status->float_exception_flags |= float_flag_inexact;
5999 }
6000 }
6001 else {
6002 float_raise(float_flag_invalid, status);
6003 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6004 return LIT64( 0x7FFFFFFFFFFFFFFF );
6005 }
6006 }
6007 return (int64_t) LIT64( 0x8000000000000000 );
6008 }
6009 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
6010 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
6011 status->float_exception_flags |= float_flag_inexact;
6012 }
6013 }
6014 else {
6015 if ( aExp < 0x3FFF ) {
6016 if ( aExp | aSig0 | aSig1 ) {
6017 status->float_exception_flags |= float_flag_inexact;
6018 }
6019 return 0;
6020 }
6021 z = aSig0>>( - shiftCount );
6022 if ( aSig1
6023 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
6024 status->float_exception_flags |= float_flag_inexact;
6025 }
6026 }
6027 if ( aSign ) z = - z;
6028 return z;
6029
6030}
6031
6032
6033
6034
6035
6036
6037
6038
6039float32 float128_to_float32(float128 a, float_status *status)
6040{
6041 flag aSign;
6042 int32 aExp;
6043 uint64_t aSig0, aSig1;
6044 uint32_t zSig;
6045
6046 aSig1 = extractFloat128Frac1( a );
6047 aSig0 = extractFloat128Frac0( a );
6048 aExp = extractFloat128Exp( a );
6049 aSign = extractFloat128Sign( a );
6050 if ( aExp == 0x7FFF ) {
6051 if ( aSig0 | aSig1 ) {
6052 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
6053 }
6054 return packFloat32( aSign, 0xFF, 0 );
6055 }
6056 aSig0 |= ( aSig1 != 0 );
6057 shift64RightJamming( aSig0, 18, &aSig0 );
6058 zSig = aSig0;
6059 if ( aExp || zSig ) {
6060 zSig |= 0x40000000;
6061 aExp -= 0x3F81;
6062 }
6063 return roundAndPackFloat32(aSign, aExp, zSig, status);
6064
6065}
6066
6067
6068
6069
6070
6071
6072
6073
6074float64 float128_to_float64(float128 a, float_status *status)
6075{
6076 flag aSign;
6077 int32 aExp;
6078 uint64_t aSig0, aSig1;
6079
6080 aSig1 = extractFloat128Frac1( a );
6081 aSig0 = extractFloat128Frac0( a );
6082 aExp = extractFloat128Exp( a );
6083 aSign = extractFloat128Sign( a );
6084 if ( aExp == 0x7FFF ) {
6085 if ( aSig0 | aSig1 ) {
6086 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
6087 }
6088 return packFloat64( aSign, 0x7FF, 0 );
6089 }
6090 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6091 aSig0 |= ( aSig1 != 0 );
6092 if ( aExp || aSig0 ) {
6093 aSig0 |= LIT64( 0x4000000000000000 );
6094 aExp -= 0x3C01;
6095 }
6096 return roundAndPackFloat64(aSign, aExp, aSig0, status);
6097
6098}
6099
6100
6101
6102
6103
6104
6105
6106
6107floatx80 float128_to_floatx80(float128 a, float_status *status)
6108{
6109 flag aSign;
6110 int32 aExp;
6111 uint64_t aSig0, aSig1;
6112
6113 aSig1 = extractFloat128Frac1( a );
6114 aSig0 = extractFloat128Frac0( a );
6115 aExp = extractFloat128Exp( a );
6116 aSign = extractFloat128Sign( a );
6117 if ( aExp == 0x7FFF ) {
6118 if ( aSig0 | aSig1 ) {
6119 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
6120 }
6121 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
6122 }
6123 if ( aExp == 0 ) {
6124 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6125 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6126 }
6127 else {
6128 aSig0 |= LIT64( 0x0001000000000000 );
6129 }
6130 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6131 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6132
6133}
6134
6135
6136
6137
6138
6139
6140
6141
6142float128 float128_round_to_int(float128 a, float_status *status)
6143{
6144 flag aSign;
6145 int32 aExp;
6146 uint64_t lastBitMask, roundBitsMask;
6147 float128 z;
6148
6149 aExp = extractFloat128Exp( a );
6150 if ( 0x402F <= aExp ) {
6151 if ( 0x406F <= aExp ) {
6152 if ( ( aExp == 0x7FFF )
6153 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6154 ) {
6155 return propagateFloat128NaN(a, a, status);
6156 }
6157 return a;
6158 }
6159 lastBitMask = 1;
6160 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6161 roundBitsMask = lastBitMask - 1;
6162 z = a;
6163 switch (status->float_rounding_mode) {
6164 case float_round_nearest_even:
6165 if ( lastBitMask ) {
6166 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6167 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6168 }
6169 else {
6170 if ( (int64_t) z.low < 0 ) {
6171 ++z.high;
6172 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
6173 }
6174 }
6175 break;
6176 case float_round_ties_away:
6177 if (lastBitMask) {
6178 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6179 } else {
6180 if ((int64_t) z.low < 0) {
6181 ++z.high;
6182 }
6183 }
6184 break;
6185 case float_round_to_zero:
6186 break;
6187 case float_round_up:
6188 if (!extractFloat128Sign(z)) {
6189 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6190 }
6191 break;
6192 case float_round_down:
6193 if (extractFloat128Sign(z)) {
6194 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6195 }
6196 break;
6197 default:
6198 abort();
6199 }
6200 z.low &= ~ roundBitsMask;
6201 }
6202 else {
6203 if ( aExp < 0x3FFF ) {
6204 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
6205 status->float_exception_flags |= float_flag_inexact;
6206 aSign = extractFloat128Sign( a );
6207 switch (status->float_rounding_mode) {
6208 case float_round_nearest_even:
6209 if ( ( aExp == 0x3FFE )
6210 && ( extractFloat128Frac0( a )
6211 | extractFloat128Frac1( a ) )
6212 ) {
6213 return packFloat128( aSign, 0x3FFF, 0, 0 );
6214 }
6215 break;
6216 case float_round_ties_away:
6217 if (aExp == 0x3FFE) {
6218 return packFloat128(aSign, 0x3FFF, 0, 0);
6219 }
6220 break;
6221 case float_round_down:
6222 return
6223 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6224 : packFloat128( 0, 0, 0, 0 );
6225 case float_round_up:
6226 return
6227 aSign ? packFloat128( 1, 0, 0, 0 )
6228 : packFloat128( 0, 0x3FFF, 0, 0 );
6229 }
6230 return packFloat128( aSign, 0, 0, 0 );
6231 }
6232 lastBitMask = 1;
6233 lastBitMask <<= 0x402F - aExp;
6234 roundBitsMask = lastBitMask - 1;
6235 z.low = 0;
6236 z.high = a.high;
6237 switch (status->float_rounding_mode) {
6238 case float_round_nearest_even:
6239 z.high += lastBitMask>>1;
6240 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6241 z.high &= ~ lastBitMask;
6242 }
6243 break;
6244 case float_round_ties_away:
6245 z.high += lastBitMask>>1;
6246 break;
6247 case float_round_to_zero:
6248 break;
6249 case float_round_up:
6250 if (!extractFloat128Sign(z)) {
6251 z.high |= ( a.low != 0 );
6252 z.high += roundBitsMask;
6253 }
6254 break;
6255 case float_round_down:
6256 if (extractFloat128Sign(z)) {
6257 z.high |= (a.low != 0);
6258 z.high += roundBitsMask;
6259 }
6260 break;
6261 default:
6262 abort();
6263 }
6264 z.high &= ~ roundBitsMask;
6265 }
6266 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
6267 status->float_exception_flags |= float_flag_inexact;
6268 }
6269 return z;
6270
6271}
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6282 float_status *status)
6283{
6284 int32 aExp, bExp, zExp;
6285 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6286 int32 expDiff;
6287
6288 aSig1 = extractFloat128Frac1( a );
6289 aSig0 = extractFloat128Frac0( a );
6290 aExp = extractFloat128Exp( a );
6291 bSig1 = extractFloat128Frac1( b );
6292 bSig0 = extractFloat128Frac0( b );
6293 bExp = extractFloat128Exp( b );
6294 expDiff = aExp - bExp;
6295 if ( 0 < expDiff ) {
6296 if ( aExp == 0x7FFF ) {
6297 if (aSig0 | aSig1) {
6298 return propagateFloat128NaN(a, b, status);
6299 }
6300 return a;
6301 }
6302 if ( bExp == 0 ) {
6303 --expDiff;
6304 }
6305 else {
6306 bSig0 |= LIT64( 0x0001000000000000 );
6307 }
6308 shift128ExtraRightJamming(
6309 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6310 zExp = aExp;
6311 }
6312 else if ( expDiff < 0 ) {
6313 if ( bExp == 0x7FFF ) {
6314 if (bSig0 | bSig1) {
6315 return propagateFloat128NaN(a, b, status);
6316 }
6317 return packFloat128( zSign, 0x7FFF, 0, 0 );
6318 }
6319 if ( aExp == 0 ) {
6320 ++expDiff;
6321 }
6322 else {
6323 aSig0 |= LIT64( 0x0001000000000000 );
6324 }
6325 shift128ExtraRightJamming(
6326 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6327 zExp = bExp;
6328 }
6329 else {
6330 if ( aExp == 0x7FFF ) {
6331 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6332 return propagateFloat128NaN(a, b, status);
6333 }
6334 return a;
6335 }
6336 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6337 if ( aExp == 0 ) {
6338 if (status->flush_to_zero) {
6339 if (zSig0 | zSig1) {
6340 float_raise(float_flag_output_denormal, status);
6341 }
6342 return packFloat128(zSign, 0, 0, 0);
6343 }
6344 return packFloat128( zSign, 0, zSig0, zSig1 );
6345 }
6346 zSig2 = 0;
6347 zSig0 |= LIT64( 0x0002000000000000 );
6348 zExp = aExp;
6349 goto shiftRight1;
6350 }
6351 aSig0 |= LIT64( 0x0001000000000000 );
6352 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6353 --zExp;
6354 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6355 ++zExp;
6356 shiftRight1:
6357 shift128ExtraRightJamming(
6358 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6359 roundAndPack:
6360 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6361
6362}
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6373 float_status *status)
6374{
6375 int32 aExp, bExp, zExp;
6376 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
6377 int32 expDiff;
6378 float128 z;
6379
6380 aSig1 = extractFloat128Frac1( a );
6381 aSig0 = extractFloat128Frac0( a );
6382 aExp = extractFloat128Exp( a );
6383 bSig1 = extractFloat128Frac1( b );
6384 bSig0 = extractFloat128Frac0( b );
6385 bExp = extractFloat128Exp( b );
6386 expDiff = aExp - bExp;
6387 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6388 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6389 if ( 0 < expDiff ) goto aExpBigger;
6390 if ( expDiff < 0 ) goto bExpBigger;
6391 if ( aExp == 0x7FFF ) {
6392 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6393 return propagateFloat128NaN(a, b, status);
6394 }
6395 float_raise(float_flag_invalid, status);
6396 z.low = float128_default_nan_low;
6397 z.high = float128_default_nan_high;
6398 return z;
6399 }
6400 if ( aExp == 0 ) {
6401 aExp = 1;
6402 bExp = 1;
6403 }
6404 if ( bSig0 < aSig0 ) goto aBigger;
6405 if ( aSig0 < bSig0 ) goto bBigger;
6406 if ( bSig1 < aSig1 ) goto aBigger;
6407 if ( aSig1 < bSig1 ) goto bBigger;
6408 return packFloat128(status->float_rounding_mode == float_round_down,
6409 0, 0, 0);
6410 bExpBigger:
6411 if ( bExp == 0x7FFF ) {
6412 if (bSig0 | bSig1) {
6413 return propagateFloat128NaN(a, b, status);
6414 }
6415 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6416 }
6417 if ( aExp == 0 ) {
6418 ++expDiff;
6419 }
6420 else {
6421 aSig0 |= LIT64( 0x4000000000000000 );
6422 }
6423 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6424 bSig0 |= LIT64( 0x4000000000000000 );
6425 bBigger:
6426 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6427 zExp = bExp;
6428 zSign ^= 1;
6429 goto normalizeRoundAndPack;
6430 aExpBigger:
6431 if ( aExp == 0x7FFF ) {
6432 if (aSig0 | aSig1) {
6433 return propagateFloat128NaN(a, b, status);
6434 }
6435 return a;
6436 }
6437 if ( bExp == 0 ) {
6438 --expDiff;
6439 }
6440 else {
6441 bSig0 |= LIT64( 0x4000000000000000 );
6442 }
6443 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6444 aSig0 |= LIT64( 0x4000000000000000 );
6445 aBigger:
6446 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6447 zExp = aExp;
6448 normalizeRoundAndPack:
6449 --zExp;
6450 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6451 status);
6452
6453}
6454
6455
6456
6457
6458
6459
6460
6461float128 float128_add(float128 a, float128 b, float_status *status)
6462{
6463 flag aSign, bSign;
6464
6465 aSign = extractFloat128Sign( a );
6466 bSign = extractFloat128Sign( b );
6467 if ( aSign == bSign ) {
6468 return addFloat128Sigs(a, b, aSign, status);
6469 }
6470 else {
6471 return subFloat128Sigs(a, b, aSign, status);
6472 }
6473
6474}
6475
6476
6477
6478
6479
6480
6481
6482float128 float128_sub(float128 a, float128 b, float_status *status)
6483{
6484 flag aSign, bSign;
6485
6486 aSign = extractFloat128Sign( a );
6487 bSign = extractFloat128Sign( b );
6488 if ( aSign == bSign ) {
6489 return subFloat128Sigs(a, b, aSign, status);
6490 }
6491 else {
6492 return addFloat128Sigs(a, b, aSign, status);
6493 }
6494
6495}
6496
6497
6498
6499
6500
6501
6502
6503float128 float128_mul(float128 a, float128 b, float_status *status)
6504{
6505 flag aSign, bSign, zSign;
6506 int32 aExp, bExp, zExp;
6507 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
6508 float128 z;
6509
6510 aSig1 = extractFloat128Frac1( a );
6511 aSig0 = extractFloat128Frac0( a );
6512 aExp = extractFloat128Exp( a );
6513 aSign = extractFloat128Sign( a );
6514 bSig1 = extractFloat128Frac1( b );
6515 bSig0 = extractFloat128Frac0( b );
6516 bExp = extractFloat128Exp( b );
6517 bSign = extractFloat128Sign( b );
6518 zSign = aSign ^ bSign;
6519 if ( aExp == 0x7FFF ) {
6520 if ( ( aSig0 | aSig1 )
6521 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6522 return propagateFloat128NaN(a, b, status);
6523 }
6524 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6525 return packFloat128( zSign, 0x7FFF, 0, 0 );
6526 }
6527 if ( bExp == 0x7FFF ) {
6528 if (bSig0 | bSig1) {
6529 return propagateFloat128NaN(a, b, status);
6530 }
6531 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6532 invalid:
6533 float_raise(float_flag_invalid, status);
6534 z.low = float128_default_nan_low;
6535 z.high = float128_default_nan_high;
6536 return z;
6537 }
6538 return packFloat128( zSign, 0x7FFF, 0, 0 );
6539 }
6540 if ( aExp == 0 ) {
6541 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6542 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6543 }
6544 if ( bExp == 0 ) {
6545 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6546 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6547 }
6548 zExp = aExp + bExp - 0x4000;
6549 aSig0 |= LIT64( 0x0001000000000000 );
6550 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6551 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6552 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6553 zSig2 |= ( zSig3 != 0 );
6554 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6555 shift128ExtraRightJamming(
6556 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6557 ++zExp;
6558 }
6559 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6560
6561}
6562
6563
6564
6565
6566
6567
6568
6569float128 float128_div(float128 a, float128 b, float_status *status)
6570{
6571 flag aSign, bSign, zSign;
6572 int32 aExp, bExp, zExp;
6573 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6574 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6575 float128 z;
6576
6577 aSig1 = extractFloat128Frac1( a );
6578 aSig0 = extractFloat128Frac0( a );
6579 aExp = extractFloat128Exp( a );
6580 aSign = extractFloat128Sign( a );
6581 bSig1 = extractFloat128Frac1( b );
6582 bSig0 = extractFloat128Frac0( b );
6583 bExp = extractFloat128Exp( b );
6584 bSign = extractFloat128Sign( b );
6585 zSign = aSign ^ bSign;
6586 if ( aExp == 0x7FFF ) {
6587 if (aSig0 | aSig1) {
6588 return propagateFloat128NaN(a, b, status);
6589 }
6590 if ( bExp == 0x7FFF ) {
6591 if (bSig0 | bSig1) {
6592 return propagateFloat128NaN(a, b, status);
6593 }
6594 goto invalid;
6595 }
6596 return packFloat128( zSign, 0x7FFF, 0, 0 );
6597 }
6598 if ( bExp == 0x7FFF ) {
6599 if (bSig0 | bSig1) {
6600 return propagateFloat128NaN(a, b, status);
6601 }
6602 return packFloat128( zSign, 0, 0, 0 );
6603 }
6604 if ( bExp == 0 ) {
6605 if ( ( bSig0 | bSig1 ) == 0 ) {
6606 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6607 invalid:
6608 float_raise(float_flag_invalid, status);
6609 z.low = float128_default_nan_low;
6610 z.high = float128_default_nan_high;
6611 return z;
6612 }
6613 float_raise(float_flag_divbyzero, status);
6614 return packFloat128( zSign, 0x7FFF, 0, 0 );
6615 }
6616 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6617 }
6618 if ( aExp == 0 ) {
6619 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6620 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6621 }
6622 zExp = aExp - bExp + 0x3FFD;
6623 shortShift128Left(
6624 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6625 shortShift128Left(
6626 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6627 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6628 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6629 ++zExp;
6630 }
6631 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6632 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6633 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
6634 while ( (int64_t) rem0 < 0 ) {
6635 --zSig0;
6636 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6637 }
6638 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6639 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6640 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6641 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
6642 while ( (int64_t) rem1 < 0 ) {
6643 --zSig1;
6644 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6645 }
6646 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6647 }
6648 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
6649 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6650
6651}
6652
6653
6654
6655
6656
6657
6658
6659float128 float128_rem(float128 a, float128 b, float_status *status)
6660{
6661 flag aSign, zSign;
6662 int32 aExp, bExp, expDiff;
6663 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6664 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6665 int64_t sigMean0;
6666 float128 z;
6667
6668 aSig1 = extractFloat128Frac1( a );
6669 aSig0 = extractFloat128Frac0( a );
6670 aExp = extractFloat128Exp( a );
6671 aSign = extractFloat128Sign( a );
6672 bSig1 = extractFloat128Frac1( b );
6673 bSig0 = extractFloat128Frac0( b );
6674 bExp = extractFloat128Exp( b );
6675 if ( aExp == 0x7FFF ) {
6676 if ( ( aSig0 | aSig1 )
6677 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6678 return propagateFloat128NaN(a, b, status);
6679 }
6680 goto invalid;
6681 }
6682 if ( bExp == 0x7FFF ) {
6683 if (bSig0 | bSig1) {
6684 return propagateFloat128NaN(a, b, status);
6685 }
6686 return a;
6687 }
6688 if ( bExp == 0 ) {
6689 if ( ( bSig0 | bSig1 ) == 0 ) {
6690 invalid:
6691 float_raise(float_flag_invalid, status);
6692 z.low = float128_default_nan_low;
6693 z.high = float128_default_nan_high;
6694 return z;
6695 }
6696 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6697 }
6698 if ( aExp == 0 ) {
6699 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6700 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6701 }
6702 expDiff = aExp - bExp;
6703 if ( expDiff < -1 ) return a;
6704 shortShift128Left(
6705 aSig0 | LIT64( 0x0001000000000000 ),
6706 aSig1,
6707 15 - ( expDiff < 0 ),
6708 &aSig0,
6709 &aSig1
6710 );
6711 shortShift128Left(
6712 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6713 q = le128( bSig0, bSig1, aSig0, aSig1 );
6714 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6715 expDiff -= 64;
6716 while ( 0 < expDiff ) {
6717 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6718 q = ( 4 < q ) ? q - 4 : 0;
6719 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6720 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6721 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6722 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6723 expDiff -= 61;
6724 }
6725 if ( -64 < expDiff ) {
6726 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6727 q = ( 4 < q ) ? q - 4 : 0;
6728 q >>= - expDiff;
6729 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6730 expDiff += 52;
6731 if ( expDiff < 0 ) {
6732 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6733 }
6734 else {
6735 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6736 }
6737 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6738 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6739 }
6740 else {
6741 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6742 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6743 }
6744 do {
6745 alternateASig0 = aSig0;
6746 alternateASig1 = aSig1;
6747 ++q;
6748 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6749 } while ( 0 <= (int64_t) aSig0 );
6750 add128(
6751 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6752 if ( ( sigMean0 < 0 )
6753 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6754 aSig0 = alternateASig0;
6755 aSig1 = alternateASig1;
6756 }
6757 zSign = ( (int64_t) aSig0 < 0 );
6758 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6759 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6760 status);
6761}
6762
6763
6764
6765
6766
6767
6768
6769float128 float128_sqrt(float128 a, float_status *status)
6770{
6771 flag aSign;
6772 int32 aExp, zExp;
6773 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6774 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6775 float128 z;
6776
6777 aSig1 = extractFloat128Frac1( a );
6778 aSig0 = extractFloat128Frac0( a );
6779 aExp = extractFloat128Exp( a );
6780 aSign = extractFloat128Sign( a );
6781 if ( aExp == 0x7FFF ) {
6782 if (aSig0 | aSig1) {
6783 return propagateFloat128NaN(a, a, status);
6784 }
6785 if ( ! aSign ) return a;
6786 goto invalid;
6787 }
6788 if ( aSign ) {
6789 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6790 invalid:
6791 float_raise(float_flag_invalid, status);
6792 z.low = float128_default_nan_low;
6793 z.high = float128_default_nan_high;
6794 return z;
6795 }
6796 if ( aExp == 0 ) {
6797 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6798 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6799 }
6800 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6801 aSig0 |= LIT64( 0x0001000000000000 );
6802 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6803 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6804 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6805 doubleZSig0 = zSig0<<1;
6806 mul64To128( zSig0, zSig0, &term0, &term1 );
6807 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6808 while ( (int64_t) rem0 < 0 ) {
6809 --zSig0;
6810 doubleZSig0 -= 2;
6811 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6812 }
6813 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6814 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6815 if ( zSig1 == 0 ) zSig1 = 1;
6816 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6817 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6818 mul64To128( zSig1, zSig1, &term2, &term3 );
6819 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6820 while ( (int64_t) rem1 < 0 ) {
6821 --zSig1;
6822 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6823 term3 |= 1;
6824 term2 |= doubleZSig0;
6825 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6826 }
6827 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6828 }
6829 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6830 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6831
6832}
6833
6834
6835
6836
6837
6838
6839
6840
6841int float128_eq(float128 a, float128 b, float_status *status)
6842{
6843
6844 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6845 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6846 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6847 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6848 ) {
6849 float_raise(float_flag_invalid, status);
6850 return 0;
6851 }
6852 return
6853 ( a.low == b.low )
6854 && ( ( a.high == b.high )
6855 || ( ( a.low == 0 )
6856 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
6857 );
6858
6859}
6860
6861
6862
6863
6864
6865
6866
6867
6868int float128_le(float128 a, float128 b, float_status *status)
6869{
6870 flag aSign, bSign;
6871
6872 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6873 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6874 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6875 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6876 ) {
6877 float_raise(float_flag_invalid, status);
6878 return 0;
6879 }
6880 aSign = extractFloat128Sign( a );
6881 bSign = extractFloat128Sign( b );
6882 if ( aSign != bSign ) {
6883 return
6884 aSign
6885 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6886 == 0 );
6887 }
6888 return
6889 aSign ? le128( b.high, b.low, a.high, a.low )
6890 : le128( a.high, a.low, b.high, b.low );
6891
6892}
6893
6894
6895
6896
6897
6898
6899
6900
6901int float128_lt(float128 a, float128 b, float_status *status)
6902{
6903 flag aSign, bSign;
6904
6905 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6906 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6907 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6908 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6909 ) {
6910 float_raise(float_flag_invalid, status);
6911 return 0;
6912 }
6913 aSign = extractFloat128Sign( a );
6914 bSign = extractFloat128Sign( b );
6915 if ( aSign != bSign ) {
6916 return
6917 aSign
6918 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6919 != 0 );
6920 }
6921 return
6922 aSign ? lt128( b.high, b.low, a.high, a.low )
6923 : lt128( a.high, a.low, b.high, b.low );
6924
6925}
6926
6927
6928
6929
6930
6931
6932
6933
6934int float128_unordered(float128 a, float128 b, float_status *status)
6935{
6936 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6937 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6938 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6939 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6940 ) {
6941 float_raise(float_flag_invalid, status);
6942 return 1;
6943 }
6944 return 0;
6945}
6946
6947
6948
6949
6950
6951
6952
6953
6954int float128_eq_quiet(float128 a, float128 b, float_status *status)
6955{
6956
6957 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6958 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6959 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6960 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6961 ) {
6962 if ( float128_is_signaling_nan( a )
6963 || float128_is_signaling_nan( b ) ) {
6964 float_raise(float_flag_invalid, status);
6965 }
6966 return 0;
6967 }
6968 return
6969 ( a.low == b.low )
6970 && ( ( a.high == b.high )
6971 || ( ( a.low == 0 )
6972 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
6973 );
6974
6975}
6976
6977
6978
6979
6980
6981
6982
6983
6984int float128_le_quiet(float128 a, float128 b, float_status *status)
6985{
6986 flag aSign, bSign;
6987
6988 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6989 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6990 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6991 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6992 ) {
6993 if ( float128_is_signaling_nan( a )
6994 || float128_is_signaling_nan( b ) ) {
6995 float_raise(float_flag_invalid, status);
6996 }
6997 return 0;
6998 }
6999 aSign = extractFloat128Sign( a );
7000 bSign = extractFloat128Sign( b );
7001 if ( aSign != bSign ) {
7002 return
7003 aSign
7004 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
7005 == 0 );
7006 }
7007 return
7008 aSign ? le128( b.high, b.low, a.high, a.low )
7009 : le128( a.high, a.low, b.high, b.low );
7010
7011}
7012
7013
7014
7015
7016
7017
7018
7019
7020int float128_lt_quiet(float128 a, float128 b, float_status *status)
7021{
7022 flag aSign, bSign;
7023
7024 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7025 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7026 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7027 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7028 ) {
7029 if ( float128_is_signaling_nan( a )
7030 || float128_is_signaling_nan( b ) ) {
7031 float_raise(float_flag_invalid, status);
7032 }
7033 return 0;
7034 }
7035 aSign = extractFloat128Sign( a );
7036 bSign = extractFloat128Sign( b );
7037 if ( aSign != bSign ) {
7038 return
7039 aSign
7040 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
7041 != 0 );
7042 }
7043 return
7044 aSign ? lt128( b.high, b.low, a.high, a.low )
7045 : lt128( a.high, a.low, b.high, b.low );
7046
7047}
7048
7049
7050
7051
7052
7053
7054
7055
7056int float128_unordered_quiet(float128 a, float128 b, float_status *status)
7057{
7058 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7059 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7060 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7061 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7062 ) {
7063 if ( float128_is_signaling_nan( a )
7064 || float128_is_signaling_nan( b ) ) {
7065 float_raise(float_flag_invalid, status);
7066 }
7067 return 1;
7068 }
7069 return 0;
7070}
7071
7072
7073float32 uint32_to_float32(uint32_t a, float_status *status)
7074{
7075 return int64_to_float32(a, status);
7076}
7077
7078float64 uint32_to_float64(uint32_t a, float_status *status)
7079{
7080 return int64_to_float64(a, status);
7081}
7082
7083uint32 float32_to_uint32(float32 a, float_status *status)
7084{
7085 int64_t v;
7086 uint32 res;
7087 int old_exc_flags = get_float_exception_flags(status);
7088
7089 v = float32_to_int64(a, status);
7090 if (v < 0) {
7091 res = 0;
7092 } else if (v > 0xffffffff) {
7093 res = 0xffffffff;
7094 } else {
7095 return v;
7096 }
7097 set_float_exception_flags(old_exc_flags, status);
7098 float_raise(float_flag_invalid, status);
7099 return res;
7100}
7101
7102uint32 float32_to_uint32_round_to_zero(float32 a, float_status *status)
7103{
7104 int64_t v;
7105 uint32 res;
7106 int old_exc_flags = get_float_exception_flags(status);
7107
7108 v = float32_to_int64_round_to_zero(a, status);
7109 if (v < 0) {
7110 res = 0;
7111 } else if (v > 0xffffffff) {
7112 res = 0xffffffff;
7113 } else {
7114 return v;
7115 }
7116 set_float_exception_flags(old_exc_flags, status);
7117 float_raise(float_flag_invalid, status);
7118 return res;
7119}
7120
7121int_fast16_t float32_to_int16(float32 a, float_status *status)
7122{
7123 int32_t v;
7124 int_fast16_t res;
7125 int old_exc_flags = get_float_exception_flags(status);
7126
7127 v = float32_to_int32(a, status);
7128 if (v < -0x8000) {
7129 res = -0x8000;
7130 } else if (v > 0x7fff) {
7131 res = 0x7fff;
7132 } else {
7133 return v;
7134 }
7135
7136 set_float_exception_flags(old_exc_flags, status);
7137 float_raise(float_flag_invalid, status);
7138 return res;
7139}
7140
7141uint_fast16_t float32_to_uint16(float32 a, float_status *status)
7142{
7143 int32_t v;
7144 uint_fast16_t res;
7145 int old_exc_flags = get_float_exception_flags(status);
7146
7147 v = float32_to_int32(a, status);
7148 if (v < 0) {
7149 res = 0;
7150 } else if (v > 0xffff) {
7151 res = 0xffff;
7152 } else {
7153 return v;
7154 }
7155
7156 set_float_exception_flags(old_exc_flags, status);
7157 float_raise(float_flag_invalid, status);
7158 return res;
7159}
7160
7161uint_fast16_t float32_to_uint16_round_to_zero(float32 a, float_status *status)
7162{
7163 int64_t v;
7164 uint_fast16_t res;
7165 int old_exc_flags = get_float_exception_flags(status);
7166
7167 v = float32_to_int64_round_to_zero(a, status);
7168 if (v < 0) {
7169 res = 0;
7170 } else if (v > 0xffff) {
7171 res = 0xffff;
7172 } else {
7173 return v;
7174 }
7175 set_float_exception_flags(old_exc_flags, status);
7176 float_raise(float_flag_invalid, status);
7177 return res;
7178}
7179
7180uint32 float64_to_uint32(float64 a, float_status *status)
7181{
7182 uint64_t v;
7183 uint32 res;
7184 int old_exc_flags = get_float_exception_flags(status);
7185
7186 v = float64_to_uint64(a, status);
7187 if (v > 0xffffffff) {
7188 res = 0xffffffff;
7189 } else {
7190 return v;
7191 }
7192 set_float_exception_flags(old_exc_flags, status);
7193 float_raise(float_flag_invalid, status);
7194 return res;
7195}
7196
7197uint32 float64_to_uint32_round_to_zero(float64 a, float_status *status)
7198{
7199 uint64_t v;
7200 uint32 res;
7201 int old_exc_flags = get_float_exception_flags(status);
7202
7203 v = float64_to_uint64_round_to_zero(a, status);
7204 if (v > 0xffffffff) {
7205 res = 0xffffffff;
7206 } else {
7207 return v;
7208 }
7209 set_float_exception_flags(old_exc_flags, status);
7210 float_raise(float_flag_invalid, status);
7211 return res;
7212}
7213
7214int_fast16_t float64_to_int16(float64 a, float_status *status)
7215{
7216 int64_t v;
7217 int_fast16_t res;
7218 int old_exc_flags = get_float_exception_flags(status);
7219
7220 v = float64_to_int32(a, status);
7221 if (v < -0x8000) {
7222 res = -0x8000;
7223 } else if (v > 0x7fff) {
7224 res = 0x7fff;
7225 } else {
7226 return v;
7227 }
7228
7229 set_float_exception_flags(old_exc_flags, status);
7230 float_raise(float_flag_invalid, status);
7231 return res;
7232}
7233
7234uint_fast16_t float64_to_uint16(float64 a, float_status *status)
7235{
7236 int64_t v;
7237 uint_fast16_t res;
7238 int old_exc_flags = get_float_exception_flags(status);
7239
7240 v = float64_to_int32(a, status);
7241 if (v < 0) {
7242 res = 0;
7243 } else if (v > 0xffff) {
7244 res = 0xffff;
7245 } else {
7246 return v;
7247 }
7248
7249 set_float_exception_flags(old_exc_flags, status);
7250 float_raise(float_flag_invalid, status);
7251 return res;
7252}
7253
7254uint_fast16_t float64_to_uint16_round_to_zero(float64 a, float_status *status)
7255{
7256 int64_t v;
7257 uint_fast16_t res;
7258 int old_exc_flags = get_float_exception_flags(status);
7259
7260 v = float64_to_int64_round_to_zero(a, status);
7261 if (v < 0) {
7262 res = 0;
7263 } else if (v > 0xffff) {
7264 res = 0xffff;
7265 } else {
7266 return v;
7267 }
7268 set_float_exception_flags(old_exc_flags, status);
7269 float_raise(float_flag_invalid, status);
7270 return res;
7271}
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285uint64_t float64_to_uint64(float64 a, float_status *status)
7286{
7287 flag aSign;
7288 int_fast16_t aExp, shiftCount;
7289 uint64_t aSig, aSigExtra;
7290 a = float64_squash_input_denormal(a, status);
7291
7292 aSig = extractFloat64Frac(a);
7293 aExp = extractFloat64Exp(a);
7294 aSign = extractFloat64Sign(a);
7295 if (aSign && (aExp > 1022)) {
7296 float_raise(float_flag_invalid, status);
7297 if (float64_is_any_nan(a)) {
7298 return LIT64(0xFFFFFFFFFFFFFFFF);
7299 } else {
7300 return 0;
7301 }
7302 }
7303 if (aExp) {
7304 aSig |= LIT64(0x0010000000000000);
7305 }
7306 shiftCount = 0x433 - aExp;
7307 if (shiftCount <= 0) {
7308 if (0x43E < aExp) {
7309 float_raise(float_flag_invalid, status);
7310 return LIT64(0xFFFFFFFFFFFFFFFF);
7311 }
7312 aSigExtra = 0;
7313 aSig <<= -shiftCount;
7314 } else {
7315 shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
7316 }
7317 return roundAndPackUint64(aSign, aSig, aSigExtra, status);
7318}
7319
7320uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status)
7321{
7322 signed char current_rounding_mode = status->float_rounding_mode;
7323 set_float_rounding_mode(float_round_to_zero, status);
7324 int64_t v = float64_to_uint64(a, status);
7325 set_float_rounding_mode(current_rounding_mode, status);
7326 return v;
7327}
7328
7329#define COMPARE(s, nan_exp) \
7330static inline int float ## s ## _compare_internal(float ## s a, float ## s b,\
7331 int is_quiet, float_status *status) \
7332{ \
7333 flag aSign, bSign; \
7334 uint ## s ## _t av, bv; \
7335 a = float ## s ## _squash_input_denormal(a, status); \
7336 b = float ## s ## _squash_input_denormal(b, status); \
7337 \
7338 if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
7339 extractFloat ## s ## Frac( a ) ) || \
7340 ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
7341 extractFloat ## s ## Frac( b ) )) { \
7342 if (!is_quiet || \
7343 float ## s ## _is_signaling_nan( a ) || \
7344 float ## s ## _is_signaling_nan( b ) ) { \
7345 float_raise(float_flag_invalid, status); \
7346 } \
7347 return float_relation_unordered; \
7348 } \
7349 aSign = extractFloat ## s ## Sign( a ); \
7350 bSign = extractFloat ## s ## Sign( b ); \
7351 av = float ## s ## _val(a); \
7352 bv = float ## s ## _val(b); \
7353 if ( aSign != bSign ) { \
7354 if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) { \
7355 \
7356 return float_relation_equal; \
7357 } else { \
7358 return 1 - (2 * aSign); \
7359 } \
7360 } else { \
7361 if (av == bv) { \
7362 return float_relation_equal; \
7363 } else { \
7364 return 1 - 2 * (aSign ^ ( av < bv )); \
7365 } \
7366 } \
7367} \
7368 \
7369int float ## s ## _compare(float ## s a, float ## s b, float_status *status) \
7370{ \
7371 return float ## s ## _compare_internal(a, b, 0, status); \
7372} \
7373 \
7374int float ## s ## _compare_quiet(float ## s a, float ## s b, \
7375 float_status *status) \
7376{ \
7377 return float ## s ## _compare_internal(a, b, 1, status); \
7378}
7379
7380COMPARE(32, 0xff)
7381COMPARE(64, 0x7ff)
7382
7383static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7384 int is_quiet, float_status *status)
7385{
7386 flag aSign, bSign;
7387
7388 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7389 ( extractFloatx80Frac( a )<<1 ) ) ||
7390 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7391 ( extractFloatx80Frac( b )<<1 ) )) {
7392 if (!is_quiet ||
7393 floatx80_is_signaling_nan( a ) ||
7394 floatx80_is_signaling_nan( b ) ) {
7395 float_raise(float_flag_invalid, status);
7396 }
7397 return float_relation_unordered;
7398 }
7399 aSign = extractFloatx80Sign( a );
7400 bSign = extractFloatx80Sign( b );
7401 if ( aSign != bSign ) {
7402
7403 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7404 ( ( a.low | b.low ) == 0 ) ) {
7405
7406 return float_relation_equal;
7407 } else {
7408 return 1 - (2 * aSign);
7409 }
7410 } else {
7411 if (a.low == b.low && a.high == b.high) {
7412 return float_relation_equal;
7413 } else {
7414 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7415 }
7416 }
7417}
7418
7419int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7420{
7421 return floatx80_compare_internal(a, b, 0, status);
7422}
7423
7424int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
7425{
7426 return floatx80_compare_internal(a, b, 1, status);
7427}
7428
7429static inline int float128_compare_internal(float128 a, float128 b,
7430 int is_quiet, float_status *status)
7431{
7432 flag aSign, bSign;
7433
7434 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7435 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7436 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7437 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7438 if (!is_quiet ||
7439 float128_is_signaling_nan( a ) ||
7440 float128_is_signaling_nan( b ) ) {
7441 float_raise(float_flag_invalid, status);
7442 }
7443 return float_relation_unordered;
7444 }
7445 aSign = extractFloat128Sign( a );
7446 bSign = extractFloat128Sign( b );
7447 if ( aSign != bSign ) {
7448 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7449
7450 return float_relation_equal;
7451 } else {
7452 return 1 - (2 * aSign);
7453 }
7454 } else {
7455 if (a.low == b.low && a.high == b.high) {
7456 return float_relation_equal;
7457 } else {
7458 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7459 }
7460 }
7461}
7462
7463int float128_compare(float128 a, float128 b, float_status *status)
7464{
7465 return float128_compare_internal(a, b, 0, status);
7466}
7467
7468int float128_compare_quiet(float128 a, float128 b, float_status *status)
7469{
7470 return float128_compare_internal(a, b, 1, status);
7471}
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487#define MINMAX(s) \
7488static inline float ## s float ## s ## _minmax(float ## s a, float ## s b, \
7489 int ismin, int isieee, \
7490 int ismag, \
7491 float_status *status) \
7492{ \
7493 flag aSign, bSign; \
7494 uint ## s ## _t av, bv, aav, abv; \
7495 a = float ## s ## _squash_input_denormal(a, status); \
7496 b = float ## s ## _squash_input_denormal(b, status); \
7497 if (float ## s ## _is_any_nan(a) || \
7498 float ## s ## _is_any_nan(b)) { \
7499 if (isieee) { \
7500 if (float ## s ## _is_quiet_nan(a) && \
7501 !float ## s ##_is_any_nan(b)) { \
7502 return b; \
7503 } else if (float ## s ## _is_quiet_nan(b) && \
7504 !float ## s ## _is_any_nan(a)) { \
7505 return a; \
7506 } \
7507 } \
7508 return propagateFloat ## s ## NaN(a, b, status); \
7509 } \
7510 aSign = extractFloat ## s ## Sign(a); \
7511 bSign = extractFloat ## s ## Sign(b); \
7512 av = float ## s ## _val(a); \
7513 bv = float ## s ## _val(b); \
7514 if (ismag) { \
7515 aav = float ## s ## _abs(av); \
7516 abv = float ## s ## _abs(bv); \
7517 if (aav != abv) { \
7518 if (ismin) { \
7519 return (aav < abv) ? a : b; \
7520 } else { \
7521 return (aav < abv) ? b : a; \
7522 } \
7523 } \
7524 } \
7525 if (aSign != bSign) { \
7526 if (ismin) { \
7527 return aSign ? a : b; \
7528 } else { \
7529 return aSign ? b : a; \
7530 } \
7531 } else { \
7532 if (ismin) { \
7533 return (aSign ^ (av < bv)) ? a : b; \
7534 } else { \
7535 return (aSign ^ (av < bv)) ? b : a; \
7536 } \
7537 } \
7538} \
7539 \
7540float ## s float ## s ## _min(float ## s a, float ## s b, \
7541 float_status *status) \
7542{ \
7543 return float ## s ## _minmax(a, b, 1, 0, 0, status); \
7544} \
7545 \
7546float ## s float ## s ## _max(float ## s a, float ## s b, \
7547 float_status *status) \
7548{ \
7549 return float ## s ## _minmax(a, b, 0, 0, 0, status); \
7550} \
7551 \
7552float ## s float ## s ## _minnum(float ## s a, float ## s b, \
7553 float_status *status) \
7554{ \
7555 return float ## s ## _minmax(a, b, 1, 1, 0, status); \
7556} \
7557 \
7558float ## s float ## s ## _maxnum(float ## s a, float ## s b, \
7559 float_status *status) \
7560{ \
7561 return float ## s ## _minmax(a, b, 0, 1, 0, status); \
7562} \
7563 \
7564float ## s float ## s ## _minnummag(float ## s a, float ## s b, \
7565 float_status *status) \
7566{ \
7567 return float ## s ## _minmax(a, b, 1, 1, 1, status); \
7568} \
7569 \
7570float ## s float ## s ## _maxnummag(float ## s a, float ## s b, \
7571 float_status *status) \
7572{ \
7573 return float ## s ## _minmax(a, b, 0, 1, 1, status); \
7574}
7575
7576MINMAX(32)
7577MINMAX(64)
7578
7579
7580
7581float32 float32_scalbn(float32 a, int n, float_status *status)
7582{
7583 flag aSign;
7584 int16_t aExp;
7585 uint32_t aSig;
7586
7587 a = float32_squash_input_denormal(a, status);
7588 aSig = extractFloat32Frac( a );
7589 aExp = extractFloat32Exp( a );
7590 aSign = extractFloat32Sign( a );
7591
7592 if ( aExp == 0xFF ) {
7593 if ( aSig ) {
7594 return propagateFloat32NaN(a, a, status);
7595 }
7596 return a;
7597 }
7598 if (aExp != 0) {
7599 aSig |= 0x00800000;
7600 } else if (aSig == 0) {
7601 return a;
7602 } else {
7603 aExp++;
7604 }
7605
7606 if (n > 0x200) {
7607 n = 0x200;
7608 } else if (n < -0x200) {
7609 n = -0x200;
7610 }
7611
7612 aExp += n - 1;
7613 aSig <<= 7;
7614 return normalizeRoundAndPackFloat32(aSign, aExp, aSig, status);
7615}
7616
7617float64 float64_scalbn(float64 a, int n, float_status *status)
7618{
7619 flag aSign;
7620 int16_t aExp;
7621 uint64_t aSig;
7622
7623 a = float64_squash_input_denormal(a, status);
7624 aSig = extractFloat64Frac( a );
7625 aExp = extractFloat64Exp( a );
7626 aSign = extractFloat64Sign( a );
7627
7628 if ( aExp == 0x7FF ) {
7629 if ( aSig ) {
7630 return propagateFloat64NaN(a, a, status);
7631 }
7632 return a;
7633 }
7634 if (aExp != 0) {
7635 aSig |= LIT64( 0x0010000000000000 );
7636 } else if (aSig == 0) {
7637 return a;
7638 } else {
7639 aExp++;
7640 }
7641
7642 if (n > 0x1000) {
7643 n = 0x1000;
7644 } else if (n < -0x1000) {
7645 n = -0x1000;
7646 }
7647
7648 aExp += n - 1;
7649 aSig <<= 10;
7650 return normalizeRoundAndPackFloat64(aSign, aExp, aSig, status);
7651}
7652
7653floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7654{
7655 flag aSign;
7656 int32_t aExp;
7657 uint64_t aSig;
7658
7659 aSig = extractFloatx80Frac( a );
7660 aExp = extractFloatx80Exp( a );
7661 aSign = extractFloatx80Sign( a );
7662
7663 if ( aExp == 0x7FFF ) {
7664 if ( aSig<<1 ) {
7665 return propagateFloatx80NaN(a, a, status);
7666 }
7667 return a;
7668 }
7669
7670 if (aExp == 0) {
7671 if (aSig == 0) {
7672 return a;
7673 }
7674 aExp++;
7675 }
7676
7677 if (n > 0x10000) {
7678 n = 0x10000;
7679 } else if (n < -0x10000) {
7680 n = -0x10000;
7681 }
7682
7683 aExp += n;
7684 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7685 aSign, aExp, aSig, 0, status);
7686}
7687
7688float128 float128_scalbn(float128 a, int n, float_status *status)
7689{
7690 flag aSign;
7691 int32_t aExp;
7692 uint64_t aSig0, aSig1;
7693
7694 aSig1 = extractFloat128Frac1( a );
7695 aSig0 = extractFloat128Frac0( a );
7696 aExp = extractFloat128Exp( a );
7697 aSign = extractFloat128Sign( a );
7698 if ( aExp == 0x7FFF ) {
7699 if ( aSig0 | aSig1 ) {
7700 return propagateFloat128NaN(a, a, status);
7701 }
7702 return a;
7703 }
7704 if (aExp != 0) {
7705 aSig0 |= LIT64( 0x0001000000000000 );
7706 } else if (aSig0 == 0 && aSig1 == 0) {
7707 return a;
7708 } else {
7709 aExp++;
7710 }
7711
7712 if (n > 0x10000) {
7713 n = 0x10000;
7714 } else if (n < -0x10000) {
7715 n = -0x10000;
7716 }
7717
7718 aExp += n - 1;
7719 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7720 , status);
7721
7722}
7723