1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85#include "qemu/osdep.h"
86
87#include "fpu/softfloat.h"
88
89
90
91
92
93
94
95
96#include "softfloat-macros.h"
97
98
99
100
101
102
103
104
105
106#include "softfloat-specialize.h"
107
108
109
110
111
112static inline uint32_t extractFloat16Frac(float16 a)
113{
114 return float16_val(a) & 0x3ff;
115}
116
117
118
119
120
121static inline int extractFloat16Exp(float16 a)
122{
123 return (float16_val(a) >> 10) & 0x1f;
124}
125
126
127
128
129
130static inline flag extractFloat16Sign(float16 a)
131{
132 return float16_val(a)>>15;
133}
134
135
136
137
138
139
140
141
142
143
144
145
146static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
147{
148 int8_t roundingMode;
149 flag roundNearestEven;
150 int8_t roundIncrement, roundBits;
151 int32_t z;
152
153 roundingMode = status->float_rounding_mode;
154 roundNearestEven = ( roundingMode == float_round_nearest_even );
155 switch (roundingMode) {
156 case float_round_nearest_even:
157 case float_round_ties_away:
158 roundIncrement = 0x40;
159 break;
160 case float_round_to_zero:
161 roundIncrement = 0;
162 break;
163 case float_round_up:
164 roundIncrement = zSign ? 0 : 0x7f;
165 break;
166 case float_round_down:
167 roundIncrement = zSign ? 0x7f : 0;
168 break;
169 default:
170 abort();
171 }
172 roundBits = absZ & 0x7F;
173 absZ = ( absZ + roundIncrement )>>7;
174 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
175 z = absZ;
176 if ( zSign ) z = - z;
177 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
178 float_raise(float_flag_invalid, status);
179 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
180 }
181 if (roundBits) {
182 status->float_exception_flags |= float_flag_inexact;
183 }
184 return z;
185
186}
187
188
189
190
191
192
193
194
195
196
197
198
199
200static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
201 float_status *status)
202{
203 int8_t roundingMode;
204 flag roundNearestEven, increment;
205 int64_t z;
206
207 roundingMode = status->float_rounding_mode;
208 roundNearestEven = ( roundingMode == float_round_nearest_even );
209 switch (roundingMode) {
210 case float_round_nearest_even:
211 case float_round_ties_away:
212 increment = ((int64_t) absZ1 < 0);
213 break;
214 case float_round_to_zero:
215 increment = 0;
216 break;
217 case float_round_up:
218 increment = !zSign && absZ1;
219 break;
220 case float_round_down:
221 increment = zSign && absZ1;
222 break;
223 default:
224 abort();
225 }
226 if ( increment ) {
227 ++absZ0;
228 if ( absZ0 == 0 ) goto overflow;
229 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
230 }
231 z = absZ0;
232 if ( zSign ) z = - z;
233 if ( z && ( ( z < 0 ) ^ zSign ) ) {
234 overflow:
235 float_raise(float_flag_invalid, status);
236 return
237 zSign ? (int64_t) LIT64( 0x8000000000000000 )
238 : LIT64( 0x7FFFFFFFFFFFFFFF );
239 }
240 if (absZ1) {
241 status->float_exception_flags |= float_flag_inexact;
242 }
243 return z;
244
245}
246
247
248
249
250
251
252
253
254
255
256
257static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
258 uint64_t absZ1, float_status *status)
259{
260 int8_t roundingMode;
261 flag roundNearestEven, increment;
262
263 roundingMode = status->float_rounding_mode;
264 roundNearestEven = (roundingMode == float_round_nearest_even);
265 switch (roundingMode) {
266 case float_round_nearest_even:
267 case float_round_ties_away:
268 increment = ((int64_t)absZ1 < 0);
269 break;
270 case float_round_to_zero:
271 increment = 0;
272 break;
273 case float_round_up:
274 increment = !zSign && absZ1;
275 break;
276 case float_round_down:
277 increment = zSign && absZ1;
278 break;
279 default:
280 abort();
281 }
282 if (increment) {
283 ++absZ0;
284 if (absZ0 == 0) {
285 float_raise(float_flag_invalid, status);
286 return LIT64(0xFFFFFFFFFFFFFFFF);
287 }
288 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
289 }
290
291 if (zSign && absZ0) {
292 float_raise(float_flag_invalid, status);
293 return 0;
294 }
295
296 if (absZ1) {
297 status->float_exception_flags |= float_flag_inexact;
298 }
299 return absZ0;
300}
301
302
303
304
305
306static inline uint32_t extractFloat32Frac( float32 a )
307{
308
309 return float32_val(a) & 0x007FFFFF;
310
311}
312
313
314
315
316
317static inline int extractFloat32Exp(float32 a)
318{
319
320 return ( float32_val(a)>>23 ) & 0xFF;
321
322}
323
324
325
326
327
328static inline flag extractFloat32Sign( float32 a )
329{
330
331 return float32_val(a)>>31;
332
333}
334
335
336
337
338
339float32 float32_squash_input_denormal(float32 a, float_status *status)
340{
341 if (status->flush_inputs_to_zero) {
342 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
343 float_raise(float_flag_input_denormal, status);
344 return make_float32(float32_val(a) & 0x80000000);
345 }
346 }
347 return a;
348}
349
350
351
352
353
354
355
356
357static void
358 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
359{
360 int8_t shiftCount;
361
362 shiftCount = countLeadingZeros32( aSig ) - 8;
363 *zSigPtr = aSig<<shiftCount;
364 *zExpPtr = 1 - shiftCount;
365
366}
367
368
369
370
371
372
373
374
375
376
377
378
379static inline float32 packFloat32(flag zSign, int zExp, uint32_t zSig)
380{
381
382 return make_float32(
383 ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
384
385}
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
410 float_status *status)
411{
412 int8_t roundingMode;
413 flag roundNearestEven;
414 int8_t roundIncrement, roundBits;
415 flag isTiny;
416
417 roundingMode = status->float_rounding_mode;
418 roundNearestEven = ( roundingMode == float_round_nearest_even );
419 switch (roundingMode) {
420 case float_round_nearest_even:
421 case float_round_ties_away:
422 roundIncrement = 0x40;
423 break;
424 case float_round_to_zero:
425 roundIncrement = 0;
426 break;
427 case float_round_up:
428 roundIncrement = zSign ? 0 : 0x7f;
429 break;
430 case float_round_down:
431 roundIncrement = zSign ? 0x7f : 0;
432 break;
433 default:
434 abort();
435 break;
436 }
437 roundBits = zSig & 0x7F;
438 if ( 0xFD <= (uint16_t) zExp ) {
439 if ( ( 0xFD < zExp )
440 || ( ( zExp == 0xFD )
441 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
442 ) {
443 float_raise(float_flag_overflow | float_flag_inexact, status);
444 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
445 }
446 if ( zExp < 0 ) {
447 if (status->flush_to_zero) {
448 float_raise(float_flag_output_denormal, status);
449 return packFloat32(zSign, 0, 0);
450 }
451 isTiny =
452 (status->float_detect_tininess
453 == float_tininess_before_rounding)
454 || ( zExp < -1 )
455 || ( zSig + roundIncrement < 0x80000000 );
456 shift32RightJamming( zSig, - zExp, &zSig );
457 zExp = 0;
458 roundBits = zSig & 0x7F;
459 if (isTiny && roundBits) {
460 float_raise(float_flag_underflow, status);
461 }
462 }
463 }
464 if (roundBits) {
465 status->float_exception_flags |= float_flag_inexact;
466 }
467 zSig = ( zSig + roundIncrement )>>7;
468 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
469 if ( zSig == 0 ) zExp = 0;
470 return packFloat32( zSign, zExp, zSig );
471
472}
473
474
475
476
477
478
479
480
481
482
483static float32
484 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
485 float_status *status)
486{
487 int8_t shiftCount;
488
489 shiftCount = countLeadingZeros32( zSig ) - 1;
490 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
491 status);
492
493}
494
495
496
497
498
499static inline uint64_t extractFloat64Frac( float64 a )
500{
501
502 return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
503
504}
505
506
507
508
509
510static inline int extractFloat64Exp(float64 a)
511{
512
513 return ( float64_val(a)>>52 ) & 0x7FF;
514
515}
516
517
518
519
520
521static inline flag extractFloat64Sign( float64 a )
522{
523
524 return float64_val(a)>>63;
525
526}
527
528
529
530
531
532float64 float64_squash_input_denormal(float64 a, float_status *status)
533{
534 if (status->flush_inputs_to_zero) {
535 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
536 float_raise(float_flag_input_denormal, status);
537 return make_float64(float64_val(a) & (1ULL << 63));
538 }
539 }
540 return a;
541}
542
543
544
545
546
547
548
549
550static void
551 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
552{
553 int8_t shiftCount;
554
555 shiftCount = countLeadingZeros64( aSig ) - 11;
556 *zSigPtr = aSig<<shiftCount;
557 *zExpPtr = 1 - shiftCount;
558
559}
560
561
562
563
564
565
566
567
568
569
570
571
572static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
573{
574
575 return make_float64(
576 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
577
578}
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
603 float_status *status)
604{
605 int8_t roundingMode;
606 flag roundNearestEven;
607 int roundIncrement, roundBits;
608 flag isTiny;
609
610 roundingMode = status->float_rounding_mode;
611 roundNearestEven = ( roundingMode == float_round_nearest_even );
612 switch (roundingMode) {
613 case float_round_nearest_even:
614 case float_round_ties_away:
615 roundIncrement = 0x200;
616 break;
617 case float_round_to_zero:
618 roundIncrement = 0;
619 break;
620 case float_round_up:
621 roundIncrement = zSign ? 0 : 0x3ff;
622 break;
623 case float_round_down:
624 roundIncrement = zSign ? 0x3ff : 0;
625 break;
626 default:
627 abort();
628 }
629 roundBits = zSig & 0x3FF;
630 if ( 0x7FD <= (uint16_t) zExp ) {
631 if ( ( 0x7FD < zExp )
632 || ( ( zExp == 0x7FD )
633 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
634 ) {
635 float_raise(float_flag_overflow | float_flag_inexact, status);
636 return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
637 }
638 if ( zExp < 0 ) {
639 if (status->flush_to_zero) {
640 float_raise(float_flag_output_denormal, status);
641 return packFloat64(zSign, 0, 0);
642 }
643 isTiny =
644 (status->float_detect_tininess
645 == float_tininess_before_rounding)
646 || ( zExp < -1 )
647 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
648 shift64RightJamming( zSig, - zExp, &zSig );
649 zExp = 0;
650 roundBits = zSig & 0x3FF;
651 if (isTiny && roundBits) {
652 float_raise(float_flag_underflow, status);
653 }
654 }
655 }
656 if (roundBits) {
657 status->float_exception_flags |= float_flag_inexact;
658 }
659 zSig = ( zSig + roundIncrement )>>10;
660 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
661 if ( zSig == 0 ) zExp = 0;
662 return packFloat64( zSign, zExp, zSig );
663
664}
665
666
667
668
669
670
671
672
673
674
675static float64
676 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
677 float_status *status)
678{
679 int8_t shiftCount;
680
681 shiftCount = countLeadingZeros64( zSig ) - 1;
682 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
683 status);
684
685}
686
687
688
689
690
691
692static inline uint64_t extractFloatx80Frac( floatx80 a )
693{
694
695 return a.low;
696
697}
698
699
700
701
702
703
704static inline int32_t extractFloatx80Exp( floatx80 a )
705{
706
707 return a.high & 0x7FFF;
708
709}
710
711
712
713
714
715
716static inline flag extractFloatx80Sign( floatx80 a )
717{
718
719 return a.high>>15;
720
721}
722
723
724
725
726
727
728
729
730static void
731 normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr )
732{
733 int8_t shiftCount;
734
735 shiftCount = countLeadingZeros64( aSig );
736 *zSigPtr = aSig<<shiftCount;
737 *zExpPtr = 1 - shiftCount;
738
739}
740
741
742
743
744
745
746static inline floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig )
747{
748 floatx80 z;
749
750 z.low = zSig;
751 z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
752 return z;
753
754}
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780static floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
781 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
782 float_status *status)
783{
784 int8_t roundingMode;
785 flag roundNearestEven, increment, isTiny;
786 int64_t roundIncrement, roundMask, roundBits;
787
788 roundingMode = status->float_rounding_mode;
789 roundNearestEven = ( roundingMode == float_round_nearest_even );
790 if ( roundingPrecision == 80 ) goto precision80;
791 if ( roundingPrecision == 64 ) {
792 roundIncrement = LIT64( 0x0000000000000400 );
793 roundMask = LIT64( 0x00000000000007FF );
794 }
795 else if ( roundingPrecision == 32 ) {
796 roundIncrement = LIT64( 0x0000008000000000 );
797 roundMask = LIT64( 0x000000FFFFFFFFFF );
798 }
799 else {
800 goto precision80;
801 }
802 zSig0 |= ( zSig1 != 0 );
803 switch (roundingMode) {
804 case float_round_nearest_even:
805 case float_round_ties_away:
806 break;
807 case float_round_to_zero:
808 roundIncrement = 0;
809 break;
810 case float_round_up:
811 roundIncrement = zSign ? 0 : roundMask;
812 break;
813 case float_round_down:
814 roundIncrement = zSign ? roundMask : 0;
815 break;
816 default:
817 abort();
818 }
819 roundBits = zSig0 & roundMask;
820 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
821 if ( ( 0x7FFE < zExp )
822 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
823 ) {
824 goto overflow;
825 }
826 if ( zExp <= 0 ) {
827 if (status->flush_to_zero) {
828 float_raise(float_flag_output_denormal, status);
829 return packFloatx80(zSign, 0, 0);
830 }
831 isTiny =
832 (status->float_detect_tininess
833 == float_tininess_before_rounding)
834 || ( zExp < 0 )
835 || ( zSig0 <= zSig0 + roundIncrement );
836 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
837 zExp = 0;
838 roundBits = zSig0 & roundMask;
839 if (isTiny && roundBits) {
840 float_raise(float_flag_underflow, status);
841 }
842 if (roundBits) {
843 status->float_exception_flags |= float_flag_inexact;
844 }
845 zSig0 += roundIncrement;
846 if ( (int64_t) zSig0 < 0 ) zExp = 1;
847 roundIncrement = roundMask + 1;
848 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
849 roundMask |= roundIncrement;
850 }
851 zSig0 &= ~ roundMask;
852 return packFloatx80( zSign, zExp, zSig0 );
853 }
854 }
855 if (roundBits) {
856 status->float_exception_flags |= float_flag_inexact;
857 }
858 zSig0 += roundIncrement;
859 if ( zSig0 < roundIncrement ) {
860 ++zExp;
861 zSig0 = LIT64( 0x8000000000000000 );
862 }
863 roundIncrement = roundMask + 1;
864 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
865 roundMask |= roundIncrement;
866 }
867 zSig0 &= ~ roundMask;
868 if ( zSig0 == 0 ) zExp = 0;
869 return packFloatx80( zSign, zExp, zSig0 );
870 precision80:
871 switch (roundingMode) {
872 case float_round_nearest_even:
873 case float_round_ties_away:
874 increment = ((int64_t)zSig1 < 0);
875 break;
876 case float_round_to_zero:
877 increment = 0;
878 break;
879 case float_round_up:
880 increment = !zSign && zSig1;
881 break;
882 case float_round_down:
883 increment = zSign && zSig1;
884 break;
885 default:
886 abort();
887 }
888 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
889 if ( ( 0x7FFE < zExp )
890 || ( ( zExp == 0x7FFE )
891 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
892 && increment
893 )
894 ) {
895 roundMask = 0;
896 overflow:
897 float_raise(float_flag_overflow | float_flag_inexact, status);
898 if ( ( roundingMode == float_round_to_zero )
899 || ( zSign && ( roundingMode == float_round_up ) )
900 || ( ! zSign && ( roundingMode == float_round_down ) )
901 ) {
902 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
903 }
904 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
905 }
906 if ( zExp <= 0 ) {
907 isTiny =
908 (status->float_detect_tininess
909 == float_tininess_before_rounding)
910 || ( zExp < 0 )
911 || ! increment
912 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
913 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
914 zExp = 0;
915 if (isTiny && zSig1) {
916 float_raise(float_flag_underflow, status);
917 }
918 if (zSig1) {
919 status->float_exception_flags |= float_flag_inexact;
920 }
921 switch (roundingMode) {
922 case float_round_nearest_even:
923 case float_round_ties_away:
924 increment = ((int64_t)zSig1 < 0);
925 break;
926 case float_round_to_zero:
927 increment = 0;
928 break;
929 case float_round_up:
930 increment = !zSign && zSig1;
931 break;
932 case float_round_down:
933 increment = zSign && zSig1;
934 break;
935 default:
936 abort();
937 }
938 if ( increment ) {
939 ++zSig0;
940 zSig0 &=
941 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
942 if ( (int64_t) zSig0 < 0 ) zExp = 1;
943 }
944 return packFloatx80( zSign, zExp, zSig0 );
945 }
946 }
947 if (zSig1) {
948 status->float_exception_flags |= float_flag_inexact;
949 }
950 if ( increment ) {
951 ++zSig0;
952 if ( zSig0 == 0 ) {
953 ++zExp;
954 zSig0 = LIT64( 0x8000000000000000 );
955 }
956 else {
957 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
958 }
959 }
960 else {
961 if ( zSig0 == 0 ) zExp = 0;
962 }
963 return packFloatx80( zSign, zExp, zSig0 );
964
965}
966
967
968
969
970
971
972
973
974
975
976static floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
977 flag zSign, int32_t zExp,
978 uint64_t zSig0, uint64_t zSig1,
979 float_status *status)
980{
981 int8_t shiftCount;
982
983 if ( zSig0 == 0 ) {
984 zSig0 = zSig1;
985 zSig1 = 0;
986 zExp -= 64;
987 }
988 shiftCount = countLeadingZeros64( zSig0 );
989 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
990 zExp -= shiftCount;
991 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
992 zSig0, zSig1, status);
993
994}
995
996
997
998
999
1000
1001static inline uint64_t extractFloat128Frac1( float128 a )
1002{
1003
1004 return a.low;
1005
1006}
1007
1008
1009
1010
1011
1012
1013static inline uint64_t extractFloat128Frac0( float128 a )
1014{
1015
1016 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
1017
1018}
1019
1020
1021
1022
1023
1024
1025static inline int32_t extractFloat128Exp( float128 a )
1026{
1027
1028 return ( a.high>>48 ) & 0x7FFF;
1029
1030}
1031
1032
1033
1034
1035
1036static inline flag extractFloat128Sign( float128 a )
1037{
1038
1039 return a.high>>63;
1040
1041}
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053static void
1054 normalizeFloat128Subnormal(
1055 uint64_t aSig0,
1056 uint64_t aSig1,
1057 int32_t *zExpPtr,
1058 uint64_t *zSig0Ptr,
1059 uint64_t *zSig1Ptr
1060 )
1061{
1062 int8_t shiftCount;
1063
1064 if ( aSig0 == 0 ) {
1065 shiftCount = countLeadingZeros64( aSig1 ) - 15;
1066 if ( shiftCount < 0 ) {
1067 *zSig0Ptr = aSig1>>( - shiftCount );
1068 *zSig1Ptr = aSig1<<( shiftCount & 63 );
1069 }
1070 else {
1071 *zSig0Ptr = aSig1<<shiftCount;
1072 *zSig1Ptr = 0;
1073 }
1074 *zExpPtr = - shiftCount - 63;
1075 }
1076 else {
1077 shiftCount = countLeadingZeros64( aSig0 ) - 15;
1078 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
1079 *zExpPtr = 1 - shiftCount;
1080 }
1081
1082}
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097static inline float128
1098 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
1099{
1100 float128 z;
1101
1102 z.low = zSig1;
1103 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
1104 return z;
1105
1106}
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
1130 uint64_t zSig0, uint64_t zSig1,
1131 uint64_t zSig2, float_status *status)
1132{
1133 int8_t roundingMode;
1134 flag roundNearestEven, increment, isTiny;
1135
1136 roundingMode = status->float_rounding_mode;
1137 roundNearestEven = ( roundingMode == float_round_nearest_even );
1138 switch (roundingMode) {
1139 case float_round_nearest_even:
1140 case float_round_ties_away:
1141 increment = ((int64_t)zSig2 < 0);
1142 break;
1143 case float_round_to_zero:
1144 increment = 0;
1145 break;
1146 case float_round_up:
1147 increment = !zSign && zSig2;
1148 break;
1149 case float_round_down:
1150 increment = zSign && zSig2;
1151 break;
1152 default:
1153 abort();
1154 }
1155 if ( 0x7FFD <= (uint32_t) zExp ) {
1156 if ( ( 0x7FFD < zExp )
1157 || ( ( zExp == 0x7FFD )
1158 && eq128(
1159 LIT64( 0x0001FFFFFFFFFFFF ),
1160 LIT64( 0xFFFFFFFFFFFFFFFF ),
1161 zSig0,
1162 zSig1
1163 )
1164 && increment
1165 )
1166 ) {
1167 float_raise(float_flag_overflow | float_flag_inexact, status);
1168 if ( ( roundingMode == float_round_to_zero )
1169 || ( zSign && ( roundingMode == float_round_up ) )
1170 || ( ! zSign && ( roundingMode == float_round_down ) )
1171 ) {
1172 return
1173 packFloat128(
1174 zSign,
1175 0x7FFE,
1176 LIT64( 0x0000FFFFFFFFFFFF ),
1177 LIT64( 0xFFFFFFFFFFFFFFFF )
1178 );
1179 }
1180 return packFloat128( zSign, 0x7FFF, 0, 0 );
1181 }
1182 if ( zExp < 0 ) {
1183 if (status->flush_to_zero) {
1184 float_raise(float_flag_output_denormal, status);
1185 return packFloat128(zSign, 0, 0, 0);
1186 }
1187 isTiny =
1188 (status->float_detect_tininess
1189 == float_tininess_before_rounding)
1190 || ( zExp < -1 )
1191 || ! increment
1192 || lt128(
1193 zSig0,
1194 zSig1,
1195 LIT64( 0x0001FFFFFFFFFFFF ),
1196 LIT64( 0xFFFFFFFFFFFFFFFF )
1197 );
1198 shift128ExtraRightJamming(
1199 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1200 zExp = 0;
1201 if (isTiny && zSig2) {
1202 float_raise(float_flag_underflow, status);
1203 }
1204 switch (roundingMode) {
1205 case float_round_nearest_even:
1206 case float_round_ties_away:
1207 increment = ((int64_t)zSig2 < 0);
1208 break;
1209 case float_round_to_zero:
1210 increment = 0;
1211 break;
1212 case float_round_up:
1213 increment = !zSign && zSig2;
1214 break;
1215 case float_round_down:
1216 increment = zSign && zSig2;
1217 break;
1218 default:
1219 abort();
1220 }
1221 }
1222 }
1223 if (zSig2) {
1224 status->float_exception_flags |= float_flag_inexact;
1225 }
1226 if ( increment ) {
1227 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1228 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1229 }
1230 else {
1231 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1232 }
1233 return packFloat128( zSign, zExp, zSig0, zSig1 );
1234
1235}
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
1248 uint64_t zSig0, uint64_t zSig1,
1249 float_status *status)
1250{
1251 int8_t shiftCount;
1252 uint64_t zSig2;
1253
1254 if ( zSig0 == 0 ) {
1255 zSig0 = zSig1;
1256 zSig1 = 0;
1257 zExp -= 64;
1258 }
1259 shiftCount = countLeadingZeros64( zSig0 ) - 15;
1260 if ( 0 <= shiftCount ) {
1261 zSig2 = 0;
1262 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1263 }
1264 else {
1265 shift128ExtraRightJamming(
1266 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1267 }
1268 zExp -= shiftCount;
1269 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
1270
1271}
1272
1273
1274
1275
1276
1277
1278
1279float32 int32_to_float32(int32_t a, float_status *status)
1280{
1281 flag zSign;
1282
1283 if ( a == 0 ) return float32_zero;
1284 if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1285 zSign = ( a < 0 );
1286 return normalizeRoundAndPackFloat32(zSign, 0x9C, zSign ? -a : a, status);
1287}
1288
1289
1290
1291
1292
1293
1294
1295float64 int32_to_float64(int32_t a, float_status *status)
1296{
1297 flag zSign;
1298 uint32_t absA;
1299 int8_t shiftCount;
1300 uint64_t zSig;
1301
1302 if ( a == 0 ) return float64_zero;
1303 zSign = ( a < 0 );
1304 absA = zSign ? - a : a;
1305 shiftCount = countLeadingZeros32( absA ) + 21;
1306 zSig = absA;
1307 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1308
1309}
1310
1311
1312
1313
1314
1315
1316
1317
1318floatx80 int32_to_floatx80(int32_t a, float_status *status)
1319{
1320 flag zSign;
1321 uint32_t absA;
1322 int8_t shiftCount;
1323 uint64_t zSig;
1324
1325 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1326 zSign = ( a < 0 );
1327 absA = zSign ? - a : a;
1328 shiftCount = countLeadingZeros32( absA ) + 32;
1329 zSig = absA;
1330 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1331
1332}
1333
1334
1335
1336
1337
1338
1339
1340float128 int32_to_float128(int32_t a, float_status *status)
1341{
1342 flag zSign;
1343 uint32_t absA;
1344 int8_t shiftCount;
1345 uint64_t zSig0;
1346
1347 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1348 zSign = ( a < 0 );
1349 absA = zSign ? - a : a;
1350 shiftCount = countLeadingZeros32( absA ) + 17;
1351 zSig0 = absA;
1352 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1353
1354}
1355
1356
1357
1358
1359
1360
1361
1362float32 int64_to_float32(int64_t a, float_status *status)
1363{
1364 flag zSign;
1365 uint64_t absA;
1366 int8_t shiftCount;
1367
1368 if ( a == 0 ) return float32_zero;
1369 zSign = ( a < 0 );
1370 absA = zSign ? - a : a;
1371 shiftCount = countLeadingZeros64( absA ) - 40;
1372 if ( 0 <= shiftCount ) {
1373 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1374 }
1375 else {
1376 shiftCount += 7;
1377 if ( shiftCount < 0 ) {
1378 shift64RightJamming( absA, - shiftCount, &absA );
1379 }
1380 else {
1381 absA <<= shiftCount;
1382 }
1383 return roundAndPackFloat32(zSign, 0x9C - shiftCount, absA, status);
1384 }
1385
1386}
1387
1388
1389
1390
1391
1392
1393
1394float64 int64_to_float64(int64_t a, float_status *status)
1395{
1396 flag zSign;
1397
1398 if ( a == 0 ) return float64_zero;
1399 if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
1400 return packFloat64( 1, 0x43E, 0 );
1401 }
1402 zSign = ( a < 0 );
1403 return normalizeRoundAndPackFloat64(zSign, 0x43C, zSign ? -a : a, status);
1404}
1405
1406
1407
1408
1409
1410
1411
1412
1413floatx80 int64_to_floatx80(int64_t a, float_status *status)
1414{
1415 flag zSign;
1416 uint64_t absA;
1417 int8_t shiftCount;
1418
1419 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1420 zSign = ( a < 0 );
1421 absA = zSign ? - a : a;
1422 shiftCount = countLeadingZeros64( absA );
1423 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1424
1425}
1426
1427
1428
1429
1430
1431
1432
1433float128 int64_to_float128(int64_t a, float_status *status)
1434{
1435 flag zSign;
1436 uint64_t absA;
1437 int8_t shiftCount;
1438 int32_t zExp;
1439 uint64_t zSig0, zSig1;
1440
1441 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1442 zSign = ( a < 0 );
1443 absA = zSign ? - a : a;
1444 shiftCount = countLeadingZeros64( absA ) + 49;
1445 zExp = 0x406E - shiftCount;
1446 if ( 64 <= shiftCount ) {
1447 zSig1 = 0;
1448 zSig0 = absA;
1449 shiftCount -= 64;
1450 }
1451 else {
1452 zSig1 = absA;
1453 zSig0 = 0;
1454 }
1455 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1456 return packFloat128( zSign, zExp, zSig0, zSig1 );
1457
1458}
1459
1460
1461
1462
1463
1464
1465
1466float32 uint64_to_float32(uint64_t a, float_status *status)
1467{
1468 int shiftcount;
1469
1470 if (a == 0) {
1471 return float32_zero;
1472 }
1473
1474
1475
1476
1477
1478 shiftcount = countLeadingZeros64(a) - 40;
1479 if (shiftcount >= 0) {
1480 return packFloat32(0, 0x95 - shiftcount, a << shiftcount);
1481 }
1482
1483
1484
1485 shiftcount += 7;
1486 if (shiftcount < 0) {
1487 shift64RightJamming(a, -shiftcount, &a);
1488 } else {
1489 a <<= shiftcount;
1490 }
1491
1492 return roundAndPackFloat32(0, 0x9c - shiftcount, a, status);
1493}
1494
1495
1496
1497
1498
1499
1500
1501float64 uint64_to_float64(uint64_t a, float_status *status)
1502{
1503 int exp = 0x43C;
1504 int shiftcount;
1505
1506 if (a == 0) {
1507 return float64_zero;
1508 }
1509
1510 shiftcount = countLeadingZeros64(a) - 1;
1511 if (shiftcount < 0) {
1512 shift64RightJamming(a, -shiftcount, &a);
1513 } else {
1514 a <<= shiftcount;
1515 }
1516 return roundAndPackFloat64(0, exp - shiftcount, a, status);
1517}
1518
1519
1520
1521
1522
1523
1524
1525float128 uint64_to_float128(uint64_t a, float_status *status)
1526{
1527 if (a == 0) {
1528 return float128_zero;
1529 }
1530 return normalizeRoundAndPackFloat128(0, 0x406E, a, 0, status);
1531}
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543int32_t float32_to_int32(float32 a, float_status *status)
1544{
1545 flag aSign;
1546 int aExp;
1547 int shiftCount;
1548 uint32_t aSig;
1549 uint64_t aSig64;
1550
1551 a = float32_squash_input_denormal(a, status);
1552 aSig = extractFloat32Frac( a );
1553 aExp = extractFloat32Exp( a );
1554 aSign = extractFloat32Sign( a );
1555 if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1556 if ( aExp ) aSig |= 0x00800000;
1557 shiftCount = 0xAF - aExp;
1558 aSig64 = aSig;
1559 aSig64 <<= 32;
1560 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1561 return roundAndPackInt32(aSign, aSig64, status);
1562
1563}
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575int32_t float32_to_int32_round_to_zero(float32 a, float_status *status)
1576{
1577 flag aSign;
1578 int aExp;
1579 int shiftCount;
1580 uint32_t aSig;
1581 int32_t z;
1582 a = float32_squash_input_denormal(a, status);
1583
1584 aSig = extractFloat32Frac( a );
1585 aExp = extractFloat32Exp( a );
1586 aSign = extractFloat32Sign( a );
1587 shiftCount = aExp - 0x9E;
1588 if ( 0 <= shiftCount ) {
1589 if ( float32_val(a) != 0xCF000000 ) {
1590 float_raise(float_flag_invalid, status);
1591 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1592 }
1593 return (int32_t) 0x80000000;
1594 }
1595 else if ( aExp <= 0x7E ) {
1596 if (aExp | aSig) {
1597 status->float_exception_flags |= float_flag_inexact;
1598 }
1599 return 0;
1600 }
1601 aSig = ( aSig | 0x00800000 )<<8;
1602 z = aSig>>( - shiftCount );
1603 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1604 status->float_exception_flags |= float_flag_inexact;
1605 }
1606 if ( aSign ) z = - z;
1607 return z;
1608
1609}
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621int16_t float32_to_int16_round_to_zero(float32 a, float_status *status)
1622{
1623 flag aSign;
1624 int aExp;
1625 int shiftCount;
1626 uint32_t aSig;
1627 int32_t z;
1628
1629 aSig = extractFloat32Frac( a );
1630 aExp = extractFloat32Exp( a );
1631 aSign = extractFloat32Sign( a );
1632 shiftCount = aExp - 0x8E;
1633 if ( 0 <= shiftCount ) {
1634 if ( float32_val(a) != 0xC7000000 ) {
1635 float_raise(float_flag_invalid, status);
1636 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1637 return 0x7FFF;
1638 }
1639 }
1640 return (int32_t) 0xffff8000;
1641 }
1642 else if ( aExp <= 0x7E ) {
1643 if ( aExp | aSig ) {
1644 status->float_exception_flags |= float_flag_inexact;
1645 }
1646 return 0;
1647 }
1648 shiftCount -= 0x10;
1649 aSig = ( aSig | 0x00800000 )<<8;
1650 z = aSig>>( - shiftCount );
1651 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1652 status->float_exception_flags |= float_flag_inexact;
1653 }
1654 if ( aSign ) {
1655 z = - z;
1656 }
1657 return z;
1658
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671int64_t float32_to_int64(float32 a, float_status *status)
1672{
1673 flag aSign;
1674 int aExp;
1675 int shiftCount;
1676 uint32_t aSig;
1677 uint64_t aSig64, aSigExtra;
1678 a = float32_squash_input_denormal(a, status);
1679
1680 aSig = extractFloat32Frac( a );
1681 aExp = extractFloat32Exp( a );
1682 aSign = extractFloat32Sign( a );
1683 shiftCount = 0xBE - aExp;
1684 if ( shiftCount < 0 ) {
1685 float_raise(float_flag_invalid, status);
1686 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1687 return LIT64( 0x7FFFFFFFFFFFFFFF );
1688 }
1689 return (int64_t) LIT64( 0x8000000000000000 );
1690 }
1691 if ( aExp ) aSig |= 0x00800000;
1692 aSig64 = aSig;
1693 aSig64 <<= 40;
1694 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1695 return roundAndPackInt64(aSign, aSig64, aSigExtra, status);
1696
1697}
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711uint64_t float32_to_uint64(float32 a, float_status *status)
1712{
1713 flag aSign;
1714 int aExp;
1715 int shiftCount;
1716 uint32_t aSig;
1717 uint64_t aSig64, aSigExtra;
1718 a = float32_squash_input_denormal(a, status);
1719
1720 aSig = extractFloat32Frac(a);
1721 aExp = extractFloat32Exp(a);
1722 aSign = extractFloat32Sign(a);
1723 if ((aSign) && (aExp > 126)) {
1724 float_raise(float_flag_invalid, status);
1725 if (float32_is_any_nan(a)) {
1726 return LIT64(0xFFFFFFFFFFFFFFFF);
1727 } else {
1728 return 0;
1729 }
1730 }
1731 shiftCount = 0xBE - aExp;
1732 if (aExp) {
1733 aSig |= 0x00800000;
1734 }
1735 if (shiftCount < 0) {
1736 float_raise(float_flag_invalid, status);
1737 return LIT64(0xFFFFFFFFFFFFFFFF);
1738 }
1739
1740 aSig64 = aSig;
1741 aSig64 <<= 40;
1742 shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
1743 return roundAndPackUint64(aSign, aSig64, aSigExtra, status);
1744}
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *status)
1758{
1759 signed char current_rounding_mode = status->float_rounding_mode;
1760 set_float_rounding_mode(float_round_to_zero, status);
1761 int64_t v = float32_to_uint64(a, status);
1762 set_float_rounding_mode(current_rounding_mode, status);
1763 return v;
1764}
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776int64_t float32_to_int64_round_to_zero(float32 a, float_status *status)
1777{
1778 flag aSign;
1779 int aExp;
1780 int shiftCount;
1781 uint32_t aSig;
1782 uint64_t aSig64;
1783 int64_t z;
1784 a = float32_squash_input_denormal(a, status);
1785
1786 aSig = extractFloat32Frac( a );
1787 aExp = extractFloat32Exp( a );
1788 aSign = extractFloat32Sign( a );
1789 shiftCount = aExp - 0xBE;
1790 if ( 0 <= shiftCount ) {
1791 if ( float32_val(a) != 0xDF000000 ) {
1792 float_raise(float_flag_invalid, status);
1793 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1794 return LIT64( 0x7FFFFFFFFFFFFFFF );
1795 }
1796 }
1797 return (int64_t) LIT64( 0x8000000000000000 );
1798 }
1799 else if ( aExp <= 0x7E ) {
1800 if (aExp | aSig) {
1801 status->float_exception_flags |= float_flag_inexact;
1802 }
1803 return 0;
1804 }
1805 aSig64 = aSig | 0x00800000;
1806 aSig64 <<= 40;
1807 z = aSig64>>( - shiftCount );
1808 if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
1809 status->float_exception_flags |= float_flag_inexact;
1810 }
1811 if ( aSign ) z = - z;
1812 return z;
1813
1814}
1815
1816
1817
1818
1819
1820
1821
1822
1823float64 float32_to_float64(float32 a, float_status *status)
1824{
1825 flag aSign;
1826 int aExp;
1827 uint32_t aSig;
1828 a = float32_squash_input_denormal(a, status);
1829
1830 aSig = extractFloat32Frac( a );
1831 aExp = extractFloat32Exp( a );
1832 aSign = extractFloat32Sign( a );
1833 if ( aExp == 0xFF ) {
1834 if (aSig) {
1835 return commonNaNToFloat64(float32ToCommonNaN(a, status), status);
1836 }
1837 return packFloat64( aSign, 0x7FF, 0 );
1838 }
1839 if ( aExp == 0 ) {
1840 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1841 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1842 --aExp;
1843 }
1844 return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
1845
1846}
1847
1848
1849
1850
1851
1852
1853
1854
1855floatx80 float32_to_floatx80(float32 a, float_status *status)
1856{
1857 flag aSign;
1858 int aExp;
1859 uint32_t aSig;
1860
1861 a = float32_squash_input_denormal(a, status);
1862 aSig = extractFloat32Frac( a );
1863 aExp = extractFloat32Exp( a );
1864 aSign = extractFloat32Sign( a );
1865 if ( aExp == 0xFF ) {
1866 if (aSig) {
1867 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
1868 }
1869 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1870 }
1871 if ( aExp == 0 ) {
1872 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1873 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1874 }
1875 aSig |= 0x00800000;
1876 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
1877
1878}
1879
1880
1881
1882
1883
1884
1885
1886
1887float128 float32_to_float128(float32 a, float_status *status)
1888{
1889 flag aSign;
1890 int aExp;
1891 uint32_t aSig;
1892
1893 a = float32_squash_input_denormal(a, status);
1894 aSig = extractFloat32Frac( a );
1895 aExp = extractFloat32Exp( a );
1896 aSign = extractFloat32Sign( a );
1897 if ( aExp == 0xFF ) {
1898 if (aSig) {
1899 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
1900 }
1901 return packFloat128( aSign, 0x7FFF, 0, 0 );
1902 }
1903 if ( aExp == 0 ) {
1904 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1905 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1906 --aExp;
1907 }
1908 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
1909
1910}
1911
1912
1913
1914
1915
1916
1917
1918
1919float32 float32_round_to_int(float32 a, float_status *status)
1920{
1921 flag aSign;
1922 int aExp;
1923 uint32_t lastBitMask, roundBitsMask;
1924 uint32_t z;
1925 a = float32_squash_input_denormal(a, status);
1926
1927 aExp = extractFloat32Exp( a );
1928 if ( 0x96 <= aExp ) {
1929 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1930 return propagateFloat32NaN(a, a, status);
1931 }
1932 return a;
1933 }
1934 if ( aExp <= 0x7E ) {
1935 if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
1936 status->float_exception_flags |= float_flag_inexact;
1937 aSign = extractFloat32Sign( a );
1938 switch (status->float_rounding_mode) {
1939 case float_round_nearest_even:
1940 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1941 return packFloat32( aSign, 0x7F, 0 );
1942 }
1943 break;
1944 case float_round_ties_away:
1945 if (aExp == 0x7E) {
1946 return packFloat32(aSign, 0x7F, 0);
1947 }
1948 break;
1949 case float_round_down:
1950 return make_float32(aSign ? 0xBF800000 : 0);
1951 case float_round_up:
1952 return make_float32(aSign ? 0x80000000 : 0x3F800000);
1953 }
1954 return packFloat32( aSign, 0, 0 );
1955 }
1956 lastBitMask = 1;
1957 lastBitMask <<= 0x96 - aExp;
1958 roundBitsMask = lastBitMask - 1;
1959 z = float32_val(a);
1960 switch (status->float_rounding_mode) {
1961 case float_round_nearest_even:
1962 z += lastBitMask>>1;
1963 if ((z & roundBitsMask) == 0) {
1964 z &= ~lastBitMask;
1965 }
1966 break;
1967 case float_round_ties_away:
1968 z += lastBitMask >> 1;
1969 break;
1970 case float_round_to_zero:
1971 break;
1972 case float_round_up:
1973 if (!extractFloat32Sign(make_float32(z))) {
1974 z += roundBitsMask;
1975 }
1976 break;
1977 case float_round_down:
1978 if (extractFloat32Sign(make_float32(z))) {
1979 z += roundBitsMask;
1980 }
1981 break;
1982 default:
1983 abort();
1984 }
1985 z &= ~ roundBitsMask;
1986 if (z != float32_val(a)) {
1987 status->float_exception_flags |= float_flag_inexact;
1988 }
1989 return make_float32(z);
1990
1991}
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001static float32 addFloat32Sigs(float32 a, float32 b, flag zSign,
2002 float_status *status)
2003{
2004 int aExp, bExp, zExp;
2005 uint32_t aSig, bSig, zSig;
2006 int expDiff;
2007
2008 aSig = extractFloat32Frac( a );
2009 aExp = extractFloat32Exp( a );
2010 bSig = extractFloat32Frac( b );
2011 bExp = extractFloat32Exp( b );
2012 expDiff = aExp - bExp;
2013 aSig <<= 6;
2014 bSig <<= 6;
2015 if ( 0 < expDiff ) {
2016 if ( aExp == 0xFF ) {
2017 if (aSig) {
2018 return propagateFloat32NaN(a, b, status);
2019 }
2020 return a;
2021 }
2022 if ( bExp == 0 ) {
2023 --expDiff;
2024 }
2025 else {
2026 bSig |= 0x20000000;
2027 }
2028 shift32RightJamming( bSig, expDiff, &bSig );
2029 zExp = aExp;
2030 }
2031 else if ( expDiff < 0 ) {
2032 if ( bExp == 0xFF ) {
2033 if (bSig) {
2034 return propagateFloat32NaN(a, b, status);
2035 }
2036 return packFloat32( zSign, 0xFF, 0 );
2037 }
2038 if ( aExp == 0 ) {
2039 ++expDiff;
2040 }
2041 else {
2042 aSig |= 0x20000000;
2043 }
2044 shift32RightJamming( aSig, - expDiff, &aSig );
2045 zExp = bExp;
2046 }
2047 else {
2048 if ( aExp == 0xFF ) {
2049 if (aSig | bSig) {
2050 return propagateFloat32NaN(a, b, status);
2051 }
2052 return a;
2053 }
2054 if ( aExp == 0 ) {
2055 if (status->flush_to_zero) {
2056 if (aSig | bSig) {
2057 float_raise(float_flag_output_denormal, status);
2058 }
2059 return packFloat32(zSign, 0, 0);
2060 }
2061 return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
2062 }
2063 zSig = 0x40000000 + aSig + bSig;
2064 zExp = aExp;
2065 goto roundAndPack;
2066 }
2067 aSig |= 0x20000000;
2068 zSig = ( aSig + bSig )<<1;
2069 --zExp;
2070 if ( (int32_t) zSig < 0 ) {
2071 zSig = aSig + bSig;
2072 ++zExp;
2073 }
2074 roundAndPack:
2075 return roundAndPackFloat32(zSign, zExp, zSig, status);
2076
2077}
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087static float32 subFloat32Sigs(float32 a, float32 b, flag zSign,
2088 float_status *status)
2089{
2090 int aExp, bExp, zExp;
2091 uint32_t aSig, bSig, zSig;
2092 int expDiff;
2093
2094 aSig = extractFloat32Frac( a );
2095 aExp = extractFloat32Exp( a );
2096 bSig = extractFloat32Frac( b );
2097 bExp = extractFloat32Exp( b );
2098 expDiff = aExp - bExp;
2099 aSig <<= 7;
2100 bSig <<= 7;
2101 if ( 0 < expDiff ) goto aExpBigger;
2102 if ( expDiff < 0 ) goto bExpBigger;
2103 if ( aExp == 0xFF ) {
2104 if (aSig | bSig) {
2105 return propagateFloat32NaN(a, b, status);
2106 }
2107 float_raise(float_flag_invalid, status);
2108 return float32_default_nan(status);
2109 }
2110 if ( aExp == 0 ) {
2111 aExp = 1;
2112 bExp = 1;
2113 }
2114 if ( bSig < aSig ) goto aBigger;
2115 if ( aSig < bSig ) goto bBigger;
2116 return packFloat32(status->float_rounding_mode == float_round_down, 0, 0);
2117 bExpBigger:
2118 if ( bExp == 0xFF ) {
2119 if (bSig) {
2120 return propagateFloat32NaN(a, b, status);
2121 }
2122 return packFloat32( zSign ^ 1, 0xFF, 0 );
2123 }
2124 if ( aExp == 0 ) {
2125 ++expDiff;
2126 }
2127 else {
2128 aSig |= 0x40000000;
2129 }
2130 shift32RightJamming( aSig, - expDiff, &aSig );
2131 bSig |= 0x40000000;
2132 bBigger:
2133 zSig = bSig - aSig;
2134 zExp = bExp;
2135 zSign ^= 1;
2136 goto normalizeRoundAndPack;
2137 aExpBigger:
2138 if ( aExp == 0xFF ) {
2139 if (aSig) {
2140 return propagateFloat32NaN(a, b, status);
2141 }
2142 return a;
2143 }
2144 if ( bExp == 0 ) {
2145 --expDiff;
2146 }
2147 else {
2148 bSig |= 0x40000000;
2149 }
2150 shift32RightJamming( bSig, expDiff, &bSig );
2151 aSig |= 0x40000000;
2152 aBigger:
2153 zSig = aSig - bSig;
2154 zExp = aExp;
2155 normalizeRoundAndPack:
2156 --zExp;
2157 return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status);
2158
2159}
2160
2161
2162
2163
2164
2165
2166
2167float32 float32_add(float32 a, float32 b, float_status *status)
2168{
2169 flag aSign, bSign;
2170 a = float32_squash_input_denormal(a, status);
2171 b = float32_squash_input_denormal(b, status);
2172
2173 aSign = extractFloat32Sign( a );
2174 bSign = extractFloat32Sign( b );
2175 if ( aSign == bSign ) {
2176 return addFloat32Sigs(a, b, aSign, status);
2177 }
2178 else {
2179 return subFloat32Sigs(a, b, aSign, status);
2180 }
2181
2182}
2183
2184
2185
2186
2187
2188
2189
2190float32 float32_sub(float32 a, float32 b, float_status *status)
2191{
2192 flag aSign, bSign;
2193 a = float32_squash_input_denormal(a, status);
2194 b = float32_squash_input_denormal(b, status);
2195
2196 aSign = extractFloat32Sign( a );
2197 bSign = extractFloat32Sign( b );
2198 if ( aSign == bSign ) {
2199 return subFloat32Sigs(a, b, aSign, status);
2200 }
2201 else {
2202 return addFloat32Sigs(a, b, aSign, status);
2203 }
2204
2205}
2206
2207
2208
2209
2210
2211
2212
2213float32 float32_mul(float32 a, float32 b, float_status *status)
2214{
2215 flag aSign, bSign, zSign;
2216 int aExp, bExp, zExp;
2217 uint32_t aSig, bSig;
2218 uint64_t zSig64;
2219 uint32_t zSig;
2220
2221 a = float32_squash_input_denormal(a, status);
2222 b = float32_squash_input_denormal(b, status);
2223
2224 aSig = extractFloat32Frac( a );
2225 aExp = extractFloat32Exp( a );
2226 aSign = extractFloat32Sign( a );
2227 bSig = extractFloat32Frac( b );
2228 bExp = extractFloat32Exp( b );
2229 bSign = extractFloat32Sign( b );
2230 zSign = aSign ^ bSign;
2231 if ( aExp == 0xFF ) {
2232 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2233 return propagateFloat32NaN(a, b, status);
2234 }
2235 if ( ( bExp | bSig ) == 0 ) {
2236 float_raise(float_flag_invalid, status);
2237 return float32_default_nan(status);
2238 }
2239 return packFloat32( zSign, 0xFF, 0 );
2240 }
2241 if ( bExp == 0xFF ) {
2242 if (bSig) {
2243 return propagateFloat32NaN(a, b, status);
2244 }
2245 if ( ( aExp | aSig ) == 0 ) {
2246 float_raise(float_flag_invalid, status);
2247 return float32_default_nan(status);
2248 }
2249 return packFloat32( zSign, 0xFF, 0 );
2250 }
2251 if ( aExp == 0 ) {
2252 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2253 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2254 }
2255 if ( bExp == 0 ) {
2256 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
2257 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2258 }
2259 zExp = aExp + bExp - 0x7F;
2260 aSig = ( aSig | 0x00800000 )<<7;
2261 bSig = ( bSig | 0x00800000 )<<8;
2262 shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
2263 zSig = zSig64;
2264 if ( 0 <= (int32_t) ( zSig<<1 ) ) {
2265 zSig <<= 1;
2266 --zExp;
2267 }
2268 return roundAndPackFloat32(zSign, zExp, zSig, status);
2269
2270}
2271
2272
2273
2274
2275
2276
2277
2278float32 float32_div(float32 a, float32 b, float_status *status)
2279{
2280 flag aSign, bSign, zSign;
2281 int aExp, bExp, zExp;
2282 uint32_t aSig, bSig, zSig;
2283 a = float32_squash_input_denormal(a, status);
2284 b = float32_squash_input_denormal(b, status);
2285
2286 aSig = extractFloat32Frac( a );
2287 aExp = extractFloat32Exp( a );
2288 aSign = extractFloat32Sign( a );
2289 bSig = extractFloat32Frac( b );
2290 bExp = extractFloat32Exp( b );
2291 bSign = extractFloat32Sign( b );
2292 zSign = aSign ^ bSign;
2293 if ( aExp == 0xFF ) {
2294 if (aSig) {
2295 return propagateFloat32NaN(a, b, status);
2296 }
2297 if ( bExp == 0xFF ) {
2298 if (bSig) {
2299 return propagateFloat32NaN(a, b, status);
2300 }
2301 float_raise(float_flag_invalid, status);
2302 return float32_default_nan(status);
2303 }
2304 return packFloat32( zSign, 0xFF, 0 );
2305 }
2306 if ( bExp == 0xFF ) {
2307 if (bSig) {
2308 return propagateFloat32NaN(a, b, status);
2309 }
2310 return packFloat32( zSign, 0, 0 );
2311 }
2312 if ( bExp == 0 ) {
2313 if ( bSig == 0 ) {
2314 if ( ( aExp | aSig ) == 0 ) {
2315 float_raise(float_flag_invalid, status);
2316 return float32_default_nan(status);
2317 }
2318 float_raise(float_flag_divbyzero, status);
2319 return packFloat32( zSign, 0xFF, 0 );
2320 }
2321 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2322 }
2323 if ( aExp == 0 ) {
2324 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2325 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2326 }
2327 zExp = aExp - bExp + 0x7D;
2328 aSig = ( aSig | 0x00800000 )<<7;
2329 bSig = ( bSig | 0x00800000 )<<8;
2330 if ( bSig <= ( aSig + aSig ) ) {
2331 aSig >>= 1;
2332 ++zExp;
2333 }
2334 zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
2335 if ( ( zSig & 0x3F ) == 0 ) {
2336 zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
2337 }
2338 return roundAndPackFloat32(zSign, zExp, zSig, status);
2339
2340}
2341
2342
2343
2344
2345
2346
2347
2348float32 float32_rem(float32 a, float32 b, float_status *status)
2349{
2350 flag aSign, zSign;
2351 int aExp, bExp, expDiff;
2352 uint32_t aSig, bSig;
2353 uint32_t q;
2354 uint64_t aSig64, bSig64, q64;
2355 uint32_t alternateASig;
2356 int32_t sigMean;
2357 a = float32_squash_input_denormal(a, status);
2358 b = float32_squash_input_denormal(b, status);
2359
2360 aSig = extractFloat32Frac( a );
2361 aExp = extractFloat32Exp( a );
2362 aSign = extractFloat32Sign( a );
2363 bSig = extractFloat32Frac( b );
2364 bExp = extractFloat32Exp( b );
2365 if ( aExp == 0xFF ) {
2366 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2367 return propagateFloat32NaN(a, b, status);
2368 }
2369 float_raise(float_flag_invalid, status);
2370 return float32_default_nan(status);
2371 }
2372 if ( bExp == 0xFF ) {
2373 if (bSig) {
2374 return propagateFloat32NaN(a, b, status);
2375 }
2376 return a;
2377 }
2378 if ( bExp == 0 ) {
2379 if ( bSig == 0 ) {
2380 float_raise(float_flag_invalid, status);
2381 return float32_default_nan(status);
2382 }
2383 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2384 }
2385 if ( aExp == 0 ) {
2386 if ( aSig == 0 ) return a;
2387 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2388 }
2389 expDiff = aExp - bExp;
2390 aSig |= 0x00800000;
2391 bSig |= 0x00800000;
2392 if ( expDiff < 32 ) {
2393 aSig <<= 8;
2394 bSig <<= 8;
2395 if ( expDiff < 0 ) {
2396 if ( expDiff < -1 ) return a;
2397 aSig >>= 1;
2398 }
2399 q = ( bSig <= aSig );
2400 if ( q ) aSig -= bSig;
2401 if ( 0 < expDiff ) {
2402 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
2403 q >>= 32 - expDiff;
2404 bSig >>= 2;
2405 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2406 }
2407 else {
2408 aSig >>= 2;
2409 bSig >>= 2;
2410 }
2411 }
2412 else {
2413 if ( bSig <= aSig ) aSig -= bSig;
2414 aSig64 = ( (uint64_t) aSig )<<40;
2415 bSig64 = ( (uint64_t) bSig )<<40;
2416 expDiff -= 64;
2417 while ( 0 < expDiff ) {
2418 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2419 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2420 aSig64 = - ( ( bSig * q64 )<<38 );
2421 expDiff -= 62;
2422 }
2423 expDiff += 64;
2424 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2425 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2426 q = q64>>( 64 - expDiff );
2427 bSig <<= 6;
2428 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2429 }
2430 do {
2431 alternateASig = aSig;
2432 ++q;
2433 aSig -= bSig;
2434 } while ( 0 <= (int32_t) aSig );
2435 sigMean = aSig + alternateASig;
2436 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2437 aSig = alternateASig;
2438 }
2439 zSign = ( (int32_t) aSig < 0 );
2440 if ( zSign ) aSig = - aSig;
2441 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
2442}
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455float32 float32_muladd(float32 a, float32 b, float32 c, int flags,
2456 float_status *status)
2457{
2458 flag aSign, bSign, cSign, zSign;
2459 int aExp, bExp, cExp, pExp, zExp, expDiff;
2460 uint32_t aSig, bSig, cSig;
2461 flag pInf, pZero, pSign;
2462 uint64_t pSig64, cSig64, zSig64;
2463 uint32_t pSig;
2464 int shiftcount;
2465 flag signflip, infzero;
2466
2467 a = float32_squash_input_denormal(a, status);
2468 b = float32_squash_input_denormal(b, status);
2469 c = float32_squash_input_denormal(c, status);
2470 aSig = extractFloat32Frac(a);
2471 aExp = extractFloat32Exp(a);
2472 aSign = extractFloat32Sign(a);
2473 bSig = extractFloat32Frac(b);
2474 bExp = extractFloat32Exp(b);
2475 bSign = extractFloat32Sign(b);
2476 cSig = extractFloat32Frac(c);
2477 cExp = extractFloat32Exp(c);
2478 cSign = extractFloat32Sign(c);
2479
2480 infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
2481 (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
2482
2483
2484
2485
2486
2487
2488 if (((aExp == 0xff) && aSig) ||
2489 ((bExp == 0xff) && bSig) ||
2490 ((cExp == 0xff) && cSig)) {
2491 return propagateFloat32MulAddNaN(a, b, c, infzero, status);
2492 }
2493
2494 if (infzero) {
2495 float_raise(float_flag_invalid, status);
2496 return float32_default_nan(status);
2497 }
2498
2499 if (flags & float_muladd_negate_c) {
2500 cSign ^= 1;
2501 }
2502
2503 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
2504
2505
2506 pSign = aSign ^ bSign;
2507 if (flags & float_muladd_negate_product) {
2508 pSign ^= 1;
2509 }
2510 pInf = (aExp == 0xff) || (bExp == 0xff);
2511 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
2512
2513 if (cExp == 0xff) {
2514 if (pInf && (pSign ^ cSign)) {
2515
2516 float_raise(float_flag_invalid, status);
2517 return float32_default_nan(status);
2518 }
2519
2520 return packFloat32(cSign ^ signflip, 0xff, 0);
2521 }
2522
2523 if (pInf) {
2524 return packFloat32(pSign ^ signflip, 0xff, 0);
2525 }
2526
2527 if (pZero) {
2528 if (cExp == 0) {
2529 if (cSig == 0) {
2530
2531 if (pSign == cSign) {
2532 zSign = pSign;
2533 } else if (status->float_rounding_mode == float_round_down) {
2534 zSign = 1;
2535 } else {
2536 zSign = 0;
2537 }
2538 return packFloat32(zSign ^ signflip, 0, 0);
2539 }
2540
2541 if (status->flush_to_zero) {
2542 float_raise(float_flag_output_denormal, status);
2543 return packFloat32(cSign ^ signflip, 0, 0);
2544 }
2545 }
2546
2547 if (flags & float_muladd_halve_result) {
2548 if (cExp == 0) {
2549 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2550 }
2551
2552
2553
2554 cExp -= 2;
2555 cSig = (cSig | 0x00800000) << 7;
2556 return roundAndPackFloat32(cSign ^ signflip, cExp, cSig, status);
2557 }
2558 return packFloat32(cSign ^ signflip, cExp, cSig);
2559 }
2560
2561 if (aExp == 0) {
2562 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
2563 }
2564 if (bExp == 0) {
2565 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
2566 }
2567
2568
2569
2570
2571
2572
2573
2574
2575 pExp = aExp + bExp - 0x7e;
2576 aSig = (aSig | 0x00800000) << 7;
2577 bSig = (bSig | 0x00800000) << 8;
2578 pSig64 = (uint64_t)aSig * bSig;
2579 if ((int64_t)(pSig64 << 1) >= 0) {
2580 pSig64 <<= 1;
2581 pExp--;
2582 }
2583
2584 zSign = pSign ^ signflip;
2585
2586
2587
2588
2589 if (cExp == 0) {
2590 if (!cSig) {
2591
2592 shift64RightJamming(pSig64, 32, &pSig64);
2593 pSig = pSig64;
2594 if (flags & float_muladd_halve_result) {
2595 pExp--;
2596 }
2597 return roundAndPackFloat32(zSign, pExp - 1,
2598 pSig, status);
2599 }
2600 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2601 }
2602
2603 cSig64 = (uint64_t)cSig << (62 - 23);
2604 cSig64 |= LIT64(0x4000000000000000);
2605 expDiff = pExp - cExp;
2606
2607 if (pSign == cSign) {
2608
2609 if (expDiff > 0) {
2610
2611 shift64RightJamming(cSig64, expDiff, &cSig64);
2612 zExp = pExp;
2613 } else if (expDiff < 0) {
2614
2615 shift64RightJamming(pSig64, -expDiff, &pSig64);
2616 zExp = cExp;
2617 } else {
2618
2619 zExp = cExp;
2620 }
2621
2622 zSig64 = pSig64 + cSig64;
2623 if ((int64_t)zSig64 < 0) {
2624 shift64RightJamming(zSig64, 1, &zSig64);
2625 } else {
2626 zExp--;
2627 }
2628 } else {
2629
2630 if (expDiff > 0) {
2631 shift64RightJamming(cSig64, expDiff, &cSig64);
2632 zSig64 = pSig64 - cSig64;
2633 zExp = pExp;
2634 } else if (expDiff < 0) {
2635 shift64RightJamming(pSig64, -expDiff, &pSig64);
2636 zSig64 = cSig64 - pSig64;
2637 zExp = cExp;
2638 zSign ^= 1;
2639 } else {
2640 zExp = pExp;
2641 if (cSig64 < pSig64) {
2642 zSig64 = pSig64 - cSig64;
2643 } else if (pSig64 < cSig64) {
2644 zSig64 = cSig64 - pSig64;
2645 zSign ^= 1;
2646 } else {
2647
2648 zSign = signflip;
2649 if (status->float_rounding_mode == float_round_down) {
2650 zSign ^= 1;
2651 }
2652 return packFloat32(zSign, 0, 0);
2653 }
2654 }
2655 --zExp;
2656
2657 shiftcount = countLeadingZeros64(zSig64) - 1;
2658 zSig64 <<= shiftcount;
2659 zExp -= shiftcount;
2660 }
2661 if (flags & float_muladd_halve_result) {
2662 zExp--;
2663 }
2664
2665 shift64RightJamming(zSig64, 32, &zSig64);
2666 return roundAndPackFloat32(zSign, zExp, zSig64, status);
2667}
2668
2669
2670
2671
2672
2673
2674
2675
2676float32 float32_sqrt(float32 a, float_status *status)
2677{
2678 flag aSign;
2679 int aExp, zExp;
2680 uint32_t aSig, zSig;
2681 uint64_t rem, term;
2682 a = float32_squash_input_denormal(a, status);
2683
2684 aSig = extractFloat32Frac( a );
2685 aExp = extractFloat32Exp( a );
2686 aSign = extractFloat32Sign( a );
2687 if ( aExp == 0xFF ) {
2688 if (aSig) {
2689 return propagateFloat32NaN(a, float32_zero, status);
2690 }
2691 if ( ! aSign ) return a;
2692 float_raise(float_flag_invalid, status);
2693 return float32_default_nan(status);
2694 }
2695 if ( aSign ) {
2696 if ( ( aExp | aSig ) == 0 ) return a;
2697 float_raise(float_flag_invalid, status);
2698 return float32_default_nan(status);
2699 }
2700 if ( aExp == 0 ) {
2701 if ( aSig == 0 ) return float32_zero;
2702 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2703 }
2704 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2705 aSig = ( aSig | 0x00800000 )<<8;
2706 zSig = estimateSqrt32( aExp, aSig ) + 2;
2707 if ( ( zSig & 0x7F ) <= 5 ) {
2708 if ( zSig < 2 ) {
2709 zSig = 0x7FFFFFFF;
2710 goto roundAndPack;
2711 }
2712 aSig >>= aExp & 1;
2713 term = ( (uint64_t) zSig ) * zSig;
2714 rem = ( ( (uint64_t) aSig )<<32 ) - term;
2715 while ( (int64_t) rem < 0 ) {
2716 --zSig;
2717 rem += ( ( (uint64_t) zSig )<<1 ) | 1;
2718 }
2719 zSig |= ( rem != 0 );
2720 }
2721 shift32RightJamming( zSig, 1, &zSig );
2722 roundAndPack:
2723 return roundAndPackFloat32(0, zExp, zSig, status);
2724
2725}
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745static const float64 float32_exp2_coefficients[15] =
2746{
2747 const_float64( 0x3ff0000000000000ll ),
2748 const_float64( 0x3fe0000000000000ll ),
2749 const_float64( 0x3fc5555555555555ll ),
2750 const_float64( 0x3fa5555555555555ll ),
2751 const_float64( 0x3f81111111111111ll ),
2752 const_float64( 0x3f56c16c16c16c17ll ),
2753 const_float64( 0x3f2a01a01a01a01all ),
2754 const_float64( 0x3efa01a01a01a01all ),
2755 const_float64( 0x3ec71de3a556c734ll ),
2756 const_float64( 0x3e927e4fb7789f5cll ),
2757 const_float64( 0x3e5ae64567f544e4ll ),
2758 const_float64( 0x3e21eed8eff8d898ll ),
2759 const_float64( 0x3de6124613a86d09ll ),
2760 const_float64( 0x3da93974a8c07c9dll ),
2761 const_float64( 0x3d6ae7f3e733b81fll ),
2762};
2763
2764float32 float32_exp2(float32 a, float_status *status)
2765{
2766 flag aSign;
2767 int aExp;
2768 uint32_t aSig;
2769 float64 r, x, xn;
2770 int i;
2771 a = float32_squash_input_denormal(a, status);
2772
2773 aSig = extractFloat32Frac( a );
2774 aExp = extractFloat32Exp( a );
2775 aSign = extractFloat32Sign( a );
2776
2777 if ( aExp == 0xFF) {
2778 if (aSig) {
2779 return propagateFloat32NaN(a, float32_zero, status);
2780 }
2781 return (aSign) ? float32_zero : a;
2782 }
2783 if (aExp == 0) {
2784 if (aSig == 0) return float32_one;
2785 }
2786
2787 float_raise(float_flag_inexact, status);
2788
2789
2790
2791
2792 x = float32_to_float64(a, status);
2793 x = float64_mul(x, float64_ln2, status);
2794
2795 xn = x;
2796 r = float64_one;
2797 for (i = 0 ; i < 15 ; i++) {
2798 float64 f;
2799
2800 f = float64_mul(xn, float32_exp2_coefficients[i], status);
2801 r = float64_add(r, f, status);
2802
2803 xn = float64_mul(xn, x, status);
2804 }
2805
2806 return float64_to_float32(r, status);
2807}
2808
2809
2810
2811
2812
2813
2814float32 float32_log2(float32 a, float_status *status)
2815{
2816 flag aSign, zSign;
2817 int aExp;
2818 uint32_t aSig, zSig, i;
2819
2820 a = float32_squash_input_denormal(a, status);
2821 aSig = extractFloat32Frac( a );
2822 aExp = extractFloat32Exp( a );
2823 aSign = extractFloat32Sign( a );
2824
2825 if ( aExp == 0 ) {
2826 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2827 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2828 }
2829 if ( aSign ) {
2830 float_raise(float_flag_invalid, status);
2831 return float32_default_nan(status);
2832 }
2833 if ( aExp == 0xFF ) {
2834 if (aSig) {
2835 return propagateFloat32NaN(a, float32_zero, status);
2836 }
2837 return a;
2838 }
2839
2840 aExp -= 0x7F;
2841 aSig |= 0x00800000;
2842 zSign = aExp < 0;
2843 zSig = aExp << 23;
2844
2845 for (i = 1 << 22; i > 0; i >>= 1) {
2846 aSig = ( (uint64_t)aSig * aSig ) >> 23;
2847 if ( aSig & 0x01000000 ) {
2848 aSig >>= 1;
2849 zSig |= i;
2850 }
2851 }
2852
2853 if ( zSign )
2854 zSig = -zSig;
2855
2856 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
2857}
2858
2859
2860
2861
2862
2863
2864
2865
2866int float32_eq(float32 a, float32 b, float_status *status)
2867{
2868 uint32_t av, bv;
2869 a = float32_squash_input_denormal(a, status);
2870 b = float32_squash_input_denormal(b, status);
2871
2872 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2873 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2874 ) {
2875 float_raise(float_flag_invalid, status);
2876 return 0;
2877 }
2878 av = float32_val(a);
2879 bv = float32_val(b);
2880 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2881}
2882
2883
2884
2885
2886
2887
2888
2889
2890int float32_le(float32 a, float32 b, float_status *status)
2891{
2892 flag aSign, bSign;
2893 uint32_t av, bv;
2894 a = float32_squash_input_denormal(a, status);
2895 b = float32_squash_input_denormal(b, status);
2896
2897 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2898 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2899 ) {
2900 float_raise(float_flag_invalid, status);
2901 return 0;
2902 }
2903 aSign = extractFloat32Sign( a );
2904 bSign = extractFloat32Sign( b );
2905 av = float32_val(a);
2906 bv = float32_val(b);
2907 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2908 return ( av == bv ) || ( aSign ^ ( av < bv ) );
2909
2910}
2911
2912
2913
2914
2915
2916
2917
2918
2919int float32_lt(float32 a, float32 b, float_status *status)
2920{
2921 flag aSign, bSign;
2922 uint32_t av, bv;
2923 a = float32_squash_input_denormal(a, status);
2924 b = float32_squash_input_denormal(b, status);
2925
2926 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2927 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2928 ) {
2929 float_raise(float_flag_invalid, status);
2930 return 0;
2931 }
2932 aSign = extractFloat32Sign( a );
2933 bSign = extractFloat32Sign( b );
2934 av = float32_val(a);
2935 bv = float32_val(b);
2936 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2937 return ( av != bv ) && ( aSign ^ ( av < bv ) );
2938
2939}
2940
2941
2942
2943
2944
2945
2946
2947
2948int float32_unordered(float32 a, float32 b, float_status *status)
2949{
2950 a = float32_squash_input_denormal(a, status);
2951 b = float32_squash_input_denormal(b, status);
2952
2953 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2954 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2955 ) {
2956 float_raise(float_flag_invalid, status);
2957 return 1;
2958 }
2959 return 0;
2960}
2961
2962
2963
2964
2965
2966
2967
2968
2969int float32_eq_quiet(float32 a, float32 b, float_status *status)
2970{
2971 a = float32_squash_input_denormal(a, status);
2972 b = float32_squash_input_denormal(b, status);
2973
2974 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2975 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2976 ) {
2977 if (float32_is_signaling_nan(a, status)
2978 || float32_is_signaling_nan(b, status)) {
2979 float_raise(float_flag_invalid, status);
2980 }
2981 return 0;
2982 }
2983 return ( float32_val(a) == float32_val(b) ) ||
2984 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
2985}
2986
2987
2988
2989
2990
2991
2992
2993
2994int float32_le_quiet(float32 a, float32 b, float_status *status)
2995{
2996 flag aSign, bSign;
2997 uint32_t av, bv;
2998 a = float32_squash_input_denormal(a, status);
2999 b = float32_squash_input_denormal(b, status);
3000
3001 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3002 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3003 ) {
3004 if (float32_is_signaling_nan(a, status)
3005 || float32_is_signaling_nan(b, status)) {
3006 float_raise(float_flag_invalid, status);
3007 }
3008 return 0;
3009 }
3010 aSign = extractFloat32Sign( a );
3011 bSign = extractFloat32Sign( b );
3012 av = float32_val(a);
3013 bv = float32_val(b);
3014 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
3015 return ( av == bv ) || ( aSign ^ ( av < bv ) );
3016
3017}
3018
3019
3020
3021
3022
3023
3024
3025
3026int float32_lt_quiet(float32 a, float32 b, float_status *status)
3027{
3028 flag aSign, bSign;
3029 uint32_t av, bv;
3030 a = float32_squash_input_denormal(a, status);
3031 b = float32_squash_input_denormal(b, status);
3032
3033 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3034 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3035 ) {
3036 if (float32_is_signaling_nan(a, status)
3037 || float32_is_signaling_nan(b, status)) {
3038 float_raise(float_flag_invalid, status);
3039 }
3040 return 0;
3041 }
3042 aSign = extractFloat32Sign( a );
3043 bSign = extractFloat32Sign( b );
3044 av = float32_val(a);
3045 bv = float32_val(b);
3046 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
3047 return ( av != bv ) && ( aSign ^ ( av < bv ) );
3048
3049}
3050
3051
3052
3053
3054
3055
3056
3057
3058int float32_unordered_quiet(float32 a, float32 b, float_status *status)
3059{
3060 a = float32_squash_input_denormal(a, status);
3061 b = float32_squash_input_denormal(b, status);
3062
3063 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3064 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3065 ) {
3066 if (float32_is_signaling_nan(a, status)
3067 || float32_is_signaling_nan(b, status)) {
3068 float_raise(float_flag_invalid, status);
3069 }
3070 return 1;
3071 }
3072 return 0;
3073}
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085int32_t float64_to_int32(float64 a, float_status *status)
3086{
3087 flag aSign;
3088 int aExp;
3089 int shiftCount;
3090 uint64_t aSig;
3091 a = float64_squash_input_denormal(a, status);
3092
3093 aSig = extractFloat64Frac( a );
3094 aExp = extractFloat64Exp( a );
3095 aSign = extractFloat64Sign( a );
3096 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
3097 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3098 shiftCount = 0x42C - aExp;
3099 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
3100 return roundAndPackInt32(aSign, aSig, status);
3101
3102}
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114int32_t float64_to_int32_round_to_zero(float64 a, float_status *status)
3115{
3116 flag aSign;
3117 int aExp;
3118 int shiftCount;
3119 uint64_t aSig, savedASig;
3120 int32_t z;
3121 a = float64_squash_input_denormal(a, status);
3122
3123 aSig = extractFloat64Frac( a );
3124 aExp = extractFloat64Exp( a );
3125 aSign = extractFloat64Sign( a );
3126 if ( 0x41E < aExp ) {
3127 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
3128 goto invalid;
3129 }
3130 else if ( aExp < 0x3FF ) {
3131 if (aExp || aSig) {
3132 status->float_exception_flags |= float_flag_inexact;
3133 }
3134 return 0;
3135 }
3136 aSig |= LIT64( 0x0010000000000000 );
3137 shiftCount = 0x433 - aExp;
3138 savedASig = aSig;
3139 aSig >>= shiftCount;
3140 z = aSig;
3141 if ( aSign ) z = - z;
3142 if ( ( z < 0 ) ^ aSign ) {
3143 invalid:
3144 float_raise(float_flag_invalid, status);
3145 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
3146 }
3147 if ( ( aSig<<shiftCount ) != savedASig ) {
3148 status->float_exception_flags |= float_flag_inexact;
3149 }
3150 return z;
3151
3152}
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164int16_t float64_to_int16_round_to_zero(float64 a, float_status *status)
3165{
3166 flag aSign;
3167 int aExp;
3168 int shiftCount;
3169 uint64_t aSig, savedASig;
3170 int32_t z;
3171
3172 aSig = extractFloat64Frac( a );
3173 aExp = extractFloat64Exp( a );
3174 aSign = extractFloat64Sign( a );
3175 if ( 0x40E < aExp ) {
3176 if ( ( aExp == 0x7FF ) && aSig ) {
3177 aSign = 0;
3178 }
3179 goto invalid;
3180 }
3181 else if ( aExp < 0x3FF ) {
3182 if ( aExp || aSig ) {
3183 status->float_exception_flags |= float_flag_inexact;
3184 }
3185 return 0;
3186 }
3187 aSig |= LIT64( 0x0010000000000000 );
3188 shiftCount = 0x433 - aExp;
3189 savedASig = aSig;
3190 aSig >>= shiftCount;
3191 z = aSig;
3192 if ( aSign ) {
3193 z = - z;
3194 }
3195 if ( ( (int16_t)z < 0 ) ^ aSign ) {
3196 invalid:
3197 float_raise(float_flag_invalid, status);
3198 return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
3199 }
3200 if ( ( aSig<<shiftCount ) != savedASig ) {
3201 status->float_exception_flags |= float_flag_inexact;
3202 }
3203 return z;
3204}
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216int64_t float64_to_int64(float64 a, float_status *status)
3217{
3218 flag aSign;
3219 int aExp;
3220 int shiftCount;
3221 uint64_t aSig, aSigExtra;
3222 a = float64_squash_input_denormal(a, status);
3223
3224 aSig = extractFloat64Frac( a );
3225 aExp = extractFloat64Exp( a );
3226 aSign = extractFloat64Sign( a );
3227 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3228 shiftCount = 0x433 - aExp;
3229 if ( shiftCount <= 0 ) {
3230 if ( 0x43E < aExp ) {
3231 float_raise(float_flag_invalid, status);
3232 if ( ! aSign
3233 || ( ( aExp == 0x7FF )
3234 && ( aSig != LIT64( 0x0010000000000000 ) ) )
3235 ) {
3236 return LIT64( 0x7FFFFFFFFFFFFFFF );
3237 }
3238 return (int64_t) LIT64( 0x8000000000000000 );
3239 }
3240 aSigExtra = 0;
3241 aSig <<= - shiftCount;
3242 }
3243 else {
3244 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3245 }
3246 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
3247
3248}
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260int64_t float64_to_int64_round_to_zero(float64 a, float_status *status)
3261{
3262 flag aSign;
3263 int aExp;
3264 int shiftCount;
3265 uint64_t aSig;
3266 int64_t z;
3267 a = float64_squash_input_denormal(a, status);
3268
3269 aSig = extractFloat64Frac( a );
3270 aExp = extractFloat64Exp( a );
3271 aSign = extractFloat64Sign( a );
3272 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3273 shiftCount = aExp - 0x433;
3274 if ( 0 <= shiftCount ) {
3275 if ( 0x43E <= aExp ) {
3276 if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
3277 float_raise(float_flag_invalid, status);
3278 if ( ! aSign
3279 || ( ( aExp == 0x7FF )
3280 && ( aSig != LIT64( 0x0010000000000000 ) ) )
3281 ) {
3282 return LIT64( 0x7FFFFFFFFFFFFFFF );
3283 }
3284 }
3285 return (int64_t) LIT64( 0x8000000000000000 );
3286 }
3287 z = aSig<<shiftCount;
3288 }
3289 else {
3290 if ( aExp < 0x3FE ) {
3291 if (aExp | aSig) {
3292 status->float_exception_flags |= float_flag_inexact;
3293 }
3294 return 0;
3295 }
3296 z = aSig>>( - shiftCount );
3297 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
3298 status->float_exception_flags |= float_flag_inexact;
3299 }
3300 }
3301 if ( aSign ) z = - z;
3302 return z;
3303
3304}
3305
3306
3307
3308
3309
3310
3311
3312
3313float32 float64_to_float32(float64 a, float_status *status)
3314{
3315 flag aSign;
3316 int aExp;
3317 uint64_t aSig;
3318 uint32_t zSig;
3319 a = float64_squash_input_denormal(a, status);
3320
3321 aSig = extractFloat64Frac( a );
3322 aExp = extractFloat64Exp( a );
3323 aSign = extractFloat64Sign( a );
3324 if ( aExp == 0x7FF ) {
3325 if (aSig) {
3326 return commonNaNToFloat32(float64ToCommonNaN(a, status), status);
3327 }
3328 return packFloat32( aSign, 0xFF, 0 );
3329 }
3330 shift64RightJamming( aSig, 22, &aSig );
3331 zSig = aSig;
3332 if ( aExp || zSig ) {
3333 zSig |= 0x40000000;
3334 aExp -= 0x381;
3335 }
3336 return roundAndPackFloat32(aSign, aExp, zSig, status);
3337
3338}
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351static float16 packFloat16(flag zSign, int zExp, uint16_t zSig)
3352{
3353 return make_float16(
3354 (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
3355}
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385static float16 roundAndPackFloat16(flag zSign, int zExp,
3386 uint32_t zSig, flag ieee,
3387 float_status *status)
3388{
3389 int maxexp = ieee ? 29 : 30;
3390 uint32_t mask;
3391 uint32_t increment;
3392 bool rounding_bumps_exp;
3393 bool is_tiny = false;
3394
3395
3396
3397
3398 if (zExp < 1) {
3399
3400 mask = 0x00ffffff;
3401 if (zExp >= -11) {
3402 mask >>= 11 + zExp;
3403 }
3404 } else {
3405
3406 mask = 0x00001fff;
3407 }
3408
3409 switch (status->float_rounding_mode) {
3410 case float_round_nearest_even:
3411 increment = (mask + 1) >> 1;
3412 if ((zSig & mask) == increment) {
3413 increment = zSig & (increment << 1);
3414 }
3415 break;
3416 case float_round_ties_away:
3417 increment = (mask + 1) >> 1;
3418 break;
3419 case float_round_up:
3420 increment = zSign ? 0 : mask;
3421 break;
3422 case float_round_down:
3423 increment = zSign ? mask : 0;
3424 break;
3425 default:
3426 increment = 0;
3427 break;
3428 }
3429
3430 rounding_bumps_exp = (zSig + increment >= 0x01000000);
3431
3432 if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) {
3433 if (ieee) {
3434 float_raise(float_flag_overflow | float_flag_inexact, status);
3435 return packFloat16(zSign, 0x1f, 0);
3436 } else {
3437 float_raise(float_flag_invalid, status);
3438 return packFloat16(zSign, 0x1f, 0x3ff);
3439 }
3440 }
3441
3442 if (zExp < 0) {
3443
3444 is_tiny =
3445 (status->float_detect_tininess == float_tininess_before_rounding)
3446 || (zExp < -1)
3447 || (!rounding_bumps_exp);
3448 }
3449 if (zSig & mask) {
3450 float_raise(float_flag_inexact, status);
3451 if (is_tiny) {
3452 float_raise(float_flag_underflow, status);
3453 }
3454 }
3455
3456 zSig += increment;
3457 if (rounding_bumps_exp) {
3458 zSig >>= 1;
3459 zExp++;
3460 }
3461
3462 if (zExp < -10) {
3463 return packFloat16(zSign, 0, 0);
3464 }
3465 if (zExp < 0) {
3466 zSig >>= -zExp;
3467 zExp = 0;
3468 }
3469 return packFloat16(zSign, zExp, zSig >> 13);
3470}
3471
3472static void normalizeFloat16Subnormal(uint32_t aSig, int *zExpPtr,
3473 uint32_t *zSigPtr)
3474{
3475 int8_t shiftCount = countLeadingZeros32(aSig) - 21;
3476 *zSigPtr = aSig << shiftCount;
3477 *zExpPtr = 1 - shiftCount;
3478}
3479
3480
3481
3482
3483float32 float16_to_float32(float16 a, flag ieee, float_status *status)
3484{
3485 flag aSign;
3486 int aExp;
3487 uint32_t aSig;
3488
3489 aSign = extractFloat16Sign(a);
3490 aExp = extractFloat16Exp(a);
3491 aSig = extractFloat16Frac(a);
3492
3493 if (aExp == 0x1f && ieee) {
3494 if (aSig) {
3495 return commonNaNToFloat32(float16ToCommonNaN(a, status), status);
3496 }
3497 return packFloat32(aSign, 0xff, 0);
3498 }
3499 if (aExp == 0) {
3500 if (aSig == 0) {
3501 return packFloat32(aSign, 0, 0);
3502 }
3503
3504 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3505 aExp--;
3506 }
3507 return packFloat32( aSign, aExp + 0x70, aSig << 13);
3508}
3509
3510float16 float32_to_float16(float32 a, flag ieee, float_status *status)
3511{
3512 flag aSign;
3513 int aExp;
3514 uint32_t aSig;
3515
3516 a = float32_squash_input_denormal(a, status);
3517
3518 aSig = extractFloat32Frac( a );
3519 aExp = extractFloat32Exp( a );
3520 aSign = extractFloat32Sign( a );
3521 if ( aExp == 0xFF ) {
3522 if (aSig) {
3523
3524 if (!ieee) {
3525 float_raise(float_flag_invalid, status);
3526 return packFloat16(aSign, 0, 0);
3527 }
3528 return commonNaNToFloat16(
3529 float32ToCommonNaN(a, status), status);
3530 }
3531
3532 if (!ieee) {
3533 float_raise(float_flag_invalid, status);
3534 return packFloat16(aSign, 0x1f, 0x3ff);
3535 }
3536 return packFloat16(aSign, 0x1f, 0);
3537 }
3538 if (aExp == 0 && aSig == 0) {
3539 return packFloat16(aSign, 0, 0);
3540 }
3541
3542
3543
3544
3545
3546
3547
3548 aSig |= 0x00800000;
3549 aExp -= 0x71;
3550
3551 return roundAndPackFloat16(aSign, aExp, aSig, ieee, status);
3552}
3553
3554float64 float16_to_float64(float16 a, flag ieee, float_status *status)
3555{
3556 flag aSign;
3557 int aExp;
3558 uint32_t aSig;
3559
3560 aSign = extractFloat16Sign(a);
3561 aExp = extractFloat16Exp(a);
3562 aSig = extractFloat16Frac(a);
3563
3564 if (aExp == 0x1f && ieee) {
3565 if (aSig) {
3566 return commonNaNToFloat64(
3567 float16ToCommonNaN(a, status), status);
3568 }
3569 return packFloat64(aSign, 0x7ff, 0);
3570 }
3571 if (aExp == 0) {
3572 if (aSig == 0) {
3573 return packFloat64(aSign, 0, 0);
3574 }
3575
3576 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3577 aExp--;
3578 }
3579 return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
3580}
3581
3582float16 float64_to_float16(float64 a, flag ieee, float_status *status)
3583{
3584 flag aSign;
3585 int aExp;
3586 uint64_t aSig;
3587 uint32_t zSig;
3588
3589 a = float64_squash_input_denormal(a, status);
3590
3591 aSig = extractFloat64Frac(a);
3592 aExp = extractFloat64Exp(a);
3593 aSign = extractFloat64Sign(a);
3594 if (aExp == 0x7FF) {
3595 if (aSig) {
3596
3597 if (!ieee) {
3598 float_raise(float_flag_invalid, status);
3599 return packFloat16(aSign, 0, 0);
3600 }
3601 return commonNaNToFloat16(
3602 float64ToCommonNaN(a, status), status);
3603 }
3604
3605 if (!ieee) {
3606 float_raise(float_flag_invalid, status);
3607 return packFloat16(aSign, 0x1f, 0x3ff);
3608 }
3609 return packFloat16(aSign, 0x1f, 0);
3610 }
3611 shift64RightJamming(aSig, 29, &aSig);
3612 zSig = aSig;
3613 if (aExp == 0 && zSig == 0) {
3614 return packFloat16(aSign, 0, 0);
3615 }
3616
3617
3618
3619
3620
3621
3622
3623 zSig |= 0x00800000;
3624 aExp -= 0x3F1;
3625
3626 return roundAndPackFloat16(aSign, aExp, zSig, ieee, status);
3627}
3628
3629
3630
3631
3632
3633
3634
3635
3636floatx80 float64_to_floatx80(float64 a, float_status *status)
3637{
3638 flag aSign;
3639 int aExp;
3640 uint64_t aSig;
3641
3642 a = float64_squash_input_denormal(a, status);
3643 aSig = extractFloat64Frac( a );
3644 aExp = extractFloat64Exp( a );
3645 aSign = extractFloat64Sign( a );
3646 if ( aExp == 0x7FF ) {
3647 if (aSig) {
3648 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
3649 }
3650 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3651 }
3652 if ( aExp == 0 ) {
3653 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3654 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3655 }
3656 return
3657 packFloatx80(
3658 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3659
3660}
3661
3662
3663
3664
3665
3666
3667
3668
3669float128 float64_to_float128(float64 a, float_status *status)
3670{
3671 flag aSign;
3672 int aExp;
3673 uint64_t aSig, zSig0, zSig1;
3674
3675 a = float64_squash_input_denormal(a, status);
3676 aSig = extractFloat64Frac( a );
3677 aExp = extractFloat64Exp( a );
3678 aSign = extractFloat64Sign( a );
3679 if ( aExp == 0x7FF ) {
3680 if (aSig) {
3681 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
3682 }
3683 return packFloat128( aSign, 0x7FFF, 0, 0 );
3684 }
3685 if ( aExp == 0 ) {
3686 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3687 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3688 --aExp;
3689 }
3690 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3691 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3692
3693}
3694
3695
3696
3697
3698
3699
3700
3701
3702float64 float64_round_to_int(float64 a, float_status *status)
3703{
3704 flag aSign;
3705 int aExp;
3706 uint64_t lastBitMask, roundBitsMask;
3707 uint64_t z;
3708 a = float64_squash_input_denormal(a, status);
3709
3710 aExp = extractFloat64Exp( a );
3711 if ( 0x433 <= aExp ) {
3712 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
3713 return propagateFloat64NaN(a, a, status);
3714 }
3715 return a;
3716 }
3717 if ( aExp < 0x3FF ) {
3718 if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
3719 status->float_exception_flags |= float_flag_inexact;
3720 aSign = extractFloat64Sign( a );
3721 switch (status->float_rounding_mode) {
3722 case float_round_nearest_even:
3723 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3724 return packFloat64( aSign, 0x3FF, 0 );
3725 }
3726 break;
3727 case float_round_ties_away:
3728 if (aExp == 0x3FE) {
3729 return packFloat64(aSign, 0x3ff, 0);
3730 }
3731 break;
3732 case float_round_down:
3733 return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
3734 case float_round_up:
3735 return make_float64(
3736 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
3737 }
3738 return packFloat64( aSign, 0, 0 );
3739 }
3740 lastBitMask = 1;
3741 lastBitMask <<= 0x433 - aExp;
3742 roundBitsMask = lastBitMask - 1;
3743 z = float64_val(a);
3744 switch (status->float_rounding_mode) {
3745 case float_round_nearest_even:
3746 z += lastBitMask >> 1;
3747 if ((z & roundBitsMask) == 0) {
3748 z &= ~lastBitMask;
3749 }
3750 break;
3751 case float_round_ties_away:
3752 z += lastBitMask >> 1;
3753 break;
3754 case float_round_to_zero:
3755 break;
3756 case float_round_up:
3757 if (!extractFloat64Sign(make_float64(z))) {
3758 z += roundBitsMask;
3759 }
3760 break;
3761 case float_round_down:
3762 if (extractFloat64Sign(make_float64(z))) {
3763 z += roundBitsMask;
3764 }
3765 break;
3766 default:
3767 abort();
3768 }
3769 z &= ~ roundBitsMask;
3770 if (z != float64_val(a)) {
3771 status->float_exception_flags |= float_flag_inexact;
3772 }
3773 return make_float64(z);
3774
3775}
3776
3777float64 float64_trunc_to_int(float64 a, float_status *status)
3778{
3779 int oldmode;
3780 float64 res;
3781 oldmode = status->float_rounding_mode;
3782 status->float_rounding_mode = float_round_to_zero;
3783 res = float64_round_to_int(a, status);
3784 status->float_rounding_mode = oldmode;
3785 return res;
3786}
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796static float64 addFloat64Sigs(float64 a, float64 b, flag zSign,
3797 float_status *status)
3798{
3799 int aExp, bExp, zExp;
3800 uint64_t aSig, bSig, zSig;
3801 int expDiff;
3802
3803 aSig = extractFloat64Frac( a );
3804 aExp = extractFloat64Exp( a );
3805 bSig = extractFloat64Frac( b );
3806 bExp = extractFloat64Exp( b );
3807 expDiff = aExp - bExp;
3808 aSig <<= 9;
3809 bSig <<= 9;
3810 if ( 0 < expDiff ) {
3811 if ( aExp == 0x7FF ) {
3812 if (aSig) {
3813 return propagateFloat64NaN(a, b, status);
3814 }
3815 return a;
3816 }
3817 if ( bExp == 0 ) {
3818 --expDiff;
3819 }
3820 else {
3821 bSig |= LIT64( 0x2000000000000000 );
3822 }
3823 shift64RightJamming( bSig, expDiff, &bSig );
3824 zExp = aExp;
3825 }
3826 else if ( expDiff < 0 ) {
3827 if ( bExp == 0x7FF ) {
3828 if (bSig) {
3829 return propagateFloat64NaN(a, b, status);
3830 }
3831 return packFloat64( zSign, 0x7FF, 0 );
3832 }
3833 if ( aExp == 0 ) {
3834 ++expDiff;
3835 }
3836 else {
3837 aSig |= LIT64( 0x2000000000000000 );
3838 }
3839 shift64RightJamming( aSig, - expDiff, &aSig );
3840 zExp = bExp;
3841 }
3842 else {
3843 if ( aExp == 0x7FF ) {
3844 if (aSig | bSig) {
3845 return propagateFloat64NaN(a, b, status);
3846 }
3847 return a;
3848 }
3849 if ( aExp == 0 ) {
3850 if (status->flush_to_zero) {
3851 if (aSig | bSig) {
3852 float_raise(float_flag_output_denormal, status);
3853 }
3854 return packFloat64(zSign, 0, 0);
3855 }
3856 return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3857 }
3858 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3859 zExp = aExp;
3860 goto roundAndPack;
3861 }
3862 aSig |= LIT64( 0x2000000000000000 );
3863 zSig = ( aSig + bSig )<<1;
3864 --zExp;
3865 if ( (int64_t) zSig < 0 ) {
3866 zSig = aSig + bSig;
3867 ++zExp;
3868 }
3869 roundAndPack:
3870 return roundAndPackFloat64(zSign, zExp, zSig, status);
3871
3872}
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882static float64 subFloat64Sigs(float64 a, float64 b, flag zSign,
3883 float_status *status)
3884{
3885 int aExp, bExp, zExp;
3886 uint64_t aSig, bSig, zSig;
3887 int expDiff;
3888
3889 aSig = extractFloat64Frac( a );
3890 aExp = extractFloat64Exp( a );
3891 bSig = extractFloat64Frac( b );
3892 bExp = extractFloat64Exp( b );
3893 expDiff = aExp - bExp;
3894 aSig <<= 10;
3895 bSig <<= 10;
3896 if ( 0 < expDiff ) goto aExpBigger;
3897 if ( expDiff < 0 ) goto bExpBigger;
3898 if ( aExp == 0x7FF ) {
3899 if (aSig | bSig) {
3900 return propagateFloat64NaN(a, b, status);
3901 }
3902 float_raise(float_flag_invalid, status);
3903 return float64_default_nan(status);
3904 }
3905 if ( aExp == 0 ) {
3906 aExp = 1;
3907 bExp = 1;
3908 }
3909 if ( bSig < aSig ) goto aBigger;
3910 if ( aSig < bSig ) goto bBigger;
3911 return packFloat64(status->float_rounding_mode == float_round_down, 0, 0);
3912 bExpBigger:
3913 if ( bExp == 0x7FF ) {
3914 if (bSig) {
3915 return propagateFloat64NaN(a, b, status);
3916 }
3917 return packFloat64( zSign ^ 1, 0x7FF, 0 );
3918 }
3919 if ( aExp == 0 ) {
3920 ++expDiff;
3921 }
3922 else {
3923 aSig |= LIT64( 0x4000000000000000 );
3924 }
3925 shift64RightJamming( aSig, - expDiff, &aSig );
3926 bSig |= LIT64( 0x4000000000000000 );
3927 bBigger:
3928 zSig = bSig - aSig;
3929 zExp = bExp;
3930 zSign ^= 1;
3931 goto normalizeRoundAndPack;
3932 aExpBigger:
3933 if ( aExp == 0x7FF ) {
3934 if (aSig) {
3935 return propagateFloat64NaN(a, b, status);
3936 }
3937 return a;
3938 }
3939 if ( bExp == 0 ) {
3940 --expDiff;
3941 }
3942 else {
3943 bSig |= LIT64( 0x4000000000000000 );
3944 }
3945 shift64RightJamming( bSig, expDiff, &bSig );
3946 aSig |= LIT64( 0x4000000000000000 );
3947 aBigger:
3948 zSig = aSig - bSig;
3949 zExp = aExp;
3950 normalizeRoundAndPack:
3951 --zExp;
3952 return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status);
3953
3954}
3955
3956
3957
3958
3959
3960
3961
3962float64 float64_add(float64 a, float64 b, float_status *status)
3963{
3964 flag aSign, bSign;
3965 a = float64_squash_input_denormal(a, status);
3966 b = float64_squash_input_denormal(b, status);
3967
3968 aSign = extractFloat64Sign( a );
3969 bSign = extractFloat64Sign( b );
3970 if ( aSign == bSign ) {
3971 return addFloat64Sigs(a, b, aSign, status);
3972 }
3973 else {
3974 return subFloat64Sigs(a, b, aSign, status);
3975 }
3976
3977}
3978
3979
3980
3981
3982
3983
3984
3985float64 float64_sub(float64 a, float64 b, float_status *status)
3986{
3987 flag aSign, bSign;
3988 a = float64_squash_input_denormal(a, status);
3989 b = float64_squash_input_denormal(b, status);
3990
3991 aSign = extractFloat64Sign( a );
3992 bSign = extractFloat64Sign( b );
3993 if ( aSign == bSign ) {
3994 return subFloat64Sigs(a, b, aSign, status);
3995 }
3996 else {
3997 return addFloat64Sigs(a, b, aSign, status);
3998 }
3999
4000}
4001
4002
4003
4004
4005
4006
4007
4008float64 float64_mul(float64 a, float64 b, float_status *status)
4009{
4010 flag aSign, bSign, zSign;
4011 int aExp, bExp, zExp;
4012 uint64_t aSig, bSig, zSig0, zSig1;
4013
4014 a = float64_squash_input_denormal(a, status);
4015 b = float64_squash_input_denormal(b, status);
4016
4017 aSig = extractFloat64Frac( a );
4018 aExp = extractFloat64Exp( a );
4019 aSign = extractFloat64Sign( a );
4020 bSig = extractFloat64Frac( b );
4021 bExp = extractFloat64Exp( b );
4022 bSign = extractFloat64Sign( b );
4023 zSign = aSign ^ bSign;
4024 if ( aExp == 0x7FF ) {
4025 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
4026 return propagateFloat64NaN(a, b, status);
4027 }
4028 if ( ( bExp | bSig ) == 0 ) {
4029 float_raise(float_flag_invalid, status);
4030 return float64_default_nan(status);
4031 }
4032 return packFloat64( zSign, 0x7FF, 0 );
4033 }
4034 if ( bExp == 0x7FF ) {
4035 if (bSig) {
4036 return propagateFloat64NaN(a, b, status);
4037 }
4038 if ( ( aExp | aSig ) == 0 ) {
4039 float_raise(float_flag_invalid, status);
4040 return float64_default_nan(status);
4041 }
4042 return packFloat64( zSign, 0x7FF, 0 );
4043 }
4044 if ( aExp == 0 ) {
4045 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
4046 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4047 }
4048 if ( bExp == 0 ) {
4049 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
4050 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4051 }
4052 zExp = aExp + bExp - 0x3FF;
4053 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
4054 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4055 mul64To128( aSig, bSig, &zSig0, &zSig1 );
4056 zSig0 |= ( zSig1 != 0 );
4057 if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
4058 zSig0 <<= 1;
4059 --zExp;
4060 }
4061 return roundAndPackFloat64(zSign, zExp, zSig0, status);
4062
4063}
4064
4065
4066
4067
4068
4069
4070
4071float64 float64_div(float64 a, float64 b, float_status *status)
4072{
4073 flag aSign, bSign, zSign;
4074 int aExp, bExp, zExp;
4075 uint64_t aSig, bSig, zSig;
4076 uint64_t rem0, rem1;
4077 uint64_t term0, term1;
4078 a = float64_squash_input_denormal(a, status);
4079 b = float64_squash_input_denormal(b, status);
4080
4081 aSig = extractFloat64Frac( a );
4082 aExp = extractFloat64Exp( a );
4083 aSign = extractFloat64Sign( a );
4084 bSig = extractFloat64Frac( b );
4085 bExp = extractFloat64Exp( b );
4086 bSign = extractFloat64Sign( b );
4087 zSign = aSign ^ bSign;
4088 if ( aExp == 0x7FF ) {
4089 if (aSig) {
4090 return propagateFloat64NaN(a, b, status);
4091 }
4092 if ( bExp == 0x7FF ) {
4093 if (bSig) {
4094 return propagateFloat64NaN(a, b, status);
4095 }
4096 float_raise(float_flag_invalid, status);
4097 return float64_default_nan(status);
4098 }
4099 return packFloat64( zSign, 0x7FF, 0 );
4100 }
4101 if ( bExp == 0x7FF ) {
4102 if (bSig) {
4103 return propagateFloat64NaN(a, b, status);
4104 }
4105 return packFloat64( zSign, 0, 0 );
4106 }
4107 if ( bExp == 0 ) {
4108 if ( bSig == 0 ) {
4109 if ( ( aExp | aSig ) == 0 ) {
4110 float_raise(float_flag_invalid, status);
4111 return float64_default_nan(status);
4112 }
4113 float_raise(float_flag_divbyzero, status);
4114 return packFloat64( zSign, 0x7FF, 0 );
4115 }
4116 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4117 }
4118 if ( aExp == 0 ) {
4119 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
4120 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4121 }
4122 zExp = aExp - bExp + 0x3FD;
4123 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
4124 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4125 if ( bSig <= ( aSig + aSig ) ) {
4126 aSig >>= 1;
4127 ++zExp;
4128 }
4129 zSig = estimateDiv128To64( aSig, 0, bSig );
4130 if ( ( zSig & 0x1FF ) <= 2 ) {
4131 mul64To128( bSig, zSig, &term0, &term1 );
4132 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
4133 while ( (int64_t) rem0 < 0 ) {
4134 --zSig;
4135 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4136 }
4137 zSig |= ( rem1 != 0 );
4138 }
4139 return roundAndPackFloat64(zSign, zExp, zSig, status);
4140
4141}
4142
4143
4144
4145
4146
4147
4148
4149float64 float64_rem(float64 a, float64 b, float_status *status)
4150{
4151 flag aSign, zSign;
4152 int aExp, bExp, expDiff;
4153 uint64_t aSig, bSig;
4154 uint64_t q, alternateASig;
4155 int64_t sigMean;
4156
4157 a = float64_squash_input_denormal(a, status);
4158 b = float64_squash_input_denormal(b, status);
4159 aSig = extractFloat64Frac( a );
4160 aExp = extractFloat64Exp( a );
4161 aSign = extractFloat64Sign( a );
4162 bSig = extractFloat64Frac( b );
4163 bExp = extractFloat64Exp( b );
4164 if ( aExp == 0x7FF ) {
4165 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
4166 return propagateFloat64NaN(a, b, status);
4167 }
4168 float_raise(float_flag_invalid, status);
4169 return float64_default_nan(status);
4170 }
4171 if ( bExp == 0x7FF ) {
4172 if (bSig) {
4173 return propagateFloat64NaN(a, b, status);
4174 }
4175 return a;
4176 }
4177 if ( bExp == 0 ) {
4178 if ( bSig == 0 ) {
4179 float_raise(float_flag_invalid, status);
4180 return float64_default_nan(status);
4181 }
4182 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4183 }
4184 if ( aExp == 0 ) {
4185 if ( aSig == 0 ) return a;
4186 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4187 }
4188 expDiff = aExp - bExp;
4189 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4190 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4191 if ( expDiff < 0 ) {
4192 if ( expDiff < -1 ) return a;
4193 aSig >>= 1;
4194 }
4195 q = ( bSig <= aSig );
4196 if ( q ) aSig -= bSig;
4197 expDiff -= 64;
4198 while ( 0 < expDiff ) {
4199 q = estimateDiv128To64( aSig, 0, bSig );
4200 q = ( 2 < q ) ? q - 2 : 0;
4201 aSig = - ( ( bSig>>2 ) * q );
4202 expDiff -= 62;
4203 }
4204 expDiff += 64;
4205 if ( 0 < expDiff ) {
4206 q = estimateDiv128To64( aSig, 0, bSig );
4207 q = ( 2 < q ) ? q - 2 : 0;
4208 q >>= 64 - expDiff;
4209 bSig >>= 2;
4210 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4211 }
4212 else {
4213 aSig >>= 2;
4214 bSig >>= 2;
4215 }
4216 do {
4217 alternateASig = aSig;
4218 ++q;
4219 aSig -= bSig;
4220 } while ( 0 <= (int64_t) aSig );
4221 sigMean = aSig + alternateASig;
4222 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4223 aSig = alternateASig;
4224 }
4225 zSign = ( (int64_t) aSig < 0 );
4226 if ( zSign ) aSig = - aSig;
4227 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
4228
4229}
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242float64 float64_muladd(float64 a, float64 b, float64 c, int flags,
4243 float_status *status)
4244{
4245 flag aSign, bSign, cSign, zSign;
4246 int aExp, bExp, cExp, pExp, zExp, expDiff;
4247 uint64_t aSig, bSig, cSig;
4248 flag pInf, pZero, pSign;
4249 uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
4250 int shiftcount;
4251 flag signflip, infzero;
4252
4253 a = float64_squash_input_denormal(a, status);
4254 b = float64_squash_input_denormal(b, status);
4255 c = float64_squash_input_denormal(c, status);
4256 aSig = extractFloat64Frac(a);
4257 aExp = extractFloat64Exp(a);
4258 aSign = extractFloat64Sign(a);
4259 bSig = extractFloat64Frac(b);
4260 bExp = extractFloat64Exp(b);
4261 bSign = extractFloat64Sign(b);
4262 cSig = extractFloat64Frac(c);
4263 cExp = extractFloat64Exp(c);
4264 cSign = extractFloat64Sign(c);
4265
4266 infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
4267 (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
4268
4269
4270
4271
4272
4273
4274 if (((aExp == 0x7ff) && aSig) ||
4275 ((bExp == 0x7ff) && bSig) ||
4276 ((cExp == 0x7ff) && cSig)) {
4277 return propagateFloat64MulAddNaN(a, b, c, infzero, status);
4278 }
4279
4280 if (infzero) {
4281 float_raise(float_flag_invalid, status);
4282 return float64_default_nan(status);
4283 }
4284
4285 if (flags & float_muladd_negate_c) {
4286 cSign ^= 1;
4287 }
4288
4289 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
4290
4291
4292 pSign = aSign ^ bSign;
4293 if (flags & float_muladd_negate_product) {
4294 pSign ^= 1;
4295 }
4296 pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
4297 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
4298
4299 if (cExp == 0x7ff) {
4300 if (pInf && (pSign ^ cSign)) {
4301
4302 float_raise(float_flag_invalid, status);
4303 return float64_default_nan(status);
4304 }
4305
4306 return packFloat64(cSign ^ signflip, 0x7ff, 0);
4307 }
4308
4309 if (pInf) {
4310 return packFloat64(pSign ^ signflip, 0x7ff, 0);
4311 }
4312
4313 if (pZero) {
4314 if (cExp == 0) {
4315 if (cSig == 0) {
4316
4317 if (pSign == cSign) {
4318 zSign = pSign;
4319 } else if (status->float_rounding_mode == float_round_down) {
4320 zSign = 1;
4321 } else {
4322 zSign = 0;
4323 }
4324 return packFloat64(zSign ^ signflip, 0, 0);
4325 }
4326
4327 if (status->flush_to_zero) {
4328 float_raise(float_flag_output_denormal, status);
4329 return packFloat64(cSign ^ signflip, 0, 0);
4330 }
4331 }
4332
4333 if (flags & float_muladd_halve_result) {
4334 if (cExp == 0) {
4335 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
4336 }
4337
4338
4339
4340 cExp -= 2;
4341 cSig = (cSig | 0x0010000000000000ULL) << 10;
4342 return roundAndPackFloat64(cSign ^ signflip, cExp, cSig, status);
4343 }
4344 return packFloat64(cSign ^ signflip, cExp, cSig);
4345 }
4346
4347 if (aExp == 0) {
4348 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
4349 }
4350 if (bExp == 0) {
4351 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
4352 }
4353
4354
4355
4356
4357
4358
4359
4360
4361 pExp = aExp + bExp - 0x3fe;
4362 aSig = (aSig | LIT64(0x0010000000000000))<<10;
4363 bSig = (bSig | LIT64(0x0010000000000000))<<11;
4364 mul64To128(aSig, bSig, &pSig0, &pSig1);
4365 if ((int64_t)(pSig0 << 1) >= 0) {
4366 shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
4367 pExp--;
4368 }
4369
4370 zSign = pSign ^ signflip;
4371
4372
4373
4374
4375 if (cExp == 0) {
4376 if (!cSig) {
4377
4378 shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
4379 if (flags & float_muladd_halve_result) {
4380 pExp--;
4381 }
4382 return roundAndPackFloat64(zSign, pExp - 1,
4383 pSig1, status);
4384 }
4385 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
4386 }
4387
4388
4389
4390
4391 cSig0 = cSig << (126 - 64 - 52);
4392 cSig1 = 0;
4393 cSig0 |= LIT64(0x4000000000000000);
4394 expDiff = pExp - cExp;
4395
4396 if (pSign == cSign) {
4397
4398 if (expDiff > 0) {
4399
4400 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
4401 zExp = pExp;
4402 } else if (expDiff < 0) {
4403
4404 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
4405 zExp = cExp;
4406 } else {
4407
4408 zExp = cExp;
4409 }
4410
4411 add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4412 if ((int64_t)zSig0 < 0) {
4413 shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
4414 } else {
4415 zExp--;
4416 }
4417 shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
4418 if (flags & float_muladd_halve_result) {
4419 zExp--;
4420 }
4421 return roundAndPackFloat64(zSign, zExp, zSig1, status);
4422 } else {
4423
4424 if (expDiff > 0) {
4425 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
4426 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4427 zExp = pExp;
4428 } else if (expDiff < 0) {
4429 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
4430 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
4431 zExp = cExp;
4432 zSign ^= 1;
4433 } else {
4434 zExp = pExp;
4435 if (lt128(cSig0, cSig1, pSig0, pSig1)) {
4436 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4437 } else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
4438 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
4439 zSign ^= 1;
4440 } else {
4441
4442 zSign = signflip;
4443 if (status->float_rounding_mode == float_round_down) {
4444 zSign ^= 1;
4445 }
4446 return packFloat64(zSign, 0, 0);
4447 }
4448 }
4449 --zExp;
4450
4451
4452
4453 if (zSig0) {
4454 shiftcount = countLeadingZeros64(zSig0) - 1;
4455 shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
4456 if (zSig1) {
4457 zSig0 |= 1;
4458 }
4459 zExp -= shiftcount;
4460 } else {
4461 shiftcount = countLeadingZeros64(zSig1);
4462 if (shiftcount == 0) {
4463 zSig0 = (zSig1 >> 1) | (zSig1 & 1);
4464 zExp -= 63;
4465 } else {
4466 shiftcount--;
4467 zSig0 = zSig1 << shiftcount;
4468 zExp -= (shiftcount + 64);
4469 }
4470 }
4471 if (flags & float_muladd_halve_result) {
4472 zExp--;
4473 }
4474 return roundAndPackFloat64(zSign, zExp, zSig0, status);
4475 }
4476}
4477
4478
4479
4480
4481
4482
4483
4484float64 float64_sqrt(float64 a, float_status *status)
4485{
4486 flag aSign;
4487 int aExp, zExp;
4488 uint64_t aSig, zSig, doubleZSig;
4489 uint64_t rem0, rem1, term0, term1;
4490 a = float64_squash_input_denormal(a, status);
4491
4492 aSig = extractFloat64Frac( a );
4493 aExp = extractFloat64Exp( a );
4494 aSign = extractFloat64Sign( a );
4495 if ( aExp == 0x7FF ) {
4496 if (aSig) {
4497 return propagateFloat64NaN(a, a, status);
4498 }
4499 if ( ! aSign ) return a;
4500 float_raise(float_flag_invalid, status);
4501 return float64_default_nan(status);
4502 }
4503 if ( aSign ) {
4504 if ( ( aExp | aSig ) == 0 ) return a;
4505 float_raise(float_flag_invalid, status);
4506 return float64_default_nan(status);
4507 }
4508 if ( aExp == 0 ) {
4509 if ( aSig == 0 ) return float64_zero;
4510 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4511 }
4512 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
4513 aSig |= LIT64( 0x0010000000000000 );
4514 zSig = estimateSqrt32( aExp, aSig>>21 );
4515 aSig <<= 9 - ( aExp & 1 );
4516 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
4517 if ( ( zSig & 0x1FF ) <= 5 ) {
4518 doubleZSig = zSig<<1;
4519 mul64To128( zSig, zSig, &term0, &term1 );
4520 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
4521 while ( (int64_t) rem0 < 0 ) {
4522 --zSig;
4523 doubleZSig -= 2;
4524 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
4525 }
4526 zSig |= ( ( rem0 | rem1 ) != 0 );
4527 }
4528 return roundAndPackFloat64(0, zExp, zSig, status);
4529
4530}
4531
4532
4533
4534
4535
4536
4537float64 float64_log2(float64 a, float_status *status)
4538{
4539 flag aSign, zSign;
4540 int aExp;
4541 uint64_t aSig, aSig0, aSig1, zSig, i;
4542 a = float64_squash_input_denormal(a, status);
4543
4544 aSig = extractFloat64Frac( a );
4545 aExp = extractFloat64Exp( a );
4546 aSign = extractFloat64Sign( a );
4547
4548 if ( aExp == 0 ) {
4549 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4550 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4551 }
4552 if ( aSign ) {
4553 float_raise(float_flag_invalid, status);
4554 return float64_default_nan(status);
4555 }
4556 if ( aExp == 0x7FF ) {
4557 if (aSig) {
4558 return propagateFloat64NaN(a, float64_zero, status);
4559 }
4560 return a;
4561 }
4562
4563 aExp -= 0x3FF;
4564 aSig |= LIT64( 0x0010000000000000 );
4565 zSign = aExp < 0;
4566 zSig = (uint64_t)aExp << 52;
4567 for (i = 1LL << 51; i > 0; i >>= 1) {
4568 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4569 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4570 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4571 aSig >>= 1;
4572 zSig |= i;
4573 }
4574 }
4575
4576 if ( zSign )
4577 zSig = -zSig;
4578 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
4579}
4580
4581
4582
4583
4584
4585
4586
4587
4588int float64_eq(float64 a, float64 b, float_status *status)
4589{
4590 uint64_t av, bv;
4591 a = float64_squash_input_denormal(a, status);
4592 b = float64_squash_input_denormal(b, status);
4593
4594 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4595 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4596 ) {
4597 float_raise(float_flag_invalid, status);
4598 return 0;
4599 }
4600 av = float64_val(a);
4601 bv = float64_val(b);
4602 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4603
4604}
4605
4606
4607
4608
4609
4610
4611
4612
4613int float64_le(float64 a, float64 b, float_status *status)
4614{
4615 flag aSign, bSign;
4616 uint64_t av, bv;
4617 a = float64_squash_input_denormal(a, status);
4618 b = float64_squash_input_denormal(b, status);
4619
4620 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4621 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4622 ) {
4623 float_raise(float_flag_invalid, status);
4624 return 0;
4625 }
4626 aSign = extractFloat64Sign( a );
4627 bSign = extractFloat64Sign( b );
4628 av = float64_val(a);
4629 bv = float64_val(b);
4630 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4631 return ( av == bv ) || ( aSign ^ ( av < bv ) );
4632
4633}
4634
4635
4636
4637
4638
4639
4640
4641
4642int float64_lt(float64 a, float64 b, float_status *status)
4643{
4644 flag aSign, bSign;
4645 uint64_t av, bv;
4646
4647 a = float64_squash_input_denormal(a, status);
4648 b = float64_squash_input_denormal(b, status);
4649 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4650 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4651 ) {
4652 float_raise(float_flag_invalid, status);
4653 return 0;
4654 }
4655 aSign = extractFloat64Sign( a );
4656 bSign = extractFloat64Sign( b );
4657 av = float64_val(a);
4658 bv = float64_val(b);
4659 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4660 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4661
4662}
4663
4664
4665
4666
4667
4668
4669
4670
4671int float64_unordered(float64 a, float64 b, float_status *status)
4672{
4673 a = float64_squash_input_denormal(a, status);
4674 b = float64_squash_input_denormal(b, status);
4675
4676 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4677 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4678 ) {
4679 float_raise(float_flag_invalid, status);
4680 return 1;
4681 }
4682 return 0;
4683}
4684
4685
4686
4687
4688
4689
4690
4691
4692int float64_eq_quiet(float64 a, float64 b, float_status *status)
4693{
4694 uint64_t av, bv;
4695 a = float64_squash_input_denormal(a, status);
4696 b = float64_squash_input_denormal(b, status);
4697
4698 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4699 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4700 ) {
4701 if (float64_is_signaling_nan(a, status)
4702 || float64_is_signaling_nan(b, status)) {
4703 float_raise(float_flag_invalid, status);
4704 }
4705 return 0;
4706 }
4707 av = float64_val(a);
4708 bv = float64_val(b);
4709 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4710
4711}
4712
4713
4714
4715
4716
4717
4718
4719
4720int float64_le_quiet(float64 a, float64 b, float_status *status)
4721{
4722 flag aSign, bSign;
4723 uint64_t av, bv;
4724 a = float64_squash_input_denormal(a, status);
4725 b = float64_squash_input_denormal(b, status);
4726
4727 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4728 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4729 ) {
4730 if (float64_is_signaling_nan(a, status)
4731 || float64_is_signaling_nan(b, status)) {
4732 float_raise(float_flag_invalid, status);
4733 }
4734 return 0;
4735 }
4736 aSign = extractFloat64Sign( a );
4737 bSign = extractFloat64Sign( b );
4738 av = float64_val(a);
4739 bv = float64_val(b);
4740 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4741 return ( av == bv ) || ( aSign ^ ( av < bv ) );
4742
4743}
4744
4745
4746
4747
4748
4749
4750
4751
4752int float64_lt_quiet(float64 a, float64 b, float_status *status)
4753{
4754 flag aSign, bSign;
4755 uint64_t av, bv;
4756 a = float64_squash_input_denormal(a, status);
4757 b = float64_squash_input_denormal(b, status);
4758
4759 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4760 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4761 ) {
4762 if (float64_is_signaling_nan(a, status)
4763 || float64_is_signaling_nan(b, status)) {
4764 float_raise(float_flag_invalid, status);
4765 }
4766 return 0;
4767 }
4768 aSign = extractFloat64Sign( a );
4769 bSign = extractFloat64Sign( b );
4770 av = float64_val(a);
4771 bv = float64_val(b);
4772 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4773 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4774
4775}
4776
4777
4778
4779
4780
4781
4782
4783
4784int float64_unordered_quiet(float64 a, float64 b, float_status *status)
4785{
4786 a = float64_squash_input_denormal(a, status);
4787 b = float64_squash_input_denormal(b, status);
4788
4789 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4790 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4791 ) {
4792 if (float64_is_signaling_nan(a, status)
4793 || float64_is_signaling_nan(b, status)) {
4794 float_raise(float_flag_invalid, status);
4795 }
4796 return 1;
4797 }
4798 return 0;
4799}
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811int32_t floatx80_to_int32(floatx80 a, float_status *status)
4812{
4813 flag aSign;
4814 int32_t aExp, shiftCount;
4815 uint64_t aSig;
4816
4817 if (floatx80_invalid_encoding(a)) {
4818 float_raise(float_flag_invalid, status);
4819 return 1 << 31;
4820 }
4821 aSig = extractFloatx80Frac( a );
4822 aExp = extractFloatx80Exp( a );
4823 aSign = extractFloatx80Sign( a );
4824 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4825 shiftCount = 0x4037 - aExp;
4826 if ( shiftCount <= 0 ) shiftCount = 1;
4827 shift64RightJamming( aSig, shiftCount, &aSig );
4828 return roundAndPackInt32(aSign, aSig, status);
4829
4830}
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
4843{
4844 flag aSign;
4845 int32_t aExp, shiftCount;
4846 uint64_t aSig, savedASig;
4847 int32_t z;
4848
4849 if (floatx80_invalid_encoding(a)) {
4850 float_raise(float_flag_invalid, status);
4851 return 1 << 31;
4852 }
4853 aSig = extractFloatx80Frac( a );
4854 aExp = extractFloatx80Exp( a );
4855 aSign = extractFloatx80Sign( a );
4856 if ( 0x401E < aExp ) {
4857 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4858 goto invalid;
4859 }
4860 else if ( aExp < 0x3FFF ) {
4861 if (aExp || aSig) {
4862 status->float_exception_flags |= float_flag_inexact;
4863 }
4864 return 0;
4865 }
4866 shiftCount = 0x403E - aExp;
4867 savedASig = aSig;
4868 aSig >>= shiftCount;
4869 z = aSig;
4870 if ( aSign ) z = - z;
4871 if ( ( z < 0 ) ^ aSign ) {
4872 invalid:
4873 float_raise(float_flag_invalid, status);
4874 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
4875 }
4876 if ( ( aSig<<shiftCount ) != savedASig ) {
4877 status->float_exception_flags |= float_flag_inexact;
4878 }
4879 return z;
4880
4881}
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893int64_t floatx80_to_int64(floatx80 a, float_status *status)
4894{
4895 flag aSign;
4896 int32_t aExp, shiftCount;
4897 uint64_t aSig, aSigExtra;
4898
4899 if (floatx80_invalid_encoding(a)) {
4900 float_raise(float_flag_invalid, status);
4901 return 1ULL << 63;
4902 }
4903 aSig = extractFloatx80Frac( a );
4904 aExp = extractFloatx80Exp( a );
4905 aSign = extractFloatx80Sign( a );
4906 shiftCount = 0x403E - aExp;
4907 if ( shiftCount <= 0 ) {
4908 if ( shiftCount ) {
4909 float_raise(float_flag_invalid, status);
4910 if ( ! aSign
4911 || ( ( aExp == 0x7FFF )
4912 && ( aSig != LIT64( 0x8000000000000000 ) ) )
4913 ) {
4914 return LIT64( 0x7FFFFFFFFFFFFFFF );
4915 }
4916 return (int64_t) LIT64( 0x8000000000000000 );
4917 }
4918 aSigExtra = 0;
4919 }
4920 else {
4921 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4922 }
4923 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
4924
4925}
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
4938{
4939 flag aSign;
4940 int32_t aExp, shiftCount;
4941 uint64_t aSig;
4942 int64_t z;
4943
4944 if (floatx80_invalid_encoding(a)) {
4945 float_raise(float_flag_invalid, status);
4946 return 1ULL << 63;
4947 }
4948 aSig = extractFloatx80Frac( a );
4949 aExp = extractFloatx80Exp( a );
4950 aSign = extractFloatx80Sign( a );
4951 shiftCount = aExp - 0x403E;
4952 if ( 0 <= shiftCount ) {
4953 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4954 if ( ( a.high != 0xC03E ) || aSig ) {
4955 float_raise(float_flag_invalid, status);
4956 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4957 return LIT64( 0x7FFFFFFFFFFFFFFF );
4958 }
4959 }
4960 return (int64_t) LIT64( 0x8000000000000000 );
4961 }
4962 else if ( aExp < 0x3FFF ) {
4963 if (aExp | aSig) {
4964 status->float_exception_flags |= float_flag_inexact;
4965 }
4966 return 0;
4967 }
4968 z = aSig>>( - shiftCount );
4969 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
4970 status->float_exception_flags |= float_flag_inexact;
4971 }
4972 if ( aSign ) z = - z;
4973 return z;
4974
4975}
4976
4977
4978
4979
4980
4981
4982
4983
4984float32 floatx80_to_float32(floatx80 a, float_status *status)
4985{
4986 flag aSign;
4987 int32_t aExp;
4988 uint64_t aSig;
4989
4990 if (floatx80_invalid_encoding(a)) {
4991 float_raise(float_flag_invalid, status);
4992 return float32_default_nan(status);
4993 }
4994 aSig = extractFloatx80Frac( a );
4995 aExp = extractFloatx80Exp( a );
4996 aSign = extractFloatx80Sign( a );
4997 if ( aExp == 0x7FFF ) {
4998 if ( (uint64_t) ( aSig<<1 ) ) {
4999 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
5000 }
5001 return packFloat32( aSign, 0xFF, 0 );
5002 }
5003 shift64RightJamming( aSig, 33, &aSig );
5004 if ( aExp || aSig ) aExp -= 0x3F81;
5005 return roundAndPackFloat32(aSign, aExp, aSig, status);
5006
5007}
5008
5009
5010
5011
5012
5013
5014
5015
5016float64 floatx80_to_float64(floatx80 a, float_status *status)
5017{
5018 flag aSign;
5019 int32_t aExp;
5020 uint64_t aSig, zSig;
5021
5022 if (floatx80_invalid_encoding(a)) {
5023 float_raise(float_flag_invalid, status);
5024 return float64_default_nan(status);
5025 }
5026 aSig = extractFloatx80Frac( a );
5027 aExp = extractFloatx80Exp( a );
5028 aSign = extractFloatx80Sign( a );
5029 if ( aExp == 0x7FFF ) {
5030 if ( (uint64_t) ( aSig<<1 ) ) {
5031 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
5032 }
5033 return packFloat64( aSign, 0x7FF, 0 );
5034 }
5035 shift64RightJamming( aSig, 1, &zSig );
5036 if ( aExp || aSig ) aExp -= 0x3C01;
5037 return roundAndPackFloat64(aSign, aExp, zSig, status);
5038
5039}
5040
5041
5042
5043
5044
5045
5046
5047
5048float128 floatx80_to_float128(floatx80 a, float_status *status)
5049{
5050 flag aSign;
5051 int aExp;
5052 uint64_t aSig, zSig0, zSig1;
5053
5054 if (floatx80_invalid_encoding(a)) {
5055 float_raise(float_flag_invalid, status);
5056 return float128_default_nan(status);
5057 }
5058 aSig = extractFloatx80Frac( a );
5059 aExp = extractFloatx80Exp( a );
5060 aSign = extractFloatx80Sign( a );
5061 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5062 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
5063 }
5064 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5065 return packFloat128( aSign, aExp, zSig0, zSig1 );
5066
5067}
5068
5069
5070
5071
5072
5073
5074
5075
5076floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5077{
5078 flag aSign;
5079 int32_t aExp;
5080 uint64_t lastBitMask, roundBitsMask;
5081 floatx80 z;
5082
5083 if (floatx80_invalid_encoding(a)) {
5084 float_raise(float_flag_invalid, status);
5085 return floatx80_default_nan(status);
5086 }
5087 aExp = extractFloatx80Exp( a );
5088 if ( 0x403E <= aExp ) {
5089 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5090 return propagateFloatx80NaN(a, a, status);
5091 }
5092 return a;
5093 }
5094 if ( aExp < 0x3FFF ) {
5095 if ( ( aExp == 0 )
5096 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
5097 return a;
5098 }
5099 status->float_exception_flags |= float_flag_inexact;
5100 aSign = extractFloatx80Sign( a );
5101 switch (status->float_rounding_mode) {
5102 case float_round_nearest_even:
5103 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5104 ) {
5105 return
5106 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
5107 }
5108 break;
5109 case float_round_ties_away:
5110 if (aExp == 0x3FFE) {
5111 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
5112 }
5113 break;
5114 case float_round_down:
5115 return
5116 aSign ?
5117 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
5118 : packFloatx80( 0, 0, 0 );
5119 case float_round_up:
5120 return
5121 aSign ? packFloatx80( 1, 0, 0 )
5122 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
5123 }
5124 return packFloatx80( aSign, 0, 0 );
5125 }
5126 lastBitMask = 1;
5127 lastBitMask <<= 0x403E - aExp;
5128 roundBitsMask = lastBitMask - 1;
5129 z = a;
5130 switch (status->float_rounding_mode) {
5131 case float_round_nearest_even:
5132 z.low += lastBitMask>>1;
5133 if ((z.low & roundBitsMask) == 0) {
5134 z.low &= ~lastBitMask;
5135 }
5136 break;
5137 case float_round_ties_away:
5138 z.low += lastBitMask >> 1;
5139 break;
5140 case float_round_to_zero:
5141 break;
5142 case float_round_up:
5143 if (!extractFloatx80Sign(z)) {
5144 z.low += roundBitsMask;
5145 }
5146 break;
5147 case float_round_down:
5148 if (extractFloatx80Sign(z)) {
5149 z.low += roundBitsMask;
5150 }
5151 break;
5152 default:
5153 abort();
5154 }
5155 z.low &= ~ roundBitsMask;
5156 if ( z.low == 0 ) {
5157 ++z.high;
5158 z.low = LIT64( 0x8000000000000000 );
5159 }
5160 if (z.low != a.low) {
5161 status->float_exception_flags |= float_flag_inexact;
5162 }
5163 return z;
5164
5165}
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5176 float_status *status)
5177{
5178 int32_t aExp, bExp, zExp;
5179 uint64_t aSig, bSig, zSig0, zSig1;
5180 int32_t expDiff;
5181
5182 aSig = extractFloatx80Frac( a );
5183 aExp = extractFloatx80Exp( a );
5184 bSig = extractFloatx80Frac( b );
5185 bExp = extractFloatx80Exp( b );
5186 expDiff = aExp - bExp;
5187 if ( 0 < expDiff ) {
5188 if ( aExp == 0x7FFF ) {
5189 if ((uint64_t)(aSig << 1)) {
5190 return propagateFloatx80NaN(a, b, status);
5191 }
5192 return a;
5193 }
5194 if ( bExp == 0 ) --expDiff;
5195 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5196 zExp = aExp;
5197 }
5198 else if ( expDiff < 0 ) {
5199 if ( bExp == 0x7FFF ) {
5200 if ((uint64_t)(bSig << 1)) {
5201 return propagateFloatx80NaN(a, b, status);
5202 }
5203 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5204 }
5205 if ( aExp == 0 ) ++expDiff;
5206 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5207 zExp = bExp;
5208 }
5209 else {
5210 if ( aExp == 0x7FFF ) {
5211 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5212 return propagateFloatx80NaN(a, b, status);
5213 }
5214 return a;
5215 }
5216 zSig1 = 0;
5217 zSig0 = aSig + bSig;
5218 if ( aExp == 0 ) {
5219 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5220 goto roundAndPack;
5221 }
5222 zExp = aExp;
5223 goto shiftRight1;
5224 }
5225 zSig0 = aSig + bSig;
5226 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5227 shiftRight1:
5228 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5229 zSig0 |= LIT64( 0x8000000000000000 );
5230 ++zExp;
5231 roundAndPack:
5232 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5233 zSign, zExp, zSig0, zSig1, status);
5234}
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5245 float_status *status)
5246{
5247 int32_t aExp, bExp, zExp;
5248 uint64_t aSig, bSig, zSig0, zSig1;
5249 int32_t expDiff;
5250
5251 aSig = extractFloatx80Frac( a );
5252 aExp = extractFloatx80Exp( a );
5253 bSig = extractFloatx80Frac( b );
5254 bExp = extractFloatx80Exp( b );
5255 expDiff = aExp - bExp;
5256 if ( 0 < expDiff ) goto aExpBigger;
5257 if ( expDiff < 0 ) goto bExpBigger;
5258 if ( aExp == 0x7FFF ) {
5259 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5260 return propagateFloatx80NaN(a, b, status);
5261 }
5262 float_raise(float_flag_invalid, status);
5263 return floatx80_default_nan(status);
5264 }
5265 if ( aExp == 0 ) {
5266 aExp = 1;
5267 bExp = 1;
5268 }
5269 zSig1 = 0;
5270 if ( bSig < aSig ) goto aBigger;
5271 if ( aSig < bSig ) goto bBigger;
5272 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5273 bExpBigger:
5274 if ( bExp == 0x7FFF ) {
5275 if ((uint64_t)(bSig << 1)) {
5276 return propagateFloatx80NaN(a, b, status);
5277 }
5278 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
5279 }
5280 if ( aExp == 0 ) ++expDiff;
5281 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5282 bBigger:
5283 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5284 zExp = bExp;
5285 zSign ^= 1;
5286 goto normalizeRoundAndPack;
5287 aExpBigger:
5288 if ( aExp == 0x7FFF ) {
5289 if ((uint64_t)(aSig << 1)) {
5290 return propagateFloatx80NaN(a, b, status);
5291 }
5292 return a;
5293 }
5294 if ( bExp == 0 ) --expDiff;
5295 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5296 aBigger:
5297 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5298 zExp = aExp;
5299 normalizeRoundAndPack:
5300 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5301 zSign, zExp, zSig0, zSig1, status);
5302}
5303
5304
5305
5306
5307
5308
5309
5310floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5311{
5312 flag aSign, bSign;
5313
5314 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5315 float_raise(float_flag_invalid, status);
5316 return floatx80_default_nan(status);
5317 }
5318 aSign = extractFloatx80Sign( a );
5319 bSign = extractFloatx80Sign( b );
5320 if ( aSign == bSign ) {
5321 return addFloatx80Sigs(a, b, aSign, status);
5322 }
5323 else {
5324 return subFloatx80Sigs(a, b, aSign, status);
5325 }
5326
5327}
5328
5329
5330
5331
5332
5333
5334
5335floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5336{
5337 flag aSign, bSign;
5338
5339 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5340 float_raise(float_flag_invalid, status);
5341 return floatx80_default_nan(status);
5342 }
5343 aSign = extractFloatx80Sign( a );
5344 bSign = extractFloatx80Sign( b );
5345 if ( aSign == bSign ) {
5346 return subFloatx80Sigs(a, b, aSign, status);
5347 }
5348 else {
5349 return addFloatx80Sigs(a, b, aSign, status);
5350 }
5351
5352}
5353
5354
5355
5356
5357
5358
5359
5360floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5361{
5362 flag aSign, bSign, zSign;
5363 int32_t aExp, bExp, zExp;
5364 uint64_t aSig, bSig, zSig0, zSig1;
5365
5366 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5367 float_raise(float_flag_invalid, status);
5368 return floatx80_default_nan(status);
5369 }
5370 aSig = extractFloatx80Frac( a );
5371 aExp = extractFloatx80Exp( a );
5372 aSign = extractFloatx80Sign( a );
5373 bSig = extractFloatx80Frac( b );
5374 bExp = extractFloatx80Exp( b );
5375 bSign = extractFloatx80Sign( b );
5376 zSign = aSign ^ bSign;
5377 if ( aExp == 0x7FFF ) {
5378 if ( (uint64_t) ( aSig<<1 )
5379 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5380 return propagateFloatx80NaN(a, b, status);
5381 }
5382 if ( ( bExp | bSig ) == 0 ) goto invalid;
5383 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5384 }
5385 if ( bExp == 0x7FFF ) {
5386 if ((uint64_t)(bSig << 1)) {
5387 return propagateFloatx80NaN(a, b, status);
5388 }
5389 if ( ( aExp | aSig ) == 0 ) {
5390 invalid:
5391 float_raise(float_flag_invalid, status);
5392 return floatx80_default_nan(status);
5393 }
5394 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5395 }
5396 if ( aExp == 0 ) {
5397 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5398 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5399 }
5400 if ( bExp == 0 ) {
5401 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5402 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5403 }
5404 zExp = aExp + bExp - 0x3FFE;
5405 mul64To128( aSig, bSig, &zSig0, &zSig1 );
5406 if ( 0 < (int64_t) zSig0 ) {
5407 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5408 --zExp;
5409 }
5410 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5411 zSign, zExp, zSig0, zSig1, status);
5412}
5413
5414
5415
5416
5417
5418
5419
5420floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
5421{
5422 flag aSign, bSign, zSign;
5423 int32_t aExp, bExp, zExp;
5424 uint64_t aSig, bSig, zSig0, zSig1;
5425 uint64_t rem0, rem1, rem2, term0, term1, term2;
5426
5427 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5428 float_raise(float_flag_invalid, status);
5429 return floatx80_default_nan(status);
5430 }
5431 aSig = extractFloatx80Frac( a );
5432 aExp = extractFloatx80Exp( a );
5433 aSign = extractFloatx80Sign( a );
5434 bSig = extractFloatx80Frac( b );
5435 bExp = extractFloatx80Exp( b );
5436 bSign = extractFloatx80Sign( b );
5437 zSign = aSign ^ bSign;
5438 if ( aExp == 0x7FFF ) {
5439 if ((uint64_t)(aSig << 1)) {
5440 return propagateFloatx80NaN(a, b, status);
5441 }
5442 if ( bExp == 0x7FFF ) {
5443 if ((uint64_t)(bSig << 1)) {
5444 return propagateFloatx80NaN(a, b, status);
5445 }
5446 goto invalid;
5447 }
5448 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5449 }
5450 if ( bExp == 0x7FFF ) {
5451 if ((uint64_t)(bSig << 1)) {
5452 return propagateFloatx80NaN(a, b, status);
5453 }
5454 return packFloatx80( zSign, 0, 0 );
5455 }
5456 if ( bExp == 0 ) {
5457 if ( bSig == 0 ) {
5458 if ( ( aExp | aSig ) == 0 ) {
5459 invalid:
5460 float_raise(float_flag_invalid, status);
5461 return floatx80_default_nan(status);
5462 }
5463 float_raise(float_flag_divbyzero, status);
5464 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5465 }
5466 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5467 }
5468 if ( aExp == 0 ) {
5469 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5470 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5471 }
5472 zExp = aExp - bExp + 0x3FFE;
5473 rem1 = 0;
5474 if ( bSig <= aSig ) {
5475 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5476 ++zExp;
5477 }
5478 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5479 mul64To128( bSig, zSig0, &term0, &term1 );
5480 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
5481 while ( (int64_t) rem0 < 0 ) {
5482 --zSig0;
5483 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5484 }
5485 zSig1 = estimateDiv128To64( rem1, 0, bSig );
5486 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
5487 mul64To128( bSig, zSig1, &term1, &term2 );
5488 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5489 while ( (int64_t) rem1 < 0 ) {
5490 --zSig1;
5491 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5492 }
5493 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5494 }
5495 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5496 zSign, zExp, zSig0, zSig1, status);
5497}
5498
5499
5500
5501
5502
5503
5504
5505floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
5506{
5507 flag aSign, zSign;
5508 int32_t aExp, bExp, expDiff;
5509 uint64_t aSig0, aSig1, bSig;
5510 uint64_t q, term0, term1, alternateASig0, alternateASig1;
5511
5512 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5513 float_raise(float_flag_invalid, status);
5514 return floatx80_default_nan(status);
5515 }
5516 aSig0 = extractFloatx80Frac( a );
5517 aExp = extractFloatx80Exp( a );
5518 aSign = extractFloatx80Sign( a );
5519 bSig = extractFloatx80Frac( b );
5520 bExp = extractFloatx80Exp( b );
5521 if ( aExp == 0x7FFF ) {
5522 if ( (uint64_t) ( aSig0<<1 )
5523 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5524 return propagateFloatx80NaN(a, b, status);
5525 }
5526 goto invalid;
5527 }
5528 if ( bExp == 0x7FFF ) {
5529 if ((uint64_t)(bSig << 1)) {
5530 return propagateFloatx80NaN(a, b, status);
5531 }
5532 return a;
5533 }
5534 if ( bExp == 0 ) {
5535 if ( bSig == 0 ) {
5536 invalid:
5537 float_raise(float_flag_invalid, status);
5538 return floatx80_default_nan(status);
5539 }
5540 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5541 }
5542 if ( aExp == 0 ) {
5543 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
5544 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5545 }
5546 bSig |= LIT64( 0x8000000000000000 );
5547 zSign = aSign;
5548 expDiff = aExp - bExp;
5549 aSig1 = 0;
5550 if ( expDiff < 0 ) {
5551 if ( expDiff < -1 ) return a;
5552 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5553 expDiff = 0;
5554 }
5555 q = ( bSig <= aSig0 );
5556 if ( q ) aSig0 -= bSig;
5557 expDiff -= 64;
5558 while ( 0 < expDiff ) {
5559 q = estimateDiv128To64( aSig0, aSig1, bSig );
5560 q = ( 2 < q ) ? q - 2 : 0;
5561 mul64To128( bSig, q, &term0, &term1 );
5562 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5563 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5564 expDiff -= 62;
5565 }
5566 expDiff += 64;
5567 if ( 0 < expDiff ) {
5568 q = estimateDiv128To64( aSig0, aSig1, bSig );
5569 q = ( 2 < q ) ? q - 2 : 0;
5570 q >>= 64 - expDiff;
5571 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5572 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5573 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5574 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5575 ++q;
5576 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5577 }
5578 }
5579 else {
5580 term1 = 0;
5581 term0 = bSig;
5582 }
5583 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5584 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5585 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5586 && ( q & 1 ) )
5587 ) {
5588 aSig0 = alternateASig0;
5589 aSig1 = alternateASig1;
5590 zSign = ! zSign;
5591 }
5592 return
5593 normalizeRoundAndPackFloatx80(
5594 80, zSign, bExp + expDiff, aSig0, aSig1, status);
5595
5596}
5597
5598
5599
5600
5601
5602
5603
5604floatx80 floatx80_sqrt(floatx80 a, float_status *status)
5605{
5606 flag aSign;
5607 int32_t aExp, zExp;
5608 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5609 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5610
5611 if (floatx80_invalid_encoding(a)) {
5612 float_raise(float_flag_invalid, status);
5613 return floatx80_default_nan(status);
5614 }
5615 aSig0 = extractFloatx80Frac( a );
5616 aExp = extractFloatx80Exp( a );
5617 aSign = extractFloatx80Sign( a );
5618 if ( aExp == 0x7FFF ) {
5619 if ((uint64_t)(aSig0 << 1)) {
5620 return propagateFloatx80NaN(a, a, status);
5621 }
5622 if ( ! aSign ) return a;
5623 goto invalid;
5624 }
5625 if ( aSign ) {
5626 if ( ( aExp | aSig0 ) == 0 ) return a;
5627 invalid:
5628 float_raise(float_flag_invalid, status);
5629 return floatx80_default_nan(status);
5630 }
5631 if ( aExp == 0 ) {
5632 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5633 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5634 }
5635 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5636 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5637 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5638 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5639 doubleZSig0 = zSig0<<1;
5640 mul64To128( zSig0, zSig0, &term0, &term1 );
5641 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5642 while ( (int64_t) rem0 < 0 ) {
5643 --zSig0;
5644 doubleZSig0 -= 2;
5645 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5646 }
5647 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5648 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5649 if ( zSig1 == 0 ) zSig1 = 1;
5650 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5651 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5652 mul64To128( zSig1, zSig1, &term2, &term3 );
5653 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5654 while ( (int64_t) rem1 < 0 ) {
5655 --zSig1;
5656 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5657 term3 |= 1;
5658 term2 |= doubleZSig0;
5659 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5660 }
5661 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5662 }
5663 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5664 zSig0 |= doubleZSig0;
5665 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5666 0, zExp, zSig0, zSig1, status);
5667}
5668
5669
5670
5671
5672
5673
5674
5675
5676int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
5677{
5678
5679 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5680 || (extractFloatx80Exp(a) == 0x7FFF
5681 && (uint64_t) (extractFloatx80Frac(a) << 1))
5682 || (extractFloatx80Exp(b) == 0x7FFF
5683 && (uint64_t) (extractFloatx80Frac(b) << 1))
5684 ) {
5685 float_raise(float_flag_invalid, status);
5686 return 0;
5687 }
5688 return
5689 ( a.low == b.low )
5690 && ( ( a.high == b.high )
5691 || ( ( a.low == 0 )
5692 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5693 );
5694
5695}
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705int floatx80_le(floatx80 a, floatx80 b, float_status *status)
5706{
5707 flag aSign, bSign;
5708
5709 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5710 || (extractFloatx80Exp(a) == 0x7FFF
5711 && (uint64_t) (extractFloatx80Frac(a) << 1))
5712 || (extractFloatx80Exp(b) == 0x7FFF
5713 && (uint64_t) (extractFloatx80Frac(b) << 1))
5714 ) {
5715 float_raise(float_flag_invalid, status);
5716 return 0;
5717 }
5718 aSign = extractFloatx80Sign( a );
5719 bSign = extractFloatx80Sign( b );
5720 if ( aSign != bSign ) {
5721 return
5722 aSign
5723 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5724 == 0 );
5725 }
5726 return
5727 aSign ? le128( b.high, b.low, a.high, a.low )
5728 : le128( a.high, a.low, b.high, b.low );
5729
5730}
5731
5732
5733
5734
5735
5736
5737
5738
5739int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
5740{
5741 flag aSign, bSign;
5742
5743 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5744 || (extractFloatx80Exp(a) == 0x7FFF
5745 && (uint64_t) (extractFloatx80Frac(a) << 1))
5746 || (extractFloatx80Exp(b) == 0x7FFF
5747 && (uint64_t) (extractFloatx80Frac(b) << 1))
5748 ) {
5749 float_raise(float_flag_invalid, status);
5750 return 0;
5751 }
5752 aSign = extractFloatx80Sign( a );
5753 bSign = extractFloatx80Sign( b );
5754 if ( aSign != bSign ) {
5755 return
5756 aSign
5757 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5758 != 0 );
5759 }
5760 return
5761 aSign ? lt128( b.high, b.low, a.high, a.low )
5762 : lt128( a.high, a.low, b.high, b.low );
5763
5764}
5765
5766
5767
5768
5769
5770
5771
5772int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
5773{
5774 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5775 || (extractFloatx80Exp(a) == 0x7FFF
5776 && (uint64_t) (extractFloatx80Frac(a) << 1))
5777 || (extractFloatx80Exp(b) == 0x7FFF
5778 && (uint64_t) (extractFloatx80Frac(b) << 1))
5779 ) {
5780 float_raise(float_flag_invalid, status);
5781 return 1;
5782 }
5783 return 0;
5784}
5785
5786
5787
5788
5789
5790
5791
5792
5793int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
5794{
5795
5796 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5797 float_raise(float_flag_invalid, status);
5798 return 0;
5799 }
5800 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5801 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5802 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5803 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5804 ) {
5805 if (floatx80_is_signaling_nan(a, status)
5806 || floatx80_is_signaling_nan(b, status)) {
5807 float_raise(float_flag_invalid, status);
5808 }
5809 return 0;
5810 }
5811 return
5812 ( a.low == b.low )
5813 && ( ( a.high == b.high )
5814 || ( ( a.low == 0 )
5815 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5816 );
5817
5818}
5819
5820
5821
5822
5823
5824
5825
5826
5827int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
5828{
5829 flag aSign, bSign;
5830
5831 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5832 float_raise(float_flag_invalid, status);
5833 return 0;
5834 }
5835 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5836 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5837 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5838 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5839 ) {
5840 if (floatx80_is_signaling_nan(a, status)
5841 || floatx80_is_signaling_nan(b, status)) {
5842 float_raise(float_flag_invalid, status);
5843 }
5844 return 0;
5845 }
5846 aSign = extractFloatx80Sign( a );
5847 bSign = extractFloatx80Sign( b );
5848 if ( aSign != bSign ) {
5849 return
5850 aSign
5851 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5852 == 0 );
5853 }
5854 return
5855 aSign ? le128( b.high, b.low, a.high, a.low )
5856 : le128( a.high, a.low, b.high, b.low );
5857
5858}
5859
5860
5861
5862
5863
5864
5865
5866
5867int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
5868{
5869 flag aSign, bSign;
5870
5871 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5872 float_raise(float_flag_invalid, status);
5873 return 0;
5874 }
5875 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5876 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5877 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5878 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5879 ) {
5880 if (floatx80_is_signaling_nan(a, status)
5881 || floatx80_is_signaling_nan(b, status)) {
5882 float_raise(float_flag_invalid, status);
5883 }
5884 return 0;
5885 }
5886 aSign = extractFloatx80Sign( a );
5887 bSign = extractFloatx80Sign( b );
5888 if ( aSign != bSign ) {
5889 return
5890 aSign
5891 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5892 != 0 );
5893 }
5894 return
5895 aSign ? lt128( b.high, b.low, a.high, a.low )
5896 : lt128( a.high, a.low, b.high, b.low );
5897
5898}
5899
5900
5901
5902
5903
5904
5905
5906int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
5907{
5908 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5909 float_raise(float_flag_invalid, status);
5910 return 1;
5911 }
5912 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5913 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5914 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5915 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5916 ) {
5917 if (floatx80_is_signaling_nan(a, status)
5918 || floatx80_is_signaling_nan(b, status)) {
5919 float_raise(float_flag_invalid, status);
5920 }
5921 return 1;
5922 }
5923 return 0;
5924}
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936int32_t float128_to_int32(float128 a, float_status *status)
5937{
5938 flag aSign;
5939 int32_t aExp, shiftCount;
5940 uint64_t aSig0, aSig1;
5941
5942 aSig1 = extractFloat128Frac1( a );
5943 aSig0 = extractFloat128Frac0( a );
5944 aExp = extractFloat128Exp( a );
5945 aSign = extractFloat128Sign( a );
5946 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5947 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5948 aSig0 |= ( aSig1 != 0 );
5949 shiftCount = 0x4028 - aExp;
5950 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
5951 return roundAndPackInt32(aSign, aSig0, status);
5952
5953}
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
5966{
5967 flag aSign;
5968 int32_t aExp, shiftCount;
5969 uint64_t aSig0, aSig1, savedASig;
5970 int32_t z;
5971
5972 aSig1 = extractFloat128Frac1( a );
5973 aSig0 = extractFloat128Frac0( a );
5974 aExp = extractFloat128Exp( a );
5975 aSign = extractFloat128Sign( a );
5976 aSig0 |= ( aSig1 != 0 );
5977 if ( 0x401E < aExp ) {
5978 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5979 goto invalid;
5980 }
5981 else if ( aExp < 0x3FFF ) {
5982 if (aExp || aSig0) {
5983 status->float_exception_flags |= float_flag_inexact;
5984 }
5985 return 0;
5986 }
5987 aSig0 |= LIT64( 0x0001000000000000 );
5988 shiftCount = 0x402F - aExp;
5989 savedASig = aSig0;
5990 aSig0 >>= shiftCount;
5991 z = aSig0;
5992 if ( aSign ) z = - z;
5993 if ( ( z < 0 ) ^ aSign ) {
5994 invalid:
5995 float_raise(float_flag_invalid, status);
5996 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5997 }
5998 if ( ( aSig0<<shiftCount ) != savedASig ) {
5999 status->float_exception_flags |= float_flag_inexact;
6000 }
6001 return z;
6002
6003}
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015int64_t float128_to_int64(float128 a, float_status *status)
6016{
6017 flag aSign;
6018 int32_t aExp, shiftCount;
6019 uint64_t aSig0, aSig1;
6020
6021 aSig1 = extractFloat128Frac1( a );
6022 aSig0 = extractFloat128Frac0( a );
6023 aExp = extractFloat128Exp( a );
6024 aSign = extractFloat128Sign( a );
6025 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6026 shiftCount = 0x402F - aExp;
6027 if ( shiftCount <= 0 ) {
6028 if ( 0x403E < aExp ) {
6029 float_raise(float_flag_invalid, status);
6030 if ( ! aSign
6031 || ( ( aExp == 0x7FFF )
6032 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
6033 )
6034 ) {
6035 return LIT64( 0x7FFFFFFFFFFFFFFF );
6036 }
6037 return (int64_t) LIT64( 0x8000000000000000 );
6038 }
6039 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6040 }
6041 else {
6042 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6043 }
6044 return roundAndPackInt64(aSign, aSig0, aSig1, status);
6045
6046}
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
6059{
6060 flag aSign;
6061 int32_t aExp, shiftCount;
6062 uint64_t aSig0, aSig1;
6063 int64_t z;
6064
6065 aSig1 = extractFloat128Frac1( a );
6066 aSig0 = extractFloat128Frac0( a );
6067 aExp = extractFloat128Exp( a );
6068 aSign = extractFloat128Sign( a );
6069 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6070 shiftCount = aExp - 0x402F;
6071 if ( 0 < shiftCount ) {
6072 if ( 0x403E <= aExp ) {
6073 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
6074 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
6075 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
6076 if (aSig1) {
6077 status->float_exception_flags |= float_flag_inexact;
6078 }
6079 }
6080 else {
6081 float_raise(float_flag_invalid, status);
6082 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6083 return LIT64( 0x7FFFFFFFFFFFFFFF );
6084 }
6085 }
6086 return (int64_t) LIT64( 0x8000000000000000 );
6087 }
6088 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
6089 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
6090 status->float_exception_flags |= float_flag_inexact;
6091 }
6092 }
6093 else {
6094 if ( aExp < 0x3FFF ) {
6095 if ( aExp | aSig0 | aSig1 ) {
6096 status->float_exception_flags |= float_flag_inexact;
6097 }
6098 return 0;
6099 }
6100 z = aSig0>>( - shiftCount );
6101 if ( aSig1
6102 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
6103 status->float_exception_flags |= float_flag_inexact;
6104 }
6105 }
6106 if ( aSign ) z = - z;
6107 return z;
6108
6109}
6110
6111
6112
6113
6114
6115
6116
6117
6118float32 float128_to_float32(float128 a, float_status *status)
6119{
6120 flag aSign;
6121 int32_t aExp;
6122 uint64_t aSig0, aSig1;
6123 uint32_t zSig;
6124
6125 aSig1 = extractFloat128Frac1( a );
6126 aSig0 = extractFloat128Frac0( a );
6127 aExp = extractFloat128Exp( a );
6128 aSign = extractFloat128Sign( a );
6129 if ( aExp == 0x7FFF ) {
6130 if ( aSig0 | aSig1 ) {
6131 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
6132 }
6133 return packFloat32( aSign, 0xFF, 0 );
6134 }
6135 aSig0 |= ( aSig1 != 0 );
6136 shift64RightJamming( aSig0, 18, &aSig0 );
6137 zSig = aSig0;
6138 if ( aExp || zSig ) {
6139 zSig |= 0x40000000;
6140 aExp -= 0x3F81;
6141 }
6142 return roundAndPackFloat32(aSign, aExp, zSig, status);
6143
6144}
6145
6146
6147
6148
6149
6150
6151
6152
6153float64 float128_to_float64(float128 a, float_status *status)
6154{
6155 flag aSign;
6156 int32_t aExp;
6157 uint64_t aSig0, aSig1;
6158
6159 aSig1 = extractFloat128Frac1( a );
6160 aSig0 = extractFloat128Frac0( a );
6161 aExp = extractFloat128Exp( a );
6162 aSign = extractFloat128Sign( a );
6163 if ( aExp == 0x7FFF ) {
6164 if ( aSig0 | aSig1 ) {
6165 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
6166 }
6167 return packFloat64( aSign, 0x7FF, 0 );
6168 }
6169 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6170 aSig0 |= ( aSig1 != 0 );
6171 if ( aExp || aSig0 ) {
6172 aSig0 |= LIT64( 0x4000000000000000 );
6173 aExp -= 0x3C01;
6174 }
6175 return roundAndPackFloat64(aSign, aExp, aSig0, status);
6176
6177}
6178
6179
6180
6181
6182
6183
6184
6185
6186floatx80 float128_to_floatx80(float128 a, float_status *status)
6187{
6188 flag aSign;
6189 int32_t aExp;
6190 uint64_t aSig0, aSig1;
6191
6192 aSig1 = extractFloat128Frac1( a );
6193 aSig0 = extractFloat128Frac0( a );
6194 aExp = extractFloat128Exp( a );
6195 aSign = extractFloat128Sign( a );
6196 if ( aExp == 0x7FFF ) {
6197 if ( aSig0 | aSig1 ) {
6198 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
6199 }
6200 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
6201 }
6202 if ( aExp == 0 ) {
6203 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6204 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6205 }
6206 else {
6207 aSig0 |= LIT64( 0x0001000000000000 );
6208 }
6209 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6210 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6211
6212}
6213
6214
6215
6216
6217
6218
6219
6220
6221float128 float128_round_to_int(float128 a, float_status *status)
6222{
6223 flag aSign;
6224 int32_t aExp;
6225 uint64_t lastBitMask, roundBitsMask;
6226 float128 z;
6227
6228 aExp = extractFloat128Exp( a );
6229 if ( 0x402F <= aExp ) {
6230 if ( 0x406F <= aExp ) {
6231 if ( ( aExp == 0x7FFF )
6232 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6233 ) {
6234 return propagateFloat128NaN(a, a, status);
6235 }
6236 return a;
6237 }
6238 lastBitMask = 1;
6239 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6240 roundBitsMask = lastBitMask - 1;
6241 z = a;
6242 switch (status->float_rounding_mode) {
6243 case float_round_nearest_even:
6244 if ( lastBitMask ) {
6245 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6246 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6247 }
6248 else {
6249 if ( (int64_t) z.low < 0 ) {
6250 ++z.high;
6251 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
6252 }
6253 }
6254 break;
6255 case float_round_ties_away:
6256 if (lastBitMask) {
6257 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6258 } else {
6259 if ((int64_t) z.low < 0) {
6260 ++z.high;
6261 }
6262 }
6263 break;
6264 case float_round_to_zero:
6265 break;
6266 case float_round_up:
6267 if (!extractFloat128Sign(z)) {
6268 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6269 }
6270 break;
6271 case float_round_down:
6272 if (extractFloat128Sign(z)) {
6273 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6274 }
6275 break;
6276 default:
6277 abort();
6278 }
6279 z.low &= ~ roundBitsMask;
6280 }
6281 else {
6282 if ( aExp < 0x3FFF ) {
6283 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
6284 status->float_exception_flags |= float_flag_inexact;
6285 aSign = extractFloat128Sign( a );
6286 switch (status->float_rounding_mode) {
6287 case float_round_nearest_even:
6288 if ( ( aExp == 0x3FFE )
6289 && ( extractFloat128Frac0( a )
6290 | extractFloat128Frac1( a ) )
6291 ) {
6292 return packFloat128( aSign, 0x3FFF, 0, 0 );
6293 }
6294 break;
6295 case float_round_ties_away:
6296 if (aExp == 0x3FFE) {
6297 return packFloat128(aSign, 0x3FFF, 0, 0);
6298 }
6299 break;
6300 case float_round_down:
6301 return
6302 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6303 : packFloat128( 0, 0, 0, 0 );
6304 case float_round_up:
6305 return
6306 aSign ? packFloat128( 1, 0, 0, 0 )
6307 : packFloat128( 0, 0x3FFF, 0, 0 );
6308 }
6309 return packFloat128( aSign, 0, 0, 0 );
6310 }
6311 lastBitMask = 1;
6312 lastBitMask <<= 0x402F - aExp;
6313 roundBitsMask = lastBitMask - 1;
6314 z.low = 0;
6315 z.high = a.high;
6316 switch (status->float_rounding_mode) {
6317 case float_round_nearest_even:
6318 z.high += lastBitMask>>1;
6319 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6320 z.high &= ~ lastBitMask;
6321 }
6322 break;
6323 case float_round_ties_away:
6324 z.high += lastBitMask>>1;
6325 break;
6326 case float_round_to_zero:
6327 break;
6328 case float_round_up:
6329 if (!extractFloat128Sign(z)) {
6330 z.high |= ( a.low != 0 );
6331 z.high += roundBitsMask;
6332 }
6333 break;
6334 case float_round_down:
6335 if (extractFloat128Sign(z)) {
6336 z.high |= (a.low != 0);
6337 z.high += roundBitsMask;
6338 }
6339 break;
6340 default:
6341 abort();
6342 }
6343 z.high &= ~ roundBitsMask;
6344 }
6345 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
6346 status->float_exception_flags |= float_flag_inexact;
6347 }
6348 return z;
6349
6350}
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6361 float_status *status)
6362{
6363 int32_t aExp, bExp, zExp;
6364 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6365 int32_t expDiff;
6366
6367 aSig1 = extractFloat128Frac1( a );
6368 aSig0 = extractFloat128Frac0( a );
6369 aExp = extractFloat128Exp( a );
6370 bSig1 = extractFloat128Frac1( b );
6371 bSig0 = extractFloat128Frac0( b );
6372 bExp = extractFloat128Exp( b );
6373 expDiff = aExp - bExp;
6374 if ( 0 < expDiff ) {
6375 if ( aExp == 0x7FFF ) {
6376 if (aSig0 | aSig1) {
6377 return propagateFloat128NaN(a, b, status);
6378 }
6379 return a;
6380 }
6381 if ( bExp == 0 ) {
6382 --expDiff;
6383 }
6384 else {
6385 bSig0 |= LIT64( 0x0001000000000000 );
6386 }
6387 shift128ExtraRightJamming(
6388 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6389 zExp = aExp;
6390 }
6391 else if ( expDiff < 0 ) {
6392 if ( bExp == 0x7FFF ) {
6393 if (bSig0 | bSig1) {
6394 return propagateFloat128NaN(a, b, status);
6395 }
6396 return packFloat128( zSign, 0x7FFF, 0, 0 );
6397 }
6398 if ( aExp == 0 ) {
6399 ++expDiff;
6400 }
6401 else {
6402 aSig0 |= LIT64( 0x0001000000000000 );
6403 }
6404 shift128ExtraRightJamming(
6405 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6406 zExp = bExp;
6407 }
6408 else {
6409 if ( aExp == 0x7FFF ) {
6410 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6411 return propagateFloat128NaN(a, b, status);
6412 }
6413 return a;
6414 }
6415 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6416 if ( aExp == 0 ) {
6417 if (status->flush_to_zero) {
6418 if (zSig0 | zSig1) {
6419 float_raise(float_flag_output_denormal, status);
6420 }
6421 return packFloat128(zSign, 0, 0, 0);
6422 }
6423 return packFloat128( zSign, 0, zSig0, zSig1 );
6424 }
6425 zSig2 = 0;
6426 zSig0 |= LIT64( 0x0002000000000000 );
6427 zExp = aExp;
6428 goto shiftRight1;
6429 }
6430 aSig0 |= LIT64( 0x0001000000000000 );
6431 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6432 --zExp;
6433 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6434 ++zExp;
6435 shiftRight1:
6436 shift128ExtraRightJamming(
6437 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6438 roundAndPack:
6439 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6440
6441}
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6452 float_status *status)
6453{
6454 int32_t aExp, bExp, zExp;
6455 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
6456 int32_t expDiff;
6457
6458 aSig1 = extractFloat128Frac1( a );
6459 aSig0 = extractFloat128Frac0( a );
6460 aExp = extractFloat128Exp( a );
6461 bSig1 = extractFloat128Frac1( b );
6462 bSig0 = extractFloat128Frac0( b );
6463 bExp = extractFloat128Exp( b );
6464 expDiff = aExp - bExp;
6465 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6466 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6467 if ( 0 < expDiff ) goto aExpBigger;
6468 if ( expDiff < 0 ) goto bExpBigger;
6469 if ( aExp == 0x7FFF ) {
6470 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6471 return propagateFloat128NaN(a, b, status);
6472 }
6473 float_raise(float_flag_invalid, status);
6474 return float128_default_nan(status);
6475 }
6476 if ( aExp == 0 ) {
6477 aExp = 1;
6478 bExp = 1;
6479 }
6480 if ( bSig0 < aSig0 ) goto aBigger;
6481 if ( aSig0 < bSig0 ) goto bBigger;
6482 if ( bSig1 < aSig1 ) goto aBigger;
6483 if ( aSig1 < bSig1 ) goto bBigger;
6484 return packFloat128(status->float_rounding_mode == float_round_down,
6485 0, 0, 0);
6486 bExpBigger:
6487 if ( bExp == 0x7FFF ) {
6488 if (bSig0 | bSig1) {
6489 return propagateFloat128NaN(a, b, status);
6490 }
6491 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6492 }
6493 if ( aExp == 0 ) {
6494 ++expDiff;
6495 }
6496 else {
6497 aSig0 |= LIT64( 0x4000000000000000 );
6498 }
6499 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6500 bSig0 |= LIT64( 0x4000000000000000 );
6501 bBigger:
6502 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6503 zExp = bExp;
6504 zSign ^= 1;
6505 goto normalizeRoundAndPack;
6506 aExpBigger:
6507 if ( aExp == 0x7FFF ) {
6508 if (aSig0 | aSig1) {
6509 return propagateFloat128NaN(a, b, status);
6510 }
6511 return a;
6512 }
6513 if ( bExp == 0 ) {
6514 --expDiff;
6515 }
6516 else {
6517 bSig0 |= LIT64( 0x4000000000000000 );
6518 }
6519 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6520 aSig0 |= LIT64( 0x4000000000000000 );
6521 aBigger:
6522 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6523 zExp = aExp;
6524 normalizeRoundAndPack:
6525 --zExp;
6526 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6527 status);
6528
6529}
6530
6531
6532
6533
6534
6535
6536
6537float128 float128_add(float128 a, float128 b, float_status *status)
6538{
6539 flag aSign, bSign;
6540
6541 aSign = extractFloat128Sign( a );
6542 bSign = extractFloat128Sign( b );
6543 if ( aSign == bSign ) {
6544 return addFloat128Sigs(a, b, aSign, status);
6545 }
6546 else {
6547 return subFloat128Sigs(a, b, aSign, status);
6548 }
6549
6550}
6551
6552
6553
6554
6555
6556
6557
6558float128 float128_sub(float128 a, float128 b, float_status *status)
6559{
6560 flag aSign, bSign;
6561
6562 aSign = extractFloat128Sign( a );
6563 bSign = extractFloat128Sign( b );
6564 if ( aSign == bSign ) {
6565 return subFloat128Sigs(a, b, aSign, status);
6566 }
6567 else {
6568 return addFloat128Sigs(a, b, aSign, status);
6569 }
6570
6571}
6572
6573
6574
6575
6576
6577
6578
6579float128 float128_mul(float128 a, float128 b, float_status *status)
6580{
6581 flag aSign, bSign, zSign;
6582 int32_t aExp, bExp, zExp;
6583 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
6584
6585 aSig1 = extractFloat128Frac1( a );
6586 aSig0 = extractFloat128Frac0( a );
6587 aExp = extractFloat128Exp( a );
6588 aSign = extractFloat128Sign( a );
6589 bSig1 = extractFloat128Frac1( b );
6590 bSig0 = extractFloat128Frac0( b );
6591 bExp = extractFloat128Exp( b );
6592 bSign = extractFloat128Sign( b );
6593 zSign = aSign ^ bSign;
6594 if ( aExp == 0x7FFF ) {
6595 if ( ( aSig0 | aSig1 )
6596 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6597 return propagateFloat128NaN(a, b, status);
6598 }
6599 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6600 return packFloat128( zSign, 0x7FFF, 0, 0 );
6601 }
6602 if ( bExp == 0x7FFF ) {
6603 if (bSig0 | bSig1) {
6604 return propagateFloat128NaN(a, b, status);
6605 }
6606 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6607 invalid:
6608 float_raise(float_flag_invalid, status);
6609 return float128_default_nan(status);
6610 }
6611 return packFloat128( zSign, 0x7FFF, 0, 0 );
6612 }
6613 if ( aExp == 0 ) {
6614 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6615 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6616 }
6617 if ( bExp == 0 ) {
6618 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6619 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6620 }
6621 zExp = aExp + bExp - 0x4000;
6622 aSig0 |= LIT64( 0x0001000000000000 );
6623 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6624 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6625 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6626 zSig2 |= ( zSig3 != 0 );
6627 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6628 shift128ExtraRightJamming(
6629 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6630 ++zExp;
6631 }
6632 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6633
6634}
6635
6636
6637
6638
6639
6640
6641
6642float128 float128_div(float128 a, float128 b, float_status *status)
6643{
6644 flag aSign, bSign, zSign;
6645 int32_t aExp, bExp, zExp;
6646 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6647 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6648
6649 aSig1 = extractFloat128Frac1( a );
6650 aSig0 = extractFloat128Frac0( a );
6651 aExp = extractFloat128Exp( a );
6652 aSign = extractFloat128Sign( a );
6653 bSig1 = extractFloat128Frac1( b );
6654 bSig0 = extractFloat128Frac0( b );
6655 bExp = extractFloat128Exp( b );
6656 bSign = extractFloat128Sign( b );
6657 zSign = aSign ^ bSign;
6658 if ( aExp == 0x7FFF ) {
6659 if (aSig0 | aSig1) {
6660 return propagateFloat128NaN(a, b, status);
6661 }
6662 if ( bExp == 0x7FFF ) {
6663 if (bSig0 | bSig1) {
6664 return propagateFloat128NaN(a, b, status);
6665 }
6666 goto invalid;
6667 }
6668 return packFloat128( zSign, 0x7FFF, 0, 0 );
6669 }
6670 if ( bExp == 0x7FFF ) {
6671 if (bSig0 | bSig1) {
6672 return propagateFloat128NaN(a, b, status);
6673 }
6674 return packFloat128( zSign, 0, 0, 0 );
6675 }
6676 if ( bExp == 0 ) {
6677 if ( ( bSig0 | bSig1 ) == 0 ) {
6678 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6679 invalid:
6680 float_raise(float_flag_invalid, status);
6681 return float128_default_nan(status);
6682 }
6683 float_raise(float_flag_divbyzero, status);
6684 return packFloat128( zSign, 0x7FFF, 0, 0 );
6685 }
6686 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6687 }
6688 if ( aExp == 0 ) {
6689 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6690 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6691 }
6692 zExp = aExp - bExp + 0x3FFD;
6693 shortShift128Left(
6694 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6695 shortShift128Left(
6696 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6697 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6698 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6699 ++zExp;
6700 }
6701 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6702 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6703 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
6704 while ( (int64_t) rem0 < 0 ) {
6705 --zSig0;
6706 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6707 }
6708 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6709 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6710 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6711 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
6712 while ( (int64_t) rem1 < 0 ) {
6713 --zSig1;
6714 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6715 }
6716 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6717 }
6718 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
6719 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6720
6721}
6722
6723
6724
6725
6726
6727
6728
6729float128 float128_rem(float128 a, float128 b, float_status *status)
6730{
6731 flag aSign, zSign;
6732 int32_t aExp, bExp, expDiff;
6733 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6734 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6735 int64_t sigMean0;
6736
6737 aSig1 = extractFloat128Frac1( a );
6738 aSig0 = extractFloat128Frac0( a );
6739 aExp = extractFloat128Exp( a );
6740 aSign = extractFloat128Sign( a );
6741 bSig1 = extractFloat128Frac1( b );
6742 bSig0 = extractFloat128Frac0( b );
6743 bExp = extractFloat128Exp( b );
6744 if ( aExp == 0x7FFF ) {
6745 if ( ( aSig0 | aSig1 )
6746 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6747 return propagateFloat128NaN(a, b, status);
6748 }
6749 goto invalid;
6750 }
6751 if ( bExp == 0x7FFF ) {
6752 if (bSig0 | bSig1) {
6753 return propagateFloat128NaN(a, b, status);
6754 }
6755 return a;
6756 }
6757 if ( bExp == 0 ) {
6758 if ( ( bSig0 | bSig1 ) == 0 ) {
6759 invalid:
6760 float_raise(float_flag_invalid, status);
6761 return float128_default_nan(status);
6762 }
6763 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6764 }
6765 if ( aExp == 0 ) {
6766 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6767 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6768 }
6769 expDiff = aExp - bExp;
6770 if ( expDiff < -1 ) return a;
6771 shortShift128Left(
6772 aSig0 | LIT64( 0x0001000000000000 ),
6773 aSig1,
6774 15 - ( expDiff < 0 ),
6775 &aSig0,
6776 &aSig1
6777 );
6778 shortShift128Left(
6779 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6780 q = le128( bSig0, bSig1, aSig0, aSig1 );
6781 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6782 expDiff -= 64;
6783 while ( 0 < expDiff ) {
6784 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6785 q = ( 4 < q ) ? q - 4 : 0;
6786 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6787 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6788 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6789 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6790 expDiff -= 61;
6791 }
6792 if ( -64 < expDiff ) {
6793 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6794 q = ( 4 < q ) ? q - 4 : 0;
6795 q >>= - expDiff;
6796 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6797 expDiff += 52;
6798 if ( expDiff < 0 ) {
6799 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6800 }
6801 else {
6802 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6803 }
6804 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6805 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6806 }
6807 else {
6808 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6809 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6810 }
6811 do {
6812 alternateASig0 = aSig0;
6813 alternateASig1 = aSig1;
6814 ++q;
6815 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6816 } while ( 0 <= (int64_t) aSig0 );
6817 add128(
6818 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6819 if ( ( sigMean0 < 0 )
6820 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6821 aSig0 = alternateASig0;
6822 aSig1 = alternateASig1;
6823 }
6824 zSign = ( (int64_t) aSig0 < 0 );
6825 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6826 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6827 status);
6828}
6829
6830
6831
6832
6833
6834
6835
6836float128 float128_sqrt(float128 a, float_status *status)
6837{
6838 flag aSign;
6839 int32_t aExp, zExp;
6840 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6841 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6842
6843 aSig1 = extractFloat128Frac1( a );
6844 aSig0 = extractFloat128Frac0( a );
6845 aExp = extractFloat128Exp( a );
6846 aSign = extractFloat128Sign( a );
6847 if ( aExp == 0x7FFF ) {
6848 if (aSig0 | aSig1) {
6849 return propagateFloat128NaN(a, a, status);
6850 }
6851 if ( ! aSign ) return a;
6852 goto invalid;
6853 }
6854 if ( aSign ) {
6855 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6856 invalid:
6857 float_raise(float_flag_invalid, status);
6858 return float128_default_nan(status);
6859 }
6860 if ( aExp == 0 ) {
6861 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6862 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6863 }
6864 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6865 aSig0 |= LIT64( 0x0001000000000000 );
6866 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6867 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6868 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6869 doubleZSig0 = zSig0<<1;
6870 mul64To128( zSig0, zSig0, &term0, &term1 );
6871 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6872 while ( (int64_t) rem0 < 0 ) {
6873 --zSig0;
6874 doubleZSig0 -= 2;
6875 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6876 }
6877 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6878 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6879 if ( zSig1 == 0 ) zSig1 = 1;
6880 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6881 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6882 mul64To128( zSig1, zSig1, &term2, &term3 );
6883 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6884 while ( (int64_t) rem1 < 0 ) {
6885 --zSig1;
6886 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6887 term3 |= 1;
6888 term2 |= doubleZSig0;
6889 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6890 }
6891 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6892 }
6893 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6894 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6895
6896}
6897
6898
6899
6900
6901
6902
6903
6904
6905int float128_eq(float128 a, float128 b, float_status *status)
6906{
6907
6908 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6909 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6910 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6911 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6912 ) {
6913 float_raise(float_flag_invalid, status);
6914 return 0;
6915 }
6916 return
6917 ( a.low == b.low )
6918 && ( ( a.high == b.high )
6919 || ( ( a.low == 0 )
6920 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
6921 );
6922
6923}
6924
6925
6926
6927
6928
6929
6930
6931
6932int float128_le(float128 a, float128 b, float_status *status)
6933{
6934 flag aSign, bSign;
6935
6936 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6937 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6938 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6939 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6940 ) {
6941 float_raise(float_flag_invalid, status);
6942 return 0;
6943 }
6944 aSign = extractFloat128Sign( a );
6945 bSign = extractFloat128Sign( b );
6946 if ( aSign != bSign ) {
6947 return
6948 aSign
6949 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6950 == 0 );
6951 }
6952 return
6953 aSign ? le128( b.high, b.low, a.high, a.low )
6954 : le128( a.high, a.low, b.high, b.low );
6955
6956}
6957
6958
6959
6960
6961
6962
6963
6964
6965int float128_lt(float128 a, float128 b, float_status *status)
6966{
6967 flag aSign, bSign;
6968
6969 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6970 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6971 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6972 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6973 ) {
6974 float_raise(float_flag_invalid, status);
6975 return 0;
6976 }
6977 aSign = extractFloat128Sign( a );
6978 bSign = extractFloat128Sign( b );
6979 if ( aSign != bSign ) {
6980 return
6981 aSign
6982 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6983 != 0 );
6984 }
6985 return
6986 aSign ? lt128( b.high, b.low, a.high, a.low )
6987 : lt128( a.high, a.low, b.high, b.low );
6988
6989}
6990
6991
6992
6993
6994
6995
6996
6997
6998int float128_unordered(float128 a, float128 b, float_status *status)
6999{
7000 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7001 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7002 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7003 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7004 ) {
7005 float_raise(float_flag_invalid, status);
7006 return 1;
7007 }
7008 return 0;
7009}
7010
7011
7012
7013
7014
7015
7016
7017
7018int float128_eq_quiet(float128 a, float128 b, float_status *status)
7019{
7020
7021 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7022 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7023 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7024 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7025 ) {
7026 if (float128_is_signaling_nan(a, status)
7027 || float128_is_signaling_nan(b, status)) {
7028 float_raise(float_flag_invalid, status);
7029 }
7030 return 0;
7031 }
7032 return
7033 ( a.low == b.low )
7034 && ( ( a.high == b.high )
7035 || ( ( a.low == 0 )
7036 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
7037 );
7038
7039}
7040
7041
7042
7043
7044
7045
7046
7047
7048int float128_le_quiet(float128 a, float128 b, float_status *status)
7049{
7050 flag aSign, bSign;
7051
7052 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7053 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7054 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7055 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7056 ) {
7057 if (float128_is_signaling_nan(a, status)
7058 || float128_is_signaling_nan(b, status)) {
7059 float_raise(float_flag_invalid, status);
7060 }
7061 return 0;
7062 }
7063 aSign = extractFloat128Sign( a );
7064 bSign = extractFloat128Sign( b );
7065 if ( aSign != bSign ) {
7066 return
7067 aSign
7068 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
7069 == 0 );
7070 }
7071 return
7072 aSign ? le128( b.high, b.low, a.high, a.low )
7073 : le128( a.high, a.low, b.high, b.low );
7074
7075}
7076
7077
7078
7079
7080
7081
7082
7083
7084int float128_lt_quiet(float128 a, float128 b, float_status *status)
7085{
7086 flag aSign, bSign;
7087
7088 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7089 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7090 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7091 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7092 ) {
7093 if (float128_is_signaling_nan(a, status)
7094 || float128_is_signaling_nan(b, status)) {
7095 float_raise(float_flag_invalid, status);
7096 }
7097 return 0;
7098 }
7099 aSign = extractFloat128Sign( a );
7100 bSign = extractFloat128Sign( b );
7101 if ( aSign != bSign ) {
7102 return
7103 aSign
7104 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
7105 != 0 );
7106 }
7107 return
7108 aSign ? lt128( b.high, b.low, a.high, a.low )
7109 : lt128( a.high, a.low, b.high, b.low );
7110
7111}
7112
7113
7114
7115
7116
7117
7118
7119
7120int float128_unordered_quiet(float128 a, float128 b, float_status *status)
7121{
7122 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7123 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7124 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7125 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7126 ) {
7127 if (float128_is_signaling_nan(a, status)
7128 || float128_is_signaling_nan(b, status)) {
7129 float_raise(float_flag_invalid, status);
7130 }
7131 return 1;
7132 }
7133 return 0;
7134}
7135
7136
7137float32 uint32_to_float32(uint32_t a, float_status *status)
7138{
7139 return int64_to_float32(a, status);
7140}
7141
7142float64 uint32_to_float64(uint32_t a, float_status *status)
7143{
7144 return int64_to_float64(a, status);
7145}
7146
7147uint32_t float32_to_uint32(float32 a, float_status *status)
7148{
7149 int64_t v;
7150 uint32_t res;
7151 int old_exc_flags = get_float_exception_flags(status);
7152
7153 v = float32_to_int64(a, status);
7154 if (v < 0) {
7155 res = 0;
7156 } else if (v > 0xffffffff) {
7157 res = 0xffffffff;
7158 } else {
7159 return v;
7160 }
7161 set_float_exception_flags(old_exc_flags, status);
7162 float_raise(float_flag_invalid, status);
7163 return res;
7164}
7165
7166uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *status)
7167{
7168 int64_t v;
7169 uint32_t res;
7170 int old_exc_flags = get_float_exception_flags(status);
7171
7172 v = float32_to_int64_round_to_zero(a, status);
7173 if (v < 0) {
7174 res = 0;
7175 } else if (v > 0xffffffff) {
7176 res = 0xffffffff;
7177 } else {
7178 return v;
7179 }
7180 set_float_exception_flags(old_exc_flags, status);
7181 float_raise(float_flag_invalid, status);
7182 return res;
7183}
7184
7185int16_t float32_to_int16(float32 a, float_status *status)
7186{
7187 int32_t v;
7188 int16_t res;
7189 int old_exc_flags = get_float_exception_flags(status);
7190
7191 v = float32_to_int32(a, status);
7192 if (v < -0x8000) {
7193 res = -0x8000;
7194 } else if (v > 0x7fff) {
7195 res = 0x7fff;
7196 } else {
7197 return v;
7198 }
7199
7200 set_float_exception_flags(old_exc_flags, status);
7201 float_raise(float_flag_invalid, status);
7202 return res;
7203}
7204
7205uint16_t float32_to_uint16(float32 a, float_status *status)
7206{
7207 int32_t v;
7208 uint16_t res;
7209 int old_exc_flags = get_float_exception_flags(status);
7210
7211 v = float32_to_int32(a, status);
7212 if (v < 0) {
7213 res = 0;
7214 } else if (v > 0xffff) {
7215 res = 0xffff;
7216 } else {
7217 return v;
7218 }
7219
7220 set_float_exception_flags(old_exc_flags, status);
7221 float_raise(float_flag_invalid, status);
7222 return res;
7223}
7224
7225uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *status)
7226{
7227 int64_t v;
7228 uint16_t res;
7229 int old_exc_flags = get_float_exception_flags(status);
7230
7231 v = float32_to_int64_round_to_zero(a, status);
7232 if (v < 0) {
7233 res = 0;
7234 } else if (v > 0xffff) {
7235 res = 0xffff;
7236 } else {
7237 return v;
7238 }
7239 set_float_exception_flags(old_exc_flags, status);
7240 float_raise(float_flag_invalid, status);
7241 return res;
7242}
7243
7244uint32_t float64_to_uint32(float64 a, float_status *status)
7245{
7246 uint64_t v;
7247 uint32_t res;
7248 int old_exc_flags = get_float_exception_flags(status);
7249
7250 v = float64_to_uint64(a, status);
7251 if (v > 0xffffffff) {
7252 res = 0xffffffff;
7253 } else {
7254 return v;
7255 }
7256 set_float_exception_flags(old_exc_flags, status);
7257 float_raise(float_flag_invalid, status);
7258 return res;
7259}
7260
7261uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *status)
7262{
7263 uint64_t v;
7264 uint32_t res;
7265 int old_exc_flags = get_float_exception_flags(status);
7266
7267 v = float64_to_uint64_round_to_zero(a, status);
7268 if (v > 0xffffffff) {
7269 res = 0xffffffff;
7270 } else {
7271 return v;
7272 }
7273 set_float_exception_flags(old_exc_flags, status);
7274 float_raise(float_flag_invalid, status);
7275 return res;
7276}
7277
7278int16_t float64_to_int16(float64 a, float_status *status)
7279{
7280 int64_t v;
7281 int16_t res;
7282 int old_exc_flags = get_float_exception_flags(status);
7283
7284 v = float64_to_int32(a, status);
7285 if (v < -0x8000) {
7286 res = -0x8000;
7287 } else if (v > 0x7fff) {
7288 res = 0x7fff;
7289 } else {
7290 return v;
7291 }
7292
7293 set_float_exception_flags(old_exc_flags, status);
7294 float_raise(float_flag_invalid, status);
7295 return res;
7296}
7297
7298uint16_t float64_to_uint16(float64 a, float_status *status)
7299{
7300 int64_t v;
7301 uint16_t res;
7302 int old_exc_flags = get_float_exception_flags(status);
7303
7304 v = float64_to_int32(a, status);
7305 if (v < 0) {
7306 res = 0;
7307 } else if (v > 0xffff) {
7308 res = 0xffff;
7309 } else {
7310 return v;
7311 }
7312
7313 set_float_exception_flags(old_exc_flags, status);
7314 float_raise(float_flag_invalid, status);
7315 return res;
7316}
7317
7318uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *status)
7319{
7320 int64_t v;
7321 uint16_t res;
7322 int old_exc_flags = get_float_exception_flags(status);
7323
7324 v = float64_to_int64_round_to_zero(a, status);
7325 if (v < 0) {
7326 res = 0;
7327 } else if (v > 0xffff) {
7328 res = 0xffff;
7329 } else {
7330 return v;
7331 }
7332 set_float_exception_flags(old_exc_flags, status);
7333 float_raise(float_flag_invalid, status);
7334 return res;
7335}
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349uint64_t float64_to_uint64(float64 a, float_status *status)
7350{
7351 flag aSign;
7352 int aExp;
7353 int shiftCount;
7354 uint64_t aSig, aSigExtra;
7355 a = float64_squash_input_denormal(a, status);
7356
7357 aSig = extractFloat64Frac(a);
7358 aExp = extractFloat64Exp(a);
7359 aSign = extractFloat64Sign(a);
7360 if (aSign && (aExp > 1022)) {
7361 float_raise(float_flag_invalid, status);
7362 if (float64_is_any_nan(a)) {
7363 return LIT64(0xFFFFFFFFFFFFFFFF);
7364 } else {
7365 return 0;
7366 }
7367 }
7368 if (aExp) {
7369 aSig |= LIT64(0x0010000000000000);
7370 }
7371 shiftCount = 0x433 - aExp;
7372 if (shiftCount <= 0) {
7373 if (0x43E < aExp) {
7374 float_raise(float_flag_invalid, status);
7375 return LIT64(0xFFFFFFFFFFFFFFFF);
7376 }
7377 aSigExtra = 0;
7378 aSig <<= -shiftCount;
7379 } else {
7380 shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
7381 }
7382 return roundAndPackUint64(aSign, aSig, aSigExtra, status);
7383}
7384
7385uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status)
7386{
7387 signed char current_rounding_mode = status->float_rounding_mode;
7388 set_float_rounding_mode(float_round_to_zero, status);
7389 int64_t v = float64_to_uint64(a, status);
7390 set_float_rounding_mode(current_rounding_mode, status);
7391 return v;
7392}
7393
7394#define COMPARE(s, nan_exp) \
7395static inline int float ## s ## _compare_internal(float ## s a, float ## s b,\
7396 int is_quiet, float_status *status) \
7397{ \
7398 flag aSign, bSign; \
7399 uint ## s ## _t av, bv; \
7400 a = float ## s ## _squash_input_denormal(a, status); \
7401 b = float ## s ## _squash_input_denormal(b, status); \
7402 \
7403 if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
7404 extractFloat ## s ## Frac( a ) ) || \
7405 ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
7406 extractFloat ## s ## Frac( b ) )) { \
7407 if (!is_quiet || \
7408 float ## s ## _is_signaling_nan(a, status) || \
7409 float ## s ## _is_signaling_nan(b, status)) { \
7410 float_raise(float_flag_invalid, status); \
7411 } \
7412 return float_relation_unordered; \
7413 } \
7414 aSign = extractFloat ## s ## Sign( a ); \
7415 bSign = extractFloat ## s ## Sign( b ); \
7416 av = float ## s ## _val(a); \
7417 bv = float ## s ## _val(b); \
7418 if ( aSign != bSign ) { \
7419 if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) { \
7420 \
7421 return float_relation_equal; \
7422 } else { \
7423 return 1 - (2 * aSign); \
7424 } \
7425 } else { \
7426 if (av == bv) { \
7427 return float_relation_equal; \
7428 } else { \
7429 return 1 - 2 * (aSign ^ ( av < bv )); \
7430 } \
7431 } \
7432} \
7433 \
7434int float ## s ## _compare(float ## s a, float ## s b, float_status *status) \
7435{ \
7436 return float ## s ## _compare_internal(a, b, 0, status); \
7437} \
7438 \
7439int float ## s ## _compare_quiet(float ## s a, float ## s b, \
7440 float_status *status) \
7441{ \
7442 return float ## s ## _compare_internal(a, b, 1, status); \
7443}
7444
7445COMPARE(32, 0xff)
7446COMPARE(64, 0x7ff)
7447
7448static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7449 int is_quiet, float_status *status)
7450{
7451 flag aSign, bSign;
7452
7453 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7454 float_raise(float_flag_invalid, status);
7455 return float_relation_unordered;
7456 }
7457 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7458 ( extractFloatx80Frac( a )<<1 ) ) ||
7459 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7460 ( extractFloatx80Frac( b )<<1 ) )) {
7461 if (!is_quiet ||
7462 floatx80_is_signaling_nan(a, status) ||
7463 floatx80_is_signaling_nan(b, status)) {
7464 float_raise(float_flag_invalid, status);
7465 }
7466 return float_relation_unordered;
7467 }
7468 aSign = extractFloatx80Sign( a );
7469 bSign = extractFloatx80Sign( b );
7470 if ( aSign != bSign ) {
7471
7472 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7473 ( ( a.low | b.low ) == 0 ) ) {
7474
7475 return float_relation_equal;
7476 } else {
7477 return 1 - (2 * aSign);
7478 }
7479 } else {
7480 if (a.low == b.low && a.high == b.high) {
7481 return float_relation_equal;
7482 } else {
7483 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7484 }
7485 }
7486}
7487
7488int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7489{
7490 return floatx80_compare_internal(a, b, 0, status);
7491}
7492
7493int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
7494{
7495 return floatx80_compare_internal(a, b, 1, status);
7496}
7497
7498static inline int float128_compare_internal(float128 a, float128 b,
7499 int is_quiet, float_status *status)
7500{
7501 flag aSign, bSign;
7502
7503 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7504 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7505 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7506 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7507 if (!is_quiet ||
7508 float128_is_signaling_nan(a, status) ||
7509 float128_is_signaling_nan(b, status)) {
7510 float_raise(float_flag_invalid, status);
7511 }
7512 return float_relation_unordered;
7513 }
7514 aSign = extractFloat128Sign( a );
7515 bSign = extractFloat128Sign( b );
7516 if ( aSign != bSign ) {
7517 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7518
7519 return float_relation_equal;
7520 } else {
7521 return 1 - (2 * aSign);
7522 }
7523 } else {
7524 if (a.low == b.low && a.high == b.high) {
7525 return float_relation_equal;
7526 } else {
7527 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7528 }
7529 }
7530}
7531
7532int float128_compare(float128 a, float128 b, float_status *status)
7533{
7534 return float128_compare_internal(a, b, 0, status);
7535}
7536
7537int float128_compare_quiet(float128 a, float128 b, float_status *status)
7538{
7539 return float128_compare_internal(a, b, 1, status);
7540}
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556#define MINMAX(s) \
7557static inline float ## s float ## s ## _minmax(float ## s a, float ## s b, \
7558 int ismin, int isieee, \
7559 int ismag, \
7560 float_status *status) \
7561{ \
7562 flag aSign, bSign; \
7563 uint ## s ## _t av, bv, aav, abv; \
7564 a = float ## s ## _squash_input_denormal(a, status); \
7565 b = float ## s ## _squash_input_denormal(b, status); \
7566 if (float ## s ## _is_any_nan(a) || \
7567 float ## s ## _is_any_nan(b)) { \
7568 if (isieee) { \
7569 if (float ## s ## _is_quiet_nan(a, status) && \
7570 !float ## s ##_is_any_nan(b)) { \
7571 return b; \
7572 } else if (float ## s ## _is_quiet_nan(b, status) && \
7573 !float ## s ## _is_any_nan(a)) { \
7574 return a; \
7575 } \
7576 } \
7577 return propagateFloat ## s ## NaN(a, b, status); \
7578 } \
7579 aSign = extractFloat ## s ## Sign(a); \
7580 bSign = extractFloat ## s ## Sign(b); \
7581 av = float ## s ## _val(a); \
7582 bv = float ## s ## _val(b); \
7583 if (ismag) { \
7584 aav = float ## s ## _abs(av); \
7585 abv = float ## s ## _abs(bv); \
7586 if (aav != abv) { \
7587 if (ismin) { \
7588 return (aav < abv) ? a : b; \
7589 } else { \
7590 return (aav < abv) ? b : a; \
7591 } \
7592 } \
7593 } \
7594 if (aSign != bSign) { \
7595 if (ismin) { \
7596 return aSign ? a : b; \
7597 } else { \
7598 return aSign ? b : a; \
7599 } \
7600 } else { \
7601 if (ismin) { \
7602 return (aSign ^ (av < bv)) ? a : b; \
7603 } else { \
7604 return (aSign ^ (av < bv)) ? b : a; \
7605 } \
7606 } \
7607} \
7608 \
7609float ## s float ## s ## _min(float ## s a, float ## s b, \
7610 float_status *status) \
7611{ \
7612 return float ## s ## _minmax(a, b, 1, 0, 0, status); \
7613} \
7614 \
7615float ## s float ## s ## _max(float ## s a, float ## s b, \
7616 float_status *status) \
7617{ \
7618 return float ## s ## _minmax(a, b, 0, 0, 0, status); \
7619} \
7620 \
7621float ## s float ## s ## _minnum(float ## s a, float ## s b, \
7622 float_status *status) \
7623{ \
7624 return float ## s ## _minmax(a, b, 1, 1, 0, status); \
7625} \
7626 \
7627float ## s float ## s ## _maxnum(float ## s a, float ## s b, \
7628 float_status *status) \
7629{ \
7630 return float ## s ## _minmax(a, b, 0, 1, 0, status); \
7631} \
7632 \
7633float ## s float ## s ## _minnummag(float ## s a, float ## s b, \
7634 float_status *status) \
7635{ \
7636 return float ## s ## _minmax(a, b, 1, 1, 1, status); \
7637} \
7638 \
7639float ## s float ## s ## _maxnummag(float ## s a, float ## s b, \
7640 float_status *status) \
7641{ \
7642 return float ## s ## _minmax(a, b, 0, 1, 1, status); \
7643}
7644
7645MINMAX(32)
7646MINMAX(64)
7647
7648
7649
7650float32 float32_scalbn(float32 a, int n, float_status *status)
7651{
7652 flag aSign;
7653 int16_t aExp;
7654 uint32_t aSig;
7655
7656 a = float32_squash_input_denormal(a, status);
7657 aSig = extractFloat32Frac( a );
7658 aExp = extractFloat32Exp( a );
7659 aSign = extractFloat32Sign( a );
7660
7661 if ( aExp == 0xFF ) {
7662 if ( aSig ) {
7663 return propagateFloat32NaN(a, a, status);
7664 }
7665 return a;
7666 }
7667 if (aExp != 0) {
7668 aSig |= 0x00800000;
7669 } else if (aSig == 0) {
7670 return a;
7671 } else {
7672 aExp++;
7673 }
7674
7675 if (n > 0x200) {
7676 n = 0x200;
7677 } else if (n < -0x200) {
7678 n = -0x200;
7679 }
7680
7681 aExp += n - 1;
7682 aSig <<= 7;
7683 return normalizeRoundAndPackFloat32(aSign, aExp, aSig, status);
7684}
7685
7686float64 float64_scalbn(float64 a, int n, float_status *status)
7687{
7688 flag aSign;
7689 int16_t aExp;
7690 uint64_t aSig;
7691
7692 a = float64_squash_input_denormal(a, status);
7693 aSig = extractFloat64Frac( a );
7694 aExp = extractFloat64Exp( a );
7695 aSign = extractFloat64Sign( a );
7696
7697 if ( aExp == 0x7FF ) {
7698 if ( aSig ) {
7699 return propagateFloat64NaN(a, a, status);
7700 }
7701 return a;
7702 }
7703 if (aExp != 0) {
7704 aSig |= LIT64( 0x0010000000000000 );
7705 } else if (aSig == 0) {
7706 return a;
7707 } else {
7708 aExp++;
7709 }
7710
7711 if (n > 0x1000) {
7712 n = 0x1000;
7713 } else if (n < -0x1000) {
7714 n = -0x1000;
7715 }
7716
7717 aExp += n - 1;
7718 aSig <<= 10;
7719 return normalizeRoundAndPackFloat64(aSign, aExp, aSig, status);
7720}
7721
7722floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7723{
7724 flag aSign;
7725 int32_t aExp;
7726 uint64_t aSig;
7727
7728 if (floatx80_invalid_encoding(a)) {
7729 float_raise(float_flag_invalid, status);
7730 return floatx80_default_nan(status);
7731 }
7732 aSig = extractFloatx80Frac( a );
7733 aExp = extractFloatx80Exp( a );
7734 aSign = extractFloatx80Sign( a );
7735
7736 if ( aExp == 0x7FFF ) {
7737 if ( aSig<<1 ) {
7738 return propagateFloatx80NaN(a, a, status);
7739 }
7740 return a;
7741 }
7742
7743 if (aExp == 0) {
7744 if (aSig == 0) {
7745 return a;
7746 }
7747 aExp++;
7748 }
7749
7750 if (n > 0x10000) {
7751 n = 0x10000;
7752 } else if (n < -0x10000) {
7753 n = -0x10000;
7754 }
7755
7756 aExp += n;
7757 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7758 aSign, aExp, aSig, 0, status);
7759}
7760
7761float128 float128_scalbn(float128 a, int n, float_status *status)
7762{
7763 flag aSign;
7764 int32_t aExp;
7765 uint64_t aSig0, aSig1;
7766
7767 aSig1 = extractFloat128Frac1( a );
7768 aSig0 = extractFloat128Frac0( a );
7769 aExp = extractFloat128Exp( a );
7770 aSign = extractFloat128Sign( a );
7771 if ( aExp == 0x7FFF ) {
7772 if ( aSig0 | aSig1 ) {
7773 return propagateFloat128NaN(a, a, status);
7774 }
7775 return a;
7776 }
7777 if (aExp != 0) {
7778 aSig0 |= LIT64( 0x0001000000000000 );
7779 } else if (aSig0 == 0 && aSig1 == 0) {
7780 return a;
7781 } else {
7782 aExp++;
7783 }
7784
7785 if (n > 0x10000) {
7786 n = 0x10000;
7787 } else if (n < -0x10000) {
7788 n = -0x10000;
7789 }
7790
7791 aExp += n - 1;
7792 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7793 , status);
7794
7795}
7796