1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85#include "qemu/osdep.h"
86
87#include "fpu/softfloat.h"
88
89
90
91
92
93
94
95
96#include "softfloat-macros.h"
97
98
99
100
101
102
103
104
105
106#include "softfloat-specialize.h"
107
108
109
110
111
112static inline uint32_t extractFloat16Frac(float16 a)
113{
114 return float16_val(a) & 0x3ff;
115}
116
117
118
119
120
121static inline int extractFloat16Exp(float16 a)
122{
123 return (float16_val(a) >> 10) & 0x1f;
124}
125
126
127
128
129
130static inline flag extractFloat16Sign(float16 a)
131{
132 return float16_val(a)>>15;
133}
134
135
136
137
138
139
140
141
142
143
144
145
146static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
147{
148 int8_t roundingMode;
149 flag roundNearestEven;
150 int8_t roundIncrement, roundBits;
151 int32_t z;
152
153 roundingMode = status->float_rounding_mode;
154 roundNearestEven = ( roundingMode == float_round_nearest_even );
155 switch (roundingMode) {
156 case float_round_nearest_even:
157 case float_round_ties_away:
158 roundIncrement = 0x40;
159 break;
160 case float_round_to_zero:
161 roundIncrement = 0;
162 break;
163 case float_round_up:
164 roundIncrement = zSign ? 0 : 0x7f;
165 break;
166 case float_round_down:
167 roundIncrement = zSign ? 0x7f : 0;
168 break;
169 default:
170 abort();
171 }
172 roundBits = absZ & 0x7F;
173 absZ = ( absZ + roundIncrement )>>7;
174 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
175 z = absZ;
176 if ( zSign ) z = - z;
177 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
178 float_raise(float_flag_invalid, status);
179 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
180 }
181 if (roundBits) {
182 status->float_exception_flags |= float_flag_inexact;
183 }
184 return z;
185
186}
187
188
189
190
191
192
193
194
195
196
197
198
199
200static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
201 float_status *status)
202{
203 int8_t roundingMode;
204 flag roundNearestEven, increment;
205 int64_t z;
206
207 roundingMode = status->float_rounding_mode;
208 roundNearestEven = ( roundingMode == float_round_nearest_even );
209 switch (roundingMode) {
210 case float_round_nearest_even:
211 case float_round_ties_away:
212 increment = ((int64_t) absZ1 < 0);
213 break;
214 case float_round_to_zero:
215 increment = 0;
216 break;
217 case float_round_up:
218 increment = !zSign && absZ1;
219 break;
220 case float_round_down:
221 increment = zSign && absZ1;
222 break;
223 default:
224 abort();
225 }
226 if ( increment ) {
227 ++absZ0;
228 if ( absZ0 == 0 ) goto overflow;
229 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
230 }
231 z = absZ0;
232 if ( zSign ) z = - z;
233 if ( z && ( ( z < 0 ) ^ zSign ) ) {
234 overflow:
235 float_raise(float_flag_invalid, status);
236 return
237 zSign ? (int64_t) LIT64( 0x8000000000000000 )
238 : LIT64( 0x7FFFFFFFFFFFFFFF );
239 }
240 if (absZ1) {
241 status->float_exception_flags |= float_flag_inexact;
242 }
243 return z;
244
245}
246
247
248
249
250
251
252
253
254
255
256
257static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
258 uint64_t absZ1, float_status *status)
259{
260 int8_t roundingMode;
261 flag roundNearestEven, increment;
262
263 roundingMode = status->float_rounding_mode;
264 roundNearestEven = (roundingMode == float_round_nearest_even);
265 switch (roundingMode) {
266 case float_round_nearest_even:
267 case float_round_ties_away:
268 increment = ((int64_t)absZ1 < 0);
269 break;
270 case float_round_to_zero:
271 increment = 0;
272 break;
273 case float_round_up:
274 increment = !zSign && absZ1;
275 break;
276 case float_round_down:
277 increment = zSign && absZ1;
278 break;
279 default:
280 abort();
281 }
282 if (increment) {
283 ++absZ0;
284 if (absZ0 == 0) {
285 float_raise(float_flag_invalid, status);
286 return LIT64(0xFFFFFFFFFFFFFFFF);
287 }
288 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
289 }
290
291 if (zSign && absZ0) {
292 float_raise(float_flag_invalid, status);
293 return 0;
294 }
295
296 if (absZ1) {
297 status->float_exception_flags |= float_flag_inexact;
298 }
299 return absZ0;
300}
301
302
303
304
305
306static inline uint32_t extractFloat32Frac( float32 a )
307{
308
309 return float32_val(a) & 0x007FFFFF;
310
311}
312
313
314
315
316
317static inline int extractFloat32Exp(float32 a)
318{
319
320 return ( float32_val(a)>>23 ) & 0xFF;
321
322}
323
324
325
326
327
328static inline flag extractFloat32Sign( float32 a )
329{
330
331 return float32_val(a)>>31;
332
333}
334
335
336
337
338
339float32 float32_squash_input_denormal(float32 a, float_status *status)
340{
341 if (status->flush_inputs_to_zero) {
342 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
343 float_raise(float_flag_input_denormal, status);
344 return make_float32(float32_val(a) & 0x80000000);
345 }
346 }
347 return a;
348}
349
350
351
352
353
354
355
356
357static void
358 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
359{
360 int8_t shiftCount;
361
362 shiftCount = countLeadingZeros32( aSig ) - 8;
363 *zSigPtr = aSig<<shiftCount;
364 *zExpPtr = 1 - shiftCount;
365
366}
367
368
369
370
371
372
373
374
375
376
377
378
379static inline float32 packFloat32(flag zSign, int zExp, uint32_t zSig)
380{
381
382 return make_float32(
383 ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
384
385}
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
410 float_status *status)
411{
412 int8_t roundingMode;
413 flag roundNearestEven;
414 int8_t roundIncrement, roundBits;
415 flag isTiny;
416
417 roundingMode = status->float_rounding_mode;
418 roundNearestEven = ( roundingMode == float_round_nearest_even );
419 switch (roundingMode) {
420 case float_round_nearest_even:
421 case float_round_ties_away:
422 roundIncrement = 0x40;
423 break;
424 case float_round_to_zero:
425 roundIncrement = 0;
426 break;
427 case float_round_up:
428 roundIncrement = zSign ? 0 : 0x7f;
429 break;
430 case float_round_down:
431 roundIncrement = zSign ? 0x7f : 0;
432 break;
433 default:
434 abort();
435 break;
436 }
437 roundBits = zSig & 0x7F;
438 if ( 0xFD <= (uint16_t) zExp ) {
439 if ( ( 0xFD < zExp )
440 || ( ( zExp == 0xFD )
441 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
442 ) {
443 float_raise(float_flag_overflow | float_flag_inexact, status);
444 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
445 }
446 if ( zExp < 0 ) {
447 if (status->flush_to_zero) {
448 float_raise(float_flag_output_denormal, status);
449 return packFloat32(zSign, 0, 0);
450 }
451 isTiny =
452 (status->float_detect_tininess
453 == float_tininess_before_rounding)
454 || ( zExp < -1 )
455 || ( zSig + roundIncrement < 0x80000000 );
456 shift32RightJamming( zSig, - zExp, &zSig );
457 zExp = 0;
458 roundBits = zSig & 0x7F;
459 if (isTiny && roundBits) {
460 float_raise(float_flag_underflow, status);
461 }
462 }
463 }
464 if (roundBits) {
465 status->float_exception_flags |= float_flag_inexact;
466 }
467 zSig = ( zSig + roundIncrement )>>7;
468 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
469 if ( zSig == 0 ) zExp = 0;
470 return packFloat32( zSign, zExp, zSig );
471
472}
473
474
475
476
477
478
479
480
481
482
483static float32
484 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
485 float_status *status)
486{
487 int8_t shiftCount;
488
489 shiftCount = countLeadingZeros32( zSig ) - 1;
490 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
491 status);
492
493}
494
495
496
497
498
499static inline uint64_t extractFloat64Frac( float64 a )
500{
501
502 return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
503
504}
505
506
507
508
509
510static inline int extractFloat64Exp(float64 a)
511{
512
513 return ( float64_val(a)>>52 ) & 0x7FF;
514
515}
516
517
518
519
520
521static inline flag extractFloat64Sign( float64 a )
522{
523
524 return float64_val(a)>>63;
525
526}
527
528
529
530
531
532float64 float64_squash_input_denormal(float64 a, float_status *status)
533{
534 if (status->flush_inputs_to_zero) {
535 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
536 float_raise(float_flag_input_denormal, status);
537 return make_float64(float64_val(a) & (1ULL << 63));
538 }
539 }
540 return a;
541}
542
543
544
545
546
547
548
549
550static void
551 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
552{
553 int8_t shiftCount;
554
555 shiftCount = countLeadingZeros64( aSig ) - 11;
556 *zSigPtr = aSig<<shiftCount;
557 *zExpPtr = 1 - shiftCount;
558
559}
560
561
562
563
564
565
566
567
568
569
570
571
572static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
573{
574
575 return make_float64(
576 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
577
578}
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
603 float_status *status)
604{
605 int8_t roundingMode;
606 flag roundNearestEven;
607 int roundIncrement, roundBits;
608 flag isTiny;
609
610 roundingMode = status->float_rounding_mode;
611 roundNearestEven = ( roundingMode == float_round_nearest_even );
612 switch (roundingMode) {
613 case float_round_nearest_even:
614 case float_round_ties_away:
615 roundIncrement = 0x200;
616 break;
617 case float_round_to_zero:
618 roundIncrement = 0;
619 break;
620 case float_round_up:
621 roundIncrement = zSign ? 0 : 0x3ff;
622 break;
623 case float_round_down:
624 roundIncrement = zSign ? 0x3ff : 0;
625 break;
626 default:
627 abort();
628 }
629 roundBits = zSig & 0x3FF;
630 if ( 0x7FD <= (uint16_t) zExp ) {
631 if ( ( 0x7FD < zExp )
632 || ( ( zExp == 0x7FD )
633 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
634 ) {
635 float_raise(float_flag_overflow | float_flag_inexact, status);
636 return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
637 }
638 if ( zExp < 0 ) {
639 if (status->flush_to_zero) {
640 float_raise(float_flag_output_denormal, status);
641 return packFloat64(zSign, 0, 0);
642 }
643 isTiny =
644 (status->float_detect_tininess
645 == float_tininess_before_rounding)
646 || ( zExp < -1 )
647 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
648 shift64RightJamming( zSig, - zExp, &zSig );
649 zExp = 0;
650 roundBits = zSig & 0x3FF;
651 if (isTiny && roundBits) {
652 float_raise(float_flag_underflow, status);
653 }
654 }
655 }
656 if (roundBits) {
657 status->float_exception_flags |= float_flag_inexact;
658 }
659 zSig = ( zSig + roundIncrement )>>10;
660 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
661 if ( zSig == 0 ) zExp = 0;
662 return packFloat64( zSign, zExp, zSig );
663
664}
665
666
667
668
669
670
671
672
673
674
675static float64
676 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
677 float_status *status)
678{
679 int8_t shiftCount;
680
681 shiftCount = countLeadingZeros64( zSig ) - 1;
682 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
683 status);
684
685}
686
687
688
689
690
691
692static inline uint64_t extractFloatx80Frac( floatx80 a )
693{
694
695 return a.low;
696
697}
698
699
700
701
702
703
704static inline int32_t extractFloatx80Exp( floatx80 a )
705{
706
707 return a.high & 0x7FFF;
708
709}
710
711
712
713
714
715
716static inline flag extractFloatx80Sign( floatx80 a )
717{
718
719 return a.high>>15;
720
721}
722
723
724
725
726
727
728
729
730static void
731 normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr )
732{
733 int8_t shiftCount;
734
735 shiftCount = countLeadingZeros64( aSig );
736 *zSigPtr = aSig<<shiftCount;
737 *zExpPtr = 1 - shiftCount;
738
739}
740
741
742
743
744
745
746static inline floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig )
747{
748 floatx80 z;
749
750 z.low = zSig;
751 z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
752 return z;
753
754}
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780static floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
781 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
782 float_status *status)
783{
784 int8_t roundingMode;
785 flag roundNearestEven, increment, isTiny;
786 int64_t roundIncrement, roundMask, roundBits;
787
788 roundingMode = status->float_rounding_mode;
789 roundNearestEven = ( roundingMode == float_round_nearest_even );
790 if ( roundingPrecision == 80 ) goto precision80;
791 if ( roundingPrecision == 64 ) {
792 roundIncrement = LIT64( 0x0000000000000400 );
793 roundMask = LIT64( 0x00000000000007FF );
794 }
795 else if ( roundingPrecision == 32 ) {
796 roundIncrement = LIT64( 0x0000008000000000 );
797 roundMask = LIT64( 0x000000FFFFFFFFFF );
798 }
799 else {
800 goto precision80;
801 }
802 zSig0 |= ( zSig1 != 0 );
803 switch (roundingMode) {
804 case float_round_nearest_even:
805 case float_round_ties_away:
806 break;
807 case float_round_to_zero:
808 roundIncrement = 0;
809 break;
810 case float_round_up:
811 roundIncrement = zSign ? 0 : roundMask;
812 break;
813 case float_round_down:
814 roundIncrement = zSign ? roundMask : 0;
815 break;
816 default:
817 abort();
818 }
819 roundBits = zSig0 & roundMask;
820 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
821 if ( ( 0x7FFE < zExp )
822 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
823 ) {
824 goto overflow;
825 }
826 if ( zExp <= 0 ) {
827 if (status->flush_to_zero) {
828 float_raise(float_flag_output_denormal, status);
829 return packFloatx80(zSign, 0, 0);
830 }
831 isTiny =
832 (status->float_detect_tininess
833 == float_tininess_before_rounding)
834 || ( zExp < 0 )
835 || ( zSig0 <= zSig0 + roundIncrement );
836 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
837 zExp = 0;
838 roundBits = zSig0 & roundMask;
839 if (isTiny && roundBits) {
840 float_raise(float_flag_underflow, status);
841 }
842 if (roundBits) {
843 status->float_exception_flags |= float_flag_inexact;
844 }
845 zSig0 += roundIncrement;
846 if ( (int64_t) zSig0 < 0 ) zExp = 1;
847 roundIncrement = roundMask + 1;
848 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
849 roundMask |= roundIncrement;
850 }
851 zSig0 &= ~ roundMask;
852 return packFloatx80( zSign, zExp, zSig0 );
853 }
854 }
855 if (roundBits) {
856 status->float_exception_flags |= float_flag_inexact;
857 }
858 zSig0 += roundIncrement;
859 if ( zSig0 < roundIncrement ) {
860 ++zExp;
861 zSig0 = LIT64( 0x8000000000000000 );
862 }
863 roundIncrement = roundMask + 1;
864 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
865 roundMask |= roundIncrement;
866 }
867 zSig0 &= ~ roundMask;
868 if ( zSig0 == 0 ) zExp = 0;
869 return packFloatx80( zSign, zExp, zSig0 );
870 precision80:
871 switch (roundingMode) {
872 case float_round_nearest_even:
873 case float_round_ties_away:
874 increment = ((int64_t)zSig1 < 0);
875 break;
876 case float_round_to_zero:
877 increment = 0;
878 break;
879 case float_round_up:
880 increment = !zSign && zSig1;
881 break;
882 case float_round_down:
883 increment = zSign && zSig1;
884 break;
885 default:
886 abort();
887 }
888 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
889 if ( ( 0x7FFE < zExp )
890 || ( ( zExp == 0x7FFE )
891 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
892 && increment
893 )
894 ) {
895 roundMask = 0;
896 overflow:
897 float_raise(float_flag_overflow | float_flag_inexact, status);
898 if ( ( roundingMode == float_round_to_zero )
899 || ( zSign && ( roundingMode == float_round_up ) )
900 || ( ! zSign && ( roundingMode == float_round_down ) )
901 ) {
902 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
903 }
904 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
905 }
906 if ( zExp <= 0 ) {
907 isTiny =
908 (status->float_detect_tininess
909 == float_tininess_before_rounding)
910 || ( zExp < 0 )
911 || ! increment
912 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
913 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
914 zExp = 0;
915 if (isTiny && zSig1) {
916 float_raise(float_flag_underflow, status);
917 }
918 if (zSig1) {
919 status->float_exception_flags |= float_flag_inexact;
920 }
921 switch (roundingMode) {
922 case float_round_nearest_even:
923 case float_round_ties_away:
924 increment = ((int64_t)zSig1 < 0);
925 break;
926 case float_round_to_zero:
927 increment = 0;
928 break;
929 case float_round_up:
930 increment = !zSign && zSig1;
931 break;
932 case float_round_down:
933 increment = zSign && zSig1;
934 break;
935 default:
936 abort();
937 }
938 if ( increment ) {
939 ++zSig0;
940 zSig0 &=
941 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
942 if ( (int64_t) zSig0 < 0 ) zExp = 1;
943 }
944 return packFloatx80( zSign, zExp, zSig0 );
945 }
946 }
947 if (zSig1) {
948 status->float_exception_flags |= float_flag_inexact;
949 }
950 if ( increment ) {
951 ++zSig0;
952 if ( zSig0 == 0 ) {
953 ++zExp;
954 zSig0 = LIT64( 0x8000000000000000 );
955 }
956 else {
957 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
958 }
959 }
960 else {
961 if ( zSig0 == 0 ) zExp = 0;
962 }
963 return packFloatx80( zSign, zExp, zSig0 );
964
965}
966
967
968
969
970
971
972
973
974
975
976static floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
977 flag zSign, int32_t zExp,
978 uint64_t zSig0, uint64_t zSig1,
979 float_status *status)
980{
981 int8_t shiftCount;
982
983 if ( zSig0 == 0 ) {
984 zSig0 = zSig1;
985 zSig1 = 0;
986 zExp -= 64;
987 }
988 shiftCount = countLeadingZeros64( zSig0 );
989 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
990 zExp -= shiftCount;
991 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
992 zSig0, zSig1, status);
993
994}
995
996
997
998
999
1000
1001static inline uint64_t extractFloat128Frac1( float128 a )
1002{
1003
1004 return a.low;
1005
1006}
1007
1008
1009
1010
1011
1012
1013static inline uint64_t extractFloat128Frac0( float128 a )
1014{
1015
1016 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
1017
1018}
1019
1020
1021
1022
1023
1024
1025static inline int32_t extractFloat128Exp( float128 a )
1026{
1027
1028 return ( a.high>>48 ) & 0x7FFF;
1029
1030}
1031
1032
1033
1034
1035
1036static inline flag extractFloat128Sign( float128 a )
1037{
1038
1039 return a.high>>63;
1040
1041}
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053static void
1054 normalizeFloat128Subnormal(
1055 uint64_t aSig0,
1056 uint64_t aSig1,
1057 int32_t *zExpPtr,
1058 uint64_t *zSig0Ptr,
1059 uint64_t *zSig1Ptr
1060 )
1061{
1062 int8_t shiftCount;
1063
1064 if ( aSig0 == 0 ) {
1065 shiftCount = countLeadingZeros64( aSig1 ) - 15;
1066 if ( shiftCount < 0 ) {
1067 *zSig0Ptr = aSig1>>( - shiftCount );
1068 *zSig1Ptr = aSig1<<( shiftCount & 63 );
1069 }
1070 else {
1071 *zSig0Ptr = aSig1<<shiftCount;
1072 *zSig1Ptr = 0;
1073 }
1074 *zExpPtr = - shiftCount - 63;
1075 }
1076 else {
1077 shiftCount = countLeadingZeros64( aSig0 ) - 15;
1078 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
1079 *zExpPtr = 1 - shiftCount;
1080 }
1081
1082}
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097static inline float128
1098 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
1099{
1100 float128 z;
1101
1102 z.low = zSig1;
1103 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
1104 return z;
1105
1106}
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
1130 uint64_t zSig0, uint64_t zSig1,
1131 uint64_t zSig2, float_status *status)
1132{
1133 int8_t roundingMode;
1134 flag roundNearestEven, increment, isTiny;
1135
1136 roundingMode = status->float_rounding_mode;
1137 roundNearestEven = ( roundingMode == float_round_nearest_even );
1138 switch (roundingMode) {
1139 case float_round_nearest_even:
1140 case float_round_ties_away:
1141 increment = ((int64_t)zSig2 < 0);
1142 break;
1143 case float_round_to_zero:
1144 increment = 0;
1145 break;
1146 case float_round_up:
1147 increment = !zSign && zSig2;
1148 break;
1149 case float_round_down:
1150 increment = zSign && zSig2;
1151 break;
1152 default:
1153 abort();
1154 }
1155 if ( 0x7FFD <= (uint32_t) zExp ) {
1156 if ( ( 0x7FFD < zExp )
1157 || ( ( zExp == 0x7FFD )
1158 && eq128(
1159 LIT64( 0x0001FFFFFFFFFFFF ),
1160 LIT64( 0xFFFFFFFFFFFFFFFF ),
1161 zSig0,
1162 zSig1
1163 )
1164 && increment
1165 )
1166 ) {
1167 float_raise(float_flag_overflow | float_flag_inexact, status);
1168 if ( ( roundingMode == float_round_to_zero )
1169 || ( zSign && ( roundingMode == float_round_up ) )
1170 || ( ! zSign && ( roundingMode == float_round_down ) )
1171 ) {
1172 return
1173 packFloat128(
1174 zSign,
1175 0x7FFE,
1176 LIT64( 0x0000FFFFFFFFFFFF ),
1177 LIT64( 0xFFFFFFFFFFFFFFFF )
1178 );
1179 }
1180 return packFloat128( zSign, 0x7FFF, 0, 0 );
1181 }
1182 if ( zExp < 0 ) {
1183 if (status->flush_to_zero) {
1184 float_raise(float_flag_output_denormal, status);
1185 return packFloat128(zSign, 0, 0, 0);
1186 }
1187 isTiny =
1188 (status->float_detect_tininess
1189 == float_tininess_before_rounding)
1190 || ( zExp < -1 )
1191 || ! increment
1192 || lt128(
1193 zSig0,
1194 zSig1,
1195 LIT64( 0x0001FFFFFFFFFFFF ),
1196 LIT64( 0xFFFFFFFFFFFFFFFF )
1197 );
1198 shift128ExtraRightJamming(
1199 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1200 zExp = 0;
1201 if (isTiny && zSig2) {
1202 float_raise(float_flag_underflow, status);
1203 }
1204 switch (roundingMode) {
1205 case float_round_nearest_even:
1206 case float_round_ties_away:
1207 increment = ((int64_t)zSig2 < 0);
1208 break;
1209 case float_round_to_zero:
1210 increment = 0;
1211 break;
1212 case float_round_up:
1213 increment = !zSign && zSig2;
1214 break;
1215 case float_round_down:
1216 increment = zSign && zSig2;
1217 break;
1218 default:
1219 abort();
1220 }
1221 }
1222 }
1223 if (zSig2) {
1224 status->float_exception_flags |= float_flag_inexact;
1225 }
1226 if ( increment ) {
1227 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1228 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1229 }
1230 else {
1231 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1232 }
1233 return packFloat128( zSign, zExp, zSig0, zSig1 );
1234
1235}
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
1248 uint64_t zSig0, uint64_t zSig1,
1249 float_status *status)
1250{
1251 int8_t shiftCount;
1252 uint64_t zSig2;
1253
1254 if ( zSig0 == 0 ) {
1255 zSig0 = zSig1;
1256 zSig1 = 0;
1257 zExp -= 64;
1258 }
1259 shiftCount = countLeadingZeros64( zSig0 ) - 15;
1260 if ( 0 <= shiftCount ) {
1261 zSig2 = 0;
1262 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1263 }
1264 else {
1265 shift128ExtraRightJamming(
1266 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1267 }
1268 zExp -= shiftCount;
1269 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
1270
1271}
1272
1273
1274
1275
1276
1277
1278
1279float32 int32_to_float32(int32_t a, float_status *status)
1280{
1281 flag zSign;
1282
1283 if ( a == 0 ) return float32_zero;
1284 if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1285 zSign = ( a < 0 );
1286 return normalizeRoundAndPackFloat32(zSign, 0x9C, zSign ? -a : a, status);
1287}
1288
1289
1290
1291
1292
1293
1294
1295float64 int32_to_float64(int32_t a, float_status *status)
1296{
1297 flag zSign;
1298 uint32_t absA;
1299 int8_t shiftCount;
1300 uint64_t zSig;
1301
1302 if ( a == 0 ) return float64_zero;
1303 zSign = ( a < 0 );
1304 absA = zSign ? - a : a;
1305 shiftCount = countLeadingZeros32( absA ) + 21;
1306 zSig = absA;
1307 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1308
1309}
1310
1311
1312
1313
1314
1315
1316
1317
1318floatx80 int32_to_floatx80(int32_t a, float_status *status)
1319{
1320 flag zSign;
1321 uint32_t absA;
1322 int8_t shiftCount;
1323 uint64_t zSig;
1324
1325 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1326 zSign = ( a < 0 );
1327 absA = zSign ? - a : a;
1328 shiftCount = countLeadingZeros32( absA ) + 32;
1329 zSig = absA;
1330 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1331
1332}
1333
1334
1335
1336
1337
1338
1339
1340float128 int32_to_float128(int32_t a, float_status *status)
1341{
1342 flag zSign;
1343 uint32_t absA;
1344 int8_t shiftCount;
1345 uint64_t zSig0;
1346
1347 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1348 zSign = ( a < 0 );
1349 absA = zSign ? - a : a;
1350 shiftCount = countLeadingZeros32( absA ) + 17;
1351 zSig0 = absA;
1352 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1353
1354}
1355
1356
1357
1358
1359
1360
1361
1362float32 int64_to_float32(int64_t a, float_status *status)
1363{
1364 flag zSign;
1365 uint64_t absA;
1366 int8_t shiftCount;
1367
1368 if ( a == 0 ) return float32_zero;
1369 zSign = ( a < 0 );
1370 absA = zSign ? - a : a;
1371 shiftCount = countLeadingZeros64( absA ) - 40;
1372 if ( 0 <= shiftCount ) {
1373 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1374 }
1375 else {
1376 shiftCount += 7;
1377 if ( shiftCount < 0 ) {
1378 shift64RightJamming( absA, - shiftCount, &absA );
1379 }
1380 else {
1381 absA <<= shiftCount;
1382 }
1383 return roundAndPackFloat32(zSign, 0x9C - shiftCount, absA, status);
1384 }
1385
1386}
1387
1388
1389
1390
1391
1392
1393
1394float64 int64_to_float64(int64_t a, float_status *status)
1395{
1396 flag zSign;
1397
1398 if ( a == 0 ) return float64_zero;
1399 if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
1400 return packFloat64( 1, 0x43E, 0 );
1401 }
1402 zSign = ( a < 0 );
1403 return normalizeRoundAndPackFloat64(zSign, 0x43C, zSign ? -a : a, status);
1404}
1405
1406
1407
1408
1409
1410
1411
1412
1413floatx80 int64_to_floatx80(int64_t a, float_status *status)
1414{
1415 flag zSign;
1416 uint64_t absA;
1417 int8_t shiftCount;
1418
1419 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1420 zSign = ( a < 0 );
1421 absA = zSign ? - a : a;
1422 shiftCount = countLeadingZeros64( absA );
1423 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1424
1425}
1426
1427
1428
1429
1430
1431
1432
1433float128 int64_to_float128(int64_t a, float_status *status)
1434{
1435 flag zSign;
1436 uint64_t absA;
1437 int8_t shiftCount;
1438 int32_t zExp;
1439 uint64_t zSig0, zSig1;
1440
1441 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1442 zSign = ( a < 0 );
1443 absA = zSign ? - a : a;
1444 shiftCount = countLeadingZeros64( absA ) + 49;
1445 zExp = 0x406E - shiftCount;
1446 if ( 64 <= shiftCount ) {
1447 zSig1 = 0;
1448 zSig0 = absA;
1449 shiftCount -= 64;
1450 }
1451 else {
1452 zSig1 = absA;
1453 zSig0 = 0;
1454 }
1455 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1456 return packFloat128( zSign, zExp, zSig0, zSig1 );
1457
1458}
1459
1460
1461
1462
1463
1464
1465
1466float32 uint64_to_float32(uint64_t a, float_status *status)
1467{
1468 int shiftcount;
1469
1470 if (a == 0) {
1471 return float32_zero;
1472 }
1473
1474
1475
1476
1477
1478 shiftcount = countLeadingZeros64(a) - 40;
1479 if (shiftcount >= 0) {
1480 return packFloat32(0, 0x95 - shiftcount, a << shiftcount);
1481 }
1482
1483
1484
1485 shiftcount += 7;
1486 if (shiftcount < 0) {
1487 shift64RightJamming(a, -shiftcount, &a);
1488 } else {
1489 a <<= shiftcount;
1490 }
1491
1492 return roundAndPackFloat32(0, 0x9c - shiftcount, a, status);
1493}
1494
1495
1496
1497
1498
1499
1500
1501float64 uint64_to_float64(uint64_t a, float_status *status)
1502{
1503 int exp = 0x43C;
1504 int shiftcount;
1505
1506 if (a == 0) {
1507 return float64_zero;
1508 }
1509
1510 shiftcount = countLeadingZeros64(a) - 1;
1511 if (shiftcount < 0) {
1512 shift64RightJamming(a, -shiftcount, &a);
1513 } else {
1514 a <<= shiftcount;
1515 }
1516 return roundAndPackFloat64(0, exp - shiftcount, a, status);
1517}
1518
1519
1520
1521
1522
1523
1524
1525float128 uint64_to_float128(uint64_t a, float_status *status)
1526{
1527 if (a == 0) {
1528 return float128_zero;
1529 }
1530 return normalizeRoundAndPackFloat128(0, 0x406E, a, 0, status);
1531}
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543int32_t float32_to_int32(float32 a, float_status *status)
1544{
1545 flag aSign;
1546 int aExp;
1547 int shiftCount;
1548 uint32_t aSig;
1549 uint64_t aSig64;
1550
1551 a = float32_squash_input_denormal(a, status);
1552 aSig = extractFloat32Frac( a );
1553 aExp = extractFloat32Exp( a );
1554 aSign = extractFloat32Sign( a );
1555 if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1556 if ( aExp ) aSig |= 0x00800000;
1557 shiftCount = 0xAF - aExp;
1558 aSig64 = aSig;
1559 aSig64 <<= 32;
1560 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1561 return roundAndPackInt32(aSign, aSig64, status);
1562
1563}
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575int32_t float32_to_int32_round_to_zero(float32 a, float_status *status)
1576{
1577 flag aSign;
1578 int aExp;
1579 int shiftCount;
1580 uint32_t aSig;
1581 int32_t z;
1582 a = float32_squash_input_denormal(a, status);
1583
1584 aSig = extractFloat32Frac( a );
1585 aExp = extractFloat32Exp( a );
1586 aSign = extractFloat32Sign( a );
1587 shiftCount = aExp - 0x9E;
1588 if ( 0 <= shiftCount ) {
1589 if ( float32_val(a) != 0xCF000000 ) {
1590 float_raise(float_flag_invalid, status);
1591 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1592 }
1593 return (int32_t) 0x80000000;
1594 }
1595 else if ( aExp <= 0x7E ) {
1596 if (aExp | aSig) {
1597 status->float_exception_flags |= float_flag_inexact;
1598 }
1599 return 0;
1600 }
1601 aSig = ( aSig | 0x00800000 )<<8;
1602 z = aSig>>( - shiftCount );
1603 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1604 status->float_exception_flags |= float_flag_inexact;
1605 }
1606 if ( aSign ) z = - z;
1607 return z;
1608
1609}
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621int16_t float32_to_int16_round_to_zero(float32 a, float_status *status)
1622{
1623 flag aSign;
1624 int aExp;
1625 int shiftCount;
1626 uint32_t aSig;
1627 int32_t z;
1628
1629 aSig = extractFloat32Frac( a );
1630 aExp = extractFloat32Exp( a );
1631 aSign = extractFloat32Sign( a );
1632 shiftCount = aExp - 0x8E;
1633 if ( 0 <= shiftCount ) {
1634 if ( float32_val(a) != 0xC7000000 ) {
1635 float_raise(float_flag_invalid, status);
1636 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1637 return 0x7FFF;
1638 }
1639 }
1640 return (int32_t) 0xffff8000;
1641 }
1642 else if ( aExp <= 0x7E ) {
1643 if ( aExp | aSig ) {
1644 status->float_exception_flags |= float_flag_inexact;
1645 }
1646 return 0;
1647 }
1648 shiftCount -= 0x10;
1649 aSig = ( aSig | 0x00800000 )<<8;
1650 z = aSig>>( - shiftCount );
1651 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1652 status->float_exception_flags |= float_flag_inexact;
1653 }
1654 if ( aSign ) {
1655 z = - z;
1656 }
1657 return z;
1658
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671int64_t float32_to_int64(float32 a, float_status *status)
1672{
1673 flag aSign;
1674 int aExp;
1675 int shiftCount;
1676 uint32_t aSig;
1677 uint64_t aSig64, aSigExtra;
1678 a = float32_squash_input_denormal(a, status);
1679
1680 aSig = extractFloat32Frac( a );
1681 aExp = extractFloat32Exp( a );
1682 aSign = extractFloat32Sign( a );
1683 shiftCount = 0xBE - aExp;
1684 if ( shiftCount < 0 ) {
1685 float_raise(float_flag_invalid, status);
1686 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1687 return LIT64( 0x7FFFFFFFFFFFFFFF );
1688 }
1689 return (int64_t) LIT64( 0x8000000000000000 );
1690 }
1691 if ( aExp ) aSig |= 0x00800000;
1692 aSig64 = aSig;
1693 aSig64 <<= 40;
1694 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1695 return roundAndPackInt64(aSign, aSig64, aSigExtra, status);
1696
1697}
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711uint64_t float32_to_uint64(float32 a, float_status *status)
1712{
1713 flag aSign;
1714 int aExp;
1715 int shiftCount;
1716 uint32_t aSig;
1717 uint64_t aSig64, aSigExtra;
1718 a = float32_squash_input_denormal(a, status);
1719
1720 aSig = extractFloat32Frac(a);
1721 aExp = extractFloat32Exp(a);
1722 aSign = extractFloat32Sign(a);
1723 if ((aSign) && (aExp > 126)) {
1724 float_raise(float_flag_invalid, status);
1725 if (float32_is_any_nan(a)) {
1726 return LIT64(0xFFFFFFFFFFFFFFFF);
1727 } else {
1728 return 0;
1729 }
1730 }
1731 shiftCount = 0xBE - aExp;
1732 if (aExp) {
1733 aSig |= 0x00800000;
1734 }
1735 if (shiftCount < 0) {
1736 float_raise(float_flag_invalid, status);
1737 return LIT64(0xFFFFFFFFFFFFFFFF);
1738 }
1739
1740 aSig64 = aSig;
1741 aSig64 <<= 40;
1742 shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
1743 return roundAndPackUint64(aSign, aSig64, aSigExtra, status);
1744}
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *status)
1758{
1759 signed char current_rounding_mode = status->float_rounding_mode;
1760 set_float_rounding_mode(float_round_to_zero, status);
1761 int64_t v = float32_to_uint64(a, status);
1762 set_float_rounding_mode(current_rounding_mode, status);
1763 return v;
1764}
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776int64_t float32_to_int64_round_to_zero(float32 a, float_status *status)
1777{
1778 flag aSign;
1779 int aExp;
1780 int shiftCount;
1781 uint32_t aSig;
1782 uint64_t aSig64;
1783 int64_t z;
1784 a = float32_squash_input_denormal(a, status);
1785
1786 aSig = extractFloat32Frac( a );
1787 aExp = extractFloat32Exp( a );
1788 aSign = extractFloat32Sign( a );
1789 shiftCount = aExp - 0xBE;
1790 if ( 0 <= shiftCount ) {
1791 if ( float32_val(a) != 0xDF000000 ) {
1792 float_raise(float_flag_invalid, status);
1793 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1794 return LIT64( 0x7FFFFFFFFFFFFFFF );
1795 }
1796 }
1797 return (int64_t) LIT64( 0x8000000000000000 );
1798 }
1799 else if ( aExp <= 0x7E ) {
1800 if (aExp | aSig) {
1801 status->float_exception_flags |= float_flag_inexact;
1802 }
1803 return 0;
1804 }
1805 aSig64 = aSig | 0x00800000;
1806 aSig64 <<= 40;
1807 z = aSig64>>( - shiftCount );
1808 if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
1809 status->float_exception_flags |= float_flag_inexact;
1810 }
1811 if ( aSign ) z = - z;
1812 return z;
1813
1814}
1815
1816
1817
1818
1819
1820
1821
1822
1823float64 float32_to_float64(float32 a, float_status *status)
1824{
1825 flag aSign;
1826 int aExp;
1827 uint32_t aSig;
1828 a = float32_squash_input_denormal(a, status);
1829
1830 aSig = extractFloat32Frac( a );
1831 aExp = extractFloat32Exp( a );
1832 aSign = extractFloat32Sign( a );
1833 if ( aExp == 0xFF ) {
1834 if (aSig) {
1835 return commonNaNToFloat64(float32ToCommonNaN(a, status), status);
1836 }
1837 return packFloat64( aSign, 0x7FF, 0 );
1838 }
1839 if ( aExp == 0 ) {
1840 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1841 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1842 --aExp;
1843 }
1844 return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
1845
1846}
1847
1848
1849
1850
1851
1852
1853
1854
1855floatx80 float32_to_floatx80(float32 a, float_status *status)
1856{
1857 flag aSign;
1858 int aExp;
1859 uint32_t aSig;
1860
1861 a = float32_squash_input_denormal(a, status);
1862 aSig = extractFloat32Frac( a );
1863 aExp = extractFloat32Exp( a );
1864 aSign = extractFloat32Sign( a );
1865 if ( aExp == 0xFF ) {
1866 if (aSig) {
1867 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
1868 }
1869 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1870 }
1871 if ( aExp == 0 ) {
1872 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1873 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1874 }
1875 aSig |= 0x00800000;
1876 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
1877
1878}
1879
1880
1881
1882
1883
1884
1885
1886
1887float128 float32_to_float128(float32 a, float_status *status)
1888{
1889 flag aSign;
1890 int aExp;
1891 uint32_t aSig;
1892
1893 a = float32_squash_input_denormal(a, status);
1894 aSig = extractFloat32Frac( a );
1895 aExp = extractFloat32Exp( a );
1896 aSign = extractFloat32Sign( a );
1897 if ( aExp == 0xFF ) {
1898 if (aSig) {
1899 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
1900 }
1901 return packFloat128( aSign, 0x7FFF, 0, 0 );
1902 }
1903 if ( aExp == 0 ) {
1904 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1905 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1906 --aExp;
1907 }
1908 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
1909
1910}
1911
1912
1913
1914
1915
1916
1917
1918
1919float32 float32_round_to_int(float32 a, float_status *status)
1920{
1921 flag aSign;
1922 int aExp;
1923 uint32_t lastBitMask, roundBitsMask;
1924 uint32_t z;
1925 a = float32_squash_input_denormal(a, status);
1926
1927 aExp = extractFloat32Exp( a );
1928 if ( 0x96 <= aExp ) {
1929 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1930 return propagateFloat32NaN(a, a, status);
1931 }
1932 return a;
1933 }
1934 if ( aExp <= 0x7E ) {
1935 if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
1936 status->float_exception_flags |= float_flag_inexact;
1937 aSign = extractFloat32Sign( a );
1938 switch (status->float_rounding_mode) {
1939 case float_round_nearest_even:
1940 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1941 return packFloat32( aSign, 0x7F, 0 );
1942 }
1943 break;
1944 case float_round_ties_away:
1945 if (aExp == 0x7E) {
1946 return packFloat32(aSign, 0x7F, 0);
1947 }
1948 break;
1949 case float_round_down:
1950 return make_float32(aSign ? 0xBF800000 : 0);
1951 case float_round_up:
1952 return make_float32(aSign ? 0x80000000 : 0x3F800000);
1953 }
1954 return packFloat32( aSign, 0, 0 );
1955 }
1956 lastBitMask = 1;
1957 lastBitMask <<= 0x96 - aExp;
1958 roundBitsMask = lastBitMask - 1;
1959 z = float32_val(a);
1960 switch (status->float_rounding_mode) {
1961 case float_round_nearest_even:
1962 z += lastBitMask>>1;
1963 if ((z & roundBitsMask) == 0) {
1964 z &= ~lastBitMask;
1965 }
1966 break;
1967 case float_round_ties_away:
1968 z += lastBitMask >> 1;
1969 break;
1970 case float_round_to_zero:
1971 break;
1972 case float_round_up:
1973 if (!extractFloat32Sign(make_float32(z))) {
1974 z += roundBitsMask;
1975 }
1976 break;
1977 case float_round_down:
1978 if (extractFloat32Sign(make_float32(z))) {
1979 z += roundBitsMask;
1980 }
1981 break;
1982 default:
1983 abort();
1984 }
1985 z &= ~ roundBitsMask;
1986 if (z != float32_val(a)) {
1987 status->float_exception_flags |= float_flag_inexact;
1988 }
1989 return make_float32(z);
1990
1991}
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001static float32 addFloat32Sigs(float32 a, float32 b, flag zSign,
2002 float_status *status)
2003{
2004 int aExp, bExp, zExp;
2005 uint32_t aSig, bSig, zSig;
2006 int expDiff;
2007
2008 aSig = extractFloat32Frac( a );
2009 aExp = extractFloat32Exp( a );
2010 bSig = extractFloat32Frac( b );
2011 bExp = extractFloat32Exp( b );
2012 expDiff = aExp - bExp;
2013 aSig <<= 6;
2014 bSig <<= 6;
2015 if ( 0 < expDiff ) {
2016 if ( aExp == 0xFF ) {
2017 if (aSig) {
2018 return propagateFloat32NaN(a, b, status);
2019 }
2020 return a;
2021 }
2022 if ( bExp == 0 ) {
2023 --expDiff;
2024 }
2025 else {
2026 bSig |= 0x20000000;
2027 }
2028 shift32RightJamming( bSig, expDiff, &bSig );
2029 zExp = aExp;
2030 }
2031 else if ( expDiff < 0 ) {
2032 if ( bExp == 0xFF ) {
2033 if (bSig) {
2034 return propagateFloat32NaN(a, b, status);
2035 }
2036 return packFloat32( zSign, 0xFF, 0 );
2037 }
2038 if ( aExp == 0 ) {
2039 ++expDiff;
2040 }
2041 else {
2042 aSig |= 0x20000000;
2043 }
2044 shift32RightJamming( aSig, - expDiff, &aSig );
2045 zExp = bExp;
2046 }
2047 else {
2048 if ( aExp == 0xFF ) {
2049 if (aSig | bSig) {
2050 return propagateFloat32NaN(a, b, status);
2051 }
2052 return a;
2053 }
2054 if ( aExp == 0 ) {
2055 if (status->flush_to_zero) {
2056 if (aSig | bSig) {
2057 float_raise(float_flag_output_denormal, status);
2058 }
2059 return packFloat32(zSign, 0, 0);
2060 }
2061 return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
2062 }
2063 zSig = 0x40000000 + aSig + bSig;
2064 zExp = aExp;
2065 goto roundAndPack;
2066 }
2067 aSig |= 0x20000000;
2068 zSig = ( aSig + bSig )<<1;
2069 --zExp;
2070 if ( (int32_t) zSig < 0 ) {
2071 zSig = aSig + bSig;
2072 ++zExp;
2073 }
2074 roundAndPack:
2075 return roundAndPackFloat32(zSign, zExp, zSig, status);
2076
2077}
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087static float32 subFloat32Sigs(float32 a, float32 b, flag zSign,
2088 float_status *status)
2089{
2090 int aExp, bExp, zExp;
2091 uint32_t aSig, bSig, zSig;
2092 int expDiff;
2093
2094 aSig = extractFloat32Frac( a );
2095 aExp = extractFloat32Exp( a );
2096 bSig = extractFloat32Frac( b );
2097 bExp = extractFloat32Exp( b );
2098 expDiff = aExp - bExp;
2099 aSig <<= 7;
2100 bSig <<= 7;
2101 if ( 0 < expDiff ) goto aExpBigger;
2102 if ( expDiff < 0 ) goto bExpBigger;
2103 if ( aExp == 0xFF ) {
2104 if (aSig | bSig) {
2105 return propagateFloat32NaN(a, b, status);
2106 }
2107 float_raise(float_flag_invalid, status);
2108 return float32_default_nan;
2109 }
2110 if ( aExp == 0 ) {
2111 aExp = 1;
2112 bExp = 1;
2113 }
2114 if ( bSig < aSig ) goto aBigger;
2115 if ( aSig < bSig ) goto bBigger;
2116 return packFloat32(status->float_rounding_mode == float_round_down, 0, 0);
2117 bExpBigger:
2118 if ( bExp == 0xFF ) {
2119 if (bSig) {
2120 return propagateFloat32NaN(a, b, status);
2121 }
2122 return packFloat32( zSign ^ 1, 0xFF, 0 );
2123 }
2124 if ( aExp == 0 ) {
2125 ++expDiff;
2126 }
2127 else {
2128 aSig |= 0x40000000;
2129 }
2130 shift32RightJamming( aSig, - expDiff, &aSig );
2131 bSig |= 0x40000000;
2132 bBigger:
2133 zSig = bSig - aSig;
2134 zExp = bExp;
2135 zSign ^= 1;
2136 goto normalizeRoundAndPack;
2137 aExpBigger:
2138 if ( aExp == 0xFF ) {
2139 if (aSig) {
2140 return propagateFloat32NaN(a, b, status);
2141 }
2142 return a;
2143 }
2144 if ( bExp == 0 ) {
2145 --expDiff;
2146 }
2147 else {
2148 bSig |= 0x40000000;
2149 }
2150 shift32RightJamming( bSig, expDiff, &bSig );
2151 aSig |= 0x40000000;
2152 aBigger:
2153 zSig = aSig - bSig;
2154 zExp = aExp;
2155 normalizeRoundAndPack:
2156 --zExp;
2157 return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status);
2158
2159}
2160
2161
2162
2163
2164
2165
2166
2167float32 float32_add(float32 a, float32 b, float_status *status)
2168{
2169 flag aSign, bSign;
2170 a = float32_squash_input_denormal(a, status);
2171 b = float32_squash_input_denormal(b, status);
2172
2173 aSign = extractFloat32Sign( a );
2174 bSign = extractFloat32Sign( b );
2175 if ( aSign == bSign ) {
2176 return addFloat32Sigs(a, b, aSign, status);
2177 }
2178 else {
2179 return subFloat32Sigs(a, b, aSign, status);
2180 }
2181
2182}
2183
2184
2185
2186
2187
2188
2189
2190float32 float32_sub(float32 a, float32 b, float_status *status)
2191{
2192 flag aSign, bSign;
2193 a = float32_squash_input_denormal(a, status);
2194 b = float32_squash_input_denormal(b, status);
2195
2196 aSign = extractFloat32Sign( a );
2197 bSign = extractFloat32Sign( b );
2198 if ( aSign == bSign ) {
2199 return subFloat32Sigs(a, b, aSign, status);
2200 }
2201 else {
2202 return addFloat32Sigs(a, b, aSign, status);
2203 }
2204
2205}
2206
2207
2208
2209
2210
2211
2212
2213float32 float32_mul(float32 a, float32 b, float_status *status)
2214{
2215 flag aSign, bSign, zSign;
2216 int aExp, bExp, zExp;
2217 uint32_t aSig, bSig;
2218 uint64_t zSig64;
2219 uint32_t zSig;
2220
2221 a = float32_squash_input_denormal(a, status);
2222 b = float32_squash_input_denormal(b, status);
2223
2224 aSig = extractFloat32Frac( a );
2225 aExp = extractFloat32Exp( a );
2226 aSign = extractFloat32Sign( a );
2227 bSig = extractFloat32Frac( b );
2228 bExp = extractFloat32Exp( b );
2229 bSign = extractFloat32Sign( b );
2230 zSign = aSign ^ bSign;
2231 if ( aExp == 0xFF ) {
2232 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2233 return propagateFloat32NaN(a, b, status);
2234 }
2235 if ( ( bExp | bSig ) == 0 ) {
2236 float_raise(float_flag_invalid, status);
2237 return float32_default_nan;
2238 }
2239 return packFloat32( zSign, 0xFF, 0 );
2240 }
2241 if ( bExp == 0xFF ) {
2242 if (bSig) {
2243 return propagateFloat32NaN(a, b, status);
2244 }
2245 if ( ( aExp | aSig ) == 0 ) {
2246 float_raise(float_flag_invalid, status);
2247 return float32_default_nan;
2248 }
2249 return packFloat32( zSign, 0xFF, 0 );
2250 }
2251 if ( aExp == 0 ) {
2252 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2253 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2254 }
2255 if ( bExp == 0 ) {
2256 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
2257 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2258 }
2259 zExp = aExp + bExp - 0x7F;
2260 aSig = ( aSig | 0x00800000 )<<7;
2261 bSig = ( bSig | 0x00800000 )<<8;
2262 shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
2263 zSig = zSig64;
2264 if ( 0 <= (int32_t) ( zSig<<1 ) ) {
2265 zSig <<= 1;
2266 --zExp;
2267 }
2268 return roundAndPackFloat32(zSign, zExp, zSig, status);
2269
2270}
2271
2272
2273
2274
2275
2276
2277
2278float32 float32_div(float32 a, float32 b, float_status *status)
2279{
2280 flag aSign, bSign, zSign;
2281 int aExp, bExp, zExp;
2282 uint32_t aSig, bSig, zSig;
2283 a = float32_squash_input_denormal(a, status);
2284 b = float32_squash_input_denormal(b, status);
2285
2286 aSig = extractFloat32Frac( a );
2287 aExp = extractFloat32Exp( a );
2288 aSign = extractFloat32Sign( a );
2289 bSig = extractFloat32Frac( b );
2290 bExp = extractFloat32Exp( b );
2291 bSign = extractFloat32Sign( b );
2292 zSign = aSign ^ bSign;
2293 if ( aExp == 0xFF ) {
2294 if (aSig) {
2295 return propagateFloat32NaN(a, b, status);
2296 }
2297 if ( bExp == 0xFF ) {
2298 if (bSig) {
2299 return propagateFloat32NaN(a, b, status);
2300 }
2301 float_raise(float_flag_invalid, status);
2302 return float32_default_nan;
2303 }
2304 return packFloat32( zSign, 0xFF, 0 );
2305 }
2306 if ( bExp == 0xFF ) {
2307 if (bSig) {
2308 return propagateFloat32NaN(a, b, status);
2309 }
2310 return packFloat32( zSign, 0, 0 );
2311 }
2312 if ( bExp == 0 ) {
2313 if ( bSig == 0 ) {
2314 if ( ( aExp | aSig ) == 0 ) {
2315 float_raise(float_flag_invalid, status);
2316 return float32_default_nan;
2317 }
2318 float_raise(float_flag_divbyzero, status);
2319 return packFloat32( zSign, 0xFF, 0 );
2320 }
2321 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2322 }
2323 if ( aExp == 0 ) {
2324 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2325 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2326 }
2327 zExp = aExp - bExp + 0x7D;
2328 aSig = ( aSig | 0x00800000 )<<7;
2329 bSig = ( bSig | 0x00800000 )<<8;
2330 if ( bSig <= ( aSig + aSig ) ) {
2331 aSig >>= 1;
2332 ++zExp;
2333 }
2334 zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
2335 if ( ( zSig & 0x3F ) == 0 ) {
2336 zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
2337 }
2338 return roundAndPackFloat32(zSign, zExp, zSig, status);
2339
2340}
2341
2342
2343
2344
2345
2346
2347
2348float32 float32_rem(float32 a, float32 b, float_status *status)
2349{
2350 flag aSign, zSign;
2351 int aExp, bExp, expDiff;
2352 uint32_t aSig, bSig;
2353 uint32_t q;
2354 uint64_t aSig64, bSig64, q64;
2355 uint32_t alternateASig;
2356 int32_t sigMean;
2357 a = float32_squash_input_denormal(a, status);
2358 b = float32_squash_input_denormal(b, status);
2359
2360 aSig = extractFloat32Frac( a );
2361 aExp = extractFloat32Exp( a );
2362 aSign = extractFloat32Sign( a );
2363 bSig = extractFloat32Frac( b );
2364 bExp = extractFloat32Exp( b );
2365 if ( aExp == 0xFF ) {
2366 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2367 return propagateFloat32NaN(a, b, status);
2368 }
2369 float_raise(float_flag_invalid, status);
2370 return float32_default_nan;
2371 }
2372 if ( bExp == 0xFF ) {
2373 if (bSig) {
2374 return propagateFloat32NaN(a, b, status);
2375 }
2376 return a;
2377 }
2378 if ( bExp == 0 ) {
2379 if ( bSig == 0 ) {
2380 float_raise(float_flag_invalid, status);
2381 return float32_default_nan;
2382 }
2383 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2384 }
2385 if ( aExp == 0 ) {
2386 if ( aSig == 0 ) return a;
2387 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2388 }
2389 expDiff = aExp - bExp;
2390 aSig |= 0x00800000;
2391 bSig |= 0x00800000;
2392 if ( expDiff < 32 ) {
2393 aSig <<= 8;
2394 bSig <<= 8;
2395 if ( expDiff < 0 ) {
2396 if ( expDiff < -1 ) return a;
2397 aSig >>= 1;
2398 }
2399 q = ( bSig <= aSig );
2400 if ( q ) aSig -= bSig;
2401 if ( 0 < expDiff ) {
2402 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
2403 q >>= 32 - expDiff;
2404 bSig >>= 2;
2405 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2406 }
2407 else {
2408 aSig >>= 2;
2409 bSig >>= 2;
2410 }
2411 }
2412 else {
2413 if ( bSig <= aSig ) aSig -= bSig;
2414 aSig64 = ( (uint64_t) aSig )<<40;
2415 bSig64 = ( (uint64_t) bSig )<<40;
2416 expDiff -= 64;
2417 while ( 0 < expDiff ) {
2418 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2419 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2420 aSig64 = - ( ( bSig * q64 )<<38 );
2421 expDiff -= 62;
2422 }
2423 expDiff += 64;
2424 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2425 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2426 q = q64>>( 64 - expDiff );
2427 bSig <<= 6;
2428 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2429 }
2430 do {
2431 alternateASig = aSig;
2432 ++q;
2433 aSig -= bSig;
2434 } while ( 0 <= (int32_t) aSig );
2435 sigMean = aSig + alternateASig;
2436 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2437 aSig = alternateASig;
2438 }
2439 zSign = ( (int32_t) aSig < 0 );
2440 if ( zSign ) aSig = - aSig;
2441 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
2442}
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455float32 float32_muladd(float32 a, float32 b, float32 c, int flags,
2456 float_status *status)
2457{
2458 flag aSign, bSign, cSign, zSign;
2459 int aExp, bExp, cExp, pExp, zExp, expDiff;
2460 uint32_t aSig, bSig, cSig;
2461 flag pInf, pZero, pSign;
2462 uint64_t pSig64, cSig64, zSig64;
2463 uint32_t pSig;
2464 int shiftcount;
2465 flag signflip, infzero;
2466
2467 a = float32_squash_input_denormal(a, status);
2468 b = float32_squash_input_denormal(b, status);
2469 c = float32_squash_input_denormal(c, status);
2470 aSig = extractFloat32Frac(a);
2471 aExp = extractFloat32Exp(a);
2472 aSign = extractFloat32Sign(a);
2473 bSig = extractFloat32Frac(b);
2474 bExp = extractFloat32Exp(b);
2475 bSign = extractFloat32Sign(b);
2476 cSig = extractFloat32Frac(c);
2477 cExp = extractFloat32Exp(c);
2478 cSign = extractFloat32Sign(c);
2479
2480 infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
2481 (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
2482
2483
2484
2485
2486
2487
2488 if (((aExp == 0xff) && aSig) ||
2489 ((bExp == 0xff) && bSig) ||
2490 ((cExp == 0xff) && cSig)) {
2491 return propagateFloat32MulAddNaN(a, b, c, infzero, status);
2492 }
2493
2494 if (infzero) {
2495 float_raise(float_flag_invalid, status);
2496 return float32_default_nan;
2497 }
2498
2499 if (flags & float_muladd_negate_c) {
2500 cSign ^= 1;
2501 }
2502
2503 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
2504
2505
2506 pSign = aSign ^ bSign;
2507 if (flags & float_muladd_negate_product) {
2508 pSign ^= 1;
2509 }
2510 pInf = (aExp == 0xff) || (bExp == 0xff);
2511 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
2512
2513 if (cExp == 0xff) {
2514 if (pInf && (pSign ^ cSign)) {
2515
2516 float_raise(float_flag_invalid, status);
2517 return float32_default_nan;
2518 }
2519
2520 return packFloat32(cSign ^ signflip, 0xff, 0);
2521 }
2522
2523 if (pInf) {
2524 return packFloat32(pSign ^ signflip, 0xff, 0);
2525 }
2526
2527 if (pZero) {
2528 if (cExp == 0) {
2529 if (cSig == 0) {
2530
2531 if (pSign == cSign) {
2532 zSign = pSign;
2533 } else if (status->float_rounding_mode == float_round_down) {
2534 zSign = 1;
2535 } else {
2536 zSign = 0;
2537 }
2538 return packFloat32(zSign ^ signflip, 0, 0);
2539 }
2540
2541 if (status->flush_to_zero) {
2542 float_raise(float_flag_output_denormal, status);
2543 return packFloat32(cSign ^ signflip, 0, 0);
2544 }
2545 }
2546
2547 if (flags & float_muladd_halve_result) {
2548 if (cExp == 0) {
2549 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2550 }
2551
2552
2553
2554 cExp -= 2;
2555 cSig = (cSig | 0x00800000) << 7;
2556 return roundAndPackFloat32(cSign ^ signflip, cExp, cSig, status);
2557 }
2558 return packFloat32(cSign ^ signflip, cExp, cSig);
2559 }
2560
2561 if (aExp == 0) {
2562 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
2563 }
2564 if (bExp == 0) {
2565 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
2566 }
2567
2568
2569
2570
2571
2572
2573
2574
2575 pExp = aExp + bExp - 0x7e;
2576 aSig = (aSig | 0x00800000) << 7;
2577 bSig = (bSig | 0x00800000) << 8;
2578 pSig64 = (uint64_t)aSig * bSig;
2579 if ((int64_t)(pSig64 << 1) >= 0) {
2580 pSig64 <<= 1;
2581 pExp--;
2582 }
2583
2584 zSign = pSign ^ signflip;
2585
2586
2587
2588
2589 if (cExp == 0) {
2590 if (!cSig) {
2591
2592 shift64RightJamming(pSig64, 32, &pSig64);
2593 pSig = pSig64;
2594 if (flags & float_muladd_halve_result) {
2595 pExp--;
2596 }
2597 return roundAndPackFloat32(zSign, pExp - 1,
2598 pSig, status);
2599 }
2600 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2601 }
2602
2603 cSig64 = (uint64_t)cSig << (62 - 23);
2604 cSig64 |= LIT64(0x4000000000000000);
2605 expDiff = pExp - cExp;
2606
2607 if (pSign == cSign) {
2608
2609 if (expDiff > 0) {
2610
2611 shift64RightJamming(cSig64, expDiff, &cSig64);
2612 zExp = pExp;
2613 } else if (expDiff < 0) {
2614
2615 shift64RightJamming(pSig64, -expDiff, &pSig64);
2616 zExp = cExp;
2617 } else {
2618
2619 zExp = cExp;
2620 }
2621
2622 zSig64 = pSig64 + cSig64;
2623 if ((int64_t)zSig64 < 0) {
2624 shift64RightJamming(zSig64, 1, &zSig64);
2625 } else {
2626 zExp--;
2627 }
2628 } else {
2629
2630 if (expDiff > 0) {
2631 shift64RightJamming(cSig64, expDiff, &cSig64);
2632 zSig64 = pSig64 - cSig64;
2633 zExp = pExp;
2634 } else if (expDiff < 0) {
2635 shift64RightJamming(pSig64, -expDiff, &pSig64);
2636 zSig64 = cSig64 - pSig64;
2637 zExp = cExp;
2638 zSign ^= 1;
2639 } else {
2640 zExp = pExp;
2641 if (cSig64 < pSig64) {
2642 zSig64 = pSig64 - cSig64;
2643 } else if (pSig64 < cSig64) {
2644 zSig64 = cSig64 - pSig64;
2645 zSign ^= 1;
2646 } else {
2647
2648 zSign = signflip;
2649 if (status->float_rounding_mode == float_round_down) {
2650 zSign ^= 1;
2651 }
2652 return packFloat32(zSign, 0, 0);
2653 }
2654 }
2655 --zExp;
2656
2657 shiftcount = countLeadingZeros64(zSig64) - 1;
2658 zSig64 <<= shiftcount;
2659 zExp -= shiftcount;
2660 }
2661 if (flags & float_muladd_halve_result) {
2662 zExp--;
2663 }
2664
2665 shift64RightJamming(zSig64, 32, &zSig64);
2666 return roundAndPackFloat32(zSign, zExp, zSig64, status);
2667}
2668
2669
2670
2671
2672
2673
2674
2675
2676float32 float32_sqrt(float32 a, float_status *status)
2677{
2678 flag aSign;
2679 int aExp, zExp;
2680 uint32_t aSig, zSig;
2681 uint64_t rem, term;
2682 a = float32_squash_input_denormal(a, status);
2683
2684 aSig = extractFloat32Frac( a );
2685 aExp = extractFloat32Exp( a );
2686 aSign = extractFloat32Sign( a );
2687 if ( aExp == 0xFF ) {
2688 if (aSig) {
2689 return propagateFloat32NaN(a, float32_zero, status);
2690 }
2691 if ( ! aSign ) return a;
2692 float_raise(float_flag_invalid, status);
2693 return float32_default_nan;
2694 }
2695 if ( aSign ) {
2696 if ( ( aExp | aSig ) == 0 ) return a;
2697 float_raise(float_flag_invalid, status);
2698 return float32_default_nan;
2699 }
2700 if ( aExp == 0 ) {
2701 if ( aSig == 0 ) return float32_zero;
2702 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2703 }
2704 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2705 aSig = ( aSig | 0x00800000 )<<8;
2706 zSig = estimateSqrt32( aExp, aSig ) + 2;
2707 if ( ( zSig & 0x7F ) <= 5 ) {
2708 if ( zSig < 2 ) {
2709 zSig = 0x7FFFFFFF;
2710 goto roundAndPack;
2711 }
2712 aSig >>= aExp & 1;
2713 term = ( (uint64_t) zSig ) * zSig;
2714 rem = ( ( (uint64_t) aSig )<<32 ) - term;
2715 while ( (int64_t) rem < 0 ) {
2716 --zSig;
2717 rem += ( ( (uint64_t) zSig )<<1 ) | 1;
2718 }
2719 zSig |= ( rem != 0 );
2720 }
2721 shift32RightJamming( zSig, 1, &zSig );
2722 roundAndPack:
2723 return roundAndPackFloat32(0, zExp, zSig, status);
2724
2725}
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745static const float64 float32_exp2_coefficients[15] =
2746{
2747 const_float64( 0x3ff0000000000000ll ),
2748 const_float64( 0x3fe0000000000000ll ),
2749 const_float64( 0x3fc5555555555555ll ),
2750 const_float64( 0x3fa5555555555555ll ),
2751 const_float64( 0x3f81111111111111ll ),
2752 const_float64( 0x3f56c16c16c16c17ll ),
2753 const_float64( 0x3f2a01a01a01a01all ),
2754 const_float64( 0x3efa01a01a01a01all ),
2755 const_float64( 0x3ec71de3a556c734ll ),
2756 const_float64( 0x3e927e4fb7789f5cll ),
2757 const_float64( 0x3e5ae64567f544e4ll ),
2758 const_float64( 0x3e21eed8eff8d898ll ),
2759 const_float64( 0x3de6124613a86d09ll ),
2760 const_float64( 0x3da93974a8c07c9dll ),
2761 const_float64( 0x3d6ae7f3e733b81fll ),
2762};
2763
2764float32 float32_exp2(float32 a, float_status *status)
2765{
2766 flag aSign;
2767 int aExp;
2768 uint32_t aSig;
2769 float64 r, x, xn;
2770 int i;
2771 a = float32_squash_input_denormal(a, status);
2772
2773 aSig = extractFloat32Frac( a );
2774 aExp = extractFloat32Exp( a );
2775 aSign = extractFloat32Sign( a );
2776
2777 if ( aExp == 0xFF) {
2778 if (aSig) {
2779 return propagateFloat32NaN(a, float32_zero, status);
2780 }
2781 return (aSign) ? float32_zero : a;
2782 }
2783 if (aExp == 0) {
2784 if (aSig == 0) return float32_one;
2785 }
2786
2787 float_raise(float_flag_inexact, status);
2788
2789
2790
2791
2792 x = float32_to_float64(a, status);
2793 x = float64_mul(x, float64_ln2, status);
2794
2795 xn = x;
2796 r = float64_one;
2797 for (i = 0 ; i < 15 ; i++) {
2798 float64 f;
2799
2800 f = float64_mul(xn, float32_exp2_coefficients[i], status);
2801 r = float64_add(r, f, status);
2802
2803 xn = float64_mul(xn, x, status);
2804 }
2805
2806 return float64_to_float32(r, status);
2807}
2808
2809
2810
2811
2812
2813
2814float32 float32_log2(float32 a, float_status *status)
2815{
2816 flag aSign, zSign;
2817 int aExp;
2818 uint32_t aSig, zSig, i;
2819
2820 a = float32_squash_input_denormal(a, status);
2821 aSig = extractFloat32Frac( a );
2822 aExp = extractFloat32Exp( a );
2823 aSign = extractFloat32Sign( a );
2824
2825 if ( aExp == 0 ) {
2826 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2827 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2828 }
2829 if ( aSign ) {
2830 float_raise(float_flag_invalid, status);
2831 return float32_default_nan;
2832 }
2833 if ( aExp == 0xFF ) {
2834 if (aSig) {
2835 return propagateFloat32NaN(a, float32_zero, status);
2836 }
2837 return a;
2838 }
2839
2840 aExp -= 0x7F;
2841 aSig |= 0x00800000;
2842 zSign = aExp < 0;
2843 zSig = aExp << 23;
2844
2845 for (i = 1 << 22; i > 0; i >>= 1) {
2846 aSig = ( (uint64_t)aSig * aSig ) >> 23;
2847 if ( aSig & 0x01000000 ) {
2848 aSig >>= 1;
2849 zSig |= i;
2850 }
2851 }
2852
2853 if ( zSign )
2854 zSig = -zSig;
2855
2856 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
2857}
2858
2859
2860
2861
2862
2863
2864
2865
2866int float32_eq(float32 a, float32 b, float_status *status)
2867{
2868 uint32_t av, bv;
2869 a = float32_squash_input_denormal(a, status);
2870 b = float32_squash_input_denormal(b, status);
2871
2872 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2873 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2874 ) {
2875 float_raise(float_flag_invalid, status);
2876 return 0;
2877 }
2878 av = float32_val(a);
2879 bv = float32_val(b);
2880 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2881}
2882
2883
2884
2885
2886
2887
2888
2889
2890int float32_le(float32 a, float32 b, float_status *status)
2891{
2892 flag aSign, bSign;
2893 uint32_t av, bv;
2894 a = float32_squash_input_denormal(a, status);
2895 b = float32_squash_input_denormal(b, status);
2896
2897 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2898 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2899 ) {
2900 float_raise(float_flag_invalid, status);
2901 return 0;
2902 }
2903 aSign = extractFloat32Sign( a );
2904 bSign = extractFloat32Sign( b );
2905 av = float32_val(a);
2906 bv = float32_val(b);
2907 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2908 return ( av == bv ) || ( aSign ^ ( av < bv ) );
2909
2910}
2911
2912
2913
2914
2915
2916
2917
2918
2919int float32_lt(float32 a, float32 b, float_status *status)
2920{
2921 flag aSign, bSign;
2922 uint32_t av, bv;
2923 a = float32_squash_input_denormal(a, status);
2924 b = float32_squash_input_denormal(b, status);
2925
2926 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2927 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2928 ) {
2929 float_raise(float_flag_invalid, status);
2930 return 0;
2931 }
2932 aSign = extractFloat32Sign( a );
2933 bSign = extractFloat32Sign( b );
2934 av = float32_val(a);
2935 bv = float32_val(b);
2936 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2937 return ( av != bv ) && ( aSign ^ ( av < bv ) );
2938
2939}
2940
2941
2942
2943
2944
2945
2946
2947
2948int float32_unordered(float32 a, float32 b, float_status *status)
2949{
2950 a = float32_squash_input_denormal(a, status);
2951 b = float32_squash_input_denormal(b, status);
2952
2953 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2954 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2955 ) {
2956 float_raise(float_flag_invalid, status);
2957 return 1;
2958 }
2959 return 0;
2960}
2961
2962
2963
2964
2965
2966
2967
2968
2969int float32_eq_quiet(float32 a, float32 b, float_status *status)
2970{
2971 a = float32_squash_input_denormal(a, status);
2972 b = float32_squash_input_denormal(b, status);
2973
2974 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2975 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2976 ) {
2977 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2978 float_raise(float_flag_invalid, status);
2979 }
2980 return 0;
2981 }
2982 return ( float32_val(a) == float32_val(b) ) ||
2983 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
2984}
2985
2986
2987
2988
2989
2990
2991
2992
2993int float32_le_quiet(float32 a, float32 b, float_status *status)
2994{
2995 flag aSign, bSign;
2996 uint32_t av, bv;
2997 a = float32_squash_input_denormal(a, status);
2998 b = float32_squash_input_denormal(b, status);
2999
3000 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3001 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3002 ) {
3003 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
3004 float_raise(float_flag_invalid, status);
3005 }
3006 return 0;
3007 }
3008 aSign = extractFloat32Sign( a );
3009 bSign = extractFloat32Sign( b );
3010 av = float32_val(a);
3011 bv = float32_val(b);
3012 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
3013 return ( av == bv ) || ( aSign ^ ( av < bv ) );
3014
3015}
3016
3017
3018
3019
3020
3021
3022
3023
3024int float32_lt_quiet(float32 a, float32 b, float_status *status)
3025{
3026 flag aSign, bSign;
3027 uint32_t av, bv;
3028 a = float32_squash_input_denormal(a, status);
3029 b = float32_squash_input_denormal(b, status);
3030
3031 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3032 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3033 ) {
3034 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
3035 float_raise(float_flag_invalid, status);
3036 }
3037 return 0;
3038 }
3039 aSign = extractFloat32Sign( a );
3040 bSign = extractFloat32Sign( b );
3041 av = float32_val(a);
3042 bv = float32_val(b);
3043 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
3044 return ( av != bv ) && ( aSign ^ ( av < bv ) );
3045
3046}
3047
3048
3049
3050
3051
3052
3053
3054
3055int float32_unordered_quiet(float32 a, float32 b, float_status *status)
3056{
3057 a = float32_squash_input_denormal(a, status);
3058 b = float32_squash_input_denormal(b, status);
3059
3060 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3061 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3062 ) {
3063 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
3064 float_raise(float_flag_invalid, status);
3065 }
3066 return 1;
3067 }
3068 return 0;
3069}
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081int32_t float64_to_int32(float64 a, float_status *status)
3082{
3083 flag aSign;
3084 int aExp;
3085 int shiftCount;
3086 uint64_t aSig;
3087 a = float64_squash_input_denormal(a, status);
3088
3089 aSig = extractFloat64Frac( a );
3090 aExp = extractFloat64Exp( a );
3091 aSign = extractFloat64Sign( a );
3092 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
3093 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3094 shiftCount = 0x42C - aExp;
3095 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
3096 return roundAndPackInt32(aSign, aSig, status);
3097
3098}
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110int32_t float64_to_int32_round_to_zero(float64 a, float_status *status)
3111{
3112 flag aSign;
3113 int aExp;
3114 int shiftCount;
3115 uint64_t aSig, savedASig;
3116 int32_t z;
3117 a = float64_squash_input_denormal(a, status);
3118
3119 aSig = extractFloat64Frac( a );
3120 aExp = extractFloat64Exp( a );
3121 aSign = extractFloat64Sign( a );
3122 if ( 0x41E < aExp ) {
3123 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
3124 goto invalid;
3125 }
3126 else if ( aExp < 0x3FF ) {
3127 if (aExp || aSig) {
3128 status->float_exception_flags |= float_flag_inexact;
3129 }
3130 return 0;
3131 }
3132 aSig |= LIT64( 0x0010000000000000 );
3133 shiftCount = 0x433 - aExp;
3134 savedASig = aSig;
3135 aSig >>= shiftCount;
3136 z = aSig;
3137 if ( aSign ) z = - z;
3138 if ( ( z < 0 ) ^ aSign ) {
3139 invalid:
3140 float_raise(float_flag_invalid, status);
3141 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
3142 }
3143 if ( ( aSig<<shiftCount ) != savedASig ) {
3144 status->float_exception_flags |= float_flag_inexact;
3145 }
3146 return z;
3147
3148}
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160int16_t float64_to_int16_round_to_zero(float64 a, float_status *status)
3161{
3162 flag aSign;
3163 int aExp;
3164 int shiftCount;
3165 uint64_t aSig, savedASig;
3166 int32_t z;
3167
3168 aSig = extractFloat64Frac( a );
3169 aExp = extractFloat64Exp( a );
3170 aSign = extractFloat64Sign( a );
3171 if ( 0x40E < aExp ) {
3172 if ( ( aExp == 0x7FF ) && aSig ) {
3173 aSign = 0;
3174 }
3175 goto invalid;
3176 }
3177 else if ( aExp < 0x3FF ) {
3178 if ( aExp || aSig ) {
3179 status->float_exception_flags |= float_flag_inexact;
3180 }
3181 return 0;
3182 }
3183 aSig |= LIT64( 0x0010000000000000 );
3184 shiftCount = 0x433 - aExp;
3185 savedASig = aSig;
3186 aSig >>= shiftCount;
3187 z = aSig;
3188 if ( aSign ) {
3189 z = - z;
3190 }
3191 if ( ( (int16_t)z < 0 ) ^ aSign ) {
3192 invalid:
3193 float_raise(float_flag_invalid, status);
3194 return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
3195 }
3196 if ( ( aSig<<shiftCount ) != savedASig ) {
3197 status->float_exception_flags |= float_flag_inexact;
3198 }
3199 return z;
3200}
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212int64_t float64_to_int64(float64 a, float_status *status)
3213{
3214 flag aSign;
3215 int aExp;
3216 int shiftCount;
3217 uint64_t aSig, aSigExtra;
3218 a = float64_squash_input_denormal(a, status);
3219
3220 aSig = extractFloat64Frac( a );
3221 aExp = extractFloat64Exp( a );
3222 aSign = extractFloat64Sign( a );
3223 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3224 shiftCount = 0x433 - aExp;
3225 if ( shiftCount <= 0 ) {
3226 if ( 0x43E < aExp ) {
3227 float_raise(float_flag_invalid, status);
3228 if ( ! aSign
3229 || ( ( aExp == 0x7FF )
3230 && ( aSig != LIT64( 0x0010000000000000 ) ) )
3231 ) {
3232 return LIT64( 0x7FFFFFFFFFFFFFFF );
3233 }
3234 return (int64_t) LIT64( 0x8000000000000000 );
3235 }
3236 aSigExtra = 0;
3237 aSig <<= - shiftCount;
3238 }
3239 else {
3240 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3241 }
3242 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
3243
3244}
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256int64_t float64_to_int64_round_to_zero(float64 a, float_status *status)
3257{
3258 flag aSign;
3259 int aExp;
3260 int shiftCount;
3261 uint64_t aSig;
3262 int64_t z;
3263 a = float64_squash_input_denormal(a, status);
3264
3265 aSig = extractFloat64Frac( a );
3266 aExp = extractFloat64Exp( a );
3267 aSign = extractFloat64Sign( a );
3268 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3269 shiftCount = aExp - 0x433;
3270 if ( 0 <= shiftCount ) {
3271 if ( 0x43E <= aExp ) {
3272 if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
3273 float_raise(float_flag_invalid, status);
3274 if ( ! aSign
3275 || ( ( aExp == 0x7FF )
3276 && ( aSig != LIT64( 0x0010000000000000 ) ) )
3277 ) {
3278 return LIT64( 0x7FFFFFFFFFFFFFFF );
3279 }
3280 }
3281 return (int64_t) LIT64( 0x8000000000000000 );
3282 }
3283 z = aSig<<shiftCount;
3284 }
3285 else {
3286 if ( aExp < 0x3FE ) {
3287 if (aExp | aSig) {
3288 status->float_exception_flags |= float_flag_inexact;
3289 }
3290 return 0;
3291 }
3292 z = aSig>>( - shiftCount );
3293 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
3294 status->float_exception_flags |= float_flag_inexact;
3295 }
3296 }
3297 if ( aSign ) z = - z;
3298 return z;
3299
3300}
3301
3302
3303
3304
3305
3306
3307
3308
3309float32 float64_to_float32(float64 a, float_status *status)
3310{
3311 flag aSign;
3312 int aExp;
3313 uint64_t aSig;
3314 uint32_t zSig;
3315 a = float64_squash_input_denormal(a, status);
3316
3317 aSig = extractFloat64Frac( a );
3318 aExp = extractFloat64Exp( a );
3319 aSign = extractFloat64Sign( a );
3320 if ( aExp == 0x7FF ) {
3321 if (aSig) {
3322 return commonNaNToFloat32(float64ToCommonNaN(a, status), status);
3323 }
3324 return packFloat32( aSign, 0xFF, 0 );
3325 }
3326 shift64RightJamming( aSig, 22, &aSig );
3327 zSig = aSig;
3328 if ( aExp || zSig ) {
3329 zSig |= 0x40000000;
3330 aExp -= 0x381;
3331 }
3332 return roundAndPackFloat32(aSign, aExp, zSig, status);
3333
3334}
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347static float16 packFloat16(flag zSign, int zExp, uint16_t zSig)
3348{
3349 return make_float16(
3350 (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
3351}
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381static float16 roundAndPackFloat16(flag zSign, int zExp,
3382 uint32_t zSig, flag ieee,
3383 float_status *status)
3384{
3385 int maxexp = ieee ? 29 : 30;
3386 uint32_t mask;
3387 uint32_t increment;
3388 bool rounding_bumps_exp;
3389 bool is_tiny = false;
3390
3391
3392
3393
3394 if (zExp < 1) {
3395
3396 mask = 0x00ffffff;
3397 if (zExp >= -11) {
3398 mask >>= 11 + zExp;
3399 }
3400 } else {
3401
3402 mask = 0x00001fff;
3403 }
3404
3405 switch (status->float_rounding_mode) {
3406 case float_round_nearest_even:
3407 increment = (mask + 1) >> 1;
3408 if ((zSig & mask) == increment) {
3409 increment = zSig & (increment << 1);
3410 }
3411 break;
3412 case float_round_ties_away:
3413 increment = (mask + 1) >> 1;
3414 break;
3415 case float_round_up:
3416 increment = zSign ? 0 : mask;
3417 break;
3418 case float_round_down:
3419 increment = zSign ? mask : 0;
3420 break;
3421 default:
3422 increment = 0;
3423 break;
3424 }
3425
3426 rounding_bumps_exp = (zSig + increment >= 0x01000000);
3427
3428 if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) {
3429 if (ieee) {
3430 float_raise(float_flag_overflow | float_flag_inexact, status);
3431 return packFloat16(zSign, 0x1f, 0);
3432 } else {
3433 float_raise(float_flag_invalid, status);
3434 return packFloat16(zSign, 0x1f, 0x3ff);
3435 }
3436 }
3437
3438 if (zExp < 0) {
3439
3440 is_tiny =
3441 (status->float_detect_tininess == float_tininess_before_rounding)
3442 || (zExp < -1)
3443 || (!rounding_bumps_exp);
3444 }
3445 if (zSig & mask) {
3446 float_raise(float_flag_inexact, status);
3447 if (is_tiny) {
3448 float_raise(float_flag_underflow, status);
3449 }
3450 }
3451
3452 zSig += increment;
3453 if (rounding_bumps_exp) {
3454 zSig >>= 1;
3455 zExp++;
3456 }
3457
3458 if (zExp < -10) {
3459 return packFloat16(zSign, 0, 0);
3460 }
3461 if (zExp < 0) {
3462 zSig >>= -zExp;
3463 zExp = 0;
3464 }
3465 return packFloat16(zSign, zExp, zSig >> 13);
3466}
3467
3468static void normalizeFloat16Subnormal(uint32_t aSig, int *zExpPtr,
3469 uint32_t *zSigPtr)
3470{
3471 int8_t shiftCount = countLeadingZeros32(aSig) - 21;
3472 *zSigPtr = aSig << shiftCount;
3473 *zExpPtr = 1 - shiftCount;
3474}
3475
3476
3477
3478
3479float32 float16_to_float32(float16 a, flag ieee, float_status *status)
3480{
3481 flag aSign;
3482 int aExp;
3483 uint32_t aSig;
3484
3485 aSign = extractFloat16Sign(a);
3486 aExp = extractFloat16Exp(a);
3487 aSig = extractFloat16Frac(a);
3488
3489 if (aExp == 0x1f && ieee) {
3490 if (aSig) {
3491 return commonNaNToFloat32(float16ToCommonNaN(a, status), status);
3492 }
3493 return packFloat32(aSign, 0xff, 0);
3494 }
3495 if (aExp == 0) {
3496 if (aSig == 0) {
3497 return packFloat32(aSign, 0, 0);
3498 }
3499
3500 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3501 aExp--;
3502 }
3503 return packFloat32( aSign, aExp + 0x70, aSig << 13);
3504}
3505
3506float16 float32_to_float16(float32 a, flag ieee, float_status *status)
3507{
3508 flag aSign;
3509 int aExp;
3510 uint32_t aSig;
3511
3512 a = float32_squash_input_denormal(a, status);
3513
3514 aSig = extractFloat32Frac( a );
3515 aExp = extractFloat32Exp( a );
3516 aSign = extractFloat32Sign( a );
3517 if ( aExp == 0xFF ) {
3518 if (aSig) {
3519
3520 if (!ieee) {
3521 float_raise(float_flag_invalid, status);
3522 return packFloat16(aSign, 0, 0);
3523 }
3524 return commonNaNToFloat16(
3525 float32ToCommonNaN(a, status), status);
3526 }
3527
3528 if (!ieee) {
3529 float_raise(float_flag_invalid, status);
3530 return packFloat16(aSign, 0x1f, 0x3ff);
3531 }
3532 return packFloat16(aSign, 0x1f, 0);
3533 }
3534 if (aExp == 0 && aSig == 0) {
3535 return packFloat16(aSign, 0, 0);
3536 }
3537
3538
3539
3540
3541
3542
3543
3544 aSig |= 0x00800000;
3545 aExp -= 0x71;
3546
3547 return roundAndPackFloat16(aSign, aExp, aSig, ieee, status);
3548}
3549
3550float64 float16_to_float64(float16 a, flag ieee, float_status *status)
3551{
3552 flag aSign;
3553 int aExp;
3554 uint32_t aSig;
3555
3556 aSign = extractFloat16Sign(a);
3557 aExp = extractFloat16Exp(a);
3558 aSig = extractFloat16Frac(a);
3559
3560 if (aExp == 0x1f && ieee) {
3561 if (aSig) {
3562 return commonNaNToFloat64(
3563 float16ToCommonNaN(a, status), status);
3564 }
3565 return packFloat64(aSign, 0x7ff, 0);
3566 }
3567 if (aExp == 0) {
3568 if (aSig == 0) {
3569 return packFloat64(aSign, 0, 0);
3570 }
3571
3572 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3573 aExp--;
3574 }
3575 return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
3576}
3577
3578float16 float64_to_float16(float64 a, flag ieee, float_status *status)
3579{
3580 flag aSign;
3581 int aExp;
3582 uint64_t aSig;
3583 uint32_t zSig;
3584
3585 a = float64_squash_input_denormal(a, status);
3586
3587 aSig = extractFloat64Frac(a);
3588 aExp = extractFloat64Exp(a);
3589 aSign = extractFloat64Sign(a);
3590 if (aExp == 0x7FF) {
3591 if (aSig) {
3592
3593 if (!ieee) {
3594 float_raise(float_flag_invalid, status);
3595 return packFloat16(aSign, 0, 0);
3596 }
3597 return commonNaNToFloat16(
3598 float64ToCommonNaN(a, status), status);
3599 }
3600
3601 if (!ieee) {
3602 float_raise(float_flag_invalid, status);
3603 return packFloat16(aSign, 0x1f, 0x3ff);
3604 }
3605 return packFloat16(aSign, 0x1f, 0);
3606 }
3607 shift64RightJamming(aSig, 29, &aSig);
3608 zSig = aSig;
3609 if (aExp == 0 && zSig == 0) {
3610 return packFloat16(aSign, 0, 0);
3611 }
3612
3613
3614
3615
3616
3617
3618
3619 zSig |= 0x00800000;
3620 aExp -= 0x3F1;
3621
3622 return roundAndPackFloat16(aSign, aExp, zSig, ieee, status);
3623}
3624
3625
3626
3627
3628
3629
3630
3631
3632floatx80 float64_to_floatx80(float64 a, float_status *status)
3633{
3634 flag aSign;
3635 int aExp;
3636 uint64_t aSig;
3637
3638 a = float64_squash_input_denormal(a, status);
3639 aSig = extractFloat64Frac( a );
3640 aExp = extractFloat64Exp( a );
3641 aSign = extractFloat64Sign( a );
3642 if ( aExp == 0x7FF ) {
3643 if (aSig) {
3644 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
3645 }
3646 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3647 }
3648 if ( aExp == 0 ) {
3649 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3650 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3651 }
3652 return
3653 packFloatx80(
3654 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3655
3656}
3657
3658
3659
3660
3661
3662
3663
3664
3665float128 float64_to_float128(float64 a, float_status *status)
3666{
3667 flag aSign;
3668 int aExp;
3669 uint64_t aSig, zSig0, zSig1;
3670
3671 a = float64_squash_input_denormal(a, status);
3672 aSig = extractFloat64Frac( a );
3673 aExp = extractFloat64Exp( a );
3674 aSign = extractFloat64Sign( a );
3675 if ( aExp == 0x7FF ) {
3676 if (aSig) {
3677 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
3678 }
3679 return packFloat128( aSign, 0x7FFF, 0, 0 );
3680 }
3681 if ( aExp == 0 ) {
3682 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3683 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3684 --aExp;
3685 }
3686 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3687 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3688
3689}
3690
3691
3692
3693
3694
3695
3696
3697
3698float64 float64_round_to_int(float64 a, float_status *status)
3699{
3700 flag aSign;
3701 int aExp;
3702 uint64_t lastBitMask, roundBitsMask;
3703 uint64_t z;
3704 a = float64_squash_input_denormal(a, status);
3705
3706 aExp = extractFloat64Exp( a );
3707 if ( 0x433 <= aExp ) {
3708 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
3709 return propagateFloat64NaN(a, a, status);
3710 }
3711 return a;
3712 }
3713 if ( aExp < 0x3FF ) {
3714 if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
3715 status->float_exception_flags |= float_flag_inexact;
3716 aSign = extractFloat64Sign( a );
3717 switch (status->float_rounding_mode) {
3718 case float_round_nearest_even:
3719 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3720 return packFloat64( aSign, 0x3FF, 0 );
3721 }
3722 break;
3723 case float_round_ties_away:
3724 if (aExp == 0x3FE) {
3725 return packFloat64(aSign, 0x3ff, 0);
3726 }
3727 break;
3728 case float_round_down:
3729 return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
3730 case float_round_up:
3731 return make_float64(
3732 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
3733 }
3734 return packFloat64( aSign, 0, 0 );
3735 }
3736 lastBitMask = 1;
3737 lastBitMask <<= 0x433 - aExp;
3738 roundBitsMask = lastBitMask - 1;
3739 z = float64_val(a);
3740 switch (status->float_rounding_mode) {
3741 case float_round_nearest_even:
3742 z += lastBitMask >> 1;
3743 if ((z & roundBitsMask) == 0) {
3744 z &= ~lastBitMask;
3745 }
3746 break;
3747 case float_round_ties_away:
3748 z += lastBitMask >> 1;
3749 break;
3750 case float_round_to_zero:
3751 break;
3752 case float_round_up:
3753 if (!extractFloat64Sign(make_float64(z))) {
3754 z += roundBitsMask;
3755 }
3756 break;
3757 case float_round_down:
3758 if (extractFloat64Sign(make_float64(z))) {
3759 z += roundBitsMask;
3760 }
3761 break;
3762 default:
3763 abort();
3764 }
3765 z &= ~ roundBitsMask;
3766 if (z != float64_val(a)) {
3767 status->float_exception_flags |= float_flag_inexact;
3768 }
3769 return make_float64(z);
3770
3771}
3772
3773float64 float64_trunc_to_int(float64 a, float_status *status)
3774{
3775 int oldmode;
3776 float64 res;
3777 oldmode = status->float_rounding_mode;
3778 status->float_rounding_mode = float_round_to_zero;
3779 res = float64_round_to_int(a, status);
3780 status->float_rounding_mode = oldmode;
3781 return res;
3782}
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792static float64 addFloat64Sigs(float64 a, float64 b, flag zSign,
3793 float_status *status)
3794{
3795 int aExp, bExp, zExp;
3796 uint64_t aSig, bSig, zSig;
3797 int expDiff;
3798
3799 aSig = extractFloat64Frac( a );
3800 aExp = extractFloat64Exp( a );
3801 bSig = extractFloat64Frac( b );
3802 bExp = extractFloat64Exp( b );
3803 expDiff = aExp - bExp;
3804 aSig <<= 9;
3805 bSig <<= 9;
3806 if ( 0 < expDiff ) {
3807 if ( aExp == 0x7FF ) {
3808 if (aSig) {
3809 return propagateFloat64NaN(a, b, status);
3810 }
3811 return a;
3812 }
3813 if ( bExp == 0 ) {
3814 --expDiff;
3815 }
3816 else {
3817 bSig |= LIT64( 0x2000000000000000 );
3818 }
3819 shift64RightJamming( bSig, expDiff, &bSig );
3820 zExp = aExp;
3821 }
3822 else if ( expDiff < 0 ) {
3823 if ( bExp == 0x7FF ) {
3824 if (bSig) {
3825 return propagateFloat64NaN(a, b, status);
3826 }
3827 return packFloat64( zSign, 0x7FF, 0 );
3828 }
3829 if ( aExp == 0 ) {
3830 ++expDiff;
3831 }
3832 else {
3833 aSig |= LIT64( 0x2000000000000000 );
3834 }
3835 shift64RightJamming( aSig, - expDiff, &aSig );
3836 zExp = bExp;
3837 }
3838 else {
3839 if ( aExp == 0x7FF ) {
3840 if (aSig | bSig) {
3841 return propagateFloat64NaN(a, b, status);
3842 }
3843 return a;
3844 }
3845 if ( aExp == 0 ) {
3846 if (status->flush_to_zero) {
3847 if (aSig | bSig) {
3848 float_raise(float_flag_output_denormal, status);
3849 }
3850 return packFloat64(zSign, 0, 0);
3851 }
3852 return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3853 }
3854 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3855 zExp = aExp;
3856 goto roundAndPack;
3857 }
3858 aSig |= LIT64( 0x2000000000000000 );
3859 zSig = ( aSig + bSig )<<1;
3860 --zExp;
3861 if ( (int64_t) zSig < 0 ) {
3862 zSig = aSig + bSig;
3863 ++zExp;
3864 }
3865 roundAndPack:
3866 return roundAndPackFloat64(zSign, zExp, zSig, status);
3867
3868}
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878static float64 subFloat64Sigs(float64 a, float64 b, flag zSign,
3879 float_status *status)
3880{
3881 int aExp, bExp, zExp;
3882 uint64_t aSig, bSig, zSig;
3883 int expDiff;
3884
3885 aSig = extractFloat64Frac( a );
3886 aExp = extractFloat64Exp( a );
3887 bSig = extractFloat64Frac( b );
3888 bExp = extractFloat64Exp( b );
3889 expDiff = aExp - bExp;
3890 aSig <<= 10;
3891 bSig <<= 10;
3892 if ( 0 < expDiff ) goto aExpBigger;
3893 if ( expDiff < 0 ) goto bExpBigger;
3894 if ( aExp == 0x7FF ) {
3895 if (aSig | bSig) {
3896 return propagateFloat64NaN(a, b, status);
3897 }
3898 float_raise(float_flag_invalid, status);
3899 return float64_default_nan;
3900 }
3901 if ( aExp == 0 ) {
3902 aExp = 1;
3903 bExp = 1;
3904 }
3905 if ( bSig < aSig ) goto aBigger;
3906 if ( aSig < bSig ) goto bBigger;
3907 return packFloat64(status->float_rounding_mode == float_round_down, 0, 0);
3908 bExpBigger:
3909 if ( bExp == 0x7FF ) {
3910 if (bSig) {
3911 return propagateFloat64NaN(a, b, status);
3912 }
3913 return packFloat64( zSign ^ 1, 0x7FF, 0 );
3914 }
3915 if ( aExp == 0 ) {
3916 ++expDiff;
3917 }
3918 else {
3919 aSig |= LIT64( 0x4000000000000000 );
3920 }
3921 shift64RightJamming( aSig, - expDiff, &aSig );
3922 bSig |= LIT64( 0x4000000000000000 );
3923 bBigger:
3924 zSig = bSig - aSig;
3925 zExp = bExp;
3926 zSign ^= 1;
3927 goto normalizeRoundAndPack;
3928 aExpBigger:
3929 if ( aExp == 0x7FF ) {
3930 if (aSig) {
3931 return propagateFloat64NaN(a, b, status);
3932 }
3933 return a;
3934 }
3935 if ( bExp == 0 ) {
3936 --expDiff;
3937 }
3938 else {
3939 bSig |= LIT64( 0x4000000000000000 );
3940 }
3941 shift64RightJamming( bSig, expDiff, &bSig );
3942 aSig |= LIT64( 0x4000000000000000 );
3943 aBigger:
3944 zSig = aSig - bSig;
3945 zExp = aExp;
3946 normalizeRoundAndPack:
3947 --zExp;
3948 return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status);
3949
3950}
3951
3952
3953
3954
3955
3956
3957
3958float64 float64_add(float64 a, float64 b, float_status *status)
3959{
3960 flag aSign, bSign;
3961 a = float64_squash_input_denormal(a, status);
3962 b = float64_squash_input_denormal(b, status);
3963
3964 aSign = extractFloat64Sign( a );
3965 bSign = extractFloat64Sign( b );
3966 if ( aSign == bSign ) {
3967 return addFloat64Sigs(a, b, aSign, status);
3968 }
3969 else {
3970 return subFloat64Sigs(a, b, aSign, status);
3971 }
3972
3973}
3974
3975
3976
3977
3978
3979
3980
3981float64 float64_sub(float64 a, float64 b, float_status *status)
3982{
3983 flag aSign, bSign;
3984 a = float64_squash_input_denormal(a, status);
3985 b = float64_squash_input_denormal(b, status);
3986
3987 aSign = extractFloat64Sign( a );
3988 bSign = extractFloat64Sign( b );
3989 if ( aSign == bSign ) {
3990 return subFloat64Sigs(a, b, aSign, status);
3991 }
3992 else {
3993 return addFloat64Sigs(a, b, aSign, status);
3994 }
3995
3996}
3997
3998
3999
4000
4001
4002
4003
4004float64 float64_mul(float64 a, float64 b, float_status *status)
4005{
4006 flag aSign, bSign, zSign;
4007 int aExp, bExp, zExp;
4008 uint64_t aSig, bSig, zSig0, zSig1;
4009
4010 a = float64_squash_input_denormal(a, status);
4011 b = float64_squash_input_denormal(b, status);
4012
4013 aSig = extractFloat64Frac( a );
4014 aExp = extractFloat64Exp( a );
4015 aSign = extractFloat64Sign( a );
4016 bSig = extractFloat64Frac( b );
4017 bExp = extractFloat64Exp( b );
4018 bSign = extractFloat64Sign( b );
4019 zSign = aSign ^ bSign;
4020 if ( aExp == 0x7FF ) {
4021 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
4022 return propagateFloat64NaN(a, b, status);
4023 }
4024 if ( ( bExp | bSig ) == 0 ) {
4025 float_raise(float_flag_invalid, status);
4026 return float64_default_nan;
4027 }
4028 return packFloat64( zSign, 0x7FF, 0 );
4029 }
4030 if ( bExp == 0x7FF ) {
4031 if (bSig) {
4032 return propagateFloat64NaN(a, b, status);
4033 }
4034 if ( ( aExp | aSig ) == 0 ) {
4035 float_raise(float_flag_invalid, status);
4036 return float64_default_nan;
4037 }
4038 return packFloat64( zSign, 0x7FF, 0 );
4039 }
4040 if ( aExp == 0 ) {
4041 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
4042 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4043 }
4044 if ( bExp == 0 ) {
4045 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
4046 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4047 }
4048 zExp = aExp + bExp - 0x3FF;
4049 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
4050 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4051 mul64To128( aSig, bSig, &zSig0, &zSig1 );
4052 zSig0 |= ( zSig1 != 0 );
4053 if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
4054 zSig0 <<= 1;
4055 --zExp;
4056 }
4057 return roundAndPackFloat64(zSign, zExp, zSig0, status);
4058
4059}
4060
4061
4062
4063
4064
4065
4066
4067float64 float64_div(float64 a, float64 b, float_status *status)
4068{
4069 flag aSign, bSign, zSign;
4070 int aExp, bExp, zExp;
4071 uint64_t aSig, bSig, zSig;
4072 uint64_t rem0, rem1;
4073 uint64_t term0, term1;
4074 a = float64_squash_input_denormal(a, status);
4075 b = float64_squash_input_denormal(b, status);
4076
4077 aSig = extractFloat64Frac( a );
4078 aExp = extractFloat64Exp( a );
4079 aSign = extractFloat64Sign( a );
4080 bSig = extractFloat64Frac( b );
4081 bExp = extractFloat64Exp( b );
4082 bSign = extractFloat64Sign( b );
4083 zSign = aSign ^ bSign;
4084 if ( aExp == 0x7FF ) {
4085 if (aSig) {
4086 return propagateFloat64NaN(a, b, status);
4087 }
4088 if ( bExp == 0x7FF ) {
4089 if (bSig) {
4090 return propagateFloat64NaN(a, b, status);
4091 }
4092 float_raise(float_flag_invalid, status);
4093 return float64_default_nan;
4094 }
4095 return packFloat64( zSign, 0x7FF, 0 );
4096 }
4097 if ( bExp == 0x7FF ) {
4098 if (bSig) {
4099 return propagateFloat64NaN(a, b, status);
4100 }
4101 return packFloat64( zSign, 0, 0 );
4102 }
4103 if ( bExp == 0 ) {
4104 if ( bSig == 0 ) {
4105 if ( ( aExp | aSig ) == 0 ) {
4106 float_raise(float_flag_invalid, status);
4107 return float64_default_nan;
4108 }
4109 float_raise(float_flag_divbyzero, status);
4110 return packFloat64( zSign, 0x7FF, 0 );
4111 }
4112 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4113 }
4114 if ( aExp == 0 ) {
4115 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
4116 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4117 }
4118 zExp = aExp - bExp + 0x3FD;
4119 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
4120 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4121 if ( bSig <= ( aSig + aSig ) ) {
4122 aSig >>= 1;
4123 ++zExp;
4124 }
4125 zSig = estimateDiv128To64( aSig, 0, bSig );
4126 if ( ( zSig & 0x1FF ) <= 2 ) {
4127 mul64To128( bSig, zSig, &term0, &term1 );
4128 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
4129 while ( (int64_t) rem0 < 0 ) {
4130 --zSig;
4131 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4132 }
4133 zSig |= ( rem1 != 0 );
4134 }
4135 return roundAndPackFloat64(zSign, zExp, zSig, status);
4136
4137}
4138
4139
4140
4141
4142
4143
4144
4145float64 float64_rem(float64 a, float64 b, float_status *status)
4146{
4147 flag aSign, zSign;
4148 int aExp, bExp, expDiff;
4149 uint64_t aSig, bSig;
4150 uint64_t q, alternateASig;
4151 int64_t sigMean;
4152
4153 a = float64_squash_input_denormal(a, status);
4154 b = float64_squash_input_denormal(b, status);
4155 aSig = extractFloat64Frac( a );
4156 aExp = extractFloat64Exp( a );
4157 aSign = extractFloat64Sign( a );
4158 bSig = extractFloat64Frac( b );
4159 bExp = extractFloat64Exp( b );
4160 if ( aExp == 0x7FF ) {
4161 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
4162 return propagateFloat64NaN(a, b, status);
4163 }
4164 float_raise(float_flag_invalid, status);
4165 return float64_default_nan;
4166 }
4167 if ( bExp == 0x7FF ) {
4168 if (bSig) {
4169 return propagateFloat64NaN(a, b, status);
4170 }
4171 return a;
4172 }
4173 if ( bExp == 0 ) {
4174 if ( bSig == 0 ) {
4175 float_raise(float_flag_invalid, status);
4176 return float64_default_nan;
4177 }
4178 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4179 }
4180 if ( aExp == 0 ) {
4181 if ( aSig == 0 ) return a;
4182 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4183 }
4184 expDiff = aExp - bExp;
4185 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4186 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4187 if ( expDiff < 0 ) {
4188 if ( expDiff < -1 ) return a;
4189 aSig >>= 1;
4190 }
4191 q = ( bSig <= aSig );
4192 if ( q ) aSig -= bSig;
4193 expDiff -= 64;
4194 while ( 0 < expDiff ) {
4195 q = estimateDiv128To64( aSig, 0, bSig );
4196 q = ( 2 < q ) ? q - 2 : 0;
4197 aSig = - ( ( bSig>>2 ) * q );
4198 expDiff -= 62;
4199 }
4200 expDiff += 64;
4201 if ( 0 < expDiff ) {
4202 q = estimateDiv128To64( aSig, 0, bSig );
4203 q = ( 2 < q ) ? q - 2 : 0;
4204 q >>= 64 - expDiff;
4205 bSig >>= 2;
4206 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4207 }
4208 else {
4209 aSig >>= 2;
4210 bSig >>= 2;
4211 }
4212 do {
4213 alternateASig = aSig;
4214 ++q;
4215 aSig -= bSig;
4216 } while ( 0 <= (int64_t) aSig );
4217 sigMean = aSig + alternateASig;
4218 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4219 aSig = alternateASig;
4220 }
4221 zSign = ( (int64_t) aSig < 0 );
4222 if ( zSign ) aSig = - aSig;
4223 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
4224
4225}
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238float64 float64_muladd(float64 a, float64 b, float64 c, int flags,
4239 float_status *status)
4240{
4241 flag aSign, bSign, cSign, zSign;
4242 int aExp, bExp, cExp, pExp, zExp, expDiff;
4243 uint64_t aSig, bSig, cSig;
4244 flag pInf, pZero, pSign;
4245 uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
4246 int shiftcount;
4247 flag signflip, infzero;
4248
4249 a = float64_squash_input_denormal(a, status);
4250 b = float64_squash_input_denormal(b, status);
4251 c = float64_squash_input_denormal(c, status);
4252 aSig = extractFloat64Frac(a);
4253 aExp = extractFloat64Exp(a);
4254 aSign = extractFloat64Sign(a);
4255 bSig = extractFloat64Frac(b);
4256 bExp = extractFloat64Exp(b);
4257 bSign = extractFloat64Sign(b);
4258 cSig = extractFloat64Frac(c);
4259 cExp = extractFloat64Exp(c);
4260 cSign = extractFloat64Sign(c);
4261
4262 infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
4263 (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
4264
4265
4266
4267
4268
4269
4270 if (((aExp == 0x7ff) && aSig) ||
4271 ((bExp == 0x7ff) && bSig) ||
4272 ((cExp == 0x7ff) && cSig)) {
4273 return propagateFloat64MulAddNaN(a, b, c, infzero, status);
4274 }
4275
4276 if (infzero) {
4277 float_raise(float_flag_invalid, status);
4278 return float64_default_nan;
4279 }
4280
4281 if (flags & float_muladd_negate_c) {
4282 cSign ^= 1;
4283 }
4284
4285 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
4286
4287
4288 pSign = aSign ^ bSign;
4289 if (flags & float_muladd_negate_product) {
4290 pSign ^= 1;
4291 }
4292 pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
4293 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
4294
4295 if (cExp == 0x7ff) {
4296 if (pInf && (pSign ^ cSign)) {
4297
4298 float_raise(float_flag_invalid, status);
4299 return float64_default_nan;
4300 }
4301
4302 return packFloat64(cSign ^ signflip, 0x7ff, 0);
4303 }
4304
4305 if (pInf) {
4306 return packFloat64(pSign ^ signflip, 0x7ff, 0);
4307 }
4308
4309 if (pZero) {
4310 if (cExp == 0) {
4311 if (cSig == 0) {
4312
4313 if (pSign == cSign) {
4314 zSign = pSign;
4315 } else if (status->float_rounding_mode == float_round_down) {
4316 zSign = 1;
4317 } else {
4318 zSign = 0;
4319 }
4320 return packFloat64(zSign ^ signflip, 0, 0);
4321 }
4322
4323 if (status->flush_to_zero) {
4324 float_raise(float_flag_output_denormal, status);
4325 return packFloat64(cSign ^ signflip, 0, 0);
4326 }
4327 }
4328
4329 if (flags & float_muladd_halve_result) {
4330 if (cExp == 0) {
4331 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
4332 }
4333
4334
4335
4336 cExp -= 2;
4337 cSig = (cSig | 0x0010000000000000ULL) << 10;
4338 return roundAndPackFloat64(cSign ^ signflip, cExp, cSig, status);
4339 }
4340 return packFloat64(cSign ^ signflip, cExp, cSig);
4341 }
4342
4343 if (aExp == 0) {
4344 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
4345 }
4346 if (bExp == 0) {
4347 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
4348 }
4349
4350
4351
4352
4353
4354
4355
4356
4357 pExp = aExp + bExp - 0x3fe;
4358 aSig = (aSig | LIT64(0x0010000000000000))<<10;
4359 bSig = (bSig | LIT64(0x0010000000000000))<<11;
4360 mul64To128(aSig, bSig, &pSig0, &pSig1);
4361 if ((int64_t)(pSig0 << 1) >= 0) {
4362 shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
4363 pExp--;
4364 }
4365
4366 zSign = pSign ^ signflip;
4367
4368
4369
4370
4371 if (cExp == 0) {
4372 if (!cSig) {
4373
4374 shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
4375 if (flags & float_muladd_halve_result) {
4376 pExp--;
4377 }
4378 return roundAndPackFloat64(zSign, pExp - 1,
4379 pSig1, status);
4380 }
4381 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
4382 }
4383
4384
4385
4386
4387 cSig0 = cSig << (126 - 64 - 52);
4388 cSig1 = 0;
4389 cSig0 |= LIT64(0x4000000000000000);
4390 expDiff = pExp - cExp;
4391
4392 if (pSign == cSign) {
4393
4394 if (expDiff > 0) {
4395
4396 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
4397 zExp = pExp;
4398 } else if (expDiff < 0) {
4399
4400 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
4401 zExp = cExp;
4402 } else {
4403
4404 zExp = cExp;
4405 }
4406
4407 add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4408 if ((int64_t)zSig0 < 0) {
4409 shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
4410 } else {
4411 zExp--;
4412 }
4413 shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
4414 if (flags & float_muladd_halve_result) {
4415 zExp--;
4416 }
4417 return roundAndPackFloat64(zSign, zExp, zSig1, status);
4418 } else {
4419
4420 if (expDiff > 0) {
4421 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
4422 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4423 zExp = pExp;
4424 } else if (expDiff < 0) {
4425 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
4426 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
4427 zExp = cExp;
4428 zSign ^= 1;
4429 } else {
4430 zExp = pExp;
4431 if (lt128(cSig0, cSig1, pSig0, pSig1)) {
4432 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4433 } else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
4434 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
4435 zSign ^= 1;
4436 } else {
4437
4438 zSign = signflip;
4439 if (status->float_rounding_mode == float_round_down) {
4440 zSign ^= 1;
4441 }
4442 return packFloat64(zSign, 0, 0);
4443 }
4444 }
4445 --zExp;
4446
4447
4448
4449 if (zSig0) {
4450 shiftcount = countLeadingZeros64(zSig0) - 1;
4451 shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
4452 if (zSig1) {
4453 zSig0 |= 1;
4454 }
4455 zExp -= shiftcount;
4456 } else {
4457 shiftcount = countLeadingZeros64(zSig1);
4458 if (shiftcount == 0) {
4459 zSig0 = (zSig1 >> 1) | (zSig1 & 1);
4460 zExp -= 63;
4461 } else {
4462 shiftcount--;
4463 zSig0 = zSig1 << shiftcount;
4464 zExp -= (shiftcount + 64);
4465 }
4466 }
4467 if (flags & float_muladd_halve_result) {
4468 zExp--;
4469 }
4470 return roundAndPackFloat64(zSign, zExp, zSig0, status);
4471 }
4472}
4473
4474
4475
4476
4477
4478
4479
4480float64 float64_sqrt(float64 a, float_status *status)
4481{
4482 flag aSign;
4483 int aExp, zExp;
4484 uint64_t aSig, zSig, doubleZSig;
4485 uint64_t rem0, rem1, term0, term1;
4486 a = float64_squash_input_denormal(a, status);
4487
4488 aSig = extractFloat64Frac( a );
4489 aExp = extractFloat64Exp( a );
4490 aSign = extractFloat64Sign( a );
4491 if ( aExp == 0x7FF ) {
4492 if (aSig) {
4493 return propagateFloat64NaN(a, a, status);
4494 }
4495 if ( ! aSign ) return a;
4496 float_raise(float_flag_invalid, status);
4497 return float64_default_nan;
4498 }
4499 if ( aSign ) {
4500 if ( ( aExp | aSig ) == 0 ) return a;
4501 float_raise(float_flag_invalid, status);
4502 return float64_default_nan;
4503 }
4504 if ( aExp == 0 ) {
4505 if ( aSig == 0 ) return float64_zero;
4506 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4507 }
4508 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
4509 aSig |= LIT64( 0x0010000000000000 );
4510 zSig = estimateSqrt32( aExp, aSig>>21 );
4511 aSig <<= 9 - ( aExp & 1 );
4512 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
4513 if ( ( zSig & 0x1FF ) <= 5 ) {
4514 doubleZSig = zSig<<1;
4515 mul64To128( zSig, zSig, &term0, &term1 );
4516 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
4517 while ( (int64_t) rem0 < 0 ) {
4518 --zSig;
4519 doubleZSig -= 2;
4520 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
4521 }
4522 zSig |= ( ( rem0 | rem1 ) != 0 );
4523 }
4524 return roundAndPackFloat64(0, zExp, zSig, status);
4525
4526}
4527
4528
4529
4530
4531
4532
4533float64 float64_log2(float64 a, float_status *status)
4534{
4535 flag aSign, zSign;
4536 int aExp;
4537 uint64_t aSig, aSig0, aSig1, zSig, i;
4538 a = float64_squash_input_denormal(a, status);
4539
4540 aSig = extractFloat64Frac( a );
4541 aExp = extractFloat64Exp( a );
4542 aSign = extractFloat64Sign( a );
4543
4544 if ( aExp == 0 ) {
4545 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4546 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4547 }
4548 if ( aSign ) {
4549 float_raise(float_flag_invalid, status);
4550 return float64_default_nan;
4551 }
4552 if ( aExp == 0x7FF ) {
4553 if (aSig) {
4554 return propagateFloat64NaN(a, float64_zero, status);
4555 }
4556 return a;
4557 }
4558
4559 aExp -= 0x3FF;
4560 aSig |= LIT64( 0x0010000000000000 );
4561 zSign = aExp < 0;
4562 zSig = (uint64_t)aExp << 52;
4563 for (i = 1LL << 51; i > 0; i >>= 1) {
4564 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4565 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4566 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4567 aSig >>= 1;
4568 zSig |= i;
4569 }
4570 }
4571
4572 if ( zSign )
4573 zSig = -zSig;
4574 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
4575}
4576
4577
4578
4579
4580
4581
4582
4583
4584int float64_eq(float64 a, float64 b, float_status *status)
4585{
4586 uint64_t av, bv;
4587 a = float64_squash_input_denormal(a, status);
4588 b = float64_squash_input_denormal(b, status);
4589
4590 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4591 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4592 ) {
4593 float_raise(float_flag_invalid, status);
4594 return 0;
4595 }
4596 av = float64_val(a);
4597 bv = float64_val(b);
4598 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4599
4600}
4601
4602
4603
4604
4605
4606
4607
4608
4609int float64_le(float64 a, float64 b, float_status *status)
4610{
4611 flag aSign, bSign;
4612 uint64_t av, bv;
4613 a = float64_squash_input_denormal(a, status);
4614 b = float64_squash_input_denormal(b, status);
4615
4616 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4617 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4618 ) {
4619 float_raise(float_flag_invalid, status);
4620 return 0;
4621 }
4622 aSign = extractFloat64Sign( a );
4623 bSign = extractFloat64Sign( b );
4624 av = float64_val(a);
4625 bv = float64_val(b);
4626 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4627 return ( av == bv ) || ( aSign ^ ( av < bv ) );
4628
4629}
4630
4631
4632
4633
4634
4635
4636
4637
4638int float64_lt(float64 a, float64 b, float_status *status)
4639{
4640 flag aSign, bSign;
4641 uint64_t av, bv;
4642
4643 a = float64_squash_input_denormal(a, status);
4644 b = float64_squash_input_denormal(b, status);
4645 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4646 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4647 ) {
4648 float_raise(float_flag_invalid, status);
4649 return 0;
4650 }
4651 aSign = extractFloat64Sign( a );
4652 bSign = extractFloat64Sign( b );
4653 av = float64_val(a);
4654 bv = float64_val(b);
4655 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4656 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4657
4658}
4659
4660
4661
4662
4663
4664
4665
4666
4667int float64_unordered(float64 a, float64 b, float_status *status)
4668{
4669 a = float64_squash_input_denormal(a, status);
4670 b = float64_squash_input_denormal(b, status);
4671
4672 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4673 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4674 ) {
4675 float_raise(float_flag_invalid, status);
4676 return 1;
4677 }
4678 return 0;
4679}
4680
4681
4682
4683
4684
4685
4686
4687
4688int float64_eq_quiet(float64 a, float64 b, float_status *status)
4689{
4690 uint64_t av, bv;
4691 a = float64_squash_input_denormal(a, status);
4692 b = float64_squash_input_denormal(b, status);
4693
4694 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4695 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4696 ) {
4697 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4698 float_raise(float_flag_invalid, status);
4699 }
4700 return 0;
4701 }
4702 av = float64_val(a);
4703 bv = float64_val(b);
4704 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4705
4706}
4707
4708
4709
4710
4711
4712
4713
4714
4715int float64_le_quiet(float64 a, float64 b, float_status *status)
4716{
4717 flag aSign, bSign;
4718 uint64_t av, bv;
4719 a = float64_squash_input_denormal(a, status);
4720 b = float64_squash_input_denormal(b, status);
4721
4722 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4723 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4724 ) {
4725 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4726 float_raise(float_flag_invalid, status);
4727 }
4728 return 0;
4729 }
4730 aSign = extractFloat64Sign( a );
4731 bSign = extractFloat64Sign( b );
4732 av = float64_val(a);
4733 bv = float64_val(b);
4734 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4735 return ( av == bv ) || ( aSign ^ ( av < bv ) );
4736
4737}
4738
4739
4740
4741
4742
4743
4744
4745
4746int float64_lt_quiet(float64 a, float64 b, float_status *status)
4747{
4748 flag aSign, bSign;
4749 uint64_t av, bv;
4750 a = float64_squash_input_denormal(a, status);
4751 b = float64_squash_input_denormal(b, status);
4752
4753 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4754 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4755 ) {
4756 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4757 float_raise(float_flag_invalid, status);
4758 }
4759 return 0;
4760 }
4761 aSign = extractFloat64Sign( a );
4762 bSign = extractFloat64Sign( b );
4763 av = float64_val(a);
4764 bv = float64_val(b);
4765 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4766 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4767
4768}
4769
4770
4771
4772
4773
4774
4775
4776
4777int float64_unordered_quiet(float64 a, float64 b, float_status *status)
4778{
4779 a = float64_squash_input_denormal(a, status);
4780 b = float64_squash_input_denormal(b, status);
4781
4782 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4783 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4784 ) {
4785 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4786 float_raise(float_flag_invalid, status);
4787 }
4788 return 1;
4789 }
4790 return 0;
4791}
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803int32_t floatx80_to_int32(floatx80 a, float_status *status)
4804{
4805 flag aSign;
4806 int32_t aExp, shiftCount;
4807 uint64_t aSig;
4808
4809 aSig = extractFloatx80Frac( a );
4810 aExp = extractFloatx80Exp( a );
4811 aSign = extractFloatx80Sign( a );
4812 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4813 shiftCount = 0x4037 - aExp;
4814 if ( shiftCount <= 0 ) shiftCount = 1;
4815 shift64RightJamming( aSig, shiftCount, &aSig );
4816 return roundAndPackInt32(aSign, aSig, status);
4817
4818}
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
4831{
4832 flag aSign;
4833 int32_t aExp, shiftCount;
4834 uint64_t aSig, savedASig;
4835 int32_t z;
4836
4837 aSig = extractFloatx80Frac( a );
4838 aExp = extractFloatx80Exp( a );
4839 aSign = extractFloatx80Sign( a );
4840 if ( 0x401E < aExp ) {
4841 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4842 goto invalid;
4843 }
4844 else if ( aExp < 0x3FFF ) {
4845 if (aExp || aSig) {
4846 status->float_exception_flags |= float_flag_inexact;
4847 }
4848 return 0;
4849 }
4850 shiftCount = 0x403E - aExp;
4851 savedASig = aSig;
4852 aSig >>= shiftCount;
4853 z = aSig;
4854 if ( aSign ) z = - z;
4855 if ( ( z < 0 ) ^ aSign ) {
4856 invalid:
4857 float_raise(float_flag_invalid, status);
4858 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
4859 }
4860 if ( ( aSig<<shiftCount ) != savedASig ) {
4861 status->float_exception_flags |= float_flag_inexact;
4862 }
4863 return z;
4864
4865}
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877int64_t floatx80_to_int64(floatx80 a, float_status *status)
4878{
4879 flag aSign;
4880 int32_t aExp, shiftCount;
4881 uint64_t aSig, aSigExtra;
4882
4883 aSig = extractFloatx80Frac( a );
4884 aExp = extractFloatx80Exp( a );
4885 aSign = extractFloatx80Sign( a );
4886 shiftCount = 0x403E - aExp;
4887 if ( shiftCount <= 0 ) {
4888 if ( shiftCount ) {
4889 float_raise(float_flag_invalid, status);
4890 if ( ! aSign
4891 || ( ( aExp == 0x7FFF )
4892 && ( aSig != LIT64( 0x8000000000000000 ) ) )
4893 ) {
4894 return LIT64( 0x7FFFFFFFFFFFFFFF );
4895 }
4896 return (int64_t) LIT64( 0x8000000000000000 );
4897 }
4898 aSigExtra = 0;
4899 }
4900 else {
4901 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4902 }
4903 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
4904
4905}
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
4918{
4919 flag aSign;
4920 int32_t aExp, shiftCount;
4921 uint64_t aSig;
4922 int64_t z;
4923
4924 aSig = extractFloatx80Frac( a );
4925 aExp = extractFloatx80Exp( a );
4926 aSign = extractFloatx80Sign( a );
4927 shiftCount = aExp - 0x403E;
4928 if ( 0 <= shiftCount ) {
4929 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4930 if ( ( a.high != 0xC03E ) || aSig ) {
4931 float_raise(float_flag_invalid, status);
4932 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4933 return LIT64( 0x7FFFFFFFFFFFFFFF );
4934 }
4935 }
4936 return (int64_t) LIT64( 0x8000000000000000 );
4937 }
4938 else if ( aExp < 0x3FFF ) {
4939 if (aExp | aSig) {
4940 status->float_exception_flags |= float_flag_inexact;
4941 }
4942 return 0;
4943 }
4944 z = aSig>>( - shiftCount );
4945 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
4946 status->float_exception_flags |= float_flag_inexact;
4947 }
4948 if ( aSign ) z = - z;
4949 return z;
4950
4951}
4952
4953
4954
4955
4956
4957
4958
4959
4960float32 floatx80_to_float32(floatx80 a, float_status *status)
4961{
4962 flag aSign;
4963 int32_t aExp;
4964 uint64_t aSig;
4965
4966 aSig = extractFloatx80Frac( a );
4967 aExp = extractFloatx80Exp( a );
4968 aSign = extractFloatx80Sign( a );
4969 if ( aExp == 0x7FFF ) {
4970 if ( (uint64_t) ( aSig<<1 ) ) {
4971 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
4972 }
4973 return packFloat32( aSign, 0xFF, 0 );
4974 }
4975 shift64RightJamming( aSig, 33, &aSig );
4976 if ( aExp || aSig ) aExp -= 0x3F81;
4977 return roundAndPackFloat32(aSign, aExp, aSig, status);
4978
4979}
4980
4981
4982
4983
4984
4985
4986
4987
4988float64 floatx80_to_float64(floatx80 a, float_status *status)
4989{
4990 flag aSign;
4991 int32_t aExp;
4992 uint64_t aSig, zSig;
4993
4994 aSig = extractFloatx80Frac( a );
4995 aExp = extractFloatx80Exp( a );
4996 aSign = extractFloatx80Sign( a );
4997 if ( aExp == 0x7FFF ) {
4998 if ( (uint64_t) ( aSig<<1 ) ) {
4999 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
5000 }
5001 return packFloat64( aSign, 0x7FF, 0 );
5002 }
5003 shift64RightJamming( aSig, 1, &zSig );
5004 if ( aExp || aSig ) aExp -= 0x3C01;
5005 return roundAndPackFloat64(aSign, aExp, zSig, status);
5006
5007}
5008
5009
5010
5011
5012
5013
5014
5015
5016float128 floatx80_to_float128(floatx80 a, float_status *status)
5017{
5018 flag aSign;
5019 int aExp;
5020 uint64_t aSig, zSig0, zSig1;
5021
5022 aSig = extractFloatx80Frac( a );
5023 aExp = extractFloatx80Exp( a );
5024 aSign = extractFloatx80Sign( a );
5025 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5026 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
5027 }
5028 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5029 return packFloat128( aSign, aExp, zSig0, zSig1 );
5030
5031}
5032
5033
5034
5035
5036
5037
5038
5039
5040floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5041{
5042 flag aSign;
5043 int32_t aExp;
5044 uint64_t lastBitMask, roundBitsMask;
5045 floatx80 z;
5046
5047 aExp = extractFloatx80Exp( a );
5048 if ( 0x403E <= aExp ) {
5049 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5050 return propagateFloatx80NaN(a, a, status);
5051 }
5052 return a;
5053 }
5054 if ( aExp < 0x3FFF ) {
5055 if ( ( aExp == 0 )
5056 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
5057 return a;
5058 }
5059 status->float_exception_flags |= float_flag_inexact;
5060 aSign = extractFloatx80Sign( a );
5061 switch (status->float_rounding_mode) {
5062 case float_round_nearest_even:
5063 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5064 ) {
5065 return
5066 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
5067 }
5068 break;
5069 case float_round_ties_away:
5070 if (aExp == 0x3FFE) {
5071 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
5072 }
5073 break;
5074 case float_round_down:
5075 return
5076 aSign ?
5077 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
5078 : packFloatx80( 0, 0, 0 );
5079 case float_round_up:
5080 return
5081 aSign ? packFloatx80( 1, 0, 0 )
5082 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
5083 }
5084 return packFloatx80( aSign, 0, 0 );
5085 }
5086 lastBitMask = 1;
5087 lastBitMask <<= 0x403E - aExp;
5088 roundBitsMask = lastBitMask - 1;
5089 z = a;
5090 switch (status->float_rounding_mode) {
5091 case float_round_nearest_even:
5092 z.low += lastBitMask>>1;
5093 if ((z.low & roundBitsMask) == 0) {
5094 z.low &= ~lastBitMask;
5095 }
5096 break;
5097 case float_round_ties_away:
5098 z.low += lastBitMask >> 1;
5099 break;
5100 case float_round_to_zero:
5101 break;
5102 case float_round_up:
5103 if (!extractFloatx80Sign(z)) {
5104 z.low += roundBitsMask;
5105 }
5106 break;
5107 case float_round_down:
5108 if (extractFloatx80Sign(z)) {
5109 z.low += roundBitsMask;
5110 }
5111 break;
5112 default:
5113 abort();
5114 }
5115 z.low &= ~ roundBitsMask;
5116 if ( z.low == 0 ) {
5117 ++z.high;
5118 z.low = LIT64( 0x8000000000000000 );
5119 }
5120 if (z.low != a.low) {
5121 status->float_exception_flags |= float_flag_inexact;
5122 }
5123 return z;
5124
5125}
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5136 float_status *status)
5137{
5138 int32_t aExp, bExp, zExp;
5139 uint64_t aSig, bSig, zSig0, zSig1;
5140 int32_t expDiff;
5141
5142 aSig = extractFloatx80Frac( a );
5143 aExp = extractFloatx80Exp( a );
5144 bSig = extractFloatx80Frac( b );
5145 bExp = extractFloatx80Exp( b );
5146 expDiff = aExp - bExp;
5147 if ( 0 < expDiff ) {
5148 if ( aExp == 0x7FFF ) {
5149 if ((uint64_t)(aSig << 1)) {
5150 return propagateFloatx80NaN(a, b, status);
5151 }
5152 return a;
5153 }
5154 if ( bExp == 0 ) --expDiff;
5155 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5156 zExp = aExp;
5157 }
5158 else if ( expDiff < 0 ) {
5159 if ( bExp == 0x7FFF ) {
5160 if ((uint64_t)(bSig << 1)) {
5161 return propagateFloatx80NaN(a, b, status);
5162 }
5163 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5164 }
5165 if ( aExp == 0 ) ++expDiff;
5166 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5167 zExp = bExp;
5168 }
5169 else {
5170 if ( aExp == 0x7FFF ) {
5171 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5172 return propagateFloatx80NaN(a, b, status);
5173 }
5174 return a;
5175 }
5176 zSig1 = 0;
5177 zSig0 = aSig + bSig;
5178 if ( aExp == 0 ) {
5179 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5180 goto roundAndPack;
5181 }
5182 zExp = aExp;
5183 goto shiftRight1;
5184 }
5185 zSig0 = aSig + bSig;
5186 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5187 shiftRight1:
5188 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5189 zSig0 |= LIT64( 0x8000000000000000 );
5190 ++zExp;
5191 roundAndPack:
5192 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5193 zSign, zExp, zSig0, zSig1, status);
5194}
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5205 float_status *status)
5206{
5207 int32_t aExp, bExp, zExp;
5208 uint64_t aSig, bSig, zSig0, zSig1;
5209 int32_t expDiff;
5210 floatx80 z;
5211
5212 aSig = extractFloatx80Frac( a );
5213 aExp = extractFloatx80Exp( a );
5214 bSig = extractFloatx80Frac( b );
5215 bExp = extractFloatx80Exp( b );
5216 expDiff = aExp - bExp;
5217 if ( 0 < expDiff ) goto aExpBigger;
5218 if ( expDiff < 0 ) goto bExpBigger;
5219 if ( aExp == 0x7FFF ) {
5220 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5221 return propagateFloatx80NaN(a, b, status);
5222 }
5223 float_raise(float_flag_invalid, status);
5224 z.low = floatx80_default_nan_low;
5225 z.high = floatx80_default_nan_high;
5226 return z;
5227 }
5228 if ( aExp == 0 ) {
5229 aExp = 1;
5230 bExp = 1;
5231 }
5232 zSig1 = 0;
5233 if ( bSig < aSig ) goto aBigger;
5234 if ( aSig < bSig ) goto bBigger;
5235 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5236 bExpBigger:
5237 if ( bExp == 0x7FFF ) {
5238 if ((uint64_t)(bSig << 1)) {
5239 return propagateFloatx80NaN(a, b, status);
5240 }
5241 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
5242 }
5243 if ( aExp == 0 ) ++expDiff;
5244 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5245 bBigger:
5246 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5247 zExp = bExp;
5248 zSign ^= 1;
5249 goto normalizeRoundAndPack;
5250 aExpBigger:
5251 if ( aExp == 0x7FFF ) {
5252 if ((uint64_t)(aSig << 1)) {
5253 return propagateFloatx80NaN(a, b, status);
5254 }
5255 return a;
5256 }
5257 if ( bExp == 0 ) --expDiff;
5258 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5259 aBigger:
5260 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5261 zExp = aExp;
5262 normalizeRoundAndPack:
5263 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5264 zSign, zExp, zSig0, zSig1, status);
5265}
5266
5267
5268
5269
5270
5271
5272
5273floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5274{
5275 flag aSign, bSign;
5276
5277 aSign = extractFloatx80Sign( a );
5278 bSign = extractFloatx80Sign( b );
5279 if ( aSign == bSign ) {
5280 return addFloatx80Sigs(a, b, aSign, status);
5281 }
5282 else {
5283 return subFloatx80Sigs(a, b, aSign, status);
5284 }
5285
5286}
5287
5288
5289
5290
5291
5292
5293
5294floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5295{
5296 flag aSign, bSign;
5297
5298 aSign = extractFloatx80Sign( a );
5299 bSign = extractFloatx80Sign( b );
5300 if ( aSign == bSign ) {
5301 return subFloatx80Sigs(a, b, aSign, status);
5302 }
5303 else {
5304 return addFloatx80Sigs(a, b, aSign, status);
5305 }
5306
5307}
5308
5309
5310
5311
5312
5313
5314
5315floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5316{
5317 flag aSign, bSign, zSign;
5318 int32_t aExp, bExp, zExp;
5319 uint64_t aSig, bSig, zSig0, zSig1;
5320 floatx80 z;
5321
5322 aSig = extractFloatx80Frac( a );
5323 aExp = extractFloatx80Exp( a );
5324 aSign = extractFloatx80Sign( a );
5325 bSig = extractFloatx80Frac( b );
5326 bExp = extractFloatx80Exp( b );
5327 bSign = extractFloatx80Sign( b );
5328 zSign = aSign ^ bSign;
5329 if ( aExp == 0x7FFF ) {
5330 if ( (uint64_t) ( aSig<<1 )
5331 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5332 return propagateFloatx80NaN(a, b, status);
5333 }
5334 if ( ( bExp | bSig ) == 0 ) goto invalid;
5335 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5336 }
5337 if ( bExp == 0x7FFF ) {
5338 if ((uint64_t)(bSig << 1)) {
5339 return propagateFloatx80NaN(a, b, status);
5340 }
5341 if ( ( aExp | aSig ) == 0 ) {
5342 invalid:
5343 float_raise(float_flag_invalid, status);
5344 z.low = floatx80_default_nan_low;
5345 z.high = floatx80_default_nan_high;
5346 return z;
5347 }
5348 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5349 }
5350 if ( aExp == 0 ) {
5351 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5352 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5353 }
5354 if ( bExp == 0 ) {
5355 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5356 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5357 }
5358 zExp = aExp + bExp - 0x3FFE;
5359 mul64To128( aSig, bSig, &zSig0, &zSig1 );
5360 if ( 0 < (int64_t) zSig0 ) {
5361 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5362 --zExp;
5363 }
5364 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5365 zSign, zExp, zSig0, zSig1, status);
5366}
5367
5368
5369
5370
5371
5372
5373
5374floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
5375{
5376 flag aSign, bSign, zSign;
5377 int32_t aExp, bExp, zExp;
5378 uint64_t aSig, bSig, zSig0, zSig1;
5379 uint64_t rem0, rem1, rem2, term0, term1, term2;
5380 floatx80 z;
5381
5382 aSig = extractFloatx80Frac( a );
5383 aExp = extractFloatx80Exp( a );
5384 aSign = extractFloatx80Sign( a );
5385 bSig = extractFloatx80Frac( b );
5386 bExp = extractFloatx80Exp( b );
5387 bSign = extractFloatx80Sign( b );
5388 zSign = aSign ^ bSign;
5389 if ( aExp == 0x7FFF ) {
5390 if ((uint64_t)(aSig << 1)) {
5391 return propagateFloatx80NaN(a, b, status);
5392 }
5393 if ( bExp == 0x7FFF ) {
5394 if ((uint64_t)(bSig << 1)) {
5395 return propagateFloatx80NaN(a, b, status);
5396 }
5397 goto invalid;
5398 }
5399 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5400 }
5401 if ( bExp == 0x7FFF ) {
5402 if ((uint64_t)(bSig << 1)) {
5403 return propagateFloatx80NaN(a, b, status);
5404 }
5405 return packFloatx80( zSign, 0, 0 );
5406 }
5407 if ( bExp == 0 ) {
5408 if ( bSig == 0 ) {
5409 if ( ( aExp | aSig ) == 0 ) {
5410 invalid:
5411 float_raise(float_flag_invalid, status);
5412 z.low = floatx80_default_nan_low;
5413 z.high = floatx80_default_nan_high;
5414 return z;
5415 }
5416 float_raise(float_flag_divbyzero, status);
5417 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5418 }
5419 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5420 }
5421 if ( aExp == 0 ) {
5422 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5423 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5424 }
5425 zExp = aExp - bExp + 0x3FFE;
5426 rem1 = 0;
5427 if ( bSig <= aSig ) {
5428 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5429 ++zExp;
5430 }
5431 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5432 mul64To128( bSig, zSig0, &term0, &term1 );
5433 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
5434 while ( (int64_t) rem0 < 0 ) {
5435 --zSig0;
5436 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5437 }
5438 zSig1 = estimateDiv128To64( rem1, 0, bSig );
5439 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
5440 mul64To128( bSig, zSig1, &term1, &term2 );
5441 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5442 while ( (int64_t) rem1 < 0 ) {
5443 --zSig1;
5444 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5445 }
5446 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5447 }
5448 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5449 zSign, zExp, zSig0, zSig1, status);
5450}
5451
5452
5453
5454
5455
5456
5457
5458floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
5459{
5460 flag aSign, zSign;
5461 int32_t aExp, bExp, expDiff;
5462 uint64_t aSig0, aSig1, bSig;
5463 uint64_t q, term0, term1, alternateASig0, alternateASig1;
5464 floatx80 z;
5465
5466 aSig0 = extractFloatx80Frac( a );
5467 aExp = extractFloatx80Exp( a );
5468 aSign = extractFloatx80Sign( a );
5469 bSig = extractFloatx80Frac( b );
5470 bExp = extractFloatx80Exp( b );
5471 if ( aExp == 0x7FFF ) {
5472 if ( (uint64_t) ( aSig0<<1 )
5473 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5474 return propagateFloatx80NaN(a, b, status);
5475 }
5476 goto invalid;
5477 }
5478 if ( bExp == 0x7FFF ) {
5479 if ((uint64_t)(bSig << 1)) {
5480 return propagateFloatx80NaN(a, b, status);
5481 }
5482 return a;
5483 }
5484 if ( bExp == 0 ) {
5485 if ( bSig == 0 ) {
5486 invalid:
5487 float_raise(float_flag_invalid, status);
5488 z.low = floatx80_default_nan_low;
5489 z.high = floatx80_default_nan_high;
5490 return z;
5491 }
5492 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5493 }
5494 if ( aExp == 0 ) {
5495 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
5496 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5497 }
5498 bSig |= LIT64( 0x8000000000000000 );
5499 zSign = aSign;
5500 expDiff = aExp - bExp;
5501 aSig1 = 0;
5502 if ( expDiff < 0 ) {
5503 if ( expDiff < -1 ) return a;
5504 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5505 expDiff = 0;
5506 }
5507 q = ( bSig <= aSig0 );
5508 if ( q ) aSig0 -= bSig;
5509 expDiff -= 64;
5510 while ( 0 < expDiff ) {
5511 q = estimateDiv128To64( aSig0, aSig1, bSig );
5512 q = ( 2 < q ) ? q - 2 : 0;
5513 mul64To128( bSig, q, &term0, &term1 );
5514 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5515 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5516 expDiff -= 62;
5517 }
5518 expDiff += 64;
5519 if ( 0 < expDiff ) {
5520 q = estimateDiv128To64( aSig0, aSig1, bSig );
5521 q = ( 2 < q ) ? q - 2 : 0;
5522 q >>= 64 - expDiff;
5523 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5524 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5525 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5526 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5527 ++q;
5528 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5529 }
5530 }
5531 else {
5532 term1 = 0;
5533 term0 = bSig;
5534 }
5535 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5536 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5537 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5538 && ( q & 1 ) )
5539 ) {
5540 aSig0 = alternateASig0;
5541 aSig1 = alternateASig1;
5542 zSign = ! zSign;
5543 }
5544 return
5545 normalizeRoundAndPackFloatx80(
5546 80, zSign, bExp + expDiff, aSig0, aSig1, status);
5547
5548}
5549
5550
5551
5552
5553
5554
5555
5556floatx80 floatx80_sqrt(floatx80 a, float_status *status)
5557{
5558 flag aSign;
5559 int32_t aExp, zExp;
5560 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5561 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5562 floatx80 z;
5563
5564 aSig0 = extractFloatx80Frac( a );
5565 aExp = extractFloatx80Exp( a );
5566 aSign = extractFloatx80Sign( a );
5567 if ( aExp == 0x7FFF ) {
5568 if ((uint64_t)(aSig0 << 1)) {
5569 return propagateFloatx80NaN(a, a, status);
5570 }
5571 if ( ! aSign ) return a;
5572 goto invalid;
5573 }
5574 if ( aSign ) {
5575 if ( ( aExp | aSig0 ) == 0 ) return a;
5576 invalid:
5577 float_raise(float_flag_invalid, status);
5578 z.low = floatx80_default_nan_low;
5579 z.high = floatx80_default_nan_high;
5580 return z;
5581 }
5582 if ( aExp == 0 ) {
5583 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5584 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5585 }
5586 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5587 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5588 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5589 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5590 doubleZSig0 = zSig0<<1;
5591 mul64To128( zSig0, zSig0, &term0, &term1 );
5592 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5593 while ( (int64_t) rem0 < 0 ) {
5594 --zSig0;
5595 doubleZSig0 -= 2;
5596 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5597 }
5598 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5599 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5600 if ( zSig1 == 0 ) zSig1 = 1;
5601 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5602 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5603 mul64To128( zSig1, zSig1, &term2, &term3 );
5604 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5605 while ( (int64_t) rem1 < 0 ) {
5606 --zSig1;
5607 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5608 term3 |= 1;
5609 term2 |= doubleZSig0;
5610 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5611 }
5612 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5613 }
5614 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5615 zSig0 |= doubleZSig0;
5616 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5617 0, zExp, zSig0, zSig1, status);
5618}
5619
5620
5621
5622
5623
5624
5625
5626
5627int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
5628{
5629
5630 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5631 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5632 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5633 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5634 ) {
5635 float_raise(float_flag_invalid, status);
5636 return 0;
5637 }
5638 return
5639 ( a.low == b.low )
5640 && ( ( a.high == b.high )
5641 || ( ( a.low == 0 )
5642 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5643 );
5644
5645}
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655int floatx80_le(floatx80 a, floatx80 b, float_status *status)
5656{
5657 flag aSign, bSign;
5658
5659 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5660 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5661 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5662 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5663 ) {
5664 float_raise(float_flag_invalid, status);
5665 return 0;
5666 }
5667 aSign = extractFloatx80Sign( a );
5668 bSign = extractFloatx80Sign( b );
5669 if ( aSign != bSign ) {
5670 return
5671 aSign
5672 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5673 == 0 );
5674 }
5675 return
5676 aSign ? le128( b.high, b.low, a.high, a.low )
5677 : le128( a.high, a.low, b.high, b.low );
5678
5679}
5680
5681
5682
5683
5684
5685
5686
5687
5688int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
5689{
5690 flag aSign, bSign;
5691
5692 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5693 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5694 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5695 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5696 ) {
5697 float_raise(float_flag_invalid, status);
5698 return 0;
5699 }
5700 aSign = extractFloatx80Sign( a );
5701 bSign = extractFloatx80Sign( b );
5702 if ( aSign != bSign ) {
5703 return
5704 aSign
5705 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5706 != 0 );
5707 }
5708 return
5709 aSign ? lt128( b.high, b.low, a.high, a.low )
5710 : lt128( a.high, a.low, b.high, b.low );
5711
5712}
5713
5714
5715
5716
5717
5718
5719
5720int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
5721{
5722 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5723 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5724 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5725 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5726 ) {
5727 float_raise(float_flag_invalid, status);
5728 return 1;
5729 }
5730 return 0;
5731}
5732
5733
5734
5735
5736
5737
5738
5739
5740int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
5741{
5742
5743 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5744 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5745 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5746 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5747 ) {
5748 if ( floatx80_is_signaling_nan( a )
5749 || floatx80_is_signaling_nan( b ) ) {
5750 float_raise(float_flag_invalid, status);
5751 }
5752 return 0;
5753 }
5754 return
5755 ( a.low == b.low )
5756 && ( ( a.high == b.high )
5757 || ( ( a.low == 0 )
5758 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5759 );
5760
5761}
5762
5763
5764
5765
5766
5767
5768
5769
5770int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
5771{
5772 flag aSign, bSign;
5773
5774 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5775 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5776 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5777 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5778 ) {
5779 if ( floatx80_is_signaling_nan( a )
5780 || floatx80_is_signaling_nan( b ) ) {
5781 float_raise(float_flag_invalid, status);
5782 }
5783 return 0;
5784 }
5785 aSign = extractFloatx80Sign( a );
5786 bSign = extractFloatx80Sign( b );
5787 if ( aSign != bSign ) {
5788 return
5789 aSign
5790 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5791 == 0 );
5792 }
5793 return
5794 aSign ? le128( b.high, b.low, a.high, a.low )
5795 : le128( a.high, a.low, b.high, b.low );
5796
5797}
5798
5799
5800
5801
5802
5803
5804
5805
5806int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
5807{
5808 flag aSign, bSign;
5809
5810 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5811 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5812 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5813 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5814 ) {
5815 if ( floatx80_is_signaling_nan( a )
5816 || floatx80_is_signaling_nan( b ) ) {
5817 float_raise(float_flag_invalid, status);
5818 }
5819 return 0;
5820 }
5821 aSign = extractFloatx80Sign( a );
5822 bSign = extractFloatx80Sign( b );
5823 if ( aSign != bSign ) {
5824 return
5825 aSign
5826 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5827 != 0 );
5828 }
5829 return
5830 aSign ? lt128( b.high, b.low, a.high, a.low )
5831 : lt128( a.high, a.low, b.high, b.low );
5832
5833}
5834
5835
5836
5837
5838
5839
5840
5841int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
5842{
5843 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5844 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5845 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5846 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5847 ) {
5848 if ( floatx80_is_signaling_nan( a )
5849 || floatx80_is_signaling_nan( b ) ) {
5850 float_raise(float_flag_invalid, status);
5851 }
5852 return 1;
5853 }
5854 return 0;
5855}
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867int32_t float128_to_int32(float128 a, float_status *status)
5868{
5869 flag aSign;
5870 int32_t aExp, shiftCount;
5871 uint64_t aSig0, aSig1;
5872
5873 aSig1 = extractFloat128Frac1( a );
5874 aSig0 = extractFloat128Frac0( a );
5875 aExp = extractFloat128Exp( a );
5876 aSign = extractFloat128Sign( a );
5877 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5878 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5879 aSig0 |= ( aSig1 != 0 );
5880 shiftCount = 0x4028 - aExp;
5881 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
5882 return roundAndPackInt32(aSign, aSig0, status);
5883
5884}
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
5897{
5898 flag aSign;
5899 int32_t aExp, shiftCount;
5900 uint64_t aSig0, aSig1, savedASig;
5901 int32_t z;
5902
5903 aSig1 = extractFloat128Frac1( a );
5904 aSig0 = extractFloat128Frac0( a );
5905 aExp = extractFloat128Exp( a );
5906 aSign = extractFloat128Sign( a );
5907 aSig0 |= ( aSig1 != 0 );
5908 if ( 0x401E < aExp ) {
5909 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5910 goto invalid;
5911 }
5912 else if ( aExp < 0x3FFF ) {
5913 if (aExp || aSig0) {
5914 status->float_exception_flags |= float_flag_inexact;
5915 }
5916 return 0;
5917 }
5918 aSig0 |= LIT64( 0x0001000000000000 );
5919 shiftCount = 0x402F - aExp;
5920 savedASig = aSig0;
5921 aSig0 >>= shiftCount;
5922 z = aSig0;
5923 if ( aSign ) z = - z;
5924 if ( ( z < 0 ) ^ aSign ) {
5925 invalid:
5926 float_raise(float_flag_invalid, status);
5927 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5928 }
5929 if ( ( aSig0<<shiftCount ) != savedASig ) {
5930 status->float_exception_flags |= float_flag_inexact;
5931 }
5932 return z;
5933
5934}
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946int64_t float128_to_int64(float128 a, float_status *status)
5947{
5948 flag aSign;
5949 int32_t aExp, shiftCount;
5950 uint64_t aSig0, aSig1;
5951
5952 aSig1 = extractFloat128Frac1( a );
5953 aSig0 = extractFloat128Frac0( a );
5954 aExp = extractFloat128Exp( a );
5955 aSign = extractFloat128Sign( a );
5956 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5957 shiftCount = 0x402F - aExp;
5958 if ( shiftCount <= 0 ) {
5959 if ( 0x403E < aExp ) {
5960 float_raise(float_flag_invalid, status);
5961 if ( ! aSign
5962 || ( ( aExp == 0x7FFF )
5963 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5964 )
5965 ) {
5966 return LIT64( 0x7FFFFFFFFFFFFFFF );
5967 }
5968 return (int64_t) LIT64( 0x8000000000000000 );
5969 }
5970 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5971 }
5972 else {
5973 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5974 }
5975 return roundAndPackInt64(aSign, aSig0, aSig1, status);
5976
5977}
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
5990{
5991 flag aSign;
5992 int32_t aExp, shiftCount;
5993 uint64_t aSig0, aSig1;
5994 int64_t z;
5995
5996 aSig1 = extractFloat128Frac1( a );
5997 aSig0 = extractFloat128Frac0( a );
5998 aExp = extractFloat128Exp( a );
5999 aSign = extractFloat128Sign( a );
6000 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6001 shiftCount = aExp - 0x402F;
6002 if ( 0 < shiftCount ) {
6003 if ( 0x403E <= aExp ) {
6004 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
6005 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
6006 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
6007 if (aSig1) {
6008 status->float_exception_flags |= float_flag_inexact;
6009 }
6010 }
6011 else {
6012 float_raise(float_flag_invalid, status);
6013 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6014 return LIT64( 0x7FFFFFFFFFFFFFFF );
6015 }
6016 }
6017 return (int64_t) LIT64( 0x8000000000000000 );
6018 }
6019 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
6020 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
6021 status->float_exception_flags |= float_flag_inexact;
6022 }
6023 }
6024 else {
6025 if ( aExp < 0x3FFF ) {
6026 if ( aExp | aSig0 | aSig1 ) {
6027 status->float_exception_flags |= float_flag_inexact;
6028 }
6029 return 0;
6030 }
6031 z = aSig0>>( - shiftCount );
6032 if ( aSig1
6033 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
6034 status->float_exception_flags |= float_flag_inexact;
6035 }
6036 }
6037 if ( aSign ) z = - z;
6038 return z;
6039
6040}
6041
6042
6043
6044
6045
6046
6047
6048
6049float32 float128_to_float32(float128 a, float_status *status)
6050{
6051 flag aSign;
6052 int32_t aExp;
6053 uint64_t aSig0, aSig1;
6054 uint32_t zSig;
6055
6056 aSig1 = extractFloat128Frac1( a );
6057 aSig0 = extractFloat128Frac0( a );
6058 aExp = extractFloat128Exp( a );
6059 aSign = extractFloat128Sign( a );
6060 if ( aExp == 0x7FFF ) {
6061 if ( aSig0 | aSig1 ) {
6062 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
6063 }
6064 return packFloat32( aSign, 0xFF, 0 );
6065 }
6066 aSig0 |= ( aSig1 != 0 );
6067 shift64RightJamming( aSig0, 18, &aSig0 );
6068 zSig = aSig0;
6069 if ( aExp || zSig ) {
6070 zSig |= 0x40000000;
6071 aExp -= 0x3F81;
6072 }
6073 return roundAndPackFloat32(aSign, aExp, zSig, status);
6074
6075}
6076
6077
6078
6079
6080
6081
6082
6083
6084float64 float128_to_float64(float128 a, float_status *status)
6085{
6086 flag aSign;
6087 int32_t aExp;
6088 uint64_t aSig0, aSig1;
6089
6090 aSig1 = extractFloat128Frac1( a );
6091 aSig0 = extractFloat128Frac0( a );
6092 aExp = extractFloat128Exp( a );
6093 aSign = extractFloat128Sign( a );
6094 if ( aExp == 0x7FFF ) {
6095 if ( aSig0 | aSig1 ) {
6096 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
6097 }
6098 return packFloat64( aSign, 0x7FF, 0 );
6099 }
6100 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6101 aSig0 |= ( aSig1 != 0 );
6102 if ( aExp || aSig0 ) {
6103 aSig0 |= LIT64( 0x4000000000000000 );
6104 aExp -= 0x3C01;
6105 }
6106 return roundAndPackFloat64(aSign, aExp, aSig0, status);
6107
6108}
6109
6110
6111
6112
6113
6114
6115
6116
6117floatx80 float128_to_floatx80(float128 a, float_status *status)
6118{
6119 flag aSign;
6120 int32_t aExp;
6121 uint64_t aSig0, aSig1;
6122
6123 aSig1 = extractFloat128Frac1( a );
6124 aSig0 = extractFloat128Frac0( a );
6125 aExp = extractFloat128Exp( a );
6126 aSign = extractFloat128Sign( a );
6127 if ( aExp == 0x7FFF ) {
6128 if ( aSig0 | aSig1 ) {
6129 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
6130 }
6131 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
6132 }
6133 if ( aExp == 0 ) {
6134 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6135 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6136 }
6137 else {
6138 aSig0 |= LIT64( 0x0001000000000000 );
6139 }
6140 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6141 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6142
6143}
6144
6145
6146
6147
6148
6149
6150
6151
6152float128 float128_round_to_int(float128 a, float_status *status)
6153{
6154 flag aSign;
6155 int32_t aExp;
6156 uint64_t lastBitMask, roundBitsMask;
6157 float128 z;
6158
6159 aExp = extractFloat128Exp( a );
6160 if ( 0x402F <= aExp ) {
6161 if ( 0x406F <= aExp ) {
6162 if ( ( aExp == 0x7FFF )
6163 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6164 ) {
6165 return propagateFloat128NaN(a, a, status);
6166 }
6167 return a;
6168 }
6169 lastBitMask = 1;
6170 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6171 roundBitsMask = lastBitMask - 1;
6172 z = a;
6173 switch (status->float_rounding_mode) {
6174 case float_round_nearest_even:
6175 if ( lastBitMask ) {
6176 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6177 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6178 }
6179 else {
6180 if ( (int64_t) z.low < 0 ) {
6181 ++z.high;
6182 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
6183 }
6184 }
6185 break;
6186 case float_round_ties_away:
6187 if (lastBitMask) {
6188 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6189 } else {
6190 if ((int64_t) z.low < 0) {
6191 ++z.high;
6192 }
6193 }
6194 break;
6195 case float_round_to_zero:
6196 break;
6197 case float_round_up:
6198 if (!extractFloat128Sign(z)) {
6199 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6200 }
6201 break;
6202 case float_round_down:
6203 if (extractFloat128Sign(z)) {
6204 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6205 }
6206 break;
6207 default:
6208 abort();
6209 }
6210 z.low &= ~ roundBitsMask;
6211 }
6212 else {
6213 if ( aExp < 0x3FFF ) {
6214 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
6215 status->float_exception_flags |= float_flag_inexact;
6216 aSign = extractFloat128Sign( a );
6217 switch (status->float_rounding_mode) {
6218 case float_round_nearest_even:
6219 if ( ( aExp == 0x3FFE )
6220 && ( extractFloat128Frac0( a )
6221 | extractFloat128Frac1( a ) )
6222 ) {
6223 return packFloat128( aSign, 0x3FFF, 0, 0 );
6224 }
6225 break;
6226 case float_round_ties_away:
6227 if (aExp == 0x3FFE) {
6228 return packFloat128(aSign, 0x3FFF, 0, 0);
6229 }
6230 break;
6231 case float_round_down:
6232 return
6233 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6234 : packFloat128( 0, 0, 0, 0 );
6235 case float_round_up:
6236 return
6237 aSign ? packFloat128( 1, 0, 0, 0 )
6238 : packFloat128( 0, 0x3FFF, 0, 0 );
6239 }
6240 return packFloat128( aSign, 0, 0, 0 );
6241 }
6242 lastBitMask = 1;
6243 lastBitMask <<= 0x402F - aExp;
6244 roundBitsMask = lastBitMask - 1;
6245 z.low = 0;
6246 z.high = a.high;
6247 switch (status->float_rounding_mode) {
6248 case float_round_nearest_even:
6249 z.high += lastBitMask>>1;
6250 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6251 z.high &= ~ lastBitMask;
6252 }
6253 break;
6254 case float_round_ties_away:
6255 z.high += lastBitMask>>1;
6256 break;
6257 case float_round_to_zero:
6258 break;
6259 case float_round_up:
6260 if (!extractFloat128Sign(z)) {
6261 z.high |= ( a.low != 0 );
6262 z.high += roundBitsMask;
6263 }
6264 break;
6265 case float_round_down:
6266 if (extractFloat128Sign(z)) {
6267 z.high |= (a.low != 0);
6268 z.high += roundBitsMask;
6269 }
6270 break;
6271 default:
6272 abort();
6273 }
6274 z.high &= ~ roundBitsMask;
6275 }
6276 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
6277 status->float_exception_flags |= float_flag_inexact;
6278 }
6279 return z;
6280
6281}
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6292 float_status *status)
6293{
6294 int32_t aExp, bExp, zExp;
6295 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6296 int32_t expDiff;
6297
6298 aSig1 = extractFloat128Frac1( a );
6299 aSig0 = extractFloat128Frac0( a );
6300 aExp = extractFloat128Exp( a );
6301 bSig1 = extractFloat128Frac1( b );
6302 bSig0 = extractFloat128Frac0( b );
6303 bExp = extractFloat128Exp( b );
6304 expDiff = aExp - bExp;
6305 if ( 0 < expDiff ) {
6306 if ( aExp == 0x7FFF ) {
6307 if (aSig0 | aSig1) {
6308 return propagateFloat128NaN(a, b, status);
6309 }
6310 return a;
6311 }
6312 if ( bExp == 0 ) {
6313 --expDiff;
6314 }
6315 else {
6316 bSig0 |= LIT64( 0x0001000000000000 );
6317 }
6318 shift128ExtraRightJamming(
6319 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6320 zExp = aExp;
6321 }
6322 else if ( expDiff < 0 ) {
6323 if ( bExp == 0x7FFF ) {
6324 if (bSig0 | bSig1) {
6325 return propagateFloat128NaN(a, b, status);
6326 }
6327 return packFloat128( zSign, 0x7FFF, 0, 0 );
6328 }
6329 if ( aExp == 0 ) {
6330 ++expDiff;
6331 }
6332 else {
6333 aSig0 |= LIT64( 0x0001000000000000 );
6334 }
6335 shift128ExtraRightJamming(
6336 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6337 zExp = bExp;
6338 }
6339 else {
6340 if ( aExp == 0x7FFF ) {
6341 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6342 return propagateFloat128NaN(a, b, status);
6343 }
6344 return a;
6345 }
6346 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6347 if ( aExp == 0 ) {
6348 if (status->flush_to_zero) {
6349 if (zSig0 | zSig1) {
6350 float_raise(float_flag_output_denormal, status);
6351 }
6352 return packFloat128(zSign, 0, 0, 0);
6353 }
6354 return packFloat128( zSign, 0, zSig0, zSig1 );
6355 }
6356 zSig2 = 0;
6357 zSig0 |= LIT64( 0x0002000000000000 );
6358 zExp = aExp;
6359 goto shiftRight1;
6360 }
6361 aSig0 |= LIT64( 0x0001000000000000 );
6362 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6363 --zExp;
6364 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6365 ++zExp;
6366 shiftRight1:
6367 shift128ExtraRightJamming(
6368 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6369 roundAndPack:
6370 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6371
6372}
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6383 float_status *status)
6384{
6385 int32_t aExp, bExp, zExp;
6386 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
6387 int32_t expDiff;
6388 float128 z;
6389
6390 aSig1 = extractFloat128Frac1( a );
6391 aSig0 = extractFloat128Frac0( a );
6392 aExp = extractFloat128Exp( a );
6393 bSig1 = extractFloat128Frac1( b );
6394 bSig0 = extractFloat128Frac0( b );
6395 bExp = extractFloat128Exp( b );
6396 expDiff = aExp - bExp;
6397 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6398 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6399 if ( 0 < expDiff ) goto aExpBigger;
6400 if ( expDiff < 0 ) goto bExpBigger;
6401 if ( aExp == 0x7FFF ) {
6402 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6403 return propagateFloat128NaN(a, b, status);
6404 }
6405 float_raise(float_flag_invalid, status);
6406 z.low = float128_default_nan_low;
6407 z.high = float128_default_nan_high;
6408 return z;
6409 }
6410 if ( aExp == 0 ) {
6411 aExp = 1;
6412 bExp = 1;
6413 }
6414 if ( bSig0 < aSig0 ) goto aBigger;
6415 if ( aSig0 < bSig0 ) goto bBigger;
6416 if ( bSig1 < aSig1 ) goto aBigger;
6417 if ( aSig1 < bSig1 ) goto bBigger;
6418 return packFloat128(status->float_rounding_mode == float_round_down,
6419 0, 0, 0);
6420 bExpBigger:
6421 if ( bExp == 0x7FFF ) {
6422 if (bSig0 | bSig1) {
6423 return propagateFloat128NaN(a, b, status);
6424 }
6425 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6426 }
6427 if ( aExp == 0 ) {
6428 ++expDiff;
6429 }
6430 else {
6431 aSig0 |= LIT64( 0x4000000000000000 );
6432 }
6433 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6434 bSig0 |= LIT64( 0x4000000000000000 );
6435 bBigger:
6436 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6437 zExp = bExp;
6438 zSign ^= 1;
6439 goto normalizeRoundAndPack;
6440 aExpBigger:
6441 if ( aExp == 0x7FFF ) {
6442 if (aSig0 | aSig1) {
6443 return propagateFloat128NaN(a, b, status);
6444 }
6445 return a;
6446 }
6447 if ( bExp == 0 ) {
6448 --expDiff;
6449 }
6450 else {
6451 bSig0 |= LIT64( 0x4000000000000000 );
6452 }
6453 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6454 aSig0 |= LIT64( 0x4000000000000000 );
6455 aBigger:
6456 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6457 zExp = aExp;
6458 normalizeRoundAndPack:
6459 --zExp;
6460 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6461 status);
6462
6463}
6464
6465
6466
6467
6468
6469
6470
6471float128 float128_add(float128 a, float128 b, float_status *status)
6472{
6473 flag aSign, bSign;
6474
6475 aSign = extractFloat128Sign( a );
6476 bSign = extractFloat128Sign( b );
6477 if ( aSign == bSign ) {
6478 return addFloat128Sigs(a, b, aSign, status);
6479 }
6480 else {
6481 return subFloat128Sigs(a, b, aSign, status);
6482 }
6483
6484}
6485
6486
6487
6488
6489
6490
6491
6492float128 float128_sub(float128 a, float128 b, float_status *status)
6493{
6494 flag aSign, bSign;
6495
6496 aSign = extractFloat128Sign( a );
6497 bSign = extractFloat128Sign( b );
6498 if ( aSign == bSign ) {
6499 return subFloat128Sigs(a, b, aSign, status);
6500 }
6501 else {
6502 return addFloat128Sigs(a, b, aSign, status);
6503 }
6504
6505}
6506
6507
6508
6509
6510
6511
6512
6513float128 float128_mul(float128 a, float128 b, float_status *status)
6514{
6515 flag aSign, bSign, zSign;
6516 int32_t aExp, bExp, zExp;
6517 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
6518 float128 z;
6519
6520 aSig1 = extractFloat128Frac1( a );
6521 aSig0 = extractFloat128Frac0( a );
6522 aExp = extractFloat128Exp( a );
6523 aSign = extractFloat128Sign( a );
6524 bSig1 = extractFloat128Frac1( b );
6525 bSig0 = extractFloat128Frac0( b );
6526 bExp = extractFloat128Exp( b );
6527 bSign = extractFloat128Sign( b );
6528 zSign = aSign ^ bSign;
6529 if ( aExp == 0x7FFF ) {
6530 if ( ( aSig0 | aSig1 )
6531 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6532 return propagateFloat128NaN(a, b, status);
6533 }
6534 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6535 return packFloat128( zSign, 0x7FFF, 0, 0 );
6536 }
6537 if ( bExp == 0x7FFF ) {
6538 if (bSig0 | bSig1) {
6539 return propagateFloat128NaN(a, b, status);
6540 }
6541 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6542 invalid:
6543 float_raise(float_flag_invalid, status);
6544 z.low = float128_default_nan_low;
6545 z.high = float128_default_nan_high;
6546 return z;
6547 }
6548 return packFloat128( zSign, 0x7FFF, 0, 0 );
6549 }
6550 if ( aExp == 0 ) {
6551 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6552 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6553 }
6554 if ( bExp == 0 ) {
6555 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6556 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6557 }
6558 zExp = aExp + bExp - 0x4000;
6559 aSig0 |= LIT64( 0x0001000000000000 );
6560 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6561 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6562 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6563 zSig2 |= ( zSig3 != 0 );
6564 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6565 shift128ExtraRightJamming(
6566 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6567 ++zExp;
6568 }
6569 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6570
6571}
6572
6573
6574
6575
6576
6577
6578
6579float128 float128_div(float128 a, float128 b, float_status *status)
6580{
6581 flag aSign, bSign, zSign;
6582 int32_t aExp, bExp, zExp;
6583 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6584 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6585 float128 z;
6586
6587 aSig1 = extractFloat128Frac1( a );
6588 aSig0 = extractFloat128Frac0( a );
6589 aExp = extractFloat128Exp( a );
6590 aSign = extractFloat128Sign( a );
6591 bSig1 = extractFloat128Frac1( b );
6592 bSig0 = extractFloat128Frac0( b );
6593 bExp = extractFloat128Exp( b );
6594 bSign = extractFloat128Sign( b );
6595 zSign = aSign ^ bSign;
6596 if ( aExp == 0x7FFF ) {
6597 if (aSig0 | aSig1) {
6598 return propagateFloat128NaN(a, b, status);
6599 }
6600 if ( bExp == 0x7FFF ) {
6601 if (bSig0 | bSig1) {
6602 return propagateFloat128NaN(a, b, status);
6603 }
6604 goto invalid;
6605 }
6606 return packFloat128( zSign, 0x7FFF, 0, 0 );
6607 }
6608 if ( bExp == 0x7FFF ) {
6609 if (bSig0 | bSig1) {
6610 return propagateFloat128NaN(a, b, status);
6611 }
6612 return packFloat128( zSign, 0, 0, 0 );
6613 }
6614 if ( bExp == 0 ) {
6615 if ( ( bSig0 | bSig1 ) == 0 ) {
6616 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6617 invalid:
6618 float_raise(float_flag_invalid, status);
6619 z.low = float128_default_nan_low;
6620 z.high = float128_default_nan_high;
6621 return z;
6622 }
6623 float_raise(float_flag_divbyzero, status);
6624 return packFloat128( zSign, 0x7FFF, 0, 0 );
6625 }
6626 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6627 }
6628 if ( aExp == 0 ) {
6629 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6630 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6631 }
6632 zExp = aExp - bExp + 0x3FFD;
6633 shortShift128Left(
6634 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6635 shortShift128Left(
6636 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6637 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6638 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6639 ++zExp;
6640 }
6641 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6642 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6643 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
6644 while ( (int64_t) rem0 < 0 ) {
6645 --zSig0;
6646 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6647 }
6648 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6649 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6650 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6651 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
6652 while ( (int64_t) rem1 < 0 ) {
6653 --zSig1;
6654 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6655 }
6656 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6657 }
6658 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
6659 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6660
6661}
6662
6663
6664
6665
6666
6667
6668
6669float128 float128_rem(float128 a, float128 b, float_status *status)
6670{
6671 flag aSign, zSign;
6672 int32_t aExp, bExp, expDiff;
6673 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6674 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6675 int64_t sigMean0;
6676 float128 z;
6677
6678 aSig1 = extractFloat128Frac1( a );
6679 aSig0 = extractFloat128Frac0( a );
6680 aExp = extractFloat128Exp( a );
6681 aSign = extractFloat128Sign( a );
6682 bSig1 = extractFloat128Frac1( b );
6683 bSig0 = extractFloat128Frac0( b );
6684 bExp = extractFloat128Exp( b );
6685 if ( aExp == 0x7FFF ) {
6686 if ( ( aSig0 | aSig1 )
6687 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6688 return propagateFloat128NaN(a, b, status);
6689 }
6690 goto invalid;
6691 }
6692 if ( bExp == 0x7FFF ) {
6693 if (bSig0 | bSig1) {
6694 return propagateFloat128NaN(a, b, status);
6695 }
6696 return a;
6697 }
6698 if ( bExp == 0 ) {
6699 if ( ( bSig0 | bSig1 ) == 0 ) {
6700 invalid:
6701 float_raise(float_flag_invalid, status);
6702 z.low = float128_default_nan_low;
6703 z.high = float128_default_nan_high;
6704 return z;
6705 }
6706 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6707 }
6708 if ( aExp == 0 ) {
6709 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6710 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6711 }
6712 expDiff = aExp - bExp;
6713 if ( expDiff < -1 ) return a;
6714 shortShift128Left(
6715 aSig0 | LIT64( 0x0001000000000000 ),
6716 aSig1,
6717 15 - ( expDiff < 0 ),
6718 &aSig0,
6719 &aSig1
6720 );
6721 shortShift128Left(
6722 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6723 q = le128( bSig0, bSig1, aSig0, aSig1 );
6724 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6725 expDiff -= 64;
6726 while ( 0 < expDiff ) {
6727 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6728 q = ( 4 < q ) ? q - 4 : 0;
6729 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6730 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6731 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6732 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6733 expDiff -= 61;
6734 }
6735 if ( -64 < expDiff ) {
6736 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6737 q = ( 4 < q ) ? q - 4 : 0;
6738 q >>= - expDiff;
6739 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6740 expDiff += 52;
6741 if ( expDiff < 0 ) {
6742 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6743 }
6744 else {
6745 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6746 }
6747 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6748 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6749 }
6750 else {
6751 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6752 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6753 }
6754 do {
6755 alternateASig0 = aSig0;
6756 alternateASig1 = aSig1;
6757 ++q;
6758 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6759 } while ( 0 <= (int64_t) aSig0 );
6760 add128(
6761 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6762 if ( ( sigMean0 < 0 )
6763 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6764 aSig0 = alternateASig0;
6765 aSig1 = alternateASig1;
6766 }
6767 zSign = ( (int64_t) aSig0 < 0 );
6768 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6769 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6770 status);
6771}
6772
6773
6774
6775
6776
6777
6778
6779float128 float128_sqrt(float128 a, float_status *status)
6780{
6781 flag aSign;
6782 int32_t aExp, zExp;
6783 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6784 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6785 float128 z;
6786
6787 aSig1 = extractFloat128Frac1( a );
6788 aSig0 = extractFloat128Frac0( a );
6789 aExp = extractFloat128Exp( a );
6790 aSign = extractFloat128Sign( a );
6791 if ( aExp == 0x7FFF ) {
6792 if (aSig0 | aSig1) {
6793 return propagateFloat128NaN(a, a, status);
6794 }
6795 if ( ! aSign ) return a;
6796 goto invalid;
6797 }
6798 if ( aSign ) {
6799 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6800 invalid:
6801 float_raise(float_flag_invalid, status);
6802 z.low = float128_default_nan_low;
6803 z.high = float128_default_nan_high;
6804 return z;
6805 }
6806 if ( aExp == 0 ) {
6807 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6808 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6809 }
6810 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6811 aSig0 |= LIT64( 0x0001000000000000 );
6812 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6813 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6814 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6815 doubleZSig0 = zSig0<<1;
6816 mul64To128( zSig0, zSig0, &term0, &term1 );
6817 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6818 while ( (int64_t) rem0 < 0 ) {
6819 --zSig0;
6820 doubleZSig0 -= 2;
6821 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6822 }
6823 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6824 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6825 if ( zSig1 == 0 ) zSig1 = 1;
6826 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6827 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6828 mul64To128( zSig1, zSig1, &term2, &term3 );
6829 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6830 while ( (int64_t) rem1 < 0 ) {
6831 --zSig1;
6832 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6833 term3 |= 1;
6834 term2 |= doubleZSig0;
6835 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6836 }
6837 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6838 }
6839 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6840 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
6841
6842}
6843
6844
6845
6846
6847
6848
6849
6850
6851int float128_eq(float128 a, float128 b, float_status *status)
6852{
6853
6854 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6855 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6856 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6857 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6858 ) {
6859 float_raise(float_flag_invalid, status);
6860 return 0;
6861 }
6862 return
6863 ( a.low == b.low )
6864 && ( ( a.high == b.high )
6865 || ( ( a.low == 0 )
6866 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
6867 );
6868
6869}
6870
6871
6872
6873
6874
6875
6876
6877
6878int float128_le(float128 a, float128 b, float_status *status)
6879{
6880 flag aSign, bSign;
6881
6882 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6883 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6884 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6885 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6886 ) {
6887 float_raise(float_flag_invalid, status);
6888 return 0;
6889 }
6890 aSign = extractFloat128Sign( a );
6891 bSign = extractFloat128Sign( b );
6892 if ( aSign != bSign ) {
6893 return
6894 aSign
6895 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6896 == 0 );
6897 }
6898 return
6899 aSign ? le128( b.high, b.low, a.high, a.low )
6900 : le128( a.high, a.low, b.high, b.low );
6901
6902}
6903
6904
6905
6906
6907
6908
6909
6910
6911int float128_lt(float128 a, float128 b, float_status *status)
6912{
6913 flag aSign, bSign;
6914
6915 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6916 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6917 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6918 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6919 ) {
6920 float_raise(float_flag_invalid, status);
6921 return 0;
6922 }
6923 aSign = extractFloat128Sign( a );
6924 bSign = extractFloat128Sign( b );
6925 if ( aSign != bSign ) {
6926 return
6927 aSign
6928 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
6929 != 0 );
6930 }
6931 return
6932 aSign ? lt128( b.high, b.low, a.high, a.low )
6933 : lt128( a.high, a.low, b.high, b.low );
6934
6935}
6936
6937
6938
6939
6940
6941
6942
6943
6944int float128_unordered(float128 a, float128 b, float_status *status)
6945{
6946 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6947 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6948 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6949 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6950 ) {
6951 float_raise(float_flag_invalid, status);
6952 return 1;
6953 }
6954 return 0;
6955}
6956
6957
6958
6959
6960
6961
6962
6963
6964int float128_eq_quiet(float128 a, float128 b, float_status *status)
6965{
6966
6967 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6968 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6969 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6970 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6971 ) {
6972 if ( float128_is_signaling_nan( a )
6973 || float128_is_signaling_nan( b ) ) {
6974 float_raise(float_flag_invalid, status);
6975 }
6976 return 0;
6977 }
6978 return
6979 ( a.low == b.low )
6980 && ( ( a.high == b.high )
6981 || ( ( a.low == 0 )
6982 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
6983 );
6984
6985}
6986
6987
6988
6989
6990
6991
6992
6993
6994int float128_le_quiet(float128 a, float128 b, float_status *status)
6995{
6996 flag aSign, bSign;
6997
6998 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6999 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7000 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7001 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7002 ) {
7003 if ( float128_is_signaling_nan( a )
7004 || float128_is_signaling_nan( b ) ) {
7005 float_raise(float_flag_invalid, status);
7006 }
7007 return 0;
7008 }
7009 aSign = extractFloat128Sign( a );
7010 bSign = extractFloat128Sign( b );
7011 if ( aSign != bSign ) {
7012 return
7013 aSign
7014 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
7015 == 0 );
7016 }
7017 return
7018 aSign ? le128( b.high, b.low, a.high, a.low )
7019 : le128( a.high, a.low, b.high, b.low );
7020
7021}
7022
7023
7024
7025
7026
7027
7028
7029
7030int float128_lt_quiet(float128 a, float128 b, float_status *status)
7031{
7032 flag aSign, bSign;
7033
7034 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7035 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7036 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7037 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7038 ) {
7039 if ( float128_is_signaling_nan( a )
7040 || float128_is_signaling_nan( b ) ) {
7041 float_raise(float_flag_invalid, status);
7042 }
7043 return 0;
7044 }
7045 aSign = extractFloat128Sign( a );
7046 bSign = extractFloat128Sign( b );
7047 if ( aSign != bSign ) {
7048 return
7049 aSign
7050 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
7051 != 0 );
7052 }
7053 return
7054 aSign ? lt128( b.high, b.low, a.high, a.low )
7055 : lt128( a.high, a.low, b.high, b.low );
7056
7057}
7058
7059
7060
7061
7062
7063
7064
7065
7066int float128_unordered_quiet(float128 a, float128 b, float_status *status)
7067{
7068 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7069 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7070 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7071 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7072 ) {
7073 if ( float128_is_signaling_nan( a )
7074 || float128_is_signaling_nan( b ) ) {
7075 float_raise(float_flag_invalid, status);
7076 }
7077 return 1;
7078 }
7079 return 0;
7080}
7081
7082
7083float32 uint32_to_float32(uint32_t a, float_status *status)
7084{
7085 return int64_to_float32(a, status);
7086}
7087
7088float64 uint32_to_float64(uint32_t a, float_status *status)
7089{
7090 return int64_to_float64(a, status);
7091}
7092
7093uint32_t float32_to_uint32(float32 a, float_status *status)
7094{
7095 int64_t v;
7096 uint32_t res;
7097 int old_exc_flags = get_float_exception_flags(status);
7098
7099 v = float32_to_int64(a, status);
7100 if (v < 0) {
7101 res = 0;
7102 } else if (v > 0xffffffff) {
7103 res = 0xffffffff;
7104 } else {
7105 return v;
7106 }
7107 set_float_exception_flags(old_exc_flags, status);
7108 float_raise(float_flag_invalid, status);
7109 return res;
7110}
7111
7112uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *status)
7113{
7114 int64_t v;
7115 uint32_t res;
7116 int old_exc_flags = get_float_exception_flags(status);
7117
7118 v = float32_to_int64_round_to_zero(a, status);
7119 if (v < 0) {
7120 res = 0;
7121 } else if (v > 0xffffffff) {
7122 res = 0xffffffff;
7123 } else {
7124 return v;
7125 }
7126 set_float_exception_flags(old_exc_flags, status);
7127 float_raise(float_flag_invalid, status);
7128 return res;
7129}
7130
7131int16_t float32_to_int16(float32 a, float_status *status)
7132{
7133 int32_t v;
7134 int16_t res;
7135 int old_exc_flags = get_float_exception_flags(status);
7136
7137 v = float32_to_int32(a, status);
7138 if (v < -0x8000) {
7139 res = -0x8000;
7140 } else if (v > 0x7fff) {
7141 res = 0x7fff;
7142 } else {
7143 return v;
7144 }
7145
7146 set_float_exception_flags(old_exc_flags, status);
7147 float_raise(float_flag_invalid, status);
7148 return res;
7149}
7150
7151uint16_t float32_to_uint16(float32 a, float_status *status)
7152{
7153 int32_t v;
7154 uint16_t res;
7155 int old_exc_flags = get_float_exception_flags(status);
7156
7157 v = float32_to_int32(a, status);
7158 if (v < 0) {
7159 res = 0;
7160 } else if (v > 0xffff) {
7161 res = 0xffff;
7162 } else {
7163 return v;
7164 }
7165
7166 set_float_exception_flags(old_exc_flags, status);
7167 float_raise(float_flag_invalid, status);
7168 return res;
7169}
7170
7171uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *status)
7172{
7173 int64_t v;
7174 uint16_t res;
7175 int old_exc_flags = get_float_exception_flags(status);
7176
7177 v = float32_to_int64_round_to_zero(a, status);
7178 if (v < 0) {
7179 res = 0;
7180 } else if (v > 0xffff) {
7181 res = 0xffff;
7182 } else {
7183 return v;
7184 }
7185 set_float_exception_flags(old_exc_flags, status);
7186 float_raise(float_flag_invalid, status);
7187 return res;
7188}
7189
7190uint32_t float64_to_uint32(float64 a, float_status *status)
7191{
7192 uint64_t v;
7193 uint32_t res;
7194 int old_exc_flags = get_float_exception_flags(status);
7195
7196 v = float64_to_uint64(a, status);
7197 if (v > 0xffffffff) {
7198 res = 0xffffffff;
7199 } else {
7200 return v;
7201 }
7202 set_float_exception_flags(old_exc_flags, status);
7203 float_raise(float_flag_invalid, status);
7204 return res;
7205}
7206
7207uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *status)
7208{
7209 uint64_t v;
7210 uint32_t res;
7211 int old_exc_flags = get_float_exception_flags(status);
7212
7213 v = float64_to_uint64_round_to_zero(a, status);
7214 if (v > 0xffffffff) {
7215 res = 0xffffffff;
7216 } else {
7217 return v;
7218 }
7219 set_float_exception_flags(old_exc_flags, status);
7220 float_raise(float_flag_invalid, status);
7221 return res;
7222}
7223
7224int16_t float64_to_int16(float64 a, float_status *status)
7225{
7226 int64_t v;
7227 int16_t res;
7228 int old_exc_flags = get_float_exception_flags(status);
7229
7230 v = float64_to_int32(a, status);
7231 if (v < -0x8000) {
7232 res = -0x8000;
7233 } else if (v > 0x7fff) {
7234 res = 0x7fff;
7235 } else {
7236 return v;
7237 }
7238
7239 set_float_exception_flags(old_exc_flags, status);
7240 float_raise(float_flag_invalid, status);
7241 return res;
7242}
7243
7244uint16_t float64_to_uint16(float64 a, float_status *status)
7245{
7246 int64_t v;
7247 uint16_t res;
7248 int old_exc_flags = get_float_exception_flags(status);
7249
7250 v = float64_to_int32(a, status);
7251 if (v < 0) {
7252 res = 0;
7253 } else if (v > 0xffff) {
7254 res = 0xffff;
7255 } else {
7256 return v;
7257 }
7258
7259 set_float_exception_flags(old_exc_flags, status);
7260 float_raise(float_flag_invalid, status);
7261 return res;
7262}
7263
7264uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *status)
7265{
7266 int64_t v;
7267 uint16_t res;
7268 int old_exc_flags = get_float_exception_flags(status);
7269
7270 v = float64_to_int64_round_to_zero(a, status);
7271 if (v < 0) {
7272 res = 0;
7273 } else if (v > 0xffff) {
7274 res = 0xffff;
7275 } else {
7276 return v;
7277 }
7278 set_float_exception_flags(old_exc_flags, status);
7279 float_raise(float_flag_invalid, status);
7280 return res;
7281}
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295uint64_t float64_to_uint64(float64 a, float_status *status)
7296{
7297 flag aSign;
7298 int aExp;
7299 int shiftCount;
7300 uint64_t aSig, aSigExtra;
7301 a = float64_squash_input_denormal(a, status);
7302
7303 aSig = extractFloat64Frac(a);
7304 aExp = extractFloat64Exp(a);
7305 aSign = extractFloat64Sign(a);
7306 if (aSign && (aExp > 1022)) {
7307 float_raise(float_flag_invalid, status);
7308 if (float64_is_any_nan(a)) {
7309 return LIT64(0xFFFFFFFFFFFFFFFF);
7310 } else {
7311 return 0;
7312 }
7313 }
7314 if (aExp) {
7315 aSig |= LIT64(0x0010000000000000);
7316 }
7317 shiftCount = 0x433 - aExp;
7318 if (shiftCount <= 0) {
7319 if (0x43E < aExp) {
7320 float_raise(float_flag_invalid, status);
7321 return LIT64(0xFFFFFFFFFFFFFFFF);
7322 }
7323 aSigExtra = 0;
7324 aSig <<= -shiftCount;
7325 } else {
7326 shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
7327 }
7328 return roundAndPackUint64(aSign, aSig, aSigExtra, status);
7329}
7330
7331uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status)
7332{
7333 signed char current_rounding_mode = status->float_rounding_mode;
7334 set_float_rounding_mode(float_round_to_zero, status);
7335 int64_t v = float64_to_uint64(a, status);
7336 set_float_rounding_mode(current_rounding_mode, status);
7337 return v;
7338}
7339
7340#define COMPARE(s, nan_exp) \
7341static inline int float ## s ## _compare_internal(float ## s a, float ## s b,\
7342 int is_quiet, float_status *status) \
7343{ \
7344 flag aSign, bSign; \
7345 uint ## s ## _t av, bv; \
7346 a = float ## s ## _squash_input_denormal(a, status); \
7347 b = float ## s ## _squash_input_denormal(b, status); \
7348 \
7349 if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
7350 extractFloat ## s ## Frac( a ) ) || \
7351 ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
7352 extractFloat ## s ## Frac( b ) )) { \
7353 if (!is_quiet || \
7354 float ## s ## _is_signaling_nan( a ) || \
7355 float ## s ## _is_signaling_nan( b ) ) { \
7356 float_raise(float_flag_invalid, status); \
7357 } \
7358 return float_relation_unordered; \
7359 } \
7360 aSign = extractFloat ## s ## Sign( a ); \
7361 bSign = extractFloat ## s ## Sign( b ); \
7362 av = float ## s ## _val(a); \
7363 bv = float ## s ## _val(b); \
7364 if ( aSign != bSign ) { \
7365 if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) { \
7366 \
7367 return float_relation_equal; \
7368 } else { \
7369 return 1 - (2 * aSign); \
7370 } \
7371 } else { \
7372 if (av == bv) { \
7373 return float_relation_equal; \
7374 } else { \
7375 return 1 - 2 * (aSign ^ ( av < bv )); \
7376 } \
7377 } \
7378} \
7379 \
7380int float ## s ## _compare(float ## s a, float ## s b, float_status *status) \
7381{ \
7382 return float ## s ## _compare_internal(a, b, 0, status); \
7383} \
7384 \
7385int float ## s ## _compare_quiet(float ## s a, float ## s b, \
7386 float_status *status) \
7387{ \
7388 return float ## s ## _compare_internal(a, b, 1, status); \
7389}
7390
7391COMPARE(32, 0xff)
7392COMPARE(64, 0x7ff)
7393
7394static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7395 int is_quiet, float_status *status)
7396{
7397 flag aSign, bSign;
7398
7399 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7400 ( extractFloatx80Frac( a )<<1 ) ) ||
7401 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7402 ( extractFloatx80Frac( b )<<1 ) )) {
7403 if (!is_quiet ||
7404 floatx80_is_signaling_nan( a ) ||
7405 floatx80_is_signaling_nan( b ) ) {
7406 float_raise(float_flag_invalid, status);
7407 }
7408 return float_relation_unordered;
7409 }
7410 aSign = extractFloatx80Sign( a );
7411 bSign = extractFloatx80Sign( b );
7412 if ( aSign != bSign ) {
7413
7414 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7415 ( ( a.low | b.low ) == 0 ) ) {
7416
7417 return float_relation_equal;
7418 } else {
7419 return 1 - (2 * aSign);
7420 }
7421 } else {
7422 if (a.low == b.low && a.high == b.high) {
7423 return float_relation_equal;
7424 } else {
7425 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7426 }
7427 }
7428}
7429
7430int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7431{
7432 return floatx80_compare_internal(a, b, 0, status);
7433}
7434
7435int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
7436{
7437 return floatx80_compare_internal(a, b, 1, status);
7438}
7439
7440static inline int float128_compare_internal(float128 a, float128 b,
7441 int is_quiet, float_status *status)
7442{
7443 flag aSign, bSign;
7444
7445 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7446 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7447 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7448 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7449 if (!is_quiet ||
7450 float128_is_signaling_nan( a ) ||
7451 float128_is_signaling_nan( b ) ) {
7452 float_raise(float_flag_invalid, status);
7453 }
7454 return float_relation_unordered;
7455 }
7456 aSign = extractFloat128Sign( a );
7457 bSign = extractFloat128Sign( b );
7458 if ( aSign != bSign ) {
7459 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7460
7461 return float_relation_equal;
7462 } else {
7463 return 1 - (2 * aSign);
7464 }
7465 } else {
7466 if (a.low == b.low && a.high == b.high) {
7467 return float_relation_equal;
7468 } else {
7469 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7470 }
7471 }
7472}
7473
7474int float128_compare(float128 a, float128 b, float_status *status)
7475{
7476 return float128_compare_internal(a, b, 0, status);
7477}
7478
7479int float128_compare_quiet(float128 a, float128 b, float_status *status)
7480{
7481 return float128_compare_internal(a, b, 1, status);
7482}
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498#define MINMAX(s) \
7499static inline float ## s float ## s ## _minmax(float ## s a, float ## s b, \
7500 int ismin, int isieee, \
7501 int ismag, \
7502 float_status *status) \
7503{ \
7504 flag aSign, bSign; \
7505 uint ## s ## _t av, bv, aav, abv; \
7506 a = float ## s ## _squash_input_denormal(a, status); \
7507 b = float ## s ## _squash_input_denormal(b, status); \
7508 if (float ## s ## _is_any_nan(a) || \
7509 float ## s ## _is_any_nan(b)) { \
7510 if (isieee) { \
7511 if (float ## s ## _is_quiet_nan(a) && \
7512 !float ## s ##_is_any_nan(b)) { \
7513 return b; \
7514 } else if (float ## s ## _is_quiet_nan(b) && \
7515 !float ## s ## _is_any_nan(a)) { \
7516 return a; \
7517 } \
7518 } \
7519 return propagateFloat ## s ## NaN(a, b, status); \
7520 } \
7521 aSign = extractFloat ## s ## Sign(a); \
7522 bSign = extractFloat ## s ## Sign(b); \
7523 av = float ## s ## _val(a); \
7524 bv = float ## s ## _val(b); \
7525 if (ismag) { \
7526 aav = float ## s ## _abs(av); \
7527 abv = float ## s ## _abs(bv); \
7528 if (aav != abv) { \
7529 if (ismin) { \
7530 return (aav < abv) ? a : b; \
7531 } else { \
7532 return (aav < abv) ? b : a; \
7533 } \
7534 } \
7535 } \
7536 if (aSign != bSign) { \
7537 if (ismin) { \
7538 return aSign ? a : b; \
7539 } else { \
7540 return aSign ? b : a; \
7541 } \
7542 } else { \
7543 if (ismin) { \
7544 return (aSign ^ (av < bv)) ? a : b; \
7545 } else { \
7546 return (aSign ^ (av < bv)) ? b : a; \
7547 } \
7548 } \
7549} \
7550 \
7551float ## s float ## s ## _min(float ## s a, float ## s b, \
7552 float_status *status) \
7553{ \
7554 return float ## s ## _minmax(a, b, 1, 0, 0, status); \
7555} \
7556 \
7557float ## s float ## s ## _max(float ## s a, float ## s b, \
7558 float_status *status) \
7559{ \
7560 return float ## s ## _minmax(a, b, 0, 0, 0, status); \
7561} \
7562 \
7563float ## s float ## s ## _minnum(float ## s a, float ## s b, \
7564 float_status *status) \
7565{ \
7566 return float ## s ## _minmax(a, b, 1, 1, 0, status); \
7567} \
7568 \
7569float ## s float ## s ## _maxnum(float ## s a, float ## s b, \
7570 float_status *status) \
7571{ \
7572 return float ## s ## _minmax(a, b, 0, 1, 0, status); \
7573} \
7574 \
7575float ## s float ## s ## _minnummag(float ## s a, float ## s b, \
7576 float_status *status) \
7577{ \
7578 return float ## s ## _minmax(a, b, 1, 1, 1, status); \
7579} \
7580 \
7581float ## s float ## s ## _maxnummag(float ## s a, float ## s b, \
7582 float_status *status) \
7583{ \
7584 return float ## s ## _minmax(a, b, 0, 1, 1, status); \
7585}
7586
7587MINMAX(32)
7588MINMAX(64)
7589
7590
7591
7592float32 float32_scalbn(float32 a, int n, float_status *status)
7593{
7594 flag aSign;
7595 int16_t aExp;
7596 uint32_t aSig;
7597
7598 a = float32_squash_input_denormal(a, status);
7599 aSig = extractFloat32Frac( a );
7600 aExp = extractFloat32Exp( a );
7601 aSign = extractFloat32Sign( a );
7602
7603 if ( aExp == 0xFF ) {
7604 if ( aSig ) {
7605 return propagateFloat32NaN(a, a, status);
7606 }
7607 return a;
7608 }
7609 if (aExp != 0) {
7610 aSig |= 0x00800000;
7611 } else if (aSig == 0) {
7612 return a;
7613 } else {
7614 aExp++;
7615 }
7616
7617 if (n > 0x200) {
7618 n = 0x200;
7619 } else if (n < -0x200) {
7620 n = -0x200;
7621 }
7622
7623 aExp += n - 1;
7624 aSig <<= 7;
7625 return normalizeRoundAndPackFloat32(aSign, aExp, aSig, status);
7626}
7627
7628float64 float64_scalbn(float64 a, int n, float_status *status)
7629{
7630 flag aSign;
7631 int16_t aExp;
7632 uint64_t aSig;
7633
7634 a = float64_squash_input_denormal(a, status);
7635 aSig = extractFloat64Frac( a );
7636 aExp = extractFloat64Exp( a );
7637 aSign = extractFloat64Sign( a );
7638
7639 if ( aExp == 0x7FF ) {
7640 if ( aSig ) {
7641 return propagateFloat64NaN(a, a, status);
7642 }
7643 return a;
7644 }
7645 if (aExp != 0) {
7646 aSig |= LIT64( 0x0010000000000000 );
7647 } else if (aSig == 0) {
7648 return a;
7649 } else {
7650 aExp++;
7651 }
7652
7653 if (n > 0x1000) {
7654 n = 0x1000;
7655 } else if (n < -0x1000) {
7656 n = -0x1000;
7657 }
7658
7659 aExp += n - 1;
7660 aSig <<= 10;
7661 return normalizeRoundAndPackFloat64(aSign, aExp, aSig, status);
7662}
7663
7664floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7665{
7666 flag aSign;
7667 int32_t aExp;
7668 uint64_t aSig;
7669
7670 aSig = extractFloatx80Frac( a );
7671 aExp = extractFloatx80Exp( a );
7672 aSign = extractFloatx80Sign( a );
7673
7674 if ( aExp == 0x7FFF ) {
7675 if ( aSig<<1 ) {
7676 return propagateFloatx80NaN(a, a, status);
7677 }
7678 return a;
7679 }
7680
7681 if (aExp == 0) {
7682 if (aSig == 0) {
7683 return a;
7684 }
7685 aExp++;
7686 }
7687
7688 if (n > 0x10000) {
7689 n = 0x10000;
7690 } else if (n < -0x10000) {
7691 n = -0x10000;
7692 }
7693
7694 aExp += n;
7695 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7696 aSign, aExp, aSig, 0, status);
7697}
7698
7699float128 float128_scalbn(float128 a, int n, float_status *status)
7700{
7701 flag aSign;
7702 int32_t aExp;
7703 uint64_t aSig0, aSig1;
7704
7705 aSig1 = extractFloat128Frac1( a );
7706 aSig0 = extractFloat128Frac0( a );
7707 aExp = extractFloat128Exp( a );
7708 aSign = extractFloat128Sign( a );
7709 if ( aExp == 0x7FFF ) {
7710 if ( aSig0 | aSig1 ) {
7711 return propagateFloat128NaN(a, a, status);
7712 }
7713 return a;
7714 }
7715 if (aExp != 0) {
7716 aSig0 |= LIT64( 0x0001000000000000 );
7717 } else if (aSig0 == 0 && aSig1 == 0) {
7718 return a;
7719 } else {
7720 aExp++;
7721 }
7722
7723 if (n > 0x10000) {
7724 n = 0x10000;
7725 } else if (n < -0x10000) {
7726 n = -0x10000;
7727 }
7728
7729 aExp += n - 1;
7730 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7731 , status);
7732
7733}
7734