1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35#include "softfloat.h"
36
37
38
39
40
41
42#include "softfloat-macros.h"
43
44
45
46
47
48
49
50
51
52#include "softfloat-specialize.h"
53
54void set_float_rounding_mode(int val STATUS_PARAM)
55{
56 STATUS(float_rounding_mode) = val;
57}
58
59void set_float_exception_flags(int val STATUS_PARAM)
60{
61 STATUS(float_exception_flags) = val;
62}
63
64#ifdef FLOATX80
65void set_floatx80_rounding_precision(int val STATUS_PARAM)
66{
67 STATUS(floatx80_rounding_precision) = val;
68}
69#endif
70
71
72
73
74
75
76
77
78
79
80
81
82static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
83{
84 int8 roundingMode;
85 flag roundNearestEven;
86 int8 roundIncrement, roundBits;
87 int32 z;
88
89 roundingMode = STATUS(float_rounding_mode);
90 roundNearestEven = ( roundingMode == float_round_nearest_even );
91 roundIncrement = 0x40;
92 if ( ! roundNearestEven ) {
93 if ( roundingMode == float_round_to_zero ) {
94 roundIncrement = 0;
95 }
96 else {
97 roundIncrement = 0x7F;
98 if ( zSign ) {
99 if ( roundingMode == float_round_up ) roundIncrement = 0;
100 }
101 else {
102 if ( roundingMode == float_round_down ) roundIncrement = 0;
103 }
104 }
105 }
106 roundBits = absZ & 0x7F;
107 absZ = ( absZ + roundIncrement )>>7;
108 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
109 z = absZ;
110 if ( zSign ) z = - z;
111 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
112 float_raise( float_flag_invalid STATUS_VAR);
113 return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
114 }
115 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
116 return z;
117
118}
119
120
121
122
123
124
125
126
127
128
129
130
131
132static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
133{
134 int8 roundingMode;
135 flag roundNearestEven, increment;
136 int64 z;
137
138 roundingMode = STATUS(float_rounding_mode);
139 roundNearestEven = ( roundingMode == float_round_nearest_even );
140 increment = ( (sbits64) absZ1 < 0 );
141 if ( ! roundNearestEven ) {
142 if ( roundingMode == float_round_to_zero ) {
143 increment = 0;
144 }
145 else {
146 if ( zSign ) {
147 increment = ( roundingMode == float_round_down ) && absZ1;
148 }
149 else {
150 increment = ( roundingMode == float_round_up ) && absZ1;
151 }
152 }
153 }
154 if ( increment ) {
155 ++absZ0;
156 if ( absZ0 == 0 ) goto overflow;
157 absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158 }
159 z = absZ0;
160 if ( zSign ) z = - z;
161 if ( z && ( ( z < 0 ) ^ zSign ) ) {
162 overflow:
163 float_raise( float_flag_invalid STATUS_VAR);
164 return
165 zSign ? (sbits64) LIT64( 0x8000000000000000 )
166 : LIT64( 0x7FFFFFFFFFFFFFFF );
167 }
168 if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
169 return z;
170
171}
172
173
174
175
176
177INLINE bits32 extractFloat32Frac( float32 a )
178{
179
180 return float32_val(a) & 0x007FFFFF;
181
182}
183
184
185
186
187
188INLINE int16 extractFloat32Exp( float32 a )
189{
190
191 return ( float32_val(a)>>23 ) & 0xFF;
192
193}
194
195
196
197
198
199INLINE flag extractFloat32Sign( float32 a )
200{
201
202 return float32_val(a)>>31;
203
204}
205
206
207
208
209
210
211
212
213static void
214 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
215{
216 int8 shiftCount;
217
218 shiftCount = countLeadingZeros32( aSig ) - 8;
219 *zSigPtr = aSig<<shiftCount;
220 *zExpPtr = 1 - shiftCount;
221
222}
223
224
225
226
227
228
229
230
231
232
233
234
235INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
236{
237
238 return make_float32(
239 ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig);
240
241}
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
266{
267 int8 roundingMode;
268 flag roundNearestEven;
269 int8 roundIncrement, roundBits;
270 flag isTiny;
271
272 roundingMode = STATUS(float_rounding_mode);
273 roundNearestEven = ( roundingMode == float_round_nearest_even );
274 roundIncrement = 0x40;
275 if ( ! roundNearestEven ) {
276 if ( roundingMode == float_round_to_zero ) {
277 roundIncrement = 0;
278 }
279 else {
280 roundIncrement = 0x7F;
281 if ( zSign ) {
282 if ( roundingMode == float_round_up ) roundIncrement = 0;
283 }
284 else {
285 if ( roundingMode == float_round_down ) roundIncrement = 0;
286 }
287 }
288 }
289 roundBits = zSig & 0x7F;
290 if ( 0xFD <= (bits16) zExp ) {
291 if ( ( 0xFD < zExp )
292 || ( ( zExp == 0xFD )
293 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
294 ) {
295 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
296 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
297 }
298 if ( zExp < 0 ) {
299 if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
300 isTiny =
301 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
302 || ( zExp < -1 )
303 || ( zSig + roundIncrement < 0x80000000 );
304 shift32RightJamming( zSig, - zExp, &zSig );
305 zExp = 0;
306 roundBits = zSig & 0x7F;
307 if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
308 }
309 }
310 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
311 zSig = ( zSig + roundIncrement )>>7;
312 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
313 if ( zSig == 0 ) zExp = 0;
314 return packFloat32( zSign, zExp, zSig );
315
316}
317
318
319
320
321
322
323
324
325
326
327static float32
328 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
329{
330 int8 shiftCount;
331
332 shiftCount = countLeadingZeros32( zSig ) - 1;
333 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
334
335}
336
337
338
339
340
341INLINE bits64 extractFloat64Frac( float64 a )
342{
343
344 return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
345
346}
347
348
349
350
351
352INLINE int16 extractFloat64Exp( float64 a )
353{
354
355 return ( float64_val(a)>>52 ) & 0x7FF;
356
357}
358
359
360
361
362
363INLINE flag extractFloat64Sign( float64 a )
364{
365
366 return float64_val(a)>>63;
367
368}
369
370
371
372
373
374
375
376
377static void
378 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
379{
380 int8 shiftCount;
381
382 shiftCount = countLeadingZeros64( aSig ) - 11;
383 *zSigPtr = aSig<<shiftCount;
384 *zExpPtr = 1 - shiftCount;
385
386}
387
388
389
390
391
392
393
394
395
396
397
398
399INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
400{
401
402 return make_float64(
403 ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig);
404
405}
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
430{
431 int8 roundingMode;
432 flag roundNearestEven;
433 int16 roundIncrement, roundBits;
434 flag isTiny;
435
436 roundingMode = STATUS(float_rounding_mode);
437 roundNearestEven = ( roundingMode == float_round_nearest_even );
438 roundIncrement = 0x200;
439 if ( ! roundNearestEven ) {
440 if ( roundingMode == float_round_to_zero ) {
441 roundIncrement = 0;
442 }
443 else {
444 roundIncrement = 0x3FF;
445 if ( zSign ) {
446 if ( roundingMode == float_round_up ) roundIncrement = 0;
447 }
448 else {
449 if ( roundingMode == float_round_down ) roundIncrement = 0;
450 }
451 }
452 }
453 roundBits = zSig & 0x3FF;
454 if ( 0x7FD <= (bits16) zExp ) {
455 if ( ( 0x7FD < zExp )
456 || ( ( zExp == 0x7FD )
457 && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
458 ) {
459 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
460 return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
461 }
462 if ( zExp < 0 ) {
463 if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
464 isTiny =
465 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
466 || ( zExp < -1 )
467 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
468 shift64RightJamming( zSig, - zExp, &zSig );
469 zExp = 0;
470 roundBits = zSig & 0x3FF;
471 if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
472 }
473 }
474 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
475 zSig = ( zSig + roundIncrement )>>10;
476 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
477 if ( zSig == 0 ) zExp = 0;
478 return packFloat64( zSign, zExp, zSig );
479
480}
481
482
483
484
485
486
487
488
489
490
491static float64
492 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
493{
494 int8 shiftCount;
495
496 shiftCount = countLeadingZeros64( zSig ) - 1;
497 return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
498
499}
500
501#ifdef FLOATX80
502
503
504
505
506
507
508INLINE bits64 extractFloatx80Frac( floatx80 a )
509{
510
511 return a.low;
512
513}
514
515
516
517
518
519
520INLINE int32 extractFloatx80Exp( floatx80 a )
521{
522
523 return a.high & 0x7FFF;
524
525}
526
527
528
529
530
531
532INLINE flag extractFloatx80Sign( floatx80 a )
533{
534
535 return a.high>>15;
536
537}
538
539
540
541
542
543
544
545
546static void
547 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
548{
549 int8 shiftCount;
550
551 shiftCount = countLeadingZeros64( aSig );
552 *zSigPtr = aSig<<shiftCount;
553 *zExpPtr = 1 - shiftCount;
554
555}
556
557
558
559
560
561
562INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
563{
564 floatx80 z;
565
566 z.low = zSig;
567 z.high = ( ( (bits16) zSign )<<15 ) + zExp;
568 return z;
569
570}
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596static floatx80
597 roundAndPackFloatx80(
598 int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
599 STATUS_PARAM)
600{
601 int8 roundingMode;
602 flag roundNearestEven, increment, isTiny;
603 int64 roundIncrement, roundMask, roundBits;
604
605 roundingMode = STATUS(float_rounding_mode);
606 roundNearestEven = ( roundingMode == float_round_nearest_even );
607 if ( roundingPrecision == 80 ) goto precision80;
608 if ( roundingPrecision == 64 ) {
609 roundIncrement = LIT64( 0x0000000000000400 );
610 roundMask = LIT64( 0x00000000000007FF );
611 }
612 else if ( roundingPrecision == 32 ) {
613 roundIncrement = LIT64( 0x0000008000000000 );
614 roundMask = LIT64( 0x000000FFFFFFFFFF );
615 }
616 else {
617 goto precision80;
618 }
619 zSig0 |= ( zSig1 != 0 );
620 if ( ! roundNearestEven ) {
621 if ( roundingMode == float_round_to_zero ) {
622 roundIncrement = 0;
623 }
624 else {
625 roundIncrement = roundMask;
626 if ( zSign ) {
627 if ( roundingMode == float_round_up ) roundIncrement = 0;
628 }
629 else {
630 if ( roundingMode == float_round_down ) roundIncrement = 0;
631 }
632 }
633 }
634 roundBits = zSig0 & roundMask;
635 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
636 if ( ( 0x7FFE < zExp )
637 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
638 ) {
639 goto overflow;
640 }
641 if ( zExp <= 0 ) {
642 if ( STATUS(flush_to_zero) ) return packFloatx80( zSign, 0, 0 );
643 isTiny =
644 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
645 || ( zExp < 0 )
646 || ( zSig0 <= zSig0 + roundIncrement );
647 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
648 zExp = 0;
649 roundBits = zSig0 & roundMask;
650 if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
651 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
652 zSig0 += roundIncrement;
653 if ( (sbits64) zSig0 < 0 ) zExp = 1;
654 roundIncrement = roundMask + 1;
655 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
656 roundMask |= roundIncrement;
657 }
658 zSig0 &= ~ roundMask;
659 return packFloatx80( zSign, zExp, zSig0 );
660 }
661 }
662 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
663 zSig0 += roundIncrement;
664 if ( zSig0 < roundIncrement ) {
665 ++zExp;
666 zSig0 = LIT64( 0x8000000000000000 );
667 }
668 roundIncrement = roundMask + 1;
669 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
670 roundMask |= roundIncrement;
671 }
672 zSig0 &= ~ roundMask;
673 if ( zSig0 == 0 ) zExp = 0;
674 return packFloatx80( zSign, zExp, zSig0 );
675 precision80:
676 increment = ( (sbits64) zSig1 < 0 );
677 if ( ! roundNearestEven ) {
678 if ( roundingMode == float_round_to_zero ) {
679 increment = 0;
680 }
681 else {
682 if ( zSign ) {
683 increment = ( roundingMode == float_round_down ) && zSig1;
684 }
685 else {
686 increment = ( roundingMode == float_round_up ) && zSig1;
687 }
688 }
689 }
690 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
691 if ( ( 0x7FFE < zExp )
692 || ( ( zExp == 0x7FFE )
693 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
694 && increment
695 )
696 ) {
697 roundMask = 0;
698 overflow:
699 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
700 if ( ( roundingMode == float_round_to_zero )
701 || ( zSign && ( roundingMode == float_round_up ) )
702 || ( ! zSign && ( roundingMode == float_round_down ) )
703 ) {
704 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
705 }
706 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
707 }
708 if ( zExp <= 0 ) {
709 isTiny =
710 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
711 || ( zExp < 0 )
712 || ! increment
713 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
714 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
715 zExp = 0;
716 if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
717 if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
718 if ( roundNearestEven ) {
719 increment = ( (sbits64) zSig1 < 0 );
720 }
721 else {
722 if ( zSign ) {
723 increment = ( roundingMode == float_round_down ) && zSig1;
724 }
725 else {
726 increment = ( roundingMode == float_round_up ) && zSig1;
727 }
728 }
729 if ( increment ) {
730 ++zSig0;
731 zSig0 &=
732 ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
733 if ( (sbits64) zSig0 < 0 ) zExp = 1;
734 }
735 return packFloatx80( zSign, zExp, zSig0 );
736 }
737 }
738 if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
739 if ( increment ) {
740 ++zSig0;
741 if ( zSig0 == 0 ) {
742 ++zExp;
743 zSig0 = LIT64( 0x8000000000000000 );
744 }
745 else {
746 zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
747 }
748 }
749 else {
750 if ( zSig0 == 0 ) zExp = 0;
751 }
752 return packFloatx80( zSign, zExp, zSig0 );
753
754}
755
756
757
758
759
760
761
762
763
764
765static floatx80
766 normalizeRoundAndPackFloatx80(
767 int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
768 STATUS_PARAM)
769{
770 int8 shiftCount;
771
772 if ( zSig0 == 0 ) {
773 zSig0 = zSig1;
774 zSig1 = 0;
775 zExp -= 64;
776 }
777 shiftCount = countLeadingZeros64( zSig0 );
778 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
779 zExp -= shiftCount;
780 return
781 roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
782
783}
784
785#endif
786
787#ifdef FLOAT128
788
789
790
791
792
793
794INLINE bits64 extractFloat128Frac1( float128 a )
795{
796
797 return a.low;
798
799}
800
801
802
803
804
805
806INLINE bits64 extractFloat128Frac0( float128 a )
807{
808
809 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
810
811}
812
813
814
815
816
817
818INLINE int32 extractFloat128Exp( float128 a )
819{
820
821 return ( a.high>>48 ) & 0x7FFF;
822
823}
824
825
826
827
828
829INLINE flag extractFloat128Sign( float128 a )
830{
831
832 return a.high>>63;
833
834}
835
836
837
838
839
840
841
842
843
844
845
846static void
847 normalizeFloat128Subnormal(
848 bits64 aSig0,
849 bits64 aSig1,
850 int32 *zExpPtr,
851 bits64 *zSig0Ptr,
852 bits64 *zSig1Ptr
853 )
854{
855 int8 shiftCount;
856
857 if ( aSig0 == 0 ) {
858 shiftCount = countLeadingZeros64( aSig1 ) - 15;
859 if ( shiftCount < 0 ) {
860 *zSig0Ptr = aSig1>>( - shiftCount );
861 *zSig1Ptr = aSig1<<( shiftCount & 63 );
862 }
863 else {
864 *zSig0Ptr = aSig1<<shiftCount;
865 *zSig1Ptr = 0;
866 }
867 *zExpPtr = - shiftCount - 63;
868 }
869 else {
870 shiftCount = countLeadingZeros64( aSig0 ) - 15;
871 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
872 *zExpPtr = 1 - shiftCount;
873 }
874
875}
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890INLINE float128
891 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
892{
893 float128 z;
894
895 z.low = zSig1;
896 z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
897 return z;
898
899}
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922static float128
923 roundAndPackFloat128(
924 flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
925{
926 int8 roundingMode;
927 flag roundNearestEven, increment, isTiny;
928
929 roundingMode = STATUS(float_rounding_mode);
930 roundNearestEven = ( roundingMode == float_round_nearest_even );
931 increment = ( (sbits64) zSig2 < 0 );
932 if ( ! roundNearestEven ) {
933 if ( roundingMode == float_round_to_zero ) {
934 increment = 0;
935 }
936 else {
937 if ( zSign ) {
938 increment = ( roundingMode == float_round_down ) && zSig2;
939 }
940 else {
941 increment = ( roundingMode == float_round_up ) && zSig2;
942 }
943 }
944 }
945 if ( 0x7FFD <= (bits32) zExp ) {
946 if ( ( 0x7FFD < zExp )
947 || ( ( zExp == 0x7FFD )
948 && eq128(
949 LIT64( 0x0001FFFFFFFFFFFF ),
950 LIT64( 0xFFFFFFFFFFFFFFFF ),
951 zSig0,
952 zSig1
953 )
954 && increment
955 )
956 ) {
957 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
958 if ( ( roundingMode == float_round_to_zero )
959 || ( zSign && ( roundingMode == float_round_up ) )
960 || ( ! zSign && ( roundingMode == float_round_down ) )
961 ) {
962 return
963 packFloat128(
964 zSign,
965 0x7FFE,
966 LIT64( 0x0000FFFFFFFFFFFF ),
967 LIT64( 0xFFFFFFFFFFFFFFFF )
968 );
969 }
970 return packFloat128( zSign, 0x7FFF, 0, 0 );
971 }
972 if ( zExp < 0 ) {
973 if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
974 isTiny =
975 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
976 || ( zExp < -1 )
977 || ! increment
978 || lt128(
979 zSig0,
980 zSig1,
981 LIT64( 0x0001FFFFFFFFFFFF ),
982 LIT64( 0xFFFFFFFFFFFFFFFF )
983 );
984 shift128ExtraRightJamming(
985 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
986 zExp = 0;
987 if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
988 if ( roundNearestEven ) {
989 increment = ( (sbits64) zSig2 < 0 );
990 }
991 else {
992 if ( zSign ) {
993 increment = ( roundingMode == float_round_down ) && zSig2;
994 }
995 else {
996 increment = ( roundingMode == float_round_up ) && zSig2;
997 }
998 }
999 }
1000 }
1001 if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
1002 if ( increment ) {
1003 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1004 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1005 }
1006 else {
1007 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1008 }
1009 return packFloat128( zSign, zExp, zSig0, zSig1 );
1010
1011}
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023static float128
1024 normalizeRoundAndPackFloat128(
1025 flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
1026{
1027 int8 shiftCount;
1028 bits64 zSig2;
1029
1030 if ( zSig0 == 0 ) {
1031 zSig0 = zSig1;
1032 zSig1 = 0;
1033 zExp -= 64;
1034 }
1035 shiftCount = countLeadingZeros64( zSig0 ) - 15;
1036 if ( 0 <= shiftCount ) {
1037 zSig2 = 0;
1038 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1039 }
1040 else {
1041 shift128ExtraRightJamming(
1042 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1043 }
1044 zExp -= shiftCount;
1045 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1046
1047}
1048
1049#endif
1050
1051
1052
1053
1054
1055
1056
1057float32 int32_to_float32( int32 a STATUS_PARAM )
1058{
1059 flag zSign;
1060
1061 if ( a == 0 ) return float32_zero;
1062 if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1063 zSign = ( a < 0 );
1064 return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1065
1066}
1067
1068
1069
1070
1071
1072
1073
1074float64 int32_to_float64( int32 a STATUS_PARAM )
1075{
1076 flag zSign;
1077 uint32 absA;
1078 int8 shiftCount;
1079 bits64 zSig;
1080
1081 if ( a == 0 ) return float64_zero;
1082 zSign = ( a < 0 );
1083 absA = zSign ? - a : a;
1084 shiftCount = countLeadingZeros32( absA ) + 21;
1085 zSig = absA;
1086 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1087
1088}
1089
1090#ifdef FLOATX80
1091
1092
1093
1094
1095
1096
1097
1098
1099floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
1100{
1101 flag zSign;
1102 uint32 absA;
1103 int8 shiftCount;
1104 bits64 zSig;
1105
1106 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1107 zSign = ( a < 0 );
1108 absA = zSign ? - a : a;
1109 shiftCount = countLeadingZeros32( absA ) + 32;
1110 zSig = absA;
1111 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1112
1113}
1114
1115#endif
1116
1117#ifdef FLOAT128
1118
1119
1120
1121
1122
1123
1124
1125float128 int32_to_float128( int32 a STATUS_PARAM )
1126{
1127 flag zSign;
1128 uint32 absA;
1129 int8 shiftCount;
1130 bits64 zSig0;
1131
1132 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1133 zSign = ( a < 0 );
1134 absA = zSign ? - a : a;
1135 shiftCount = countLeadingZeros32( absA ) + 17;
1136 zSig0 = absA;
1137 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1138
1139}
1140
1141#endif
1142
1143
1144
1145
1146
1147
1148
1149float32 int64_to_float32( int64 a STATUS_PARAM )
1150{
1151 flag zSign;
1152 uint64 absA;
1153 int8 shiftCount;
1154
1155 if ( a == 0 ) return float32_zero;
1156 zSign = ( a < 0 );
1157 absA = zSign ? - a : a;
1158 shiftCount = countLeadingZeros64( absA ) - 40;
1159 if ( 0 <= shiftCount ) {
1160 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1161 }
1162 else {
1163 shiftCount += 7;
1164 if ( shiftCount < 0 ) {
1165 shift64RightJamming( absA, - shiftCount, &absA );
1166 }
1167 else {
1168 absA <<= shiftCount;
1169 }
1170 return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1171 }
1172
1173}
1174
1175float32 uint64_to_float32( uint64 a STATUS_PARAM )
1176{
1177 int8 shiftCount;
1178
1179 if ( a == 0 ) return float32_zero;
1180 shiftCount = countLeadingZeros64( a ) - 40;
1181 if ( 0 <= shiftCount ) {
1182 return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
1183 }
1184 else {
1185 shiftCount += 7;
1186 if ( shiftCount < 0 ) {
1187 shift64RightJamming( a, - shiftCount, &a );
1188 }
1189 else {
1190 a <<= shiftCount;
1191 }
1192 return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
1193 }
1194}
1195
1196
1197
1198
1199
1200
1201
1202float64 int64_to_float64( int64 a STATUS_PARAM )
1203{
1204 flag zSign;
1205
1206 if ( a == 0 ) return float64_zero;
1207 if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1208 return packFloat64( 1, 0x43E, 0 );
1209 }
1210 zSign = ( a < 0 );
1211 return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1212
1213}
1214
1215float64 uint64_to_float64( uint64 a STATUS_PARAM )
1216{
1217 if ( a == 0 ) return float64_zero;
1218 return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
1219
1220}
1221
1222#ifdef FLOATX80
1223
1224
1225
1226
1227
1228
1229
1230
1231floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
1232{
1233 flag zSign;
1234 uint64 absA;
1235 int8 shiftCount;
1236
1237 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1238 zSign = ( a < 0 );
1239 absA = zSign ? - a : a;
1240 shiftCount = countLeadingZeros64( absA );
1241 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1242
1243}
1244
1245#endif
1246
1247#ifdef FLOAT128
1248
1249
1250
1251
1252
1253
1254
1255float128 int64_to_float128( int64 a STATUS_PARAM )
1256{
1257 flag zSign;
1258 uint64 absA;
1259 int8 shiftCount;
1260 int32 zExp;
1261 bits64 zSig0, zSig1;
1262
1263 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1264 zSign = ( a < 0 );
1265 absA = zSign ? - a : a;
1266 shiftCount = countLeadingZeros64( absA ) + 49;
1267 zExp = 0x406E - shiftCount;
1268 if ( 64 <= shiftCount ) {
1269 zSig1 = 0;
1270 zSig0 = absA;
1271 shiftCount -= 64;
1272 }
1273 else {
1274 zSig1 = absA;
1275 zSig0 = 0;
1276 }
1277 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1278 return packFloat128( zSign, zExp, zSig0, zSig1 );
1279
1280}
1281
1282#endif
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294int32 float32_to_int32( float32 a STATUS_PARAM )
1295{
1296 flag aSign;
1297 int16 aExp, shiftCount;
1298 bits32 aSig;
1299 bits64 aSig64;
1300
1301 aSig = extractFloat32Frac( a );
1302 aExp = extractFloat32Exp( a );
1303 aSign = extractFloat32Sign( a );
1304 if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1305 if ( aExp ) aSig |= 0x00800000;
1306 shiftCount = 0xAF - aExp;
1307 aSig64 = aSig;
1308 aSig64 <<= 32;
1309 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1310 return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1311
1312}
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1325{
1326 flag aSign;
1327 int16 aExp, shiftCount;
1328 bits32 aSig;
1329 int32 z;
1330
1331 aSig = extractFloat32Frac( a );
1332 aExp = extractFloat32Exp( a );
1333 aSign = extractFloat32Sign( a );
1334 shiftCount = aExp - 0x9E;
1335 if ( 0 <= shiftCount ) {
1336 if ( float32_val(a) != 0xCF000000 ) {
1337 float_raise( float_flag_invalid STATUS_VAR);
1338 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1339 }
1340 return (sbits32) 0x80000000;
1341 }
1342 else if ( aExp <= 0x7E ) {
1343 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1344 return 0;
1345 }
1346 aSig = ( aSig | 0x00800000 )<<8;
1347 z = aSig>>( - shiftCount );
1348 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1349 STATUS(float_exception_flags) |= float_flag_inexact;
1350 }
1351 if ( aSign ) z = - z;
1352 return z;
1353
1354}
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366int64 float32_to_int64( float32 a STATUS_PARAM )
1367{
1368 flag aSign;
1369 int16 aExp, shiftCount;
1370 bits32 aSig;
1371 bits64 aSig64, aSigExtra;
1372
1373 aSig = extractFloat32Frac( a );
1374 aExp = extractFloat32Exp( a );
1375 aSign = extractFloat32Sign( a );
1376 shiftCount = 0xBE - aExp;
1377 if ( shiftCount < 0 ) {
1378 float_raise( float_flag_invalid STATUS_VAR);
1379 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1380 return LIT64( 0x7FFFFFFFFFFFFFFF );
1381 }
1382 return (sbits64) LIT64( 0x8000000000000000 );
1383 }
1384 if ( aExp ) aSig |= 0x00800000;
1385 aSig64 = aSig;
1386 aSig64 <<= 40;
1387 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1388 return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1389
1390}
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1403{
1404 flag aSign;
1405 int16 aExp, shiftCount;
1406 bits32 aSig;
1407 bits64 aSig64;
1408 int64 z;
1409
1410 aSig = extractFloat32Frac( a );
1411 aExp = extractFloat32Exp( a );
1412 aSign = extractFloat32Sign( a );
1413 shiftCount = aExp - 0xBE;
1414 if ( 0 <= shiftCount ) {
1415 if ( float32_val(a) != 0xDF000000 ) {
1416 float_raise( float_flag_invalid STATUS_VAR);
1417 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1418 return LIT64( 0x7FFFFFFFFFFFFFFF );
1419 }
1420 }
1421 return (sbits64) LIT64( 0x8000000000000000 );
1422 }
1423 else if ( aExp <= 0x7E ) {
1424 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1425 return 0;
1426 }
1427 aSig64 = aSig | 0x00800000;
1428 aSig64 <<= 40;
1429 z = aSig64>>( - shiftCount );
1430 if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1431 STATUS(float_exception_flags) |= float_flag_inexact;
1432 }
1433 if ( aSign ) z = - z;
1434 return z;
1435
1436}
1437
1438
1439
1440
1441
1442
1443
1444
1445float64 float32_to_float64( float32 a STATUS_PARAM )
1446{
1447 flag aSign;
1448 int16 aExp;
1449 bits32 aSig;
1450
1451 aSig = extractFloat32Frac( a );
1452 aExp = extractFloat32Exp( a );
1453 aSign = extractFloat32Sign( a );
1454 if ( aExp == 0xFF ) {
1455 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
1456 return packFloat64( aSign, 0x7FF, 0 );
1457 }
1458 if ( aExp == 0 ) {
1459 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1460 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1461 --aExp;
1462 }
1463 return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1464
1465}
1466
1467#ifdef FLOATX80
1468
1469
1470
1471
1472
1473
1474
1475
1476floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1477{
1478 flag aSign;
1479 int16 aExp;
1480 bits32 aSig;
1481
1482 aSig = extractFloat32Frac( a );
1483 aExp = extractFloat32Exp( a );
1484 aSign = extractFloat32Sign( a );
1485 if ( aExp == 0xFF ) {
1486 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
1487 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1488 }
1489 if ( aExp == 0 ) {
1490 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1491 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1492 }
1493 aSig |= 0x00800000;
1494 return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1495
1496}
1497
1498#endif
1499
1500#ifdef FLOAT128
1501
1502
1503
1504
1505
1506
1507
1508
1509float128 float32_to_float128( float32 a STATUS_PARAM )
1510{
1511 flag aSign;
1512 int16 aExp;
1513 bits32 aSig;
1514
1515 aSig = extractFloat32Frac( a );
1516 aExp = extractFloat32Exp( a );
1517 aSign = extractFloat32Sign( a );
1518 if ( aExp == 0xFF ) {
1519 if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
1520 return packFloat128( aSign, 0x7FFF, 0, 0 );
1521 }
1522 if ( aExp == 0 ) {
1523 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1524 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1525 --aExp;
1526 }
1527 return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1528
1529}
1530
1531#endif
1532
1533
1534
1535
1536
1537
1538
1539
1540float32 float32_round_to_int( float32 a STATUS_PARAM)
1541{
1542 flag aSign;
1543 int16 aExp;
1544 bits32 lastBitMask, roundBitsMask;
1545 int8 roundingMode;
1546 bits32 z;
1547
1548 aExp = extractFloat32Exp( a );
1549 if ( 0x96 <= aExp ) {
1550 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1551 return propagateFloat32NaN( a, a STATUS_VAR );
1552 }
1553 return a;
1554 }
1555 if ( aExp <= 0x7E ) {
1556 if ( (bits32) ( float32_val(a)<<1 ) == 0 ) return a;
1557 STATUS(float_exception_flags) |= float_flag_inexact;
1558 aSign = extractFloat32Sign( a );
1559 switch ( STATUS(float_rounding_mode) ) {
1560 case float_round_nearest_even:
1561 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1562 return packFloat32( aSign, 0x7F, 0 );
1563 }
1564 break;
1565 case float_round_down:
1566 return make_float32(aSign ? 0xBF800000 : 0);
1567 case float_round_up:
1568 return make_float32(aSign ? 0x80000000 : 0x3F800000);
1569 }
1570 return packFloat32( aSign, 0, 0 );
1571 }
1572 lastBitMask = 1;
1573 lastBitMask <<= 0x96 - aExp;
1574 roundBitsMask = lastBitMask - 1;
1575 z = float32_val(a);
1576 roundingMode = STATUS(float_rounding_mode);
1577 if ( roundingMode == float_round_nearest_even ) {
1578 z += lastBitMask>>1;
1579 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1580 }
1581 else if ( roundingMode != float_round_to_zero ) {
1582 if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
1583 z += roundBitsMask;
1584 }
1585 }
1586 z &= ~ roundBitsMask;
1587 if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
1588 return make_float32(z);
1589
1590}
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1601{
1602 int16 aExp, bExp, zExp;
1603 bits32 aSig, bSig, zSig;
1604 int16 expDiff;
1605
1606 aSig = extractFloat32Frac( a );
1607 aExp = extractFloat32Exp( a );
1608 bSig = extractFloat32Frac( b );
1609 bExp = extractFloat32Exp( b );
1610 expDiff = aExp - bExp;
1611 aSig <<= 6;
1612 bSig <<= 6;
1613 if ( 0 < expDiff ) {
1614 if ( aExp == 0xFF ) {
1615 if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1616 return a;
1617 }
1618 if ( bExp == 0 ) {
1619 --expDiff;
1620 }
1621 else {
1622 bSig |= 0x20000000;
1623 }
1624 shift32RightJamming( bSig, expDiff, &bSig );
1625 zExp = aExp;
1626 }
1627 else if ( expDiff < 0 ) {
1628 if ( bExp == 0xFF ) {
1629 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1630 return packFloat32( zSign, 0xFF, 0 );
1631 }
1632 if ( aExp == 0 ) {
1633 ++expDiff;
1634 }
1635 else {
1636 aSig |= 0x20000000;
1637 }
1638 shift32RightJamming( aSig, - expDiff, &aSig );
1639 zExp = bExp;
1640 }
1641 else {
1642 if ( aExp == 0xFF ) {
1643 if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1644 return a;
1645 }
1646 if ( aExp == 0 ) {
1647 if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
1648 return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1649 }
1650 zSig = 0x40000000 + aSig + bSig;
1651 zExp = aExp;
1652 goto roundAndPack;
1653 }
1654 aSig |= 0x20000000;
1655 zSig = ( aSig + bSig )<<1;
1656 --zExp;
1657 if ( (sbits32) zSig < 0 ) {
1658 zSig = aSig + bSig;
1659 ++zExp;
1660 }
1661 roundAndPack:
1662 return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1663
1664}
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1675{
1676 int16 aExp, bExp, zExp;
1677 bits32 aSig, bSig, zSig;
1678 int16 expDiff;
1679
1680 aSig = extractFloat32Frac( a );
1681 aExp = extractFloat32Exp( a );
1682 bSig = extractFloat32Frac( b );
1683 bExp = extractFloat32Exp( b );
1684 expDiff = aExp - bExp;
1685 aSig <<= 7;
1686 bSig <<= 7;
1687 if ( 0 < expDiff ) goto aExpBigger;
1688 if ( expDiff < 0 ) goto bExpBigger;
1689 if ( aExp == 0xFF ) {
1690 if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1691 float_raise( float_flag_invalid STATUS_VAR);
1692 return float32_default_nan;
1693 }
1694 if ( aExp == 0 ) {
1695 aExp = 1;
1696 bExp = 1;
1697 }
1698 if ( bSig < aSig ) goto aBigger;
1699 if ( aSig < bSig ) goto bBigger;
1700 return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1701 bExpBigger:
1702 if ( bExp == 0xFF ) {
1703 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1704 return packFloat32( zSign ^ 1, 0xFF, 0 );
1705 }
1706 if ( aExp == 0 ) {
1707 ++expDiff;
1708 }
1709 else {
1710 aSig |= 0x40000000;
1711 }
1712 shift32RightJamming( aSig, - expDiff, &aSig );
1713 bSig |= 0x40000000;
1714 bBigger:
1715 zSig = bSig - aSig;
1716 zExp = bExp;
1717 zSign ^= 1;
1718 goto normalizeRoundAndPack;
1719 aExpBigger:
1720 if ( aExp == 0xFF ) {
1721 if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1722 return a;
1723 }
1724 if ( bExp == 0 ) {
1725 --expDiff;
1726 }
1727 else {
1728 bSig |= 0x40000000;
1729 }
1730 shift32RightJamming( bSig, expDiff, &bSig );
1731 aSig |= 0x40000000;
1732 aBigger:
1733 zSig = aSig - bSig;
1734 zExp = aExp;
1735 normalizeRoundAndPack:
1736 --zExp;
1737 return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1738
1739}
1740
1741
1742
1743
1744
1745
1746
1747float32 float32_add( float32 a, float32 b STATUS_PARAM )
1748{
1749 flag aSign, bSign;
1750
1751 aSign = extractFloat32Sign( a );
1752 bSign = extractFloat32Sign( b );
1753 if ( aSign == bSign ) {
1754 return addFloat32Sigs( a, b, aSign STATUS_VAR);
1755 }
1756 else {
1757 return subFloat32Sigs( a, b, aSign STATUS_VAR );
1758 }
1759
1760}
1761
1762
1763
1764
1765
1766
1767
1768float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1769{
1770 flag aSign, bSign;
1771
1772 aSign = extractFloat32Sign( a );
1773 bSign = extractFloat32Sign( b );
1774 if ( aSign == bSign ) {
1775 return subFloat32Sigs( a, b, aSign STATUS_VAR );
1776 }
1777 else {
1778 return addFloat32Sigs( a, b, aSign STATUS_VAR );
1779 }
1780
1781}
1782
1783
1784
1785
1786
1787
1788
1789float32 float32_mul( float32 a, float32 b STATUS_PARAM )
1790{
1791 flag aSign, bSign, zSign;
1792 int16 aExp, bExp, zExp;
1793 bits32 aSig, bSig;
1794 bits64 zSig64;
1795 bits32 zSig;
1796
1797 aSig = extractFloat32Frac( a );
1798 aExp = extractFloat32Exp( a );
1799 aSign = extractFloat32Sign( a );
1800 bSig = extractFloat32Frac( b );
1801 bExp = extractFloat32Exp( b );
1802 bSign = extractFloat32Sign( b );
1803 zSign = aSign ^ bSign;
1804 if ( aExp == 0xFF ) {
1805 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1806 return propagateFloat32NaN( a, b STATUS_VAR );
1807 }
1808 if ( ( bExp | bSig ) == 0 ) {
1809 float_raise( float_flag_invalid STATUS_VAR);
1810 return float32_default_nan;
1811 }
1812 return packFloat32( zSign, 0xFF, 0 );
1813 }
1814 if ( bExp == 0xFF ) {
1815 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1816 if ( ( aExp | aSig ) == 0 ) {
1817 float_raise( float_flag_invalid STATUS_VAR);
1818 return float32_default_nan;
1819 }
1820 return packFloat32( zSign, 0xFF, 0 );
1821 }
1822 if ( aExp == 0 ) {
1823 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1824 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1825 }
1826 if ( bExp == 0 ) {
1827 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1828 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1829 }
1830 zExp = aExp + bExp - 0x7F;
1831 aSig = ( aSig | 0x00800000 )<<7;
1832 bSig = ( bSig | 0x00800000 )<<8;
1833 shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1834 zSig = zSig64;
1835 if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1836 zSig <<= 1;
1837 --zExp;
1838 }
1839 return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1840
1841}
1842
1843
1844
1845
1846
1847
1848
1849float32 float32_div( float32 a, float32 b STATUS_PARAM )
1850{
1851 flag aSign, bSign, zSign;
1852 int16 aExp, bExp, zExp;
1853 bits32 aSig, bSig, zSig;
1854
1855 aSig = extractFloat32Frac( a );
1856 aExp = extractFloat32Exp( a );
1857 aSign = extractFloat32Sign( a );
1858 bSig = extractFloat32Frac( b );
1859 bExp = extractFloat32Exp( b );
1860 bSign = extractFloat32Sign( b );
1861 zSign = aSign ^ bSign;
1862 if ( aExp == 0xFF ) {
1863 if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1864 if ( bExp == 0xFF ) {
1865 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1866 float_raise( float_flag_invalid STATUS_VAR);
1867 return float32_default_nan;
1868 }
1869 return packFloat32( zSign, 0xFF, 0 );
1870 }
1871 if ( bExp == 0xFF ) {
1872 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1873 return packFloat32( zSign, 0, 0 );
1874 }
1875 if ( bExp == 0 ) {
1876 if ( bSig == 0 ) {
1877 if ( ( aExp | aSig ) == 0 ) {
1878 float_raise( float_flag_invalid STATUS_VAR);
1879 return float32_default_nan;
1880 }
1881 float_raise( float_flag_divbyzero STATUS_VAR);
1882 return packFloat32( zSign, 0xFF, 0 );
1883 }
1884 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1885 }
1886 if ( aExp == 0 ) {
1887 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1888 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1889 }
1890 zExp = aExp - bExp + 0x7D;
1891 aSig = ( aSig | 0x00800000 )<<7;
1892 bSig = ( bSig | 0x00800000 )<<8;
1893 if ( bSig <= ( aSig + aSig ) ) {
1894 aSig >>= 1;
1895 ++zExp;
1896 }
1897 zSig = ( ( (bits64) aSig )<<32 ) / bSig;
1898 if ( ( zSig & 0x3F ) == 0 ) {
1899 zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
1900 }
1901 return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1902
1903}
1904
1905
1906
1907
1908
1909
1910
1911float32 float32_rem( float32 a, float32 b STATUS_PARAM )
1912{
1913 flag aSign, bSign, zSign;
1914 int16 aExp, bExp, expDiff;
1915 bits32 aSig, bSig;
1916 bits32 q;
1917 bits64 aSig64, bSig64, q64;
1918 bits32 alternateASig;
1919 sbits32 sigMean;
1920
1921 aSig = extractFloat32Frac( a );
1922 aExp = extractFloat32Exp( a );
1923 aSign = extractFloat32Sign( a );
1924 bSig = extractFloat32Frac( b );
1925 bExp = extractFloat32Exp( b );
1926 bSign = extractFloat32Sign( b );
1927 if ( aExp == 0xFF ) {
1928 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1929 return propagateFloat32NaN( a, b STATUS_VAR );
1930 }
1931 float_raise( float_flag_invalid STATUS_VAR);
1932 return float32_default_nan;
1933 }
1934 if ( bExp == 0xFF ) {
1935 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1936 return a;
1937 }
1938 if ( bExp == 0 ) {
1939 if ( bSig == 0 ) {
1940 float_raise( float_flag_invalid STATUS_VAR);
1941 return float32_default_nan;
1942 }
1943 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1944 }
1945 if ( aExp == 0 ) {
1946 if ( aSig == 0 ) return a;
1947 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1948 }
1949 expDiff = aExp - bExp;
1950 aSig |= 0x00800000;
1951 bSig |= 0x00800000;
1952 if ( expDiff < 32 ) {
1953 aSig <<= 8;
1954 bSig <<= 8;
1955 if ( expDiff < 0 ) {
1956 if ( expDiff < -1 ) return a;
1957 aSig >>= 1;
1958 }
1959 q = ( bSig <= aSig );
1960 if ( q ) aSig -= bSig;
1961 if ( 0 < expDiff ) {
1962 q = ( ( (bits64) aSig )<<32 ) / bSig;
1963 q >>= 32 - expDiff;
1964 bSig >>= 2;
1965 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1966 }
1967 else {
1968 aSig >>= 2;
1969 bSig >>= 2;
1970 }
1971 }
1972 else {
1973 if ( bSig <= aSig ) aSig -= bSig;
1974 aSig64 = ( (bits64) aSig )<<40;
1975 bSig64 = ( (bits64) bSig )<<40;
1976 expDiff -= 64;
1977 while ( 0 < expDiff ) {
1978 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1979 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1980 aSig64 = - ( ( bSig * q64 )<<38 );
1981 expDiff -= 62;
1982 }
1983 expDiff += 64;
1984 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
1985 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
1986 q = q64>>( 64 - expDiff );
1987 bSig <<= 6;
1988 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
1989 }
1990 do {
1991 alternateASig = aSig;
1992 ++q;
1993 aSig -= bSig;
1994 } while ( 0 <= (sbits32) aSig );
1995 sigMean = aSig + alternateASig;
1996 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1997 aSig = alternateASig;
1998 }
1999 zSign = ( (sbits32) aSig < 0 );
2000 if ( zSign ) aSig = - aSig;
2001 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
2002
2003}
2004
2005
2006
2007
2008
2009
2010
2011float32 float32_sqrt( float32 a STATUS_PARAM )
2012{
2013 flag aSign;
2014 int16 aExp, zExp;
2015 bits32 aSig, zSig;
2016 bits64 rem, term;
2017
2018 aSig = extractFloat32Frac( a );
2019 aExp = extractFloat32Exp( a );
2020 aSign = extractFloat32Sign( a );
2021 if ( aExp == 0xFF ) {
2022 if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2023 if ( ! aSign ) return a;
2024 float_raise( float_flag_invalid STATUS_VAR);
2025 return float32_default_nan;
2026 }
2027 if ( aSign ) {
2028 if ( ( aExp | aSig ) == 0 ) return a;
2029 float_raise( float_flag_invalid STATUS_VAR);
2030 return float32_default_nan;
2031 }
2032 if ( aExp == 0 ) {
2033 if ( aSig == 0 ) return float32_zero;
2034 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2035 }
2036 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2037 aSig = ( aSig | 0x00800000 )<<8;
2038 zSig = estimateSqrt32( aExp, aSig ) + 2;
2039 if ( ( zSig & 0x7F ) <= 5 ) {
2040 if ( zSig < 2 ) {
2041 zSig = 0x7FFFFFFF;
2042 goto roundAndPack;
2043 }
2044 aSig >>= aExp & 1;
2045 term = ( (bits64) zSig ) * zSig;
2046 rem = ( ( (bits64) aSig )<<32 ) - term;
2047 while ( (sbits64) rem < 0 ) {
2048 --zSig;
2049 rem += ( ( (bits64) zSig )<<1 ) | 1;
2050 }
2051 zSig |= ( rem != 0 );
2052 }
2053 shift32RightJamming( zSig, 1, &zSig );
2054 roundAndPack:
2055 return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2056
2057}
2058
2059
2060
2061
2062
2063
2064float32 float32_log2( float32 a STATUS_PARAM )
2065{
2066 flag aSign, zSign;
2067 int16 aExp;
2068 bits32 aSig, zSig, i;
2069
2070 aSig = extractFloat32Frac( a );
2071 aExp = extractFloat32Exp( a );
2072 aSign = extractFloat32Sign( a );
2073
2074 if ( aExp == 0 ) {
2075 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2076 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2077 }
2078 if ( aSign ) {
2079 float_raise( float_flag_invalid STATUS_VAR);
2080 return float32_default_nan;
2081 }
2082 if ( aExp == 0xFF ) {
2083 if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2084 return a;
2085 }
2086
2087 aExp -= 0x7F;
2088 aSig |= 0x00800000;
2089 zSign = aExp < 0;
2090 zSig = aExp << 23;
2091
2092 for (i = 1 << 22; i > 0; i >>= 1) {
2093 aSig = ( (bits64)aSig * aSig ) >> 23;
2094 if ( aSig & 0x01000000 ) {
2095 aSig >>= 1;
2096 zSig |= i;
2097 }
2098 }
2099
2100 if ( zSign )
2101 zSig = -zSig;
2102
2103 return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
2104}
2105
2106
2107
2108
2109
2110
2111
2112int float32_eq( float32 a, float32 b STATUS_PARAM )
2113{
2114
2115 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2116 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2117 ) {
2118 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2119 float_raise( float_flag_invalid STATUS_VAR);
2120 }
2121 return 0;
2122 }
2123 return ( float32_val(a) == float32_val(b) ) ||
2124 ( (bits32) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
2125
2126}
2127
2128
2129
2130
2131
2132
2133
2134
2135int float32_le( float32 a, float32 b STATUS_PARAM )
2136{
2137 flag aSign, bSign;
2138 bits32 av, bv;
2139
2140 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2141 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2142 ) {
2143 float_raise( float_flag_invalid STATUS_VAR);
2144 return 0;
2145 }
2146 aSign = extractFloat32Sign( a );
2147 bSign = extractFloat32Sign( b );
2148 av = float32_val(a);
2149 bv = float32_val(b);
2150 if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2151 return ( av == bv ) || ( aSign ^ ( av < bv ) );
2152
2153}
2154
2155
2156
2157
2158
2159
2160
2161int float32_lt( float32 a, float32 b STATUS_PARAM )
2162{
2163 flag aSign, bSign;
2164 bits32 av, bv;
2165
2166 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2167 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2168 ) {
2169 float_raise( float_flag_invalid STATUS_VAR);
2170 return 0;
2171 }
2172 aSign = extractFloat32Sign( a );
2173 bSign = extractFloat32Sign( b );
2174 av = float32_val(a);
2175 bv = float32_val(b);
2176 if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
2177 return ( av != bv ) && ( aSign ^ ( av < bv ) );
2178
2179}
2180
2181
2182
2183
2184
2185
2186
2187
2188int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
2189{
2190 bits32 av, bv;
2191
2192 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2193 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2194 ) {
2195 float_raise( float_flag_invalid STATUS_VAR);
2196 return 0;
2197 }
2198 av = float32_val(a);
2199 bv = float32_val(b);
2200 return ( av == bv ) || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2201
2202}
2203
2204
2205
2206
2207
2208
2209
2210
2211int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
2212{
2213 flag aSign, bSign;
2214 bits32 av, bv;
2215
2216 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2217 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2218 ) {
2219 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2220 float_raise( float_flag_invalid STATUS_VAR);
2221 }
2222 return 0;
2223 }
2224 aSign = extractFloat32Sign( a );
2225 bSign = extractFloat32Sign( b );
2226 av = float32_val(a);
2227 bv = float32_val(b);
2228 if ( aSign != bSign ) return aSign || ( (bits32) ( ( av | bv )<<1 ) == 0 );
2229 return ( av == bv ) || ( aSign ^ ( av < bv ) );
2230
2231}
2232
2233
2234
2235
2236
2237
2238
2239
2240int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
2241{
2242 flag aSign, bSign;
2243 bits32 av, bv;
2244
2245 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2246 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2247 ) {
2248 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2249 float_raise( float_flag_invalid STATUS_VAR);
2250 }
2251 return 0;
2252 }
2253 aSign = extractFloat32Sign( a );
2254 bSign = extractFloat32Sign( b );
2255 av = float32_val(a);
2256 bv = float32_val(b);
2257 if ( aSign != bSign ) return aSign && ( (bits32) ( ( av | bv )<<1 ) != 0 );
2258 return ( av != bv ) && ( aSign ^ ( av < bv ) );
2259
2260}
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272int32 float64_to_int32( float64 a STATUS_PARAM )
2273{
2274 flag aSign;
2275 int16 aExp, shiftCount;
2276 bits64 aSig;
2277
2278 aSig = extractFloat64Frac( a );
2279 aExp = extractFloat64Exp( a );
2280 aSign = extractFloat64Sign( a );
2281 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2282 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2283 shiftCount = 0x42C - aExp;
2284 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2285 return roundAndPackInt32( aSign, aSig STATUS_VAR );
2286
2287}
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2300{
2301 flag aSign;
2302 int16 aExp, shiftCount;
2303 bits64 aSig, savedASig;
2304 int32 z;
2305
2306 aSig = extractFloat64Frac( a );
2307 aExp = extractFloat64Exp( a );
2308 aSign = extractFloat64Sign( a );
2309 if ( 0x41E < aExp ) {
2310 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2311 goto invalid;
2312 }
2313 else if ( aExp < 0x3FF ) {
2314 if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2315 return 0;
2316 }
2317 aSig |= LIT64( 0x0010000000000000 );
2318 shiftCount = 0x433 - aExp;
2319 savedASig = aSig;
2320 aSig >>= shiftCount;
2321 z = aSig;
2322 if ( aSign ) z = - z;
2323 if ( ( z < 0 ) ^ aSign ) {
2324 invalid:
2325 float_raise( float_flag_invalid STATUS_VAR);
2326 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2327 }
2328 if ( ( aSig<<shiftCount ) != savedASig ) {
2329 STATUS(float_exception_flags) |= float_flag_inexact;
2330 }
2331 return z;
2332
2333}
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345int64 float64_to_int64( float64 a STATUS_PARAM )
2346{
2347 flag aSign;
2348 int16 aExp, shiftCount;
2349 bits64 aSig, aSigExtra;
2350
2351 aSig = extractFloat64Frac( a );
2352 aExp = extractFloat64Exp( a );
2353 aSign = extractFloat64Sign( a );
2354 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2355 shiftCount = 0x433 - aExp;
2356 if ( shiftCount <= 0 ) {
2357 if ( 0x43E < aExp ) {
2358 float_raise( float_flag_invalid STATUS_VAR);
2359 if ( ! aSign
2360 || ( ( aExp == 0x7FF )
2361 && ( aSig != LIT64( 0x0010000000000000 ) ) )
2362 ) {
2363 return LIT64( 0x7FFFFFFFFFFFFFFF );
2364 }
2365 return (sbits64) LIT64( 0x8000000000000000 );
2366 }
2367 aSigExtra = 0;
2368 aSig <<= - shiftCount;
2369 }
2370 else {
2371 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2372 }
2373 return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2374
2375}
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
2388{
2389 flag aSign;
2390 int16 aExp, shiftCount;
2391 bits64 aSig;
2392 int64 z;
2393
2394 aSig = extractFloat64Frac( a );
2395 aExp = extractFloat64Exp( a );
2396 aSign = extractFloat64Sign( a );
2397 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2398 shiftCount = aExp - 0x433;
2399 if ( 0 <= shiftCount ) {
2400 if ( 0x43E <= aExp ) {
2401 if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
2402 float_raise( float_flag_invalid STATUS_VAR);
2403 if ( ! aSign
2404 || ( ( aExp == 0x7FF )
2405 && ( aSig != LIT64( 0x0010000000000000 ) ) )
2406 ) {
2407 return LIT64( 0x7FFFFFFFFFFFFFFF );
2408 }
2409 }
2410 return (sbits64) LIT64( 0x8000000000000000 );
2411 }
2412 z = aSig<<shiftCount;
2413 }
2414 else {
2415 if ( aExp < 0x3FE ) {
2416 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2417 return 0;
2418 }
2419 z = aSig>>( - shiftCount );
2420 if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2421 STATUS(float_exception_flags) |= float_flag_inexact;
2422 }
2423 }
2424 if ( aSign ) z = - z;
2425 return z;
2426
2427}
2428
2429
2430
2431
2432
2433
2434
2435
2436float32 float64_to_float32( float64 a STATUS_PARAM )
2437{
2438 flag aSign;
2439 int16 aExp;
2440 bits64 aSig;
2441 bits32 zSig;
2442
2443 aSig = extractFloat64Frac( a );
2444 aExp = extractFloat64Exp( a );
2445 aSign = extractFloat64Sign( a );
2446 if ( aExp == 0x7FF ) {
2447 if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
2448 return packFloat32( aSign, 0xFF, 0 );
2449 }
2450 shift64RightJamming( aSig, 22, &aSig );
2451 zSig = aSig;
2452 if ( aExp || zSig ) {
2453 zSig |= 0x40000000;
2454 aExp -= 0x381;
2455 }
2456 return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
2457
2458}
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471static bits16 packFloat16(flag zSign, int16 zExp, bits16 zSig)
2472{
2473 return (((bits32)zSign) << 15) + (((bits32)zExp) << 10) + zSig;
2474}
2475
2476
2477
2478
2479float32 float16_to_float32( bits16 a, flag ieee STATUS_PARAM )
2480{
2481 flag aSign;
2482 int16 aExp;
2483 bits32 aSig;
2484
2485 aSign = a >> 15;
2486 aExp = (a >> 10) & 0x1f;
2487 aSig = a & 0x3ff;
2488
2489 if (aExp == 0x1f && ieee) {
2490 if (aSig) {
2491
2492 float32ToCommonNaN(a STATUS_VAR);
2493 aSig |= 0x200;
2494 }
2495 return packFloat32(aSign, 0xff, aSig << 13);
2496 }
2497 if (aExp == 0) {
2498 int8 shiftCount;
2499
2500 if (aSig == 0) {
2501 return packFloat32(aSign, 0, 0);
2502 }
2503
2504 shiftCount = countLeadingZeros32( aSig ) - 21;
2505 aSig = aSig << shiftCount;
2506 aExp = -shiftCount;
2507 }
2508 return packFloat32( aSign, aExp + 0x70, aSig << 13);
2509}
2510
2511bits16 float32_to_float16( float32 a, flag ieee STATUS_PARAM)
2512{
2513 flag aSign;
2514 int16 aExp;
2515 bits32 aSig;
2516 bits32 mask;
2517 bits32 increment;
2518 int8 roundingMode;
2519
2520 aSig = extractFloat32Frac( a );
2521 aExp = extractFloat32Exp( a );
2522 aSign = extractFloat32Sign( a );
2523 if ( aExp == 0xFF ) {
2524 if (aSig) {
2525
2526 float32ToCommonNaN(a STATUS_VAR);
2527 aSig |= 0x00400000;
2528 }
2529 return packFloat16(aSign, 0x1f, aSig >> 13);
2530 }
2531 if (aExp == 0 && aSign == 0) {
2532 return packFloat16(aSign, 0, 0);
2533 }
2534
2535 aSig |= 0x00800000;
2536 aExp -= 0x7f;
2537 if (aExp < -14) {
2538 mask = 0x007fffff;
2539 if (aExp < -24) {
2540 aExp = -25;
2541 } else {
2542 mask >>= 24 + aExp;
2543 }
2544 } else {
2545 mask = 0x00001fff;
2546 }
2547 if (aSig & mask) {
2548 float_raise( float_flag_underflow STATUS_VAR );
2549 roundingMode = STATUS(float_rounding_mode);
2550 switch (roundingMode) {
2551 case float_round_nearest_even:
2552 increment = (mask + 1) >> 1;
2553 if ((aSig & mask) == increment) {
2554 increment = aSig & (increment << 1);
2555 }
2556 break;
2557 case float_round_up:
2558 increment = aSign ? 0 : mask;
2559 break;
2560 case float_round_down:
2561 increment = aSign ? mask : 0;
2562 break;
2563 default:
2564 increment = 0;
2565 break;
2566 }
2567 aSig += increment;
2568 if (aSig >= 0x01000000) {
2569 aSig >>= 1;
2570 aExp++;
2571 }
2572 } else if (aExp < -14
2573 && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
2574 float_raise( float_flag_underflow STATUS_VAR);
2575 }
2576
2577 if (ieee) {
2578 if (aExp > 15) {
2579 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
2580 return packFloat16(aSign, 0x1f, 0);
2581 }
2582 } else {
2583 if (aExp > 16) {
2584 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
2585 return packFloat16(aSign, 0x1f, 0x3ff);
2586 }
2587 }
2588 if (aExp < -24) {
2589 return packFloat16(aSign, 0, 0);
2590 }
2591 if (aExp < -14) {
2592 aSig >>= -14 - aExp;
2593 aExp = -14;
2594 }
2595 return packFloat16(aSign, aExp + 14, aSig >> 13);
2596}
2597
2598#ifdef FLOATX80
2599
2600
2601
2602
2603
2604
2605
2606
2607floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
2608{
2609 flag aSign;
2610 int16 aExp;
2611 bits64 aSig;
2612
2613 aSig = extractFloat64Frac( a );
2614 aExp = extractFloat64Exp( a );
2615 aSign = extractFloat64Sign( a );
2616 if ( aExp == 0x7FF ) {
2617 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
2618 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2619 }
2620 if ( aExp == 0 ) {
2621 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2622 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2623 }
2624 return
2625 packFloatx80(
2626 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2627
2628}
2629
2630#endif
2631
2632#ifdef FLOAT128
2633
2634
2635
2636
2637
2638
2639
2640
2641float128 float64_to_float128( float64 a STATUS_PARAM )
2642{
2643 flag aSign;
2644 int16 aExp;
2645 bits64 aSig, zSig0, zSig1;
2646
2647 aSig = extractFloat64Frac( a );
2648 aExp = extractFloat64Exp( a );
2649 aSign = extractFloat64Sign( a );
2650 if ( aExp == 0x7FF ) {
2651 if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
2652 return packFloat128( aSign, 0x7FFF, 0, 0 );
2653 }
2654 if ( aExp == 0 ) {
2655 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2656 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2657 --aExp;
2658 }
2659 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2660 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2661
2662}
2663
2664#endif
2665
2666
2667
2668
2669
2670
2671
2672
2673float64 float64_round_to_int( float64 a STATUS_PARAM )
2674{
2675 flag aSign;
2676 int16 aExp;
2677 bits64 lastBitMask, roundBitsMask;
2678 int8 roundingMode;
2679 bits64 z;
2680
2681 aExp = extractFloat64Exp( a );
2682 if ( 0x433 <= aExp ) {
2683 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2684 return propagateFloat64NaN( a, a STATUS_VAR );
2685 }
2686 return a;
2687 }
2688 if ( aExp < 0x3FF ) {
2689 if ( (bits64) ( float64_val(a)<<1 ) == 0 ) return a;
2690 STATUS(float_exception_flags) |= float_flag_inexact;
2691 aSign = extractFloat64Sign( a );
2692 switch ( STATUS(float_rounding_mode) ) {
2693 case float_round_nearest_even:
2694 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2695 return packFloat64( aSign, 0x3FF, 0 );
2696 }
2697 break;
2698 case float_round_down:
2699 return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
2700 case float_round_up:
2701 return make_float64(
2702 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
2703 }
2704 return packFloat64( aSign, 0, 0 );
2705 }
2706 lastBitMask = 1;
2707 lastBitMask <<= 0x433 - aExp;
2708 roundBitsMask = lastBitMask - 1;
2709 z = float64_val(a);
2710 roundingMode = STATUS(float_rounding_mode);
2711 if ( roundingMode == float_round_nearest_even ) {
2712 z += lastBitMask>>1;
2713 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2714 }
2715 else if ( roundingMode != float_round_to_zero ) {
2716 if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
2717 z += roundBitsMask;
2718 }
2719 }
2720 z &= ~ roundBitsMask;
2721 if ( z != float64_val(a) )
2722 STATUS(float_exception_flags) |= float_flag_inexact;
2723 return make_float64(z);
2724
2725}
2726
2727float64 float64_trunc_to_int( float64 a STATUS_PARAM)
2728{
2729 int oldmode;
2730 float64 res;
2731 oldmode = STATUS(float_rounding_mode);
2732 STATUS(float_rounding_mode) = float_round_to_zero;
2733 res = float64_round_to_int(a STATUS_VAR);
2734 STATUS(float_rounding_mode) = oldmode;
2735 return res;
2736}
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
2747{
2748 int16 aExp, bExp, zExp;
2749 bits64 aSig, bSig, zSig;
2750 int16 expDiff;
2751
2752 aSig = extractFloat64Frac( a );
2753 aExp = extractFloat64Exp( a );
2754 bSig = extractFloat64Frac( b );
2755 bExp = extractFloat64Exp( b );
2756 expDiff = aExp - bExp;
2757 aSig <<= 9;
2758 bSig <<= 9;
2759 if ( 0 < expDiff ) {
2760 if ( aExp == 0x7FF ) {
2761 if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2762 return a;
2763 }
2764 if ( bExp == 0 ) {
2765 --expDiff;
2766 }
2767 else {
2768 bSig |= LIT64( 0x2000000000000000 );
2769 }
2770 shift64RightJamming( bSig, expDiff, &bSig );
2771 zExp = aExp;
2772 }
2773 else if ( expDiff < 0 ) {
2774 if ( bExp == 0x7FF ) {
2775 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2776 return packFloat64( zSign, 0x7FF, 0 );
2777 }
2778 if ( aExp == 0 ) {
2779 ++expDiff;
2780 }
2781 else {
2782 aSig |= LIT64( 0x2000000000000000 );
2783 }
2784 shift64RightJamming( aSig, - expDiff, &aSig );
2785 zExp = bExp;
2786 }
2787 else {
2788 if ( aExp == 0x7FF ) {
2789 if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2790 return a;
2791 }
2792 if ( aExp == 0 ) {
2793 if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
2794 return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2795 }
2796 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2797 zExp = aExp;
2798 goto roundAndPack;
2799 }
2800 aSig |= LIT64( 0x2000000000000000 );
2801 zSig = ( aSig + bSig )<<1;
2802 --zExp;
2803 if ( (sbits64) zSig < 0 ) {
2804 zSig = aSig + bSig;
2805 ++zExp;
2806 }
2807 roundAndPack:
2808 return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2809
2810}
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
2821{
2822 int16 aExp, bExp, zExp;
2823 bits64 aSig, bSig, zSig;
2824 int16 expDiff;
2825
2826 aSig = extractFloat64Frac( a );
2827 aExp = extractFloat64Exp( a );
2828 bSig = extractFloat64Frac( b );
2829 bExp = extractFloat64Exp( b );
2830 expDiff = aExp - bExp;
2831 aSig <<= 10;
2832 bSig <<= 10;
2833 if ( 0 < expDiff ) goto aExpBigger;
2834 if ( expDiff < 0 ) goto bExpBigger;
2835 if ( aExp == 0x7FF ) {
2836 if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2837 float_raise( float_flag_invalid STATUS_VAR);
2838 return float64_default_nan;
2839 }
2840 if ( aExp == 0 ) {
2841 aExp = 1;
2842 bExp = 1;
2843 }
2844 if ( bSig < aSig ) goto aBigger;
2845 if ( aSig < bSig ) goto bBigger;
2846 return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
2847 bExpBigger:
2848 if ( bExp == 0x7FF ) {
2849 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2850 return packFloat64( zSign ^ 1, 0x7FF, 0 );
2851 }
2852 if ( aExp == 0 ) {
2853 ++expDiff;
2854 }
2855 else {
2856 aSig |= LIT64( 0x4000000000000000 );
2857 }
2858 shift64RightJamming( aSig, - expDiff, &aSig );
2859 bSig |= LIT64( 0x4000000000000000 );
2860 bBigger:
2861 zSig = bSig - aSig;
2862 zExp = bExp;
2863 zSign ^= 1;
2864 goto normalizeRoundAndPack;
2865 aExpBigger:
2866 if ( aExp == 0x7FF ) {
2867 if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2868 return a;
2869 }
2870 if ( bExp == 0 ) {
2871 --expDiff;
2872 }
2873 else {
2874 bSig |= LIT64( 0x4000000000000000 );
2875 }
2876 shift64RightJamming( bSig, expDiff, &bSig );
2877 aSig |= LIT64( 0x4000000000000000 );
2878 aBigger:
2879 zSig = aSig - bSig;
2880 zExp = aExp;
2881 normalizeRoundAndPack:
2882 --zExp;
2883 return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
2884
2885}
2886
2887
2888
2889
2890
2891
2892
2893float64 float64_add( float64 a, float64 b STATUS_PARAM )
2894{
2895 flag aSign, bSign;
2896
2897 aSign = extractFloat64Sign( a );
2898 bSign = extractFloat64Sign( b );
2899 if ( aSign == bSign ) {
2900 return addFloat64Sigs( a, b, aSign STATUS_VAR );
2901 }
2902 else {
2903 return subFloat64Sigs( a, b, aSign STATUS_VAR );
2904 }
2905
2906}
2907
2908
2909
2910
2911
2912
2913
2914float64 float64_sub( float64 a, float64 b STATUS_PARAM )
2915{
2916 flag aSign, bSign;
2917
2918 aSign = extractFloat64Sign( a );
2919 bSign = extractFloat64Sign( b );
2920 if ( aSign == bSign ) {
2921 return subFloat64Sigs( a, b, aSign STATUS_VAR );
2922 }
2923 else {
2924 return addFloat64Sigs( a, b, aSign STATUS_VAR );
2925 }
2926
2927}
2928
2929
2930
2931
2932
2933
2934
2935float64 float64_mul( float64 a, float64 b STATUS_PARAM )
2936{
2937 flag aSign, bSign, zSign;
2938 int16 aExp, bExp, zExp;
2939 bits64 aSig, bSig, zSig0, zSig1;
2940
2941 aSig = extractFloat64Frac( a );
2942 aExp = extractFloat64Exp( a );
2943 aSign = extractFloat64Sign( a );
2944 bSig = extractFloat64Frac( b );
2945 bExp = extractFloat64Exp( b );
2946 bSign = extractFloat64Sign( b );
2947 zSign = aSign ^ bSign;
2948 if ( aExp == 0x7FF ) {
2949 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2950 return propagateFloat64NaN( a, b STATUS_VAR );
2951 }
2952 if ( ( bExp | bSig ) == 0 ) {
2953 float_raise( float_flag_invalid STATUS_VAR);
2954 return float64_default_nan;
2955 }
2956 return packFloat64( zSign, 0x7FF, 0 );
2957 }
2958 if ( bExp == 0x7FF ) {
2959 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
2960 if ( ( aExp | aSig ) == 0 ) {
2961 float_raise( float_flag_invalid STATUS_VAR);
2962 return float64_default_nan;
2963 }
2964 return packFloat64( zSign, 0x7FF, 0 );
2965 }
2966 if ( aExp == 0 ) {
2967 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2968 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2969 }
2970 if ( bExp == 0 ) {
2971 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2972 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2973 }
2974 zExp = aExp + bExp - 0x3FF;
2975 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2976 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2977 mul64To128( aSig, bSig, &zSig0, &zSig1 );
2978 zSig0 |= ( zSig1 != 0 );
2979 if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2980 zSig0 <<= 1;
2981 --zExp;
2982 }
2983 return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
2984
2985}
2986
2987
2988
2989
2990
2991
2992
2993float64 float64_div( float64 a, float64 b STATUS_PARAM )
2994{
2995 flag aSign, bSign, zSign;
2996 int16 aExp, bExp, zExp;
2997 bits64 aSig, bSig, zSig;
2998 bits64 rem0, rem1;
2999 bits64 term0, term1;
3000
3001 aSig = extractFloat64Frac( a );
3002 aExp = extractFloat64Exp( a );
3003 aSign = extractFloat64Sign( a );
3004 bSig = extractFloat64Frac( b );
3005 bExp = extractFloat64Exp( b );
3006 bSign = extractFloat64Sign( b );
3007 zSign = aSign ^ bSign;
3008 if ( aExp == 0x7FF ) {
3009 if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3010 if ( bExp == 0x7FF ) {
3011 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3012 float_raise( float_flag_invalid STATUS_VAR);
3013 return float64_default_nan;
3014 }
3015 return packFloat64( zSign, 0x7FF, 0 );
3016 }
3017 if ( bExp == 0x7FF ) {
3018 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3019 return packFloat64( zSign, 0, 0 );
3020 }
3021 if ( bExp == 0 ) {
3022 if ( bSig == 0 ) {
3023 if ( ( aExp | aSig ) == 0 ) {
3024 float_raise( float_flag_invalid STATUS_VAR);
3025 return float64_default_nan;
3026 }
3027 float_raise( float_flag_divbyzero STATUS_VAR);
3028 return packFloat64( zSign, 0x7FF, 0 );
3029 }
3030 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3031 }
3032 if ( aExp == 0 ) {
3033 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3034 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3035 }
3036 zExp = aExp - bExp + 0x3FD;
3037 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3038 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3039 if ( bSig <= ( aSig + aSig ) ) {
3040 aSig >>= 1;
3041 ++zExp;
3042 }
3043 zSig = estimateDiv128To64( aSig, 0, bSig );
3044 if ( ( zSig & 0x1FF ) <= 2 ) {
3045 mul64To128( bSig, zSig, &term0, &term1 );
3046 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3047 while ( (sbits64) rem0 < 0 ) {
3048 --zSig;
3049 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3050 }
3051 zSig |= ( rem1 != 0 );
3052 }
3053 return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3054
3055}
3056
3057
3058
3059
3060
3061
3062
3063float64 float64_rem( float64 a, float64 b STATUS_PARAM )
3064{
3065 flag aSign, bSign, zSign;
3066 int16 aExp, bExp, expDiff;
3067 bits64 aSig, bSig;
3068 bits64 q, alternateASig;
3069 sbits64 sigMean;
3070
3071 aSig = extractFloat64Frac( a );
3072 aExp = extractFloat64Exp( a );
3073 aSign = extractFloat64Sign( a );
3074 bSig = extractFloat64Frac( b );
3075 bExp = extractFloat64Exp( b );
3076 bSign = extractFloat64Sign( b );
3077 if ( aExp == 0x7FF ) {
3078 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3079 return propagateFloat64NaN( a, b STATUS_VAR );
3080 }
3081 float_raise( float_flag_invalid STATUS_VAR);
3082 return float64_default_nan;
3083 }
3084 if ( bExp == 0x7FF ) {
3085 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3086 return a;
3087 }
3088 if ( bExp == 0 ) {
3089 if ( bSig == 0 ) {
3090 float_raise( float_flag_invalid STATUS_VAR);
3091 return float64_default_nan;
3092 }
3093 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3094 }
3095 if ( aExp == 0 ) {
3096 if ( aSig == 0 ) return a;
3097 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3098 }
3099 expDiff = aExp - bExp;
3100 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3101 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3102 if ( expDiff < 0 ) {
3103 if ( expDiff < -1 ) return a;
3104 aSig >>= 1;
3105 }
3106 q = ( bSig <= aSig );
3107 if ( q ) aSig -= bSig;
3108 expDiff -= 64;
3109 while ( 0 < expDiff ) {
3110 q = estimateDiv128To64( aSig, 0, bSig );
3111 q = ( 2 < q ) ? q - 2 : 0;
3112 aSig = - ( ( bSig>>2 ) * q );
3113 expDiff -= 62;
3114 }
3115 expDiff += 64;
3116 if ( 0 < expDiff ) {
3117 q = estimateDiv128To64( aSig, 0, bSig );
3118 q = ( 2 < q ) ? q - 2 : 0;
3119 q >>= 64 - expDiff;
3120 bSig >>= 2;
3121 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3122 }
3123 else {
3124 aSig >>= 2;
3125 bSig >>= 2;
3126 }
3127 do {
3128 alternateASig = aSig;
3129 ++q;
3130 aSig -= bSig;
3131 } while ( 0 <= (sbits64) aSig );
3132 sigMean = aSig + alternateASig;
3133 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3134 aSig = alternateASig;
3135 }
3136 zSign = ( (sbits64) aSig < 0 );
3137 if ( zSign ) aSig = - aSig;
3138 return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
3139
3140}
3141
3142
3143
3144
3145
3146
3147
3148float64 float64_sqrt( float64 a STATUS_PARAM )
3149{
3150 flag aSign;
3151 int16 aExp, zExp;
3152 bits64 aSig, zSig, doubleZSig;
3153 bits64 rem0, rem1, term0, term1;
3154
3155 aSig = extractFloat64Frac( a );
3156 aExp = extractFloat64Exp( a );
3157 aSign = extractFloat64Sign( a );
3158 if ( aExp == 0x7FF ) {
3159 if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
3160 if ( ! aSign ) return a;
3161 float_raise( float_flag_invalid STATUS_VAR);
3162 return float64_default_nan;
3163 }
3164 if ( aSign ) {
3165 if ( ( aExp | aSig ) == 0 ) return a;
3166 float_raise( float_flag_invalid STATUS_VAR);
3167 return float64_default_nan;
3168 }
3169 if ( aExp == 0 ) {
3170 if ( aSig == 0 ) return float64_zero;
3171 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3172 }
3173 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3174 aSig |= LIT64( 0x0010000000000000 );
3175 zSig = estimateSqrt32( aExp, aSig>>21 );
3176 aSig <<= 9 - ( aExp & 1 );
3177 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3178 if ( ( zSig & 0x1FF ) <= 5 ) {
3179 doubleZSig = zSig<<1;
3180 mul64To128( zSig, zSig, &term0, &term1 );
3181 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3182 while ( (sbits64) rem0 < 0 ) {
3183 --zSig;
3184 doubleZSig -= 2;
3185 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3186 }
3187 zSig |= ( ( rem0 | rem1 ) != 0 );
3188 }
3189 return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
3190
3191}
3192
3193
3194
3195
3196
3197
3198float64 float64_log2( float64 a STATUS_PARAM )
3199{
3200 flag aSign, zSign;
3201 int16 aExp;
3202 bits64 aSig, aSig0, aSig1, zSig, i;
3203
3204 aSig = extractFloat64Frac( a );
3205 aExp = extractFloat64Exp( a );
3206 aSign = extractFloat64Sign( a );
3207
3208 if ( aExp == 0 ) {
3209 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
3210 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3211 }
3212 if ( aSign ) {
3213 float_raise( float_flag_invalid STATUS_VAR);
3214 return float64_default_nan;
3215 }
3216 if ( aExp == 0x7FF ) {
3217 if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
3218 return a;
3219 }
3220
3221 aExp -= 0x3FF;
3222 aSig |= LIT64( 0x0010000000000000 );
3223 zSign = aExp < 0;
3224 zSig = (bits64)aExp << 52;
3225 for (i = 1LL << 51; i > 0; i >>= 1) {
3226 mul64To128( aSig, aSig, &aSig0, &aSig1 );
3227 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
3228 if ( aSig & LIT64( 0x0020000000000000 ) ) {
3229 aSig >>= 1;
3230 zSig |= i;
3231 }
3232 }
3233
3234 if ( zSign )
3235 zSig = -zSig;
3236 return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
3237}
3238
3239
3240
3241
3242
3243
3244
3245int float64_eq( float64 a, float64 b STATUS_PARAM )
3246{
3247 bits64 av, bv;
3248
3249 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3250 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3251 ) {
3252 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3253 float_raise( float_flag_invalid STATUS_VAR);
3254 }
3255 return 0;
3256 }
3257 av = float64_val(a);
3258 bv = float64_val(b);
3259 return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3260
3261}
3262
3263
3264
3265
3266
3267
3268
3269
3270int float64_le( float64 a, float64 b STATUS_PARAM )
3271{
3272 flag aSign, bSign;
3273 bits64 av, bv;
3274
3275 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3276 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3277 ) {
3278 float_raise( float_flag_invalid STATUS_VAR);
3279 return 0;
3280 }
3281 aSign = extractFloat64Sign( a );
3282 bSign = extractFloat64Sign( b );
3283 av = float64_val(a);
3284 bv = float64_val(b);
3285 if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3286 return ( av == bv ) || ( aSign ^ ( av < bv ) );
3287
3288}
3289
3290
3291
3292
3293
3294
3295
3296int float64_lt( float64 a, float64 b STATUS_PARAM )
3297{
3298 flag aSign, bSign;
3299 bits64 av, bv;
3300
3301 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3302 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3303 ) {
3304 float_raise( float_flag_invalid STATUS_VAR);
3305 return 0;
3306 }
3307 aSign = extractFloat64Sign( a );
3308 bSign = extractFloat64Sign( b );
3309 av = float64_val(a);
3310 bv = float64_val(b);
3311 if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
3312 return ( av != bv ) && ( aSign ^ ( av < bv ) );
3313
3314}
3315
3316
3317
3318
3319
3320
3321
3322
3323int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
3324{
3325 bits64 av, bv;
3326
3327 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3328 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3329 ) {
3330 float_raise( float_flag_invalid STATUS_VAR);
3331 return 0;
3332 }
3333 av = float64_val(a);
3334 bv = float64_val(b);
3335 return ( av == bv ) || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3336
3337}
3338
3339
3340
3341
3342
3343
3344
3345
3346int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
3347{
3348 flag aSign, bSign;
3349 bits64 av, bv;
3350
3351 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3352 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3353 ) {
3354 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3355 float_raise( float_flag_invalid STATUS_VAR);
3356 }
3357 return 0;
3358 }
3359 aSign = extractFloat64Sign( a );
3360 bSign = extractFloat64Sign( b );
3361 av = float64_val(a);
3362 bv = float64_val(b);
3363 if ( aSign != bSign ) return aSign || ( (bits64) ( ( av | bv )<<1 ) == 0 );
3364 return ( av == bv ) || ( aSign ^ ( av < bv ) );
3365
3366}
3367
3368
3369
3370
3371
3372
3373
3374
3375int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
3376{
3377 flag aSign, bSign;
3378 bits64 av, bv;
3379
3380 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3381 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3382 ) {
3383 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3384 float_raise( float_flag_invalid STATUS_VAR);
3385 }
3386 return 0;
3387 }
3388 aSign = extractFloat64Sign( a );
3389 bSign = extractFloat64Sign( b );
3390 av = float64_val(a);
3391 bv = float64_val(b);
3392 if ( aSign != bSign ) return aSign && ( (bits64) ( ( av | bv )<<1 ) != 0 );
3393 return ( av != bv ) && ( aSign ^ ( av < bv ) );
3394
3395}
3396
3397#ifdef FLOATX80
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
3410{
3411 flag aSign;
3412 int32 aExp, shiftCount;
3413 bits64 aSig;
3414
3415 aSig = extractFloatx80Frac( a );
3416 aExp = extractFloatx80Exp( a );
3417 aSign = extractFloatx80Sign( a );
3418 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3419 shiftCount = 0x4037 - aExp;
3420 if ( shiftCount <= 0 ) shiftCount = 1;
3421 shift64RightJamming( aSig, shiftCount, &aSig );
3422 return roundAndPackInt32( aSign, aSig STATUS_VAR );
3423
3424}
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
3437{
3438 flag aSign;
3439 int32 aExp, shiftCount;
3440 bits64 aSig, savedASig;
3441 int32 z;
3442
3443 aSig = extractFloatx80Frac( a );
3444 aExp = extractFloatx80Exp( a );
3445 aSign = extractFloatx80Sign( a );
3446 if ( 0x401E < aExp ) {
3447 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3448 goto invalid;
3449 }
3450 else if ( aExp < 0x3FFF ) {
3451 if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3452 return 0;
3453 }
3454 shiftCount = 0x403E - aExp;
3455 savedASig = aSig;
3456 aSig >>= shiftCount;
3457 z = aSig;
3458 if ( aSign ) z = - z;
3459 if ( ( z < 0 ) ^ aSign ) {
3460 invalid:
3461 float_raise( float_flag_invalid STATUS_VAR);
3462 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3463 }
3464 if ( ( aSig<<shiftCount ) != savedASig ) {
3465 STATUS(float_exception_flags) |= float_flag_inexact;
3466 }
3467 return z;
3468
3469}
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
3482{
3483 flag aSign;
3484 int32 aExp, shiftCount;
3485 bits64 aSig, aSigExtra;
3486
3487 aSig = extractFloatx80Frac( a );
3488 aExp = extractFloatx80Exp( a );
3489 aSign = extractFloatx80Sign( a );
3490 shiftCount = 0x403E - aExp;
3491 if ( shiftCount <= 0 ) {
3492 if ( shiftCount ) {
3493 float_raise( float_flag_invalid STATUS_VAR);
3494 if ( ! aSign
3495 || ( ( aExp == 0x7FFF )
3496 && ( aSig != LIT64( 0x8000000000000000 ) ) )
3497 ) {
3498 return LIT64( 0x7FFFFFFFFFFFFFFF );
3499 }
3500 return (sbits64) LIT64( 0x8000000000000000 );
3501 }
3502 aSigExtra = 0;
3503 }
3504 else {
3505 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3506 }
3507 return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
3508
3509}
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
3522{
3523 flag aSign;
3524 int32 aExp, shiftCount;
3525 bits64 aSig;
3526 int64 z;
3527
3528 aSig = extractFloatx80Frac( a );
3529 aExp = extractFloatx80Exp( a );
3530 aSign = extractFloatx80Sign( a );
3531 shiftCount = aExp - 0x403E;
3532 if ( 0 <= shiftCount ) {
3533 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3534 if ( ( a.high != 0xC03E ) || aSig ) {
3535 float_raise( float_flag_invalid STATUS_VAR);
3536 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3537 return LIT64( 0x7FFFFFFFFFFFFFFF );
3538 }
3539 }
3540 return (sbits64) LIT64( 0x8000000000000000 );
3541 }
3542 else if ( aExp < 0x3FFF ) {
3543 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3544 return 0;
3545 }
3546 z = aSig>>( - shiftCount );
3547 if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3548 STATUS(float_exception_flags) |= float_flag_inexact;
3549 }
3550 if ( aSign ) z = - z;
3551 return z;
3552
3553}
3554
3555
3556
3557
3558
3559
3560
3561
3562float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
3563{
3564 flag aSign;
3565 int32 aExp;
3566 bits64 aSig;
3567
3568 aSig = extractFloatx80Frac( a );
3569 aExp = extractFloatx80Exp( a );
3570 aSign = extractFloatx80Sign( a );
3571 if ( aExp == 0x7FFF ) {
3572 if ( (bits64) ( aSig<<1 ) ) {
3573 return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
3574 }
3575 return packFloat32( aSign, 0xFF, 0 );
3576 }
3577 shift64RightJamming( aSig, 33, &aSig );
3578 if ( aExp || aSig ) aExp -= 0x3F81;
3579 return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
3580
3581}
3582
3583
3584
3585
3586
3587
3588
3589
3590float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
3591{
3592 flag aSign;
3593 int32 aExp;
3594 bits64 aSig, zSig;
3595
3596 aSig = extractFloatx80Frac( a );
3597 aExp = extractFloatx80Exp( a );
3598 aSign = extractFloatx80Sign( a );
3599 if ( aExp == 0x7FFF ) {
3600 if ( (bits64) ( aSig<<1 ) ) {
3601 return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
3602 }
3603 return packFloat64( aSign, 0x7FF, 0 );
3604 }
3605 shift64RightJamming( aSig, 1, &zSig );
3606 if ( aExp || aSig ) aExp -= 0x3C01;
3607 return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
3608
3609}
3610
3611#ifdef FLOAT128
3612
3613
3614
3615
3616
3617
3618
3619
3620float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
3621{
3622 flag aSign;
3623 int16 aExp;
3624 bits64 aSig, zSig0, zSig1;
3625
3626 aSig = extractFloatx80Frac( a );
3627 aExp = extractFloatx80Exp( a );
3628 aSign = extractFloatx80Sign( a );
3629 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3630 return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
3631 }
3632 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3633 return packFloat128( aSign, aExp, zSig0, zSig1 );
3634
3635}
3636
3637#endif
3638
3639
3640
3641
3642
3643
3644
3645
3646floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
3647{
3648 flag aSign;
3649 int32 aExp;
3650 bits64 lastBitMask, roundBitsMask;
3651 int8 roundingMode;
3652 floatx80 z;
3653
3654 aExp = extractFloatx80Exp( a );
3655 if ( 0x403E <= aExp ) {
3656 if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3657 return propagateFloatx80NaN( a, a STATUS_VAR );
3658 }
3659 return a;
3660 }
3661 if ( aExp < 0x3FFF ) {
3662 if ( ( aExp == 0 )
3663 && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3664 return a;
3665 }
3666 STATUS(float_exception_flags) |= float_flag_inexact;
3667 aSign = extractFloatx80Sign( a );
3668 switch ( STATUS(float_rounding_mode) ) {
3669 case float_round_nearest_even:
3670 if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3671 ) {
3672 return
3673 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3674 }
3675 break;
3676 case float_round_down:
3677 return
3678 aSign ?
3679 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3680 : packFloatx80( 0, 0, 0 );
3681 case float_round_up:
3682 return
3683 aSign ? packFloatx80( 1, 0, 0 )
3684 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3685 }
3686 return packFloatx80( aSign, 0, 0 );
3687 }
3688 lastBitMask = 1;
3689 lastBitMask <<= 0x403E - aExp;
3690 roundBitsMask = lastBitMask - 1;
3691 z = a;
3692 roundingMode = STATUS(float_rounding_mode);
3693 if ( roundingMode == float_round_nearest_even ) {
3694 z.low += lastBitMask>>1;
3695 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3696 }
3697 else if ( roundingMode != float_round_to_zero ) {
3698 if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3699 z.low += roundBitsMask;
3700 }
3701 }
3702 z.low &= ~ roundBitsMask;
3703 if ( z.low == 0 ) {
3704 ++z.high;
3705 z.low = LIT64( 0x8000000000000000 );
3706 }
3707 if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
3708 return z;
3709
3710}
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
3721{
3722 int32 aExp, bExp, zExp;
3723 bits64 aSig, bSig, zSig0, zSig1;
3724 int32 expDiff;
3725
3726 aSig = extractFloatx80Frac( a );
3727 aExp = extractFloatx80Exp( a );
3728 bSig = extractFloatx80Frac( b );
3729 bExp = extractFloatx80Exp( b );
3730 expDiff = aExp - bExp;
3731 if ( 0 < expDiff ) {
3732 if ( aExp == 0x7FFF ) {
3733 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3734 return a;
3735 }
3736 if ( bExp == 0 ) --expDiff;
3737 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3738 zExp = aExp;
3739 }
3740 else if ( expDiff < 0 ) {
3741 if ( bExp == 0x7FFF ) {
3742 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3743 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3744 }
3745 if ( aExp == 0 ) ++expDiff;
3746 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3747 zExp = bExp;
3748 }
3749 else {
3750 if ( aExp == 0x7FFF ) {
3751 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3752 return propagateFloatx80NaN( a, b STATUS_VAR );
3753 }
3754 return a;
3755 }
3756 zSig1 = 0;
3757 zSig0 = aSig + bSig;
3758 if ( aExp == 0 ) {
3759 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3760 goto roundAndPack;
3761 }
3762 zExp = aExp;
3763 goto shiftRight1;
3764 }
3765 zSig0 = aSig + bSig;
3766 if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3767 shiftRight1:
3768 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3769 zSig0 |= LIT64( 0x8000000000000000 );
3770 ++zExp;
3771 roundAndPack:
3772 return
3773 roundAndPackFloatx80(
3774 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3775
3776}
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
3787{
3788 int32 aExp, bExp, zExp;
3789 bits64 aSig, bSig, zSig0, zSig1;
3790 int32 expDiff;
3791 floatx80 z;
3792
3793 aSig = extractFloatx80Frac( a );
3794 aExp = extractFloatx80Exp( a );
3795 bSig = extractFloatx80Frac( b );
3796 bExp = extractFloatx80Exp( b );
3797 expDiff = aExp - bExp;
3798 if ( 0 < expDiff ) goto aExpBigger;
3799 if ( expDiff < 0 ) goto bExpBigger;
3800 if ( aExp == 0x7FFF ) {
3801 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3802 return propagateFloatx80NaN( a, b STATUS_VAR );
3803 }
3804 float_raise( float_flag_invalid STATUS_VAR);
3805 z.low = floatx80_default_nan_low;
3806 z.high = floatx80_default_nan_high;
3807 return z;
3808 }
3809 if ( aExp == 0 ) {
3810 aExp = 1;
3811 bExp = 1;
3812 }
3813 zSig1 = 0;
3814 if ( bSig < aSig ) goto aBigger;
3815 if ( aSig < bSig ) goto bBigger;
3816 return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3817 bExpBigger:
3818 if ( bExp == 0x7FFF ) {
3819 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3820 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3821 }
3822 if ( aExp == 0 ) ++expDiff;
3823 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3824 bBigger:
3825 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3826 zExp = bExp;
3827 zSign ^= 1;
3828 goto normalizeRoundAndPack;
3829 aExpBigger:
3830 if ( aExp == 0x7FFF ) {
3831 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3832 return a;
3833 }
3834 if ( bExp == 0 ) --expDiff;
3835 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3836 aBigger:
3837 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3838 zExp = aExp;
3839 normalizeRoundAndPack:
3840 return
3841 normalizeRoundAndPackFloatx80(
3842 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3843
3844}
3845
3846
3847
3848
3849
3850
3851
3852floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
3853{
3854 flag aSign, bSign;
3855
3856 aSign = extractFloatx80Sign( a );
3857 bSign = extractFloatx80Sign( b );
3858 if ( aSign == bSign ) {
3859 return addFloatx80Sigs( a, b, aSign STATUS_VAR );
3860 }
3861 else {
3862 return subFloatx80Sigs( a, b, aSign STATUS_VAR );
3863 }
3864
3865}
3866
3867
3868
3869
3870
3871
3872
3873floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
3874{
3875 flag aSign, bSign;
3876
3877 aSign = extractFloatx80Sign( a );
3878 bSign = extractFloatx80Sign( b );
3879 if ( aSign == bSign ) {
3880 return subFloatx80Sigs( a, b, aSign STATUS_VAR );
3881 }
3882 else {
3883 return addFloatx80Sigs( a, b, aSign STATUS_VAR );
3884 }
3885
3886}
3887
3888
3889
3890
3891
3892
3893
3894floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
3895{
3896 flag aSign, bSign, zSign;
3897 int32 aExp, bExp, zExp;
3898 bits64 aSig, bSig, zSig0, zSig1;
3899 floatx80 z;
3900
3901 aSig = extractFloatx80Frac( a );
3902 aExp = extractFloatx80Exp( a );
3903 aSign = extractFloatx80Sign( a );
3904 bSig = extractFloatx80Frac( b );
3905 bExp = extractFloatx80Exp( b );
3906 bSign = extractFloatx80Sign( b );
3907 zSign = aSign ^ bSign;
3908 if ( aExp == 0x7FFF ) {
3909 if ( (bits64) ( aSig<<1 )
3910 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3911 return propagateFloatx80NaN( a, b STATUS_VAR );
3912 }
3913 if ( ( bExp | bSig ) == 0 ) goto invalid;
3914 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3915 }
3916 if ( bExp == 0x7FFF ) {
3917 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3918 if ( ( aExp | aSig ) == 0 ) {
3919 invalid:
3920 float_raise( float_flag_invalid STATUS_VAR);
3921 z.low = floatx80_default_nan_low;
3922 z.high = floatx80_default_nan_high;
3923 return z;
3924 }
3925 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3926 }
3927 if ( aExp == 0 ) {
3928 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3929 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3930 }
3931 if ( bExp == 0 ) {
3932 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3933 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3934 }
3935 zExp = aExp + bExp - 0x3FFE;
3936 mul64To128( aSig, bSig, &zSig0, &zSig1 );
3937 if ( 0 < (sbits64) zSig0 ) {
3938 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3939 --zExp;
3940 }
3941 return
3942 roundAndPackFloatx80(
3943 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
3944
3945}
3946
3947
3948
3949
3950
3951
3952
3953floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
3954{
3955 flag aSign, bSign, zSign;
3956 int32 aExp, bExp, zExp;
3957 bits64 aSig, bSig, zSig0, zSig1;
3958 bits64 rem0, rem1, rem2, term0, term1, term2;
3959 floatx80 z;
3960
3961 aSig = extractFloatx80Frac( a );
3962 aExp = extractFloatx80Exp( a );
3963 aSign = extractFloatx80Sign( a );
3964 bSig = extractFloatx80Frac( b );
3965 bExp = extractFloatx80Exp( b );
3966 bSign = extractFloatx80Sign( b );
3967 zSign = aSign ^ bSign;
3968 if ( aExp == 0x7FFF ) {
3969 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3970 if ( bExp == 0x7FFF ) {
3971 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3972 goto invalid;
3973 }
3974 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3975 }
3976 if ( bExp == 0x7FFF ) {
3977 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
3978 return packFloatx80( zSign, 0, 0 );
3979 }
3980 if ( bExp == 0 ) {
3981 if ( bSig == 0 ) {
3982 if ( ( aExp | aSig ) == 0 ) {
3983 invalid:
3984 float_raise( float_flag_invalid STATUS_VAR);
3985 z.low = floatx80_default_nan_low;
3986 z.high = floatx80_default_nan_high;
3987 return z;
3988 }
3989 float_raise( float_flag_divbyzero STATUS_VAR);
3990 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3991 }
3992 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3993 }
3994 if ( aExp == 0 ) {
3995 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3996 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3997 }
3998 zExp = aExp - bExp + 0x3FFE;
3999 rem1 = 0;
4000 if ( bSig <= aSig ) {
4001 shift128Right( aSig, 0, 1, &aSig, &rem1 );
4002 ++zExp;
4003 }
4004 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4005 mul64To128( bSig, zSig0, &term0, &term1 );
4006 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
4007 while ( (sbits64) rem0 < 0 ) {
4008 --zSig0;
4009 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4010 }
4011 zSig1 = estimateDiv128To64( rem1, 0, bSig );
4012 if ( (bits64) ( zSig1<<1 ) <= 8 ) {
4013 mul64To128( bSig, zSig1, &term1, &term2 );
4014 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4015 while ( (sbits64) rem1 < 0 ) {
4016 --zSig1;
4017 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4018 }
4019 zSig1 |= ( ( rem1 | rem2 ) != 0 );
4020 }
4021 return
4022 roundAndPackFloatx80(
4023 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4024
4025}
4026
4027
4028
4029
4030
4031
4032
4033floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
4034{
4035 flag aSign, bSign, zSign;
4036 int32 aExp, bExp, expDiff;
4037 bits64 aSig0, aSig1, bSig;
4038 bits64 q, term0, term1, alternateASig0, alternateASig1;
4039 floatx80 z;
4040
4041 aSig0 = extractFloatx80Frac( a );
4042 aExp = extractFloatx80Exp( a );
4043 aSign = extractFloatx80Sign( a );
4044 bSig = extractFloatx80Frac( b );
4045 bExp = extractFloatx80Exp( b );
4046 bSign = extractFloatx80Sign( b );
4047 if ( aExp == 0x7FFF ) {
4048 if ( (bits64) ( aSig0<<1 )
4049 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
4050 return propagateFloatx80NaN( a, b STATUS_VAR );
4051 }
4052 goto invalid;
4053 }
4054 if ( bExp == 0x7FFF ) {
4055 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
4056 return a;
4057 }
4058 if ( bExp == 0 ) {
4059 if ( bSig == 0 ) {
4060 invalid:
4061 float_raise( float_flag_invalid STATUS_VAR);
4062 z.low = floatx80_default_nan_low;
4063 z.high = floatx80_default_nan_high;
4064 return z;
4065 }
4066 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4067 }
4068 if ( aExp == 0 ) {
4069 if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
4070 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4071 }
4072 bSig |= LIT64( 0x8000000000000000 );
4073 zSign = aSign;
4074 expDiff = aExp - bExp;
4075 aSig1 = 0;
4076 if ( expDiff < 0 ) {
4077 if ( expDiff < -1 ) return a;
4078 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4079 expDiff = 0;
4080 }
4081 q = ( bSig <= aSig0 );
4082 if ( q ) aSig0 -= bSig;
4083 expDiff -= 64;
4084 while ( 0 < expDiff ) {
4085 q = estimateDiv128To64( aSig0, aSig1, bSig );
4086 q = ( 2 < q ) ? q - 2 : 0;
4087 mul64To128( bSig, q, &term0, &term1 );
4088 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4089 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4090 expDiff -= 62;
4091 }
4092 expDiff += 64;
4093 if ( 0 < expDiff ) {
4094 q = estimateDiv128To64( aSig0, aSig1, bSig );
4095 q = ( 2 < q ) ? q - 2 : 0;
4096 q >>= 64 - expDiff;
4097 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4098 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4099 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4100 while ( le128( term0, term1, aSig0, aSig1 ) ) {
4101 ++q;
4102 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4103 }
4104 }
4105 else {
4106 term1 = 0;
4107 term0 = bSig;
4108 }
4109 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4110 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4111 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4112 && ( q & 1 ) )
4113 ) {
4114 aSig0 = alternateASig0;
4115 aSig1 = alternateASig1;
4116 zSign = ! zSign;
4117 }
4118 return
4119 normalizeRoundAndPackFloatx80(
4120 80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
4121
4122}
4123
4124
4125
4126
4127
4128
4129
4130floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
4131{
4132 flag aSign;
4133 int32 aExp, zExp;
4134 bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4135 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4136 floatx80 z;
4137
4138 aSig0 = extractFloatx80Frac( a );
4139 aExp = extractFloatx80Exp( a );
4140 aSign = extractFloatx80Sign( a );
4141 if ( aExp == 0x7FFF ) {
4142 if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
4143 if ( ! aSign ) return a;
4144 goto invalid;
4145 }
4146 if ( aSign ) {
4147 if ( ( aExp | aSig0 ) == 0 ) return a;
4148 invalid:
4149 float_raise( float_flag_invalid STATUS_VAR);
4150 z.low = floatx80_default_nan_low;
4151 z.high = floatx80_default_nan_high;
4152 return z;
4153 }
4154 if ( aExp == 0 ) {
4155 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4156 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4157 }
4158 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4159 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4160 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4161 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4162 doubleZSig0 = zSig0<<1;
4163 mul64To128( zSig0, zSig0, &term0, &term1 );
4164 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4165 while ( (sbits64) rem0 < 0 ) {
4166 --zSig0;
4167 doubleZSig0 -= 2;
4168 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4169 }
4170 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4171 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4172 if ( zSig1 == 0 ) zSig1 = 1;
4173 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4174 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4175 mul64To128( zSig1, zSig1, &term2, &term3 );
4176 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4177 while ( (sbits64) rem1 < 0 ) {
4178 --zSig1;
4179 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4180 term3 |= 1;
4181 term2 |= doubleZSig0;
4182 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4183 }
4184 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4185 }
4186 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4187 zSig0 |= doubleZSig0;
4188 return
4189 roundAndPackFloatx80(
4190 STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
4191
4192}
4193
4194
4195
4196
4197
4198
4199
4200
4201int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
4202{
4203
4204 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4205 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4206 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4207 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4208 ) {
4209 if ( floatx80_is_signaling_nan( a )
4210 || floatx80_is_signaling_nan( b ) ) {
4211 float_raise( float_flag_invalid STATUS_VAR);
4212 }
4213 return 0;
4214 }
4215 return
4216 ( a.low == b.low )
4217 && ( ( a.high == b.high )
4218 || ( ( a.low == 0 )
4219 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4220 );
4221
4222}
4223
4224
4225
4226
4227
4228
4229
4230
4231int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
4232{
4233 flag aSign, bSign;
4234
4235 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4236 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4237 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4238 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4239 ) {
4240 float_raise( float_flag_invalid STATUS_VAR);
4241 return 0;
4242 }
4243 aSign = extractFloatx80Sign( a );
4244 bSign = extractFloatx80Sign( b );
4245 if ( aSign != bSign ) {
4246 return
4247 aSign
4248 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4249 == 0 );
4250 }
4251 return
4252 aSign ? le128( b.high, b.low, a.high, a.low )
4253 : le128( a.high, a.low, b.high, b.low );
4254
4255}
4256
4257
4258
4259
4260
4261
4262
4263
4264int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
4265{
4266 flag aSign, bSign;
4267
4268 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4269 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4270 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4271 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4272 ) {
4273 float_raise( float_flag_invalid STATUS_VAR);
4274 return 0;
4275 }
4276 aSign = extractFloatx80Sign( a );
4277 bSign = extractFloatx80Sign( b );
4278 if ( aSign != bSign ) {
4279 return
4280 aSign
4281 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4282 != 0 );
4283 }
4284 return
4285 aSign ? lt128( b.high, b.low, a.high, a.low )
4286 : lt128( a.high, a.low, b.high, b.low );
4287
4288}
4289
4290
4291
4292
4293
4294
4295
4296
4297int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
4298{
4299
4300 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4301 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4302 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4303 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4304 ) {
4305 float_raise( float_flag_invalid STATUS_VAR);
4306 return 0;
4307 }
4308 return
4309 ( a.low == b.low )
4310 && ( ( a.high == b.high )
4311 || ( ( a.low == 0 )
4312 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4313 );
4314
4315}
4316
4317
4318
4319
4320
4321
4322
4323
4324int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4325{
4326 flag aSign, bSign;
4327
4328 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4329 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4330 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4331 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4332 ) {
4333 if ( floatx80_is_signaling_nan( a )
4334 || floatx80_is_signaling_nan( b ) ) {
4335 float_raise( float_flag_invalid STATUS_VAR);
4336 }
4337 return 0;
4338 }
4339 aSign = extractFloatx80Sign( a );
4340 bSign = extractFloatx80Sign( b );
4341 if ( aSign != bSign ) {
4342 return
4343 aSign
4344 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4345 == 0 );
4346 }
4347 return
4348 aSign ? le128( b.high, b.low, a.high, a.low )
4349 : le128( a.high, a.low, b.high, b.low );
4350
4351}
4352
4353
4354
4355
4356
4357
4358
4359
4360int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
4361{
4362 flag aSign, bSign;
4363
4364 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4365 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4366 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4367 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4368 ) {
4369 if ( floatx80_is_signaling_nan( a )
4370 || floatx80_is_signaling_nan( b ) ) {
4371 float_raise( float_flag_invalid STATUS_VAR);
4372 }
4373 return 0;
4374 }
4375 aSign = extractFloatx80Sign( a );
4376 bSign = extractFloatx80Sign( b );
4377 if ( aSign != bSign ) {
4378 return
4379 aSign
4380 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4381 != 0 );
4382 }
4383 return
4384 aSign ? lt128( b.high, b.low, a.high, a.low )
4385 : lt128( a.high, a.low, b.high, b.low );
4386
4387}
4388
4389#endif
4390
4391#ifdef FLOAT128
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403int32 float128_to_int32( float128 a STATUS_PARAM )
4404{
4405 flag aSign;
4406 int32 aExp, shiftCount;
4407 bits64 aSig0, aSig1;
4408
4409 aSig1 = extractFloat128Frac1( a );
4410 aSig0 = extractFloat128Frac0( a );
4411 aExp = extractFloat128Exp( a );
4412 aSign = extractFloat128Sign( a );
4413 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4414 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4415 aSig0 |= ( aSig1 != 0 );
4416 shiftCount = 0x4028 - aExp;
4417 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4418 return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
4419
4420}
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
4433{
4434 flag aSign;
4435 int32 aExp, shiftCount;
4436 bits64 aSig0, aSig1, savedASig;
4437 int32 z;
4438
4439 aSig1 = extractFloat128Frac1( a );
4440 aSig0 = extractFloat128Frac0( a );
4441 aExp = extractFloat128Exp( a );
4442 aSign = extractFloat128Sign( a );
4443 aSig0 |= ( aSig1 != 0 );
4444 if ( 0x401E < aExp ) {
4445 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4446 goto invalid;
4447 }
4448 else if ( aExp < 0x3FFF ) {
4449 if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
4450 return 0;
4451 }
4452 aSig0 |= LIT64( 0x0001000000000000 );
4453 shiftCount = 0x402F - aExp;
4454 savedASig = aSig0;
4455 aSig0 >>= shiftCount;
4456 z = aSig0;
4457 if ( aSign ) z = - z;
4458 if ( ( z < 0 ) ^ aSign ) {
4459 invalid:
4460 float_raise( float_flag_invalid STATUS_VAR);
4461 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4462 }
4463 if ( ( aSig0<<shiftCount ) != savedASig ) {
4464 STATUS(float_exception_flags) |= float_flag_inexact;
4465 }
4466 return z;
4467
4468}
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480int64 float128_to_int64( float128 a STATUS_PARAM )
4481{
4482 flag aSign;
4483 int32 aExp, shiftCount;
4484 bits64 aSig0, aSig1;
4485
4486 aSig1 = extractFloat128Frac1( a );
4487 aSig0 = extractFloat128Frac0( a );
4488 aExp = extractFloat128Exp( a );
4489 aSign = extractFloat128Sign( a );
4490 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4491 shiftCount = 0x402F - aExp;
4492 if ( shiftCount <= 0 ) {
4493 if ( 0x403E < aExp ) {
4494 float_raise( float_flag_invalid STATUS_VAR);
4495 if ( ! aSign
4496 || ( ( aExp == 0x7FFF )
4497 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4498 )
4499 ) {
4500 return LIT64( 0x7FFFFFFFFFFFFFFF );
4501 }
4502 return (sbits64) LIT64( 0x8000000000000000 );
4503 }
4504 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4505 }
4506 else {
4507 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4508 }
4509 return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
4510
4511}
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
4524{
4525 flag aSign;
4526 int32 aExp, shiftCount;
4527 bits64 aSig0, aSig1;
4528 int64 z;
4529
4530 aSig1 = extractFloat128Frac1( a );
4531 aSig0 = extractFloat128Frac0( a );
4532 aExp = extractFloat128Exp( a );
4533 aSign = extractFloat128Sign( a );
4534 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4535 shiftCount = aExp - 0x402F;
4536 if ( 0 < shiftCount ) {
4537 if ( 0x403E <= aExp ) {
4538 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4539 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
4540 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4541 if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
4542 }
4543 else {
4544 float_raise( float_flag_invalid STATUS_VAR);
4545 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4546 return LIT64( 0x7FFFFFFFFFFFFFFF );
4547 }
4548 }
4549 return (sbits64) LIT64( 0x8000000000000000 );
4550 }
4551 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4552 if ( (bits64) ( aSig1<<shiftCount ) ) {
4553 STATUS(float_exception_flags) |= float_flag_inexact;
4554 }
4555 }
4556 else {
4557 if ( aExp < 0x3FFF ) {
4558 if ( aExp | aSig0 | aSig1 ) {
4559 STATUS(float_exception_flags) |= float_flag_inexact;
4560 }
4561 return 0;
4562 }
4563 z = aSig0>>( - shiftCount );
4564 if ( aSig1
4565 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4566 STATUS(float_exception_flags) |= float_flag_inexact;
4567 }
4568 }
4569 if ( aSign ) z = - z;
4570 return z;
4571
4572}
4573
4574
4575
4576
4577
4578
4579
4580
4581float32 float128_to_float32( float128 a STATUS_PARAM )
4582{
4583 flag aSign;
4584 int32 aExp;
4585 bits64 aSig0, aSig1;
4586 bits32 zSig;
4587
4588 aSig1 = extractFloat128Frac1( a );
4589 aSig0 = extractFloat128Frac0( a );
4590 aExp = extractFloat128Exp( a );
4591 aSign = extractFloat128Sign( a );
4592 if ( aExp == 0x7FFF ) {
4593 if ( aSig0 | aSig1 ) {
4594 return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
4595 }
4596 return packFloat32( aSign, 0xFF, 0 );
4597 }
4598 aSig0 |= ( aSig1 != 0 );
4599 shift64RightJamming( aSig0, 18, &aSig0 );
4600 zSig = aSig0;
4601 if ( aExp || zSig ) {
4602 zSig |= 0x40000000;
4603 aExp -= 0x3F81;
4604 }
4605 return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
4606
4607}
4608
4609
4610
4611
4612
4613
4614
4615
4616float64 float128_to_float64( float128 a STATUS_PARAM )
4617{
4618 flag aSign;
4619 int32 aExp;
4620 bits64 aSig0, aSig1;
4621
4622 aSig1 = extractFloat128Frac1( a );
4623 aSig0 = extractFloat128Frac0( a );
4624 aExp = extractFloat128Exp( a );
4625 aSign = extractFloat128Sign( a );
4626 if ( aExp == 0x7FFF ) {
4627 if ( aSig0 | aSig1 ) {
4628 return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
4629 }
4630 return packFloat64( aSign, 0x7FF, 0 );
4631 }
4632 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4633 aSig0 |= ( aSig1 != 0 );
4634 if ( aExp || aSig0 ) {
4635 aSig0 |= LIT64( 0x4000000000000000 );
4636 aExp -= 0x3C01;
4637 }
4638 return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
4639
4640}
4641
4642#ifdef FLOATX80
4643
4644
4645
4646
4647
4648
4649
4650
4651floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
4652{
4653 flag aSign;
4654 int32 aExp;
4655 bits64 aSig0, aSig1;
4656
4657 aSig1 = extractFloat128Frac1( a );
4658 aSig0 = extractFloat128Frac0( a );
4659 aExp = extractFloat128Exp( a );
4660 aSign = extractFloat128Sign( a );
4661 if ( aExp == 0x7FFF ) {
4662 if ( aSig0 | aSig1 ) {
4663 return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
4664 }
4665 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4666 }
4667 if ( aExp == 0 ) {
4668 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4669 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4670 }
4671 else {
4672 aSig0 |= LIT64( 0x0001000000000000 );
4673 }
4674 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4675 return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
4676
4677}
4678
4679#endif
4680
4681
4682
4683
4684
4685
4686
4687
4688float128 float128_round_to_int( float128 a STATUS_PARAM )
4689{
4690 flag aSign;
4691 int32 aExp;
4692 bits64 lastBitMask, roundBitsMask;
4693 int8 roundingMode;
4694 float128 z;
4695
4696 aExp = extractFloat128Exp( a );
4697 if ( 0x402F <= aExp ) {
4698 if ( 0x406F <= aExp ) {
4699 if ( ( aExp == 0x7FFF )
4700 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4701 ) {
4702 return propagateFloat128NaN( a, a STATUS_VAR );
4703 }
4704 return a;
4705 }
4706 lastBitMask = 1;
4707 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4708 roundBitsMask = lastBitMask - 1;
4709 z = a;
4710 roundingMode = STATUS(float_rounding_mode);
4711 if ( roundingMode == float_round_nearest_even ) {
4712 if ( lastBitMask ) {
4713 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4714 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4715 }
4716 else {
4717 if ( (sbits64) z.low < 0 ) {
4718 ++z.high;
4719 if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4720 }
4721 }
4722 }
4723 else if ( roundingMode != float_round_to_zero ) {
4724 if ( extractFloat128Sign( z )
4725 ^ ( roundingMode == float_round_up ) ) {
4726 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4727 }
4728 }
4729 z.low &= ~ roundBitsMask;
4730 }
4731 else {
4732 if ( aExp < 0x3FFF ) {
4733 if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4734 STATUS(float_exception_flags) |= float_flag_inexact;
4735 aSign = extractFloat128Sign( a );
4736 switch ( STATUS(float_rounding_mode) ) {
4737 case float_round_nearest_even:
4738 if ( ( aExp == 0x3FFE )
4739 && ( extractFloat128Frac0( a )
4740 | extractFloat128Frac1( a ) )
4741 ) {
4742 return packFloat128( aSign, 0x3FFF, 0, 0 );
4743 }
4744 break;
4745 case float_round_down:
4746 return
4747 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4748 : packFloat128( 0, 0, 0, 0 );
4749 case float_round_up:
4750 return
4751 aSign ? packFloat128( 1, 0, 0, 0 )
4752 : packFloat128( 0, 0x3FFF, 0, 0 );
4753 }
4754 return packFloat128( aSign, 0, 0, 0 );
4755 }
4756 lastBitMask = 1;
4757 lastBitMask <<= 0x402F - aExp;
4758 roundBitsMask = lastBitMask - 1;
4759 z.low = 0;
4760 z.high = a.high;
4761 roundingMode = STATUS(float_rounding_mode);
4762 if ( roundingMode == float_round_nearest_even ) {
4763 z.high += lastBitMask>>1;
4764 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4765 z.high &= ~ lastBitMask;
4766 }
4767 }
4768 else if ( roundingMode != float_round_to_zero ) {
4769 if ( extractFloat128Sign( z )
4770 ^ ( roundingMode == float_round_up ) ) {
4771 z.high |= ( a.low != 0 );
4772 z.high += roundBitsMask;
4773 }
4774 }
4775 z.high &= ~ roundBitsMask;
4776 }
4777 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4778 STATUS(float_exception_flags) |= float_flag_inexact;
4779 }
4780 return z;
4781
4782}
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
4793{
4794 int32 aExp, bExp, zExp;
4795 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4796 int32 expDiff;
4797
4798 aSig1 = extractFloat128Frac1( a );
4799 aSig0 = extractFloat128Frac0( a );
4800 aExp = extractFloat128Exp( a );
4801 bSig1 = extractFloat128Frac1( b );
4802 bSig0 = extractFloat128Frac0( b );
4803 bExp = extractFloat128Exp( b );
4804 expDiff = aExp - bExp;
4805 if ( 0 < expDiff ) {
4806 if ( aExp == 0x7FFF ) {
4807 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4808 return a;
4809 }
4810 if ( bExp == 0 ) {
4811 --expDiff;
4812 }
4813 else {
4814 bSig0 |= LIT64( 0x0001000000000000 );
4815 }
4816 shift128ExtraRightJamming(
4817 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4818 zExp = aExp;
4819 }
4820 else if ( expDiff < 0 ) {
4821 if ( bExp == 0x7FFF ) {
4822 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4823 return packFloat128( zSign, 0x7FFF, 0, 0 );
4824 }
4825 if ( aExp == 0 ) {
4826 ++expDiff;
4827 }
4828 else {
4829 aSig0 |= LIT64( 0x0001000000000000 );
4830 }
4831 shift128ExtraRightJamming(
4832 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4833 zExp = bExp;
4834 }
4835 else {
4836 if ( aExp == 0x7FFF ) {
4837 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4838 return propagateFloat128NaN( a, b STATUS_VAR );
4839 }
4840 return a;
4841 }
4842 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4843 if ( aExp == 0 ) {
4844 if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
4845 return packFloat128( zSign, 0, zSig0, zSig1 );
4846 }
4847 zSig2 = 0;
4848 zSig0 |= LIT64( 0x0002000000000000 );
4849 zExp = aExp;
4850 goto shiftRight1;
4851 }
4852 aSig0 |= LIT64( 0x0001000000000000 );
4853 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4854 --zExp;
4855 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4856 ++zExp;
4857 shiftRight1:
4858 shift128ExtraRightJamming(
4859 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4860 roundAndPack:
4861 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
4862
4863}
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
4874{
4875 int32 aExp, bExp, zExp;
4876 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4877 int32 expDiff;
4878 float128 z;
4879
4880 aSig1 = extractFloat128Frac1( a );
4881 aSig0 = extractFloat128Frac0( a );
4882 aExp = extractFloat128Exp( a );
4883 bSig1 = extractFloat128Frac1( b );
4884 bSig0 = extractFloat128Frac0( b );
4885 bExp = extractFloat128Exp( b );
4886 expDiff = aExp - bExp;
4887 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4888 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4889 if ( 0 < expDiff ) goto aExpBigger;
4890 if ( expDiff < 0 ) goto bExpBigger;
4891 if ( aExp == 0x7FFF ) {
4892 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4893 return propagateFloat128NaN( a, b STATUS_VAR );
4894 }
4895 float_raise( float_flag_invalid STATUS_VAR);
4896 z.low = float128_default_nan_low;
4897 z.high = float128_default_nan_high;
4898 return z;
4899 }
4900 if ( aExp == 0 ) {
4901 aExp = 1;
4902 bExp = 1;
4903 }
4904 if ( bSig0 < aSig0 ) goto aBigger;
4905 if ( aSig0 < bSig0 ) goto bBigger;
4906 if ( bSig1 < aSig1 ) goto aBigger;
4907 if ( aSig1 < bSig1 ) goto bBigger;
4908 return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
4909 bExpBigger:
4910 if ( bExp == 0x7FFF ) {
4911 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4912 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4913 }
4914 if ( aExp == 0 ) {
4915 ++expDiff;
4916 }
4917 else {
4918 aSig0 |= LIT64( 0x4000000000000000 );
4919 }
4920 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4921 bSig0 |= LIT64( 0x4000000000000000 );
4922 bBigger:
4923 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4924 zExp = bExp;
4925 zSign ^= 1;
4926 goto normalizeRoundAndPack;
4927 aExpBigger:
4928 if ( aExp == 0x7FFF ) {
4929 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
4930 return a;
4931 }
4932 if ( bExp == 0 ) {
4933 --expDiff;
4934 }
4935 else {
4936 bSig0 |= LIT64( 0x4000000000000000 );
4937 }
4938 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4939 aSig0 |= LIT64( 0x4000000000000000 );
4940 aBigger:
4941 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4942 zExp = aExp;
4943 normalizeRoundAndPack:
4944 --zExp;
4945 return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
4946
4947}
4948
4949
4950
4951
4952
4953
4954
4955float128 float128_add( float128 a, float128 b STATUS_PARAM )
4956{
4957 flag aSign, bSign;
4958
4959 aSign = extractFloat128Sign( a );
4960 bSign = extractFloat128Sign( b );
4961 if ( aSign == bSign ) {
4962 return addFloat128Sigs( a, b, aSign STATUS_VAR );
4963 }
4964 else {
4965 return subFloat128Sigs( a, b, aSign STATUS_VAR );
4966 }
4967
4968}
4969
4970
4971
4972
4973
4974
4975
4976float128 float128_sub( float128 a, float128 b STATUS_PARAM )
4977{
4978 flag aSign, bSign;
4979
4980 aSign = extractFloat128Sign( a );
4981 bSign = extractFloat128Sign( b );
4982 if ( aSign == bSign ) {
4983 return subFloat128Sigs( a, b, aSign STATUS_VAR );
4984 }
4985 else {
4986 return addFloat128Sigs( a, b, aSign STATUS_VAR );
4987 }
4988
4989}
4990
4991
4992
4993
4994
4995
4996
4997float128 float128_mul( float128 a, float128 b STATUS_PARAM )
4998{
4999 flag aSign, bSign, zSign;
5000 int32 aExp, bExp, zExp;
5001 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
5002 float128 z;
5003
5004 aSig1 = extractFloat128Frac1( a );
5005 aSig0 = extractFloat128Frac0( a );
5006 aExp = extractFloat128Exp( a );
5007 aSign = extractFloat128Sign( a );
5008 bSig1 = extractFloat128Frac1( b );
5009 bSig0 = extractFloat128Frac0( b );
5010 bExp = extractFloat128Exp( b );
5011 bSign = extractFloat128Sign( b );
5012 zSign = aSign ^ bSign;
5013 if ( aExp == 0x7FFF ) {
5014 if ( ( aSig0 | aSig1 )
5015 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5016 return propagateFloat128NaN( a, b STATUS_VAR );
5017 }
5018 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
5019 return packFloat128( zSign, 0x7FFF, 0, 0 );
5020 }
5021 if ( bExp == 0x7FFF ) {
5022 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5023 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5024 invalid:
5025 float_raise( float_flag_invalid STATUS_VAR);
5026 z.low = float128_default_nan_low;
5027 z.high = float128_default_nan_high;
5028 return z;
5029 }
5030 return packFloat128( zSign, 0x7FFF, 0, 0 );
5031 }
5032 if ( aExp == 0 ) {
5033 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5034 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5035 }
5036 if ( bExp == 0 ) {
5037 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5038 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5039 }
5040 zExp = aExp + bExp - 0x4000;
5041 aSig0 |= LIT64( 0x0001000000000000 );
5042 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5043 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5044 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5045 zSig2 |= ( zSig3 != 0 );
5046 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5047 shift128ExtraRightJamming(
5048 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5049 ++zExp;
5050 }
5051 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5052
5053}
5054
5055
5056
5057
5058
5059
5060
5061float128 float128_div( float128 a, float128 b STATUS_PARAM )
5062{
5063 flag aSign, bSign, zSign;
5064 int32 aExp, bExp, zExp;
5065 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5066 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5067 float128 z;
5068
5069 aSig1 = extractFloat128Frac1( a );
5070 aSig0 = extractFloat128Frac0( a );
5071 aExp = extractFloat128Exp( a );
5072 aSign = extractFloat128Sign( a );
5073 bSig1 = extractFloat128Frac1( b );
5074 bSig0 = extractFloat128Frac0( b );
5075 bExp = extractFloat128Exp( b );
5076 bSign = extractFloat128Sign( b );
5077 zSign = aSign ^ bSign;
5078 if ( aExp == 0x7FFF ) {
5079 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5080 if ( bExp == 0x7FFF ) {
5081 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5082 goto invalid;
5083 }
5084 return packFloat128( zSign, 0x7FFF, 0, 0 );
5085 }
5086 if ( bExp == 0x7FFF ) {
5087 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5088 return packFloat128( zSign, 0, 0, 0 );
5089 }
5090 if ( bExp == 0 ) {
5091 if ( ( bSig0 | bSig1 ) == 0 ) {
5092 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5093 invalid:
5094 float_raise( float_flag_invalid STATUS_VAR);
5095 z.low = float128_default_nan_low;
5096 z.high = float128_default_nan_high;
5097 return z;
5098 }
5099 float_raise( float_flag_divbyzero STATUS_VAR);
5100 return packFloat128( zSign, 0x7FFF, 0, 0 );
5101 }
5102 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5103 }
5104 if ( aExp == 0 ) {
5105 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5106 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5107 }
5108 zExp = aExp - bExp + 0x3FFD;
5109 shortShift128Left(
5110 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5111 shortShift128Left(
5112 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5113 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5114 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5115 ++zExp;
5116 }
5117 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5118 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5119 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5120 while ( (sbits64) rem0 < 0 ) {
5121 --zSig0;
5122 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5123 }
5124 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5125 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5126 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5127 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5128 while ( (sbits64) rem1 < 0 ) {
5129 --zSig1;
5130 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5131 }
5132 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5133 }
5134 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5135 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5136
5137}
5138
5139
5140
5141
5142
5143
5144
5145float128 float128_rem( float128 a, float128 b STATUS_PARAM )
5146{
5147 flag aSign, bSign, zSign;
5148 int32 aExp, bExp, expDiff;
5149 bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5150 bits64 allZero, alternateASig0, alternateASig1, sigMean1;
5151 sbits64 sigMean0;
5152 float128 z;
5153
5154 aSig1 = extractFloat128Frac1( a );
5155 aSig0 = extractFloat128Frac0( a );
5156 aExp = extractFloat128Exp( a );
5157 aSign = extractFloat128Sign( a );
5158 bSig1 = extractFloat128Frac1( b );
5159 bSig0 = extractFloat128Frac0( b );
5160 bExp = extractFloat128Exp( b );
5161 bSign = extractFloat128Sign( b );
5162 if ( aExp == 0x7FFF ) {
5163 if ( ( aSig0 | aSig1 )
5164 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5165 return propagateFloat128NaN( a, b STATUS_VAR );
5166 }
5167 goto invalid;
5168 }
5169 if ( bExp == 0x7FFF ) {
5170 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5171 return a;
5172 }
5173 if ( bExp == 0 ) {
5174 if ( ( bSig0 | bSig1 ) == 0 ) {
5175 invalid:
5176 float_raise( float_flag_invalid STATUS_VAR);
5177 z.low = float128_default_nan_low;
5178 z.high = float128_default_nan_high;
5179 return z;
5180 }
5181 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5182 }
5183 if ( aExp == 0 ) {
5184 if ( ( aSig0 | aSig1 ) == 0 ) return a;
5185 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5186 }
5187 expDiff = aExp - bExp;
5188 if ( expDiff < -1 ) return a;
5189 shortShift128Left(
5190 aSig0 | LIT64( 0x0001000000000000 ),
5191 aSig1,
5192 15 - ( expDiff < 0 ),
5193 &aSig0,
5194 &aSig1
5195 );
5196 shortShift128Left(
5197 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5198 q = le128( bSig0, bSig1, aSig0, aSig1 );
5199 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5200 expDiff -= 64;
5201 while ( 0 < expDiff ) {
5202 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5203 q = ( 4 < q ) ? q - 4 : 0;
5204 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5205 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5206 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5207 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5208 expDiff -= 61;
5209 }
5210 if ( -64 < expDiff ) {
5211 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5212 q = ( 4 < q ) ? q - 4 : 0;
5213 q >>= - expDiff;
5214 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5215 expDiff += 52;
5216 if ( expDiff < 0 ) {
5217 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5218 }
5219 else {
5220 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5221 }
5222 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5223 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5224 }
5225 else {
5226 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5227 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5228 }
5229 do {
5230 alternateASig0 = aSig0;
5231 alternateASig1 = aSig1;
5232 ++q;
5233 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5234 } while ( 0 <= (sbits64) aSig0 );
5235 add128(
5236 aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
5237 if ( ( sigMean0 < 0 )
5238 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5239 aSig0 = alternateASig0;
5240 aSig1 = alternateASig1;
5241 }
5242 zSign = ( (sbits64) aSig0 < 0 );
5243 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5244 return
5245 normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
5246
5247}
5248
5249
5250
5251
5252
5253
5254
5255float128 float128_sqrt( float128 a STATUS_PARAM )
5256{
5257 flag aSign;
5258 int32 aExp, zExp;
5259 bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5260 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5261 float128 z;
5262
5263 aSig1 = extractFloat128Frac1( a );
5264 aSig0 = extractFloat128Frac0( a );
5265 aExp = extractFloat128Exp( a );
5266 aSign = extractFloat128Sign( a );
5267 if ( aExp == 0x7FFF ) {
5268 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
5269 if ( ! aSign ) return a;
5270 goto invalid;
5271 }
5272 if ( aSign ) {
5273 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5274 invalid:
5275 float_raise( float_flag_invalid STATUS_VAR);
5276 z.low = float128_default_nan_low;
5277 z.high = float128_default_nan_high;
5278 return z;
5279 }
5280 if ( aExp == 0 ) {
5281 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5282 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5283 }
5284 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5285 aSig0 |= LIT64( 0x0001000000000000 );
5286 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5287 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5288 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5289 doubleZSig0 = zSig0<<1;
5290 mul64To128( zSig0, zSig0, &term0, &term1 );
5291 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5292 while ( (sbits64) rem0 < 0 ) {
5293 --zSig0;
5294 doubleZSig0 -= 2;
5295 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5296 }
5297 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5298 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5299 if ( zSig1 == 0 ) zSig1 = 1;
5300 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5301 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5302 mul64To128( zSig1, zSig1, &term2, &term3 );
5303 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5304 while ( (sbits64) rem1 < 0 ) {
5305 --zSig1;
5306 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5307 term3 |= 1;
5308 term2 |= doubleZSig0;
5309 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5310 }
5311 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5312 }
5313 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5314 return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5315
5316}
5317
5318
5319
5320
5321
5322
5323
5324int float128_eq( float128 a, float128 b STATUS_PARAM )
5325{
5326
5327 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5328 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5329 || ( ( extractFloat128Exp( b ) == 0x7FFF )
5330 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5331 ) {
5332 if ( float128_is_signaling_nan( a )
5333 || float128_is_signaling_nan( b ) ) {
5334 float_raise( float_flag_invalid STATUS_VAR);
5335 }
5336 return 0;
5337 }
5338 return
5339 ( a.low == b.low )
5340 && ( ( a.high == b.high )
5341 || ( ( a.low == 0 )
5342 && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5343 );
5344
5345}
5346
5347
5348
5349
5350
5351
5352
5353
5354int float128_le( float128 a, float128 b STATUS_PARAM )
5355{
5356 flag aSign, bSign;
5357
5358 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5359 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5360 || ( ( extractFloat128Exp( b ) == 0x7FFF )
5361 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5362 ) {
5363 float_raise( float_flag_invalid STATUS_VAR);
5364 return 0;
5365 }
5366 aSign = extractFloat128Sign( a );
5367 bSign = extractFloat128Sign( b );
5368 if ( aSign != bSign ) {
5369 return
5370 aSign
5371 || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5372 == 0 );
5373 }
5374 return
5375 aSign ? le128( b.high, b.low, a.high, a.low )
5376 : le128( a.high, a.low, b.high, b.low );
5377
5378}
5379
5380
5381
5382
5383
5384
5385
5386int float128_lt( float128 a, float128 b STATUS_PARAM )
5387{
5388 flag aSign, bSign;
5389
5390 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5391 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5392 || ( ( extractFloat128Exp( b ) == 0x7FFF )
5393 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5394 ) {
5395 float_raise( float_flag_invalid STATUS_VAR);
5396 return 0;
5397 }
5398 aSign = extractFloat128Sign( a );
5399 bSign = extractFloat128Sign( b );
5400 if ( aSign != bSign ) {
5401 return
5402 aSign
5403 && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5404 != 0 );
5405 }
5406 return
5407 aSign ? lt128( b.high, b.low, a.high, a.low )
5408 : lt128( a.high, a.low, b.high, b.low );
5409
5410}
5411
5412
5413
5414
5415
5416
5417
5418
5419int float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
5420{
5421
5422 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5423 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5424 || ( ( extractFloat128Exp( b ) == 0x7FFF )
5425 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5426 ) {
5427 float_raise( float_flag_invalid STATUS_VAR);
5428 return 0;
5429 }
5430 return
5431 ( a.low == b.low )
5432 && ( ( a.high == b.high )
5433 || ( ( a.low == 0 )
5434 && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5435 );
5436
5437}
5438
5439
5440
5441
5442
5443
5444
5445
5446int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
5447{
5448 flag aSign, bSign;
5449
5450 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5451 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5452 || ( ( extractFloat128Exp( b ) == 0x7FFF )
5453 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5454 ) {
5455 if ( float128_is_signaling_nan( a )
5456 || float128_is_signaling_nan( b ) ) {
5457 float_raise( float_flag_invalid STATUS_VAR);
5458 }
5459 return 0;
5460 }
5461 aSign = extractFloat128Sign( a );
5462 bSign = extractFloat128Sign( b );
5463 if ( aSign != bSign ) {
5464 return
5465 aSign
5466 || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5467 == 0 );
5468 }
5469 return
5470 aSign ? le128( b.high, b.low, a.high, a.low )
5471 : le128( a.high, a.low, b.high, b.low );
5472
5473}
5474
5475
5476
5477
5478
5479
5480
5481
5482int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
5483{
5484 flag aSign, bSign;
5485
5486 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
5487 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5488 || ( ( extractFloat128Exp( b ) == 0x7FFF )
5489 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5490 ) {
5491 if ( float128_is_signaling_nan( a )
5492 || float128_is_signaling_nan( b ) ) {
5493 float_raise( float_flag_invalid STATUS_VAR);
5494 }
5495 return 0;
5496 }
5497 aSign = extractFloat128Sign( a );
5498 bSign = extractFloat128Sign( b );
5499 if ( aSign != bSign ) {
5500 return
5501 aSign
5502 && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5503 != 0 );
5504 }
5505 return
5506 aSign ? lt128( b.high, b.low, a.high, a.low )
5507 : lt128( a.high, a.low, b.high, b.low );
5508
5509}
5510
5511#endif
5512
5513
5514float32 uint32_to_float32( unsigned int a STATUS_PARAM )
5515{
5516 return int64_to_float32(a STATUS_VAR);
5517}
5518
5519float64 uint32_to_float64( unsigned int a STATUS_PARAM )
5520{
5521 return int64_to_float64(a STATUS_VAR);
5522}
5523
5524unsigned int float32_to_uint32( float32 a STATUS_PARAM )
5525{
5526 int64_t v;
5527 unsigned int res;
5528
5529 v = float32_to_int64(a STATUS_VAR);
5530 if (v < 0) {
5531 res = 0;
5532 float_raise( float_flag_invalid STATUS_VAR);
5533 } else if (v > 0xffffffff) {
5534 res = 0xffffffff;
5535 float_raise( float_flag_invalid STATUS_VAR);
5536 } else {
5537 res = v;
5538 }
5539 return res;
5540}
5541
5542unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
5543{
5544 int64_t v;
5545 unsigned int res;
5546
5547 v = float32_to_int64_round_to_zero(a STATUS_VAR);
5548 if (v < 0) {
5549 res = 0;
5550 float_raise( float_flag_invalid STATUS_VAR);
5551 } else if (v > 0xffffffff) {
5552 res = 0xffffffff;
5553 float_raise( float_flag_invalid STATUS_VAR);
5554 } else {
5555 res = v;
5556 }
5557 return res;
5558}
5559
5560unsigned int float64_to_uint32( float64 a STATUS_PARAM )
5561{
5562 int64_t v;
5563 unsigned int res;
5564
5565 v = float64_to_int64(a STATUS_VAR);
5566 if (v < 0) {
5567 res = 0;
5568 float_raise( float_flag_invalid STATUS_VAR);
5569 } else if (v > 0xffffffff) {
5570 res = 0xffffffff;
5571 float_raise( float_flag_invalid STATUS_VAR);
5572 } else {
5573 res = v;
5574 }
5575 return res;
5576}
5577
5578unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
5579{
5580 int64_t v;
5581 unsigned int res;
5582
5583 v = float64_to_int64_round_to_zero(a STATUS_VAR);
5584 if (v < 0) {
5585 res = 0;
5586 float_raise( float_flag_invalid STATUS_VAR);
5587 } else if (v > 0xffffffff) {
5588 res = 0xffffffff;
5589 float_raise( float_flag_invalid STATUS_VAR);
5590 } else {
5591 res = v;
5592 }
5593 return res;
5594}
5595
5596
5597uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
5598{
5599 int64_t v;
5600
5601 v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
5602 v += float64_val(a);
5603 v = float64_to_int64(make_float64(v) STATUS_VAR);
5604
5605 return v - INT64_MIN;
5606}
5607
5608uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
5609{
5610 int64_t v;
5611
5612 v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
5613 v += float64_val(a);
5614 v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
5615
5616 return v - INT64_MIN;
5617}
5618
5619#define COMPARE(s, nan_exp) \
5620INLINE int float ## s ## _compare_internal( float ## s a, float ## s b, \
5621 int is_quiet STATUS_PARAM ) \
5622{ \
5623 flag aSign, bSign; \
5624 bits ## s av, bv; \
5625 \
5626 if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
5627 extractFloat ## s ## Frac( a ) ) || \
5628 ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
5629 extractFloat ## s ## Frac( b ) )) { \
5630 if (!is_quiet || \
5631 float ## s ## _is_signaling_nan( a ) || \
5632 float ## s ## _is_signaling_nan( b ) ) { \
5633 float_raise( float_flag_invalid STATUS_VAR); \
5634 } \
5635 return float_relation_unordered; \
5636 } \
5637 aSign = extractFloat ## s ## Sign( a ); \
5638 bSign = extractFloat ## s ## Sign( b ); \
5639 av = float ## s ## _val(a); \
5640 bv = float ## s ## _val(b); \
5641 if ( aSign != bSign ) { \
5642 if ( (bits ## s) ( ( av | bv )<<1 ) == 0 ) { \
5643 \
5644 return float_relation_equal; \
5645 } else { \
5646 return 1 - (2 * aSign); \
5647 } \
5648 } else { \
5649 if (av == bv) { \
5650 return float_relation_equal; \
5651 } else { \
5652 return 1 - 2 * (aSign ^ ( av < bv )); \
5653 } \
5654 } \
5655} \
5656 \
5657int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
5658{ \
5659 return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
5660} \
5661 \
5662int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
5663{ \
5664 return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
5665}
5666
5667COMPARE(32, 0xff)
5668COMPARE(64, 0x7ff)
5669
5670INLINE int float128_compare_internal( float128 a, float128 b,
5671 int is_quiet STATUS_PARAM )
5672{
5673 flag aSign, bSign;
5674
5675 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
5676 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
5677 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
5678 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
5679 if (!is_quiet ||
5680 float128_is_signaling_nan( a ) ||
5681 float128_is_signaling_nan( b ) ) {
5682 float_raise( float_flag_invalid STATUS_VAR);
5683 }
5684 return float_relation_unordered;
5685 }
5686 aSign = extractFloat128Sign( a );
5687 bSign = extractFloat128Sign( b );
5688 if ( aSign != bSign ) {
5689 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
5690
5691 return float_relation_equal;
5692 } else {
5693 return 1 - (2 * aSign);
5694 }
5695 } else {
5696 if (a.low == b.low && a.high == b.high) {
5697 return float_relation_equal;
5698 } else {
5699 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
5700 }
5701 }
5702}
5703
5704int float128_compare( float128 a, float128 b STATUS_PARAM )
5705{
5706 return float128_compare_internal(a, b, 0 STATUS_VAR);
5707}
5708
5709int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
5710{
5711 return float128_compare_internal(a, b, 1 STATUS_VAR);
5712}
5713
5714
5715float32 float32_scalbn( float32 a, int n STATUS_PARAM )
5716{
5717 flag aSign;
5718 int16 aExp;
5719 bits32 aSig;
5720
5721 aSig = extractFloat32Frac( a );
5722 aExp = extractFloat32Exp( a );
5723 aSign = extractFloat32Sign( a );
5724
5725 if ( aExp == 0xFF ) {
5726 return a;
5727 }
5728 if ( aExp != 0 )
5729 aSig |= 0x00800000;
5730 else if ( aSig == 0 )
5731 return a;
5732
5733 aExp += n - 1;
5734 aSig <<= 7;
5735 return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
5736}
5737
5738float64 float64_scalbn( float64 a, int n STATUS_PARAM )
5739{
5740 flag aSign;
5741 int16 aExp;
5742 bits64 aSig;
5743
5744 aSig = extractFloat64Frac( a );
5745 aExp = extractFloat64Exp( a );
5746 aSign = extractFloat64Sign( a );
5747
5748 if ( aExp == 0x7FF ) {
5749 return a;
5750 }
5751 if ( aExp != 0 )
5752 aSig |= LIT64( 0x0010000000000000 );
5753 else if ( aSig == 0 )
5754 return a;
5755
5756 aExp += n - 1;
5757 aSig <<= 10;
5758 return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
5759}
5760
5761#ifdef FLOATX80
5762floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
5763{
5764 flag aSign;
5765 int16 aExp;
5766 bits64 aSig;
5767
5768 aSig = extractFloatx80Frac( a );
5769 aExp = extractFloatx80Exp( a );
5770 aSign = extractFloatx80Sign( a );
5771
5772 if ( aExp == 0x7FF ) {
5773 return a;
5774 }
5775 if (aExp == 0 && aSig == 0)
5776 return a;
5777
5778 aExp += n;
5779 return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
5780 aSign, aExp, aSig, 0 STATUS_VAR );
5781}
5782#endif
5783
5784#ifdef FLOAT128
5785float128 float128_scalbn( float128 a, int n STATUS_PARAM )
5786{
5787 flag aSign;
5788 int32 aExp;
5789 bits64 aSig0, aSig1;
5790
5791 aSig1 = extractFloat128Frac1( a );
5792 aSig0 = extractFloat128Frac0( a );
5793 aExp = extractFloat128Exp( a );
5794 aSign = extractFloat128Sign( a );
5795 if ( aExp == 0x7FFF ) {
5796 return a;
5797 }
5798 if ( aExp != 0 )
5799 aSig0 |= LIT64( 0x0001000000000000 );
5800 else if ( aSig0 == 0 && aSig1 == 0 )
5801 return a;
5802
5803 aExp += n - 1;
5804 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
5805 STATUS_VAR );
5806
5807}
5808#endif
5809