1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85#include "qemu/osdep.h"
86
87#include "fpu/softfloat.h"
88
89
90
91
92
93
94
95
96#include "softfloat-macros.h"
97
98
99
100
101
102
103
104
105
106#include "softfloat-specialize.h"
107
108
109
110
111
112static inline uint32_t extractFloat16Frac(float16 a)
113{
114 return float16_val(a) & 0x3ff;
115}
116
117
118
119
120
121static inline int extractFloat16Exp(float16 a)
122{
123 return (float16_val(a) >> 10) & 0x1f;
124}
125
126
127
128
129
130static inline flag extractFloat16Sign(float16 a)
131{
132 return float16_val(a)>>15;
133}
134
135
136
137
138
139
140
141
142
143
144
145
146static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
147{
148 int8_t roundingMode;
149 flag roundNearestEven;
150 int8_t roundIncrement, roundBits;
151 int32_t z;
152
153 roundingMode = status->float_rounding_mode;
154 roundNearestEven = ( roundingMode == float_round_nearest_even );
155 switch (roundingMode) {
156 case float_round_nearest_even:
157 case float_round_ties_away:
158 roundIncrement = 0x40;
159 break;
160 case float_round_to_zero:
161 roundIncrement = 0;
162 break;
163 case float_round_up:
164 roundIncrement = zSign ? 0 : 0x7f;
165 break;
166 case float_round_down:
167 roundIncrement = zSign ? 0x7f : 0;
168 break;
169 default:
170 abort();
171 }
172 roundBits = absZ & 0x7F;
173 absZ = ( absZ + roundIncrement )>>7;
174 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
175 z = absZ;
176 if ( zSign ) z = - z;
177 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
178 float_raise(float_flag_invalid, status);
179 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
180 }
181 if (roundBits) {
182 status->float_exception_flags |= float_flag_inexact;
183 }
184 return z;
185
186}
187
188
189
190
191
192
193
194
195
196
197
198
199
200static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
201 float_status *status)
202{
203 int8_t roundingMode;
204 flag roundNearestEven, increment;
205 int64_t z;
206
207 roundingMode = status->float_rounding_mode;
208 roundNearestEven = ( roundingMode == float_round_nearest_even );
209 switch (roundingMode) {
210 case float_round_nearest_even:
211 case float_round_ties_away:
212 increment = ((int64_t) absZ1 < 0);
213 break;
214 case float_round_to_zero:
215 increment = 0;
216 break;
217 case float_round_up:
218 increment = !zSign && absZ1;
219 break;
220 case float_round_down:
221 increment = zSign && absZ1;
222 break;
223 default:
224 abort();
225 }
226 if ( increment ) {
227 ++absZ0;
228 if ( absZ0 == 0 ) goto overflow;
229 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
230 }
231 z = absZ0;
232 if ( zSign ) z = - z;
233 if ( z && ( ( z < 0 ) ^ zSign ) ) {
234 overflow:
235 float_raise(float_flag_invalid, status);
236 return
237 zSign ? (int64_t) LIT64( 0x8000000000000000 )
238 : LIT64( 0x7FFFFFFFFFFFFFFF );
239 }
240 if (absZ1) {
241 status->float_exception_flags |= float_flag_inexact;
242 }
243 return z;
244
245}
246
247
248
249
250
251
252
253
254
255
256
257static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
258 uint64_t absZ1, float_status *status)
259{
260 int8_t roundingMode;
261 flag roundNearestEven, increment;
262
263 roundingMode = status->float_rounding_mode;
264 roundNearestEven = (roundingMode == float_round_nearest_even);
265 switch (roundingMode) {
266 case float_round_nearest_even:
267 case float_round_ties_away:
268 increment = ((int64_t)absZ1 < 0);
269 break;
270 case float_round_to_zero:
271 increment = 0;
272 break;
273 case float_round_up:
274 increment = !zSign && absZ1;
275 break;
276 case float_round_down:
277 increment = zSign && absZ1;
278 break;
279 default:
280 abort();
281 }
282 if (increment) {
283 ++absZ0;
284 if (absZ0 == 0) {
285 float_raise(float_flag_invalid, status);
286 return LIT64(0xFFFFFFFFFFFFFFFF);
287 }
288 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
289 }
290
291 if (zSign && absZ0) {
292 float_raise(float_flag_invalid, status);
293 return 0;
294 }
295
296 if (absZ1) {
297 status->float_exception_flags |= float_flag_inexact;
298 }
299 return absZ0;
300}
301
302
303
304
305
306static inline uint32_t extractFloat32Frac( float32 a )
307{
308
309 return float32_val(a) & 0x007FFFFF;
310
311}
312
313
314
315
316
317static inline int extractFloat32Exp(float32 a)
318{
319
320 return ( float32_val(a)>>23 ) & 0xFF;
321
322}
323
324
325
326
327
328static inline flag extractFloat32Sign( float32 a )
329{
330
331 return float32_val(a)>>31;
332
333}
334
335
336
337
338
339float32 float32_squash_input_denormal(float32 a, float_status *status)
340{
341 if (status->flush_inputs_to_zero) {
342 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
343 float_raise(float_flag_input_denormal, status);
344 return make_float32(float32_val(a) & 0x80000000);
345 }
346 }
347 return a;
348}
349
350
351
352
353
354
355
356
357static void
358 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
359{
360 int8_t shiftCount;
361
362 shiftCount = countLeadingZeros32( aSig ) - 8;
363 *zSigPtr = aSig<<shiftCount;
364 *zExpPtr = 1 - shiftCount;
365
366}
367
368
369
370
371
372
373
374
375
376
377
378
379static inline float32 packFloat32(flag zSign, int zExp, uint32_t zSig)
380{
381
382 return make_float32(
383 ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
384
385}
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
410 float_status *status)
411{
412 int8_t roundingMode;
413 flag roundNearestEven;
414 int8_t roundIncrement, roundBits;
415 flag isTiny;
416
417 roundingMode = status->float_rounding_mode;
418 roundNearestEven = ( roundingMode == float_round_nearest_even );
419 switch (roundingMode) {
420 case float_round_nearest_even:
421 case float_round_ties_away:
422 roundIncrement = 0x40;
423 break;
424 case float_round_to_zero:
425 roundIncrement = 0;
426 break;
427 case float_round_up:
428 roundIncrement = zSign ? 0 : 0x7f;
429 break;
430 case float_round_down:
431 roundIncrement = zSign ? 0x7f : 0;
432 break;
433 default:
434 abort();
435 break;
436 }
437 roundBits = zSig & 0x7F;
438 if ( 0xFD <= (uint16_t) zExp ) {
439 if ( ( 0xFD < zExp )
440 || ( ( zExp == 0xFD )
441 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
442 ) {
443 float_raise(float_flag_overflow | float_flag_inexact, status);
444 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
445 }
446 if ( zExp < 0 ) {
447 if (status->flush_to_zero) {
448 float_raise(float_flag_output_denormal, status);
449 return packFloat32(zSign, 0, 0);
450 }
451 isTiny =
452 (status->float_detect_tininess
453 == float_tininess_before_rounding)
454 || ( zExp < -1 )
455 || ( zSig + roundIncrement < 0x80000000 );
456 shift32RightJamming( zSig, - zExp, &zSig );
457 zExp = 0;
458 roundBits = zSig & 0x7F;
459 if (isTiny && roundBits) {
460 float_raise(float_flag_underflow, status);
461 }
462 }
463 }
464 if (roundBits) {
465 status->float_exception_flags |= float_flag_inexact;
466 }
467 zSig = ( zSig + roundIncrement )>>7;
468 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
469 if ( zSig == 0 ) zExp = 0;
470 return packFloat32( zSign, zExp, zSig );
471
472}
473
474
475
476
477
478
479
480
481
482
483static float32
484 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
485 float_status *status)
486{
487 int8_t shiftCount;
488
489 shiftCount = countLeadingZeros32( zSig ) - 1;
490 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
491 status);
492
493}
494
495
496
497
498
499static inline uint64_t extractFloat64Frac( float64 a )
500{
501
502 return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
503
504}
505
506
507
508
509
510static inline int extractFloat64Exp(float64 a)
511{
512
513 return ( float64_val(a)>>52 ) & 0x7FF;
514
515}
516
517
518
519
520
521static inline flag extractFloat64Sign( float64 a )
522{
523
524 return float64_val(a)>>63;
525
526}
527
528
529
530
531
532float64 float64_squash_input_denormal(float64 a, float_status *status)
533{
534 if (status->flush_inputs_to_zero) {
535 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
536 float_raise(float_flag_input_denormal, status);
537 return make_float64(float64_val(a) & (1ULL << 63));
538 }
539 }
540 return a;
541}
542
543
544
545
546
547
548
549
550static void
551 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
552{
553 int8_t shiftCount;
554
555 shiftCount = countLeadingZeros64( aSig ) - 11;
556 *zSigPtr = aSig<<shiftCount;
557 *zExpPtr = 1 - shiftCount;
558
559}
560
561
562
563
564
565
566
567
568
569
570
571
572static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
573{
574
575 return make_float64(
576 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
577
578}
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
603 float_status *status)
604{
605 int8_t roundingMode;
606 flag roundNearestEven;
607 int roundIncrement, roundBits;
608 flag isTiny;
609
610 roundingMode = status->float_rounding_mode;
611 roundNearestEven = ( roundingMode == float_round_nearest_even );
612 switch (roundingMode) {
613 case float_round_nearest_even:
614 case float_round_ties_away:
615 roundIncrement = 0x200;
616 break;
617 case float_round_to_zero:
618 roundIncrement = 0;
619 break;
620 case float_round_up:
621 roundIncrement = zSign ? 0 : 0x3ff;
622 break;
623 case float_round_down:
624 roundIncrement = zSign ? 0x3ff : 0;
625 break;
626 case float_round_to_odd:
627 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
628 break;
629 default:
630 abort();
631 }
632 roundBits = zSig & 0x3FF;
633 if ( 0x7FD <= (uint16_t) zExp ) {
634 if ( ( 0x7FD < zExp )
635 || ( ( zExp == 0x7FD )
636 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
637 ) {
638 bool overflow_to_inf = roundingMode != float_round_to_odd &&
639 roundIncrement != 0;
640 float_raise(float_flag_overflow | float_flag_inexact, status);
641 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
642 }
643 if ( zExp < 0 ) {
644 if (status->flush_to_zero) {
645 float_raise(float_flag_output_denormal, status);
646 return packFloat64(zSign, 0, 0);
647 }
648 isTiny =
649 (status->float_detect_tininess
650 == float_tininess_before_rounding)
651 || ( zExp < -1 )
652 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
653 shift64RightJamming( zSig, - zExp, &zSig );
654 zExp = 0;
655 roundBits = zSig & 0x3FF;
656 if (isTiny && roundBits) {
657 float_raise(float_flag_underflow, status);
658 }
659 if (roundingMode == float_round_to_odd) {
660
661
662
663
664 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
665 }
666 }
667 }
668 if (roundBits) {
669 status->float_exception_flags |= float_flag_inexact;
670 }
671 zSig = ( zSig + roundIncrement )>>10;
672 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
673 if ( zSig == 0 ) zExp = 0;
674 return packFloat64( zSign, zExp, zSig );
675
676}
677
678
679
680
681
682
683
684
685
686
687static float64
688 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
689 float_status *status)
690{
691 int8_t shiftCount;
692
693 shiftCount = countLeadingZeros64( zSig ) - 1;
694 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
695 status);
696
697}
698
699
700
701
702
703
704static inline uint64_t extractFloatx80Frac( floatx80 a )
705{
706
707 return a.low;
708
709}
710
711
712
713
714
715
716static inline int32_t extractFloatx80Exp( floatx80 a )
717{
718
719 return a.high & 0x7FFF;
720
721}
722
723
724
725
726
727
728static inline flag extractFloatx80Sign( floatx80 a )
729{
730
731 return a.high>>15;
732
733}
734
735
736
737
738
739
740
741
742static void
743 normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr )
744{
745 int8_t shiftCount;
746
747 shiftCount = countLeadingZeros64( aSig );
748 *zSigPtr = aSig<<shiftCount;
749 *zExpPtr = 1 - shiftCount;
750
751}
752
753
754
755
756
757
758static inline floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig )
759{
760 floatx80 z;
761
762 z.low = zSig;
763 z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
764 return z;
765
766}
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792static floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
793 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
794 float_status *status)
795{
796 int8_t roundingMode;
797 flag roundNearestEven, increment, isTiny;
798 int64_t roundIncrement, roundMask, roundBits;
799
800 roundingMode = status->float_rounding_mode;
801 roundNearestEven = ( roundingMode == float_round_nearest_even );
802 if ( roundingPrecision == 80 ) goto precision80;
803 if ( roundingPrecision == 64 ) {
804 roundIncrement = LIT64( 0x0000000000000400 );
805 roundMask = LIT64( 0x00000000000007FF );
806 }
807 else if ( roundingPrecision == 32 ) {
808 roundIncrement = LIT64( 0x0000008000000000 );
809 roundMask = LIT64( 0x000000FFFFFFFFFF );
810 }
811 else {
812 goto precision80;
813 }
814 zSig0 |= ( zSig1 != 0 );
815 switch (roundingMode) {
816 case float_round_nearest_even:
817 case float_round_ties_away:
818 break;
819 case float_round_to_zero:
820 roundIncrement = 0;
821 break;
822 case float_round_up:
823 roundIncrement = zSign ? 0 : roundMask;
824 break;
825 case float_round_down:
826 roundIncrement = zSign ? roundMask : 0;
827 break;
828 default:
829 abort();
830 }
831 roundBits = zSig0 & roundMask;
832 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
833 if ( ( 0x7FFE < zExp )
834 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
835 ) {
836 goto overflow;
837 }
838 if ( zExp <= 0 ) {
839 if (status->flush_to_zero) {
840 float_raise(float_flag_output_denormal, status);
841 return packFloatx80(zSign, 0, 0);
842 }
843 isTiny =
844 (status->float_detect_tininess
845 == float_tininess_before_rounding)
846 || ( zExp < 0 )
847 || ( zSig0 <= zSig0 + roundIncrement );
848 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
849 zExp = 0;
850 roundBits = zSig0 & roundMask;
851 if (isTiny && roundBits) {
852 float_raise(float_flag_underflow, status);
853 }
854 if (roundBits) {
855 status->float_exception_flags |= float_flag_inexact;
856 }
857 zSig0 += roundIncrement;
858 if ( (int64_t) zSig0 < 0 ) zExp = 1;
859 roundIncrement = roundMask + 1;
860 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
861 roundMask |= roundIncrement;
862 }
863 zSig0 &= ~ roundMask;
864 return packFloatx80( zSign, zExp, zSig0 );
865 }
866 }
867 if (roundBits) {
868 status->float_exception_flags |= float_flag_inexact;
869 }
870 zSig0 += roundIncrement;
871 if ( zSig0 < roundIncrement ) {
872 ++zExp;
873 zSig0 = LIT64( 0x8000000000000000 );
874 }
875 roundIncrement = roundMask + 1;
876 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
877 roundMask |= roundIncrement;
878 }
879 zSig0 &= ~ roundMask;
880 if ( zSig0 == 0 ) zExp = 0;
881 return packFloatx80( zSign, zExp, zSig0 );
882 precision80:
883 switch (roundingMode) {
884 case float_round_nearest_even:
885 case float_round_ties_away:
886 increment = ((int64_t)zSig1 < 0);
887 break;
888 case float_round_to_zero:
889 increment = 0;
890 break;
891 case float_round_up:
892 increment = !zSign && zSig1;
893 break;
894 case float_round_down:
895 increment = zSign && zSig1;
896 break;
897 default:
898 abort();
899 }
900 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
901 if ( ( 0x7FFE < zExp )
902 || ( ( zExp == 0x7FFE )
903 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
904 && increment
905 )
906 ) {
907 roundMask = 0;
908 overflow:
909 float_raise(float_flag_overflow | float_flag_inexact, status);
910 if ( ( roundingMode == float_round_to_zero )
911 || ( zSign && ( roundingMode == float_round_up ) )
912 || ( ! zSign && ( roundingMode == float_round_down ) )
913 ) {
914 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
915 }
916 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
917 }
918 if ( zExp <= 0 ) {
919 isTiny =
920 (status->float_detect_tininess
921 == float_tininess_before_rounding)
922 || ( zExp < 0 )
923 || ! increment
924 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
925 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
926 zExp = 0;
927 if (isTiny && zSig1) {
928 float_raise(float_flag_underflow, status);
929 }
930 if (zSig1) {
931 status->float_exception_flags |= float_flag_inexact;
932 }
933 switch (roundingMode) {
934 case float_round_nearest_even:
935 case float_round_ties_away:
936 increment = ((int64_t)zSig1 < 0);
937 break;
938 case float_round_to_zero:
939 increment = 0;
940 break;
941 case float_round_up:
942 increment = !zSign && zSig1;
943 break;
944 case float_round_down:
945 increment = zSign && zSig1;
946 break;
947 default:
948 abort();
949 }
950 if ( increment ) {
951 ++zSig0;
952 zSig0 &=
953 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
954 if ( (int64_t) zSig0 < 0 ) zExp = 1;
955 }
956 return packFloatx80( zSign, zExp, zSig0 );
957 }
958 }
959 if (zSig1) {
960 status->float_exception_flags |= float_flag_inexact;
961 }
962 if ( increment ) {
963 ++zSig0;
964 if ( zSig0 == 0 ) {
965 ++zExp;
966 zSig0 = LIT64( 0x8000000000000000 );
967 }
968 else {
969 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
970 }
971 }
972 else {
973 if ( zSig0 == 0 ) zExp = 0;
974 }
975 return packFloatx80( zSign, zExp, zSig0 );
976
977}
978
979
980
981
982
983
984
985
986
987
988static floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
989 flag zSign, int32_t zExp,
990 uint64_t zSig0, uint64_t zSig1,
991 float_status *status)
992{
993 int8_t shiftCount;
994
995 if ( zSig0 == 0 ) {
996 zSig0 = zSig1;
997 zSig1 = 0;
998 zExp -= 64;
999 }
1000 shiftCount = countLeadingZeros64( zSig0 );
1001 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1002 zExp -= shiftCount;
1003 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
1004 zSig0, zSig1, status);
1005
1006}
1007
1008
1009
1010
1011
1012
1013static inline uint64_t extractFloat128Frac1( float128 a )
1014{
1015
1016 return a.low;
1017
1018}
1019
1020
1021
1022
1023
1024
1025static inline uint64_t extractFloat128Frac0( float128 a )
1026{
1027
1028 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
1029
1030}
1031
1032
1033
1034
1035
1036
1037static inline int32_t extractFloat128Exp( float128 a )
1038{
1039
1040 return ( a.high>>48 ) & 0x7FFF;
1041
1042}
1043
1044
1045
1046
1047
1048static inline flag extractFloat128Sign( float128 a )
1049{
1050
1051 return a.high>>63;
1052
1053}
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065static void
1066 normalizeFloat128Subnormal(
1067 uint64_t aSig0,
1068 uint64_t aSig1,
1069 int32_t *zExpPtr,
1070 uint64_t *zSig0Ptr,
1071 uint64_t *zSig1Ptr
1072 )
1073{
1074 int8_t shiftCount;
1075
1076 if ( aSig0 == 0 ) {
1077 shiftCount = countLeadingZeros64( aSig1 ) - 15;
1078 if ( shiftCount < 0 ) {
1079 *zSig0Ptr = aSig1>>( - shiftCount );
1080 *zSig1Ptr = aSig1<<( shiftCount & 63 );
1081 }
1082 else {
1083 *zSig0Ptr = aSig1<<shiftCount;
1084 *zSig1Ptr = 0;
1085 }
1086 *zExpPtr = - shiftCount - 63;
1087 }
1088 else {
1089 shiftCount = countLeadingZeros64( aSig0 ) - 15;
1090 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
1091 *zExpPtr = 1 - shiftCount;
1092 }
1093
1094}
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109static inline float128
1110 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
1111{
1112 float128 z;
1113
1114 z.low = zSig1;
1115 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
1116 return z;
1117
1118}
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
1142 uint64_t zSig0, uint64_t zSig1,
1143 uint64_t zSig2, float_status *status)
1144{
1145 int8_t roundingMode;
1146 flag roundNearestEven, increment, isTiny;
1147
1148 roundingMode = status->float_rounding_mode;
1149 roundNearestEven = ( roundingMode == float_round_nearest_even );
1150 switch (roundingMode) {
1151 case float_round_nearest_even:
1152 case float_round_ties_away:
1153 increment = ((int64_t)zSig2 < 0);
1154 break;
1155 case float_round_to_zero:
1156 increment = 0;
1157 break;
1158 case float_round_up:
1159 increment = !zSign && zSig2;
1160 break;
1161 case float_round_down:
1162 increment = zSign && zSig2;
1163 break;
1164 case float_round_to_odd:
1165 increment = !(zSig1 & 0x1) && zSig2;
1166 break;
1167 default:
1168 abort();
1169 }
1170 if ( 0x7FFD <= (uint32_t) zExp ) {
1171 if ( ( 0x7FFD < zExp )
1172 || ( ( zExp == 0x7FFD )
1173 && eq128(
1174 LIT64( 0x0001FFFFFFFFFFFF ),
1175 LIT64( 0xFFFFFFFFFFFFFFFF ),
1176 zSig0,
1177 zSig1
1178 )
1179 && increment
1180 )
1181 ) {
1182 float_raise(float_flag_overflow | float_flag_inexact, status);
1183 if ( ( roundingMode == float_round_to_zero )
1184 || ( zSign && ( roundingMode == float_round_up ) )
1185 || ( ! zSign && ( roundingMode == float_round_down ) )
1186 || (roundingMode == float_round_to_odd)
1187 ) {
1188 return
1189 packFloat128(
1190 zSign,
1191 0x7FFE,
1192 LIT64( 0x0000FFFFFFFFFFFF ),
1193 LIT64( 0xFFFFFFFFFFFFFFFF )
1194 );
1195 }
1196 return packFloat128( zSign, 0x7FFF, 0, 0 );
1197 }
1198 if ( zExp < 0 ) {
1199 if (status->flush_to_zero) {
1200 float_raise(float_flag_output_denormal, status);
1201 return packFloat128(zSign, 0, 0, 0);
1202 }
1203 isTiny =
1204 (status->float_detect_tininess
1205 == float_tininess_before_rounding)
1206 || ( zExp < -1 )
1207 || ! increment
1208 || lt128(
1209 zSig0,
1210 zSig1,
1211 LIT64( 0x0001FFFFFFFFFFFF ),
1212 LIT64( 0xFFFFFFFFFFFFFFFF )
1213 );
1214 shift128ExtraRightJamming(
1215 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1216 zExp = 0;
1217 if (isTiny && zSig2) {
1218 float_raise(float_flag_underflow, status);
1219 }
1220 switch (roundingMode) {
1221 case float_round_nearest_even:
1222 case float_round_ties_away:
1223 increment = ((int64_t)zSig2 < 0);
1224 break;
1225 case float_round_to_zero:
1226 increment = 0;
1227 break;
1228 case float_round_up:
1229 increment = !zSign && zSig2;
1230 break;
1231 case float_round_down:
1232 increment = zSign && zSig2;
1233 break;
1234 case float_round_to_odd:
1235 increment = !(zSig1 & 0x1) && zSig2;
1236 break;
1237 default:
1238 abort();
1239 }
1240 }
1241 }
1242 if (zSig2) {
1243 status->float_exception_flags |= float_flag_inexact;
1244 }
1245 if ( increment ) {
1246 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1247 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1248 }
1249 else {
1250 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1251 }
1252 return packFloat128( zSign, zExp, zSig0, zSig1 );
1253
1254}
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
1267 uint64_t zSig0, uint64_t zSig1,
1268 float_status *status)
1269{
1270 int8_t shiftCount;
1271 uint64_t zSig2;
1272
1273 if ( zSig0 == 0 ) {
1274 zSig0 = zSig1;
1275 zSig1 = 0;
1276 zExp -= 64;
1277 }
1278 shiftCount = countLeadingZeros64( zSig0 ) - 15;
1279 if ( 0 <= shiftCount ) {
1280 zSig2 = 0;
1281 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1282 }
1283 else {
1284 shift128ExtraRightJamming(
1285 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1286 }
1287 zExp -= shiftCount;
1288 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
1289
1290}
1291
1292
1293
1294
1295
1296
1297
1298float32 int32_to_float32(int32_t a, float_status *status)
1299{
1300 flag zSign;
1301
1302 if ( a == 0 ) return float32_zero;
1303 if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1304 zSign = ( a < 0 );
1305 return normalizeRoundAndPackFloat32(zSign, 0x9C, zSign ? -a : a, status);
1306}
1307
1308
1309
1310
1311
1312
1313
1314float64 int32_to_float64(int32_t a, float_status *status)
1315{
1316 flag zSign;
1317 uint32_t absA;
1318 int8_t shiftCount;
1319 uint64_t zSig;
1320
1321 if ( a == 0 ) return float64_zero;
1322 zSign = ( a < 0 );
1323 absA = zSign ? - a : a;
1324 shiftCount = countLeadingZeros32( absA ) + 21;
1325 zSig = absA;
1326 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1327
1328}
1329
1330
1331
1332
1333
1334
1335
1336
1337floatx80 int32_to_floatx80(int32_t a, float_status *status)
1338{
1339 flag zSign;
1340 uint32_t absA;
1341 int8_t shiftCount;
1342 uint64_t zSig;
1343
1344 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1345 zSign = ( a < 0 );
1346 absA = zSign ? - a : a;
1347 shiftCount = countLeadingZeros32( absA ) + 32;
1348 zSig = absA;
1349 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1350
1351}
1352
1353
1354
1355
1356
1357
1358
1359float128 int32_to_float128(int32_t a, float_status *status)
1360{
1361 flag zSign;
1362 uint32_t absA;
1363 int8_t shiftCount;
1364 uint64_t zSig0;
1365
1366 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1367 zSign = ( a < 0 );
1368 absA = zSign ? - a : a;
1369 shiftCount = countLeadingZeros32( absA ) + 17;
1370 zSig0 = absA;
1371 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1372
1373}
1374
1375
1376
1377
1378
1379
1380
1381float32 int64_to_float32(int64_t a, float_status *status)
1382{
1383 flag zSign;
1384 uint64_t absA;
1385 int8_t shiftCount;
1386
1387 if ( a == 0 ) return float32_zero;
1388 zSign = ( a < 0 );
1389 absA = zSign ? - a : a;
1390 shiftCount = countLeadingZeros64( absA ) - 40;
1391 if ( 0 <= shiftCount ) {
1392 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1393 }
1394 else {
1395 shiftCount += 7;
1396 if ( shiftCount < 0 ) {
1397 shift64RightJamming( absA, - shiftCount, &absA );
1398 }
1399 else {
1400 absA <<= shiftCount;
1401 }
1402 return roundAndPackFloat32(zSign, 0x9C - shiftCount, absA, status);
1403 }
1404
1405}
1406
1407
1408
1409
1410
1411
1412
1413float64 int64_to_float64(int64_t a, float_status *status)
1414{
1415 flag zSign;
1416
1417 if ( a == 0 ) return float64_zero;
1418 if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
1419 return packFloat64( 1, 0x43E, 0 );
1420 }
1421 zSign = ( a < 0 );
1422 return normalizeRoundAndPackFloat64(zSign, 0x43C, zSign ? -a : a, status);
1423}
1424
1425
1426
1427
1428
1429
1430
1431
1432floatx80 int64_to_floatx80(int64_t a, float_status *status)
1433{
1434 flag zSign;
1435 uint64_t absA;
1436 int8_t shiftCount;
1437
1438 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1439 zSign = ( a < 0 );
1440 absA = zSign ? - a : a;
1441 shiftCount = countLeadingZeros64( absA );
1442 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1443
1444}
1445
1446
1447
1448
1449
1450
1451
1452float128 int64_to_float128(int64_t a, float_status *status)
1453{
1454 flag zSign;
1455 uint64_t absA;
1456 int8_t shiftCount;
1457 int32_t zExp;
1458 uint64_t zSig0, zSig1;
1459
1460 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1461 zSign = ( a < 0 );
1462 absA = zSign ? - a : a;
1463 shiftCount = countLeadingZeros64( absA ) + 49;
1464 zExp = 0x406E - shiftCount;
1465 if ( 64 <= shiftCount ) {
1466 zSig1 = 0;
1467 zSig0 = absA;
1468 shiftCount -= 64;
1469 }
1470 else {
1471 zSig1 = absA;
1472 zSig0 = 0;
1473 }
1474 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1475 return packFloat128( zSign, zExp, zSig0, zSig1 );
1476
1477}
1478
1479
1480
1481
1482
1483
1484
1485float32 uint64_to_float32(uint64_t a, float_status *status)
1486{
1487 int shiftcount;
1488
1489 if (a == 0) {
1490 return float32_zero;
1491 }
1492
1493
1494
1495
1496
1497 shiftcount = countLeadingZeros64(a) - 40;
1498 if (shiftcount >= 0) {
1499 return packFloat32(0, 0x95 - shiftcount, a << shiftcount);
1500 }
1501
1502
1503
1504 shiftcount += 7;
1505 if (shiftcount < 0) {
1506 shift64RightJamming(a, -shiftcount, &a);
1507 } else {
1508 a <<= shiftcount;
1509 }
1510
1511 return roundAndPackFloat32(0, 0x9c - shiftcount, a, status);
1512}
1513
1514
1515
1516
1517
1518
1519
1520float64 uint64_to_float64(uint64_t a, float_status *status)
1521{
1522 int exp = 0x43C;
1523 int shiftcount;
1524
1525 if (a == 0) {
1526 return float64_zero;
1527 }
1528
1529 shiftcount = countLeadingZeros64(a) - 1;
1530 if (shiftcount < 0) {
1531 shift64RightJamming(a, -shiftcount, &a);
1532 } else {
1533 a <<= shiftcount;
1534 }
1535 return roundAndPackFloat64(0, exp - shiftcount, a, status);
1536}
1537
1538
1539
1540
1541
1542
1543
1544float128 uint64_to_float128(uint64_t a, float_status *status)
1545{
1546 if (a == 0) {
1547 return float128_zero;
1548 }
1549 return normalizeRoundAndPackFloat128(0, 0x406E, a, 0, status);
1550}
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562int32_t float32_to_int32(float32 a, float_status *status)
1563{
1564 flag aSign;
1565 int aExp;
1566 int shiftCount;
1567 uint32_t aSig;
1568 uint64_t aSig64;
1569
1570 a = float32_squash_input_denormal(a, status);
1571 aSig = extractFloat32Frac( a );
1572 aExp = extractFloat32Exp( a );
1573 aSign = extractFloat32Sign( a );
1574 if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1575 if ( aExp ) aSig |= 0x00800000;
1576 shiftCount = 0xAF - aExp;
1577 aSig64 = aSig;
1578 aSig64 <<= 32;
1579 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1580 return roundAndPackInt32(aSign, aSig64, status);
1581
1582}
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594int32_t float32_to_int32_round_to_zero(float32 a, float_status *status)
1595{
1596 flag aSign;
1597 int aExp;
1598 int shiftCount;
1599 uint32_t aSig;
1600 int32_t z;
1601 a = float32_squash_input_denormal(a, status);
1602
1603 aSig = extractFloat32Frac( a );
1604 aExp = extractFloat32Exp( a );
1605 aSign = extractFloat32Sign( a );
1606 shiftCount = aExp - 0x9E;
1607 if ( 0 <= shiftCount ) {
1608 if ( float32_val(a) != 0xCF000000 ) {
1609 float_raise(float_flag_invalid, status);
1610 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1611 }
1612 return (int32_t) 0x80000000;
1613 }
1614 else if ( aExp <= 0x7E ) {
1615 if (aExp | aSig) {
1616 status->float_exception_flags |= float_flag_inexact;
1617 }
1618 return 0;
1619 }
1620 aSig = ( aSig | 0x00800000 )<<8;
1621 z = aSig>>( - shiftCount );
1622 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1623 status->float_exception_flags |= float_flag_inexact;
1624 }
1625 if ( aSign ) z = - z;
1626 return z;
1627
1628}
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640int16_t float32_to_int16_round_to_zero(float32 a, float_status *status)
1641{
1642 flag aSign;
1643 int aExp;
1644 int shiftCount;
1645 uint32_t aSig;
1646 int32_t z;
1647
1648 aSig = extractFloat32Frac( a );
1649 aExp = extractFloat32Exp( a );
1650 aSign = extractFloat32Sign( a );
1651 shiftCount = aExp - 0x8E;
1652 if ( 0 <= shiftCount ) {
1653 if ( float32_val(a) != 0xC7000000 ) {
1654 float_raise(float_flag_invalid, status);
1655 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1656 return 0x7FFF;
1657 }
1658 }
1659 return (int32_t) 0xffff8000;
1660 }
1661 else if ( aExp <= 0x7E ) {
1662 if ( aExp | aSig ) {
1663 status->float_exception_flags |= float_flag_inexact;
1664 }
1665 return 0;
1666 }
1667 shiftCount -= 0x10;
1668 aSig = ( aSig | 0x00800000 )<<8;
1669 z = aSig>>( - shiftCount );
1670 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
1671 status->float_exception_flags |= float_flag_inexact;
1672 }
1673 if ( aSign ) {
1674 z = - z;
1675 }
1676 return z;
1677
1678}
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690int64_t float32_to_int64(float32 a, float_status *status)
1691{
1692 flag aSign;
1693 int aExp;
1694 int shiftCount;
1695 uint32_t aSig;
1696 uint64_t aSig64, aSigExtra;
1697 a = float32_squash_input_denormal(a, status);
1698
1699 aSig = extractFloat32Frac( a );
1700 aExp = extractFloat32Exp( a );
1701 aSign = extractFloat32Sign( a );
1702 shiftCount = 0xBE - aExp;
1703 if ( shiftCount < 0 ) {
1704 float_raise(float_flag_invalid, status);
1705 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1706 return LIT64( 0x7FFFFFFFFFFFFFFF );
1707 }
1708 return (int64_t) LIT64( 0x8000000000000000 );
1709 }
1710 if ( aExp ) aSig |= 0x00800000;
1711 aSig64 = aSig;
1712 aSig64 <<= 40;
1713 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1714 return roundAndPackInt64(aSign, aSig64, aSigExtra, status);
1715
1716}
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730uint64_t float32_to_uint64(float32 a, float_status *status)
1731{
1732 flag aSign;
1733 int aExp;
1734 int shiftCount;
1735 uint32_t aSig;
1736 uint64_t aSig64, aSigExtra;
1737 a = float32_squash_input_denormal(a, status);
1738
1739 aSig = extractFloat32Frac(a);
1740 aExp = extractFloat32Exp(a);
1741 aSign = extractFloat32Sign(a);
1742 if ((aSign) && (aExp > 126)) {
1743 float_raise(float_flag_invalid, status);
1744 if (float32_is_any_nan(a)) {
1745 return LIT64(0xFFFFFFFFFFFFFFFF);
1746 } else {
1747 return 0;
1748 }
1749 }
1750 shiftCount = 0xBE - aExp;
1751 if (aExp) {
1752 aSig |= 0x00800000;
1753 }
1754 if (shiftCount < 0) {
1755 float_raise(float_flag_invalid, status);
1756 return LIT64(0xFFFFFFFFFFFFFFFF);
1757 }
1758
1759 aSig64 = aSig;
1760 aSig64 <<= 40;
1761 shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
1762 return roundAndPackUint64(aSign, aSig64, aSigExtra, status);
1763}
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *status)
1777{
1778 signed char current_rounding_mode = status->float_rounding_mode;
1779 set_float_rounding_mode(float_round_to_zero, status);
1780 int64_t v = float32_to_uint64(a, status);
1781 set_float_rounding_mode(current_rounding_mode, status);
1782 return v;
1783}
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795int64_t float32_to_int64_round_to_zero(float32 a, float_status *status)
1796{
1797 flag aSign;
1798 int aExp;
1799 int shiftCount;
1800 uint32_t aSig;
1801 uint64_t aSig64;
1802 int64_t z;
1803 a = float32_squash_input_denormal(a, status);
1804
1805 aSig = extractFloat32Frac( a );
1806 aExp = extractFloat32Exp( a );
1807 aSign = extractFloat32Sign( a );
1808 shiftCount = aExp - 0xBE;
1809 if ( 0 <= shiftCount ) {
1810 if ( float32_val(a) != 0xDF000000 ) {
1811 float_raise(float_flag_invalid, status);
1812 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1813 return LIT64( 0x7FFFFFFFFFFFFFFF );
1814 }
1815 }
1816 return (int64_t) LIT64( 0x8000000000000000 );
1817 }
1818 else if ( aExp <= 0x7E ) {
1819 if (aExp | aSig) {
1820 status->float_exception_flags |= float_flag_inexact;
1821 }
1822 return 0;
1823 }
1824 aSig64 = aSig | 0x00800000;
1825 aSig64 <<= 40;
1826 z = aSig64>>( - shiftCount );
1827 if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
1828 status->float_exception_flags |= float_flag_inexact;
1829 }
1830 if ( aSign ) z = - z;
1831 return z;
1832
1833}
1834
1835
1836
1837
1838
1839
1840
1841
1842float64 float32_to_float64(float32 a, float_status *status)
1843{
1844 flag aSign;
1845 int aExp;
1846 uint32_t aSig;
1847 a = float32_squash_input_denormal(a, status);
1848
1849 aSig = extractFloat32Frac( a );
1850 aExp = extractFloat32Exp( a );
1851 aSign = extractFloat32Sign( a );
1852 if ( aExp == 0xFF ) {
1853 if (aSig) {
1854 return commonNaNToFloat64(float32ToCommonNaN(a, status), status);
1855 }
1856 return packFloat64( aSign, 0x7FF, 0 );
1857 }
1858 if ( aExp == 0 ) {
1859 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1860 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1861 --aExp;
1862 }
1863 return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
1864
1865}
1866
1867
1868
1869
1870
1871
1872
1873
1874floatx80 float32_to_floatx80(float32 a, float_status *status)
1875{
1876 flag aSign;
1877 int aExp;
1878 uint32_t aSig;
1879
1880 a = float32_squash_input_denormal(a, status);
1881 aSig = extractFloat32Frac( a );
1882 aExp = extractFloat32Exp( a );
1883 aSign = extractFloat32Sign( a );
1884 if ( aExp == 0xFF ) {
1885 if (aSig) {
1886 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
1887 }
1888 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1889 }
1890 if ( aExp == 0 ) {
1891 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1892 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1893 }
1894 aSig |= 0x00800000;
1895 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
1896
1897}
1898
1899
1900
1901
1902
1903
1904
1905
1906float128 float32_to_float128(float32 a, float_status *status)
1907{
1908 flag aSign;
1909 int aExp;
1910 uint32_t aSig;
1911
1912 a = float32_squash_input_denormal(a, status);
1913 aSig = extractFloat32Frac( a );
1914 aExp = extractFloat32Exp( a );
1915 aSign = extractFloat32Sign( a );
1916 if ( aExp == 0xFF ) {
1917 if (aSig) {
1918 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
1919 }
1920 return packFloat128( aSign, 0x7FFF, 0, 0 );
1921 }
1922 if ( aExp == 0 ) {
1923 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1924 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1925 --aExp;
1926 }
1927 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
1928
1929}
1930
1931
1932
1933
1934
1935
1936
1937
1938float32 float32_round_to_int(float32 a, float_status *status)
1939{
1940 flag aSign;
1941 int aExp;
1942 uint32_t lastBitMask, roundBitsMask;
1943 uint32_t z;
1944 a = float32_squash_input_denormal(a, status);
1945
1946 aExp = extractFloat32Exp( a );
1947 if ( 0x96 <= aExp ) {
1948 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1949 return propagateFloat32NaN(a, a, status);
1950 }
1951 return a;
1952 }
1953 if ( aExp <= 0x7E ) {
1954 if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
1955 status->float_exception_flags |= float_flag_inexact;
1956 aSign = extractFloat32Sign( a );
1957 switch (status->float_rounding_mode) {
1958 case float_round_nearest_even:
1959 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1960 return packFloat32( aSign, 0x7F, 0 );
1961 }
1962 break;
1963 case float_round_ties_away:
1964 if (aExp == 0x7E) {
1965 return packFloat32(aSign, 0x7F, 0);
1966 }
1967 break;
1968 case float_round_down:
1969 return make_float32(aSign ? 0xBF800000 : 0);
1970 case float_round_up:
1971 return make_float32(aSign ? 0x80000000 : 0x3F800000);
1972 }
1973 return packFloat32( aSign, 0, 0 );
1974 }
1975 lastBitMask = 1;
1976 lastBitMask <<= 0x96 - aExp;
1977 roundBitsMask = lastBitMask - 1;
1978 z = float32_val(a);
1979 switch (status->float_rounding_mode) {
1980 case float_round_nearest_even:
1981 z += lastBitMask>>1;
1982 if ((z & roundBitsMask) == 0) {
1983 z &= ~lastBitMask;
1984 }
1985 break;
1986 case float_round_ties_away:
1987 z += lastBitMask >> 1;
1988 break;
1989 case float_round_to_zero:
1990 break;
1991 case float_round_up:
1992 if (!extractFloat32Sign(make_float32(z))) {
1993 z += roundBitsMask;
1994 }
1995 break;
1996 case float_round_down:
1997 if (extractFloat32Sign(make_float32(z))) {
1998 z += roundBitsMask;
1999 }
2000 break;
2001 default:
2002 abort();
2003 }
2004 z &= ~ roundBitsMask;
2005 if (z != float32_val(a)) {
2006 status->float_exception_flags |= float_flag_inexact;
2007 }
2008 return make_float32(z);
2009
2010}
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020static float32 addFloat32Sigs(float32 a, float32 b, flag zSign,
2021 float_status *status)
2022{
2023 int aExp, bExp, zExp;
2024 uint32_t aSig, bSig, zSig;
2025 int expDiff;
2026
2027 aSig = extractFloat32Frac( a );
2028 aExp = extractFloat32Exp( a );
2029 bSig = extractFloat32Frac( b );
2030 bExp = extractFloat32Exp( b );
2031 expDiff = aExp - bExp;
2032 aSig <<= 6;
2033 bSig <<= 6;
2034 if ( 0 < expDiff ) {
2035 if ( aExp == 0xFF ) {
2036 if (aSig) {
2037 return propagateFloat32NaN(a, b, status);
2038 }
2039 return a;
2040 }
2041 if ( bExp == 0 ) {
2042 --expDiff;
2043 }
2044 else {
2045 bSig |= 0x20000000;
2046 }
2047 shift32RightJamming( bSig, expDiff, &bSig );
2048 zExp = aExp;
2049 }
2050 else if ( expDiff < 0 ) {
2051 if ( bExp == 0xFF ) {
2052 if (bSig) {
2053 return propagateFloat32NaN(a, b, status);
2054 }
2055 return packFloat32( zSign, 0xFF, 0 );
2056 }
2057 if ( aExp == 0 ) {
2058 ++expDiff;
2059 }
2060 else {
2061 aSig |= 0x20000000;
2062 }
2063 shift32RightJamming( aSig, - expDiff, &aSig );
2064 zExp = bExp;
2065 }
2066 else {
2067 if ( aExp == 0xFF ) {
2068 if (aSig | bSig) {
2069 return propagateFloat32NaN(a, b, status);
2070 }
2071 return a;
2072 }
2073 if ( aExp == 0 ) {
2074 if (status->flush_to_zero) {
2075 if (aSig | bSig) {
2076 float_raise(float_flag_output_denormal, status);
2077 }
2078 return packFloat32(zSign, 0, 0);
2079 }
2080 return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
2081 }
2082 zSig = 0x40000000 + aSig + bSig;
2083 zExp = aExp;
2084 goto roundAndPack;
2085 }
2086 aSig |= 0x20000000;
2087 zSig = ( aSig + bSig )<<1;
2088 --zExp;
2089 if ( (int32_t) zSig < 0 ) {
2090 zSig = aSig + bSig;
2091 ++zExp;
2092 }
2093 roundAndPack:
2094 return roundAndPackFloat32(zSign, zExp, zSig, status);
2095
2096}
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106static float32 subFloat32Sigs(float32 a, float32 b, flag zSign,
2107 float_status *status)
2108{
2109 int aExp, bExp, zExp;
2110 uint32_t aSig, bSig, zSig;
2111 int expDiff;
2112
2113 aSig = extractFloat32Frac( a );
2114 aExp = extractFloat32Exp( a );
2115 bSig = extractFloat32Frac( b );
2116 bExp = extractFloat32Exp( b );
2117 expDiff = aExp - bExp;
2118 aSig <<= 7;
2119 bSig <<= 7;
2120 if ( 0 < expDiff ) goto aExpBigger;
2121 if ( expDiff < 0 ) goto bExpBigger;
2122 if ( aExp == 0xFF ) {
2123 if (aSig | bSig) {
2124 return propagateFloat32NaN(a, b, status);
2125 }
2126 float_raise(float_flag_invalid, status);
2127 return float32_default_nan(status);
2128 }
2129 if ( aExp == 0 ) {
2130 aExp = 1;
2131 bExp = 1;
2132 }
2133 if ( bSig < aSig ) goto aBigger;
2134 if ( aSig < bSig ) goto bBigger;
2135 return packFloat32(status->float_rounding_mode == float_round_down, 0, 0);
2136 bExpBigger:
2137 if ( bExp == 0xFF ) {
2138 if (bSig) {
2139 return propagateFloat32NaN(a, b, status);
2140 }
2141 return packFloat32( zSign ^ 1, 0xFF, 0 );
2142 }
2143 if ( aExp == 0 ) {
2144 ++expDiff;
2145 }
2146 else {
2147 aSig |= 0x40000000;
2148 }
2149 shift32RightJamming( aSig, - expDiff, &aSig );
2150 bSig |= 0x40000000;
2151 bBigger:
2152 zSig = bSig - aSig;
2153 zExp = bExp;
2154 zSign ^= 1;
2155 goto normalizeRoundAndPack;
2156 aExpBigger:
2157 if ( aExp == 0xFF ) {
2158 if (aSig) {
2159 return propagateFloat32NaN(a, b, status);
2160 }
2161 return a;
2162 }
2163 if ( bExp == 0 ) {
2164 --expDiff;
2165 }
2166 else {
2167 bSig |= 0x40000000;
2168 }
2169 shift32RightJamming( bSig, expDiff, &bSig );
2170 aSig |= 0x40000000;
2171 aBigger:
2172 zSig = aSig - bSig;
2173 zExp = aExp;
2174 normalizeRoundAndPack:
2175 --zExp;
2176 return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status);
2177
2178}
2179
2180
2181
2182
2183
2184
2185
2186float32 float32_add(float32 a, float32 b, float_status *status)
2187{
2188 flag aSign, bSign;
2189 a = float32_squash_input_denormal(a, status);
2190 b = float32_squash_input_denormal(b, status);
2191
2192 aSign = extractFloat32Sign( a );
2193 bSign = extractFloat32Sign( b );
2194 if ( aSign == bSign ) {
2195 return addFloat32Sigs(a, b, aSign, status);
2196 }
2197 else {
2198 return subFloat32Sigs(a, b, aSign, status);
2199 }
2200
2201}
2202
2203
2204
2205
2206
2207
2208
2209float32 float32_sub(float32 a, float32 b, float_status *status)
2210{
2211 flag aSign, bSign;
2212 a = float32_squash_input_denormal(a, status);
2213 b = float32_squash_input_denormal(b, status);
2214
2215 aSign = extractFloat32Sign( a );
2216 bSign = extractFloat32Sign( b );
2217 if ( aSign == bSign ) {
2218 return subFloat32Sigs(a, b, aSign, status);
2219 }
2220 else {
2221 return addFloat32Sigs(a, b, aSign, status);
2222 }
2223
2224}
2225
2226
2227
2228
2229
2230
2231
2232float32 float32_mul(float32 a, float32 b, float_status *status)
2233{
2234 flag aSign, bSign, zSign;
2235 int aExp, bExp, zExp;
2236 uint32_t aSig, bSig;
2237 uint64_t zSig64;
2238 uint32_t zSig;
2239
2240 a = float32_squash_input_denormal(a, status);
2241 b = float32_squash_input_denormal(b, status);
2242
2243 aSig = extractFloat32Frac( a );
2244 aExp = extractFloat32Exp( a );
2245 aSign = extractFloat32Sign( a );
2246 bSig = extractFloat32Frac( b );
2247 bExp = extractFloat32Exp( b );
2248 bSign = extractFloat32Sign( b );
2249 zSign = aSign ^ bSign;
2250 if ( aExp == 0xFF ) {
2251 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2252 return propagateFloat32NaN(a, b, status);
2253 }
2254 if ( ( bExp | bSig ) == 0 ) {
2255 float_raise(float_flag_invalid, status);
2256 return float32_default_nan(status);
2257 }
2258 return packFloat32( zSign, 0xFF, 0 );
2259 }
2260 if ( bExp == 0xFF ) {
2261 if (bSig) {
2262 return propagateFloat32NaN(a, b, status);
2263 }
2264 if ( ( aExp | aSig ) == 0 ) {
2265 float_raise(float_flag_invalid, status);
2266 return float32_default_nan(status);
2267 }
2268 return packFloat32( zSign, 0xFF, 0 );
2269 }
2270 if ( aExp == 0 ) {
2271 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2272 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2273 }
2274 if ( bExp == 0 ) {
2275 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
2276 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2277 }
2278 zExp = aExp + bExp - 0x7F;
2279 aSig = ( aSig | 0x00800000 )<<7;
2280 bSig = ( bSig | 0x00800000 )<<8;
2281 shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
2282 zSig = zSig64;
2283 if ( 0 <= (int32_t) ( zSig<<1 ) ) {
2284 zSig <<= 1;
2285 --zExp;
2286 }
2287 return roundAndPackFloat32(zSign, zExp, zSig, status);
2288
2289}
2290
2291
2292
2293
2294
2295
2296
2297float32 float32_div(float32 a, float32 b, float_status *status)
2298{
2299 flag aSign, bSign, zSign;
2300 int aExp, bExp, zExp;
2301 uint32_t aSig, bSig, zSig;
2302 a = float32_squash_input_denormal(a, status);
2303 b = float32_squash_input_denormal(b, status);
2304
2305 aSig = extractFloat32Frac( a );
2306 aExp = extractFloat32Exp( a );
2307 aSign = extractFloat32Sign( a );
2308 bSig = extractFloat32Frac( b );
2309 bExp = extractFloat32Exp( b );
2310 bSign = extractFloat32Sign( b );
2311 zSign = aSign ^ bSign;
2312 if ( aExp == 0xFF ) {
2313 if (aSig) {
2314 return propagateFloat32NaN(a, b, status);
2315 }
2316 if ( bExp == 0xFF ) {
2317 if (bSig) {
2318 return propagateFloat32NaN(a, b, status);
2319 }
2320 float_raise(float_flag_invalid, status);
2321 return float32_default_nan(status);
2322 }
2323 return packFloat32( zSign, 0xFF, 0 );
2324 }
2325 if ( bExp == 0xFF ) {
2326 if (bSig) {
2327 return propagateFloat32NaN(a, b, status);
2328 }
2329 return packFloat32( zSign, 0, 0 );
2330 }
2331 if ( bExp == 0 ) {
2332 if ( bSig == 0 ) {
2333 if ( ( aExp | aSig ) == 0 ) {
2334 float_raise(float_flag_invalid, status);
2335 return float32_default_nan(status);
2336 }
2337 float_raise(float_flag_divbyzero, status);
2338 return packFloat32( zSign, 0xFF, 0 );
2339 }
2340 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2341 }
2342 if ( aExp == 0 ) {
2343 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2344 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2345 }
2346 zExp = aExp - bExp + 0x7D;
2347 aSig = ( aSig | 0x00800000 )<<7;
2348 bSig = ( bSig | 0x00800000 )<<8;
2349 if ( bSig <= ( aSig + aSig ) ) {
2350 aSig >>= 1;
2351 ++zExp;
2352 }
2353 zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
2354 if ( ( zSig & 0x3F ) == 0 ) {
2355 zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
2356 }
2357 return roundAndPackFloat32(zSign, zExp, zSig, status);
2358
2359}
2360
2361
2362
2363
2364
2365
2366
2367float32 float32_rem(float32 a, float32 b, float_status *status)
2368{
2369 flag aSign, zSign;
2370 int aExp, bExp, expDiff;
2371 uint32_t aSig, bSig;
2372 uint32_t q;
2373 uint64_t aSig64, bSig64, q64;
2374 uint32_t alternateASig;
2375 int32_t sigMean;
2376 a = float32_squash_input_denormal(a, status);
2377 b = float32_squash_input_denormal(b, status);
2378
2379 aSig = extractFloat32Frac( a );
2380 aExp = extractFloat32Exp( a );
2381 aSign = extractFloat32Sign( a );
2382 bSig = extractFloat32Frac( b );
2383 bExp = extractFloat32Exp( b );
2384 if ( aExp == 0xFF ) {
2385 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2386 return propagateFloat32NaN(a, b, status);
2387 }
2388 float_raise(float_flag_invalid, status);
2389 return float32_default_nan(status);
2390 }
2391 if ( bExp == 0xFF ) {
2392 if (bSig) {
2393 return propagateFloat32NaN(a, b, status);
2394 }
2395 return a;
2396 }
2397 if ( bExp == 0 ) {
2398 if ( bSig == 0 ) {
2399 float_raise(float_flag_invalid, status);
2400 return float32_default_nan(status);
2401 }
2402 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2403 }
2404 if ( aExp == 0 ) {
2405 if ( aSig == 0 ) return a;
2406 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2407 }
2408 expDiff = aExp - bExp;
2409 aSig |= 0x00800000;
2410 bSig |= 0x00800000;
2411 if ( expDiff < 32 ) {
2412 aSig <<= 8;
2413 bSig <<= 8;
2414 if ( expDiff < 0 ) {
2415 if ( expDiff < -1 ) return a;
2416 aSig >>= 1;
2417 }
2418 q = ( bSig <= aSig );
2419 if ( q ) aSig -= bSig;
2420 if ( 0 < expDiff ) {
2421 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
2422 q >>= 32 - expDiff;
2423 bSig >>= 2;
2424 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2425 }
2426 else {
2427 aSig >>= 2;
2428 bSig >>= 2;
2429 }
2430 }
2431 else {
2432 if ( bSig <= aSig ) aSig -= bSig;
2433 aSig64 = ( (uint64_t) aSig )<<40;
2434 bSig64 = ( (uint64_t) bSig )<<40;
2435 expDiff -= 64;
2436 while ( 0 < expDiff ) {
2437 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2438 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2439 aSig64 = - ( ( bSig * q64 )<<38 );
2440 expDiff -= 62;
2441 }
2442 expDiff += 64;
2443 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2444 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2445 q = q64>>( 64 - expDiff );
2446 bSig <<= 6;
2447 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2448 }
2449 do {
2450 alternateASig = aSig;
2451 ++q;
2452 aSig -= bSig;
2453 } while ( 0 <= (int32_t) aSig );
2454 sigMean = aSig + alternateASig;
2455 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2456 aSig = alternateASig;
2457 }
2458 zSign = ( (int32_t) aSig < 0 );
2459 if ( zSign ) aSig = - aSig;
2460 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
2461}
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474float32 float32_muladd(float32 a, float32 b, float32 c, int flags,
2475 float_status *status)
2476{
2477 flag aSign, bSign, cSign, zSign;
2478 int aExp, bExp, cExp, pExp, zExp, expDiff;
2479 uint32_t aSig, bSig, cSig;
2480 flag pInf, pZero, pSign;
2481 uint64_t pSig64, cSig64, zSig64;
2482 uint32_t pSig;
2483 int shiftcount;
2484 flag signflip, infzero;
2485
2486 a = float32_squash_input_denormal(a, status);
2487 b = float32_squash_input_denormal(b, status);
2488 c = float32_squash_input_denormal(c, status);
2489 aSig = extractFloat32Frac(a);
2490 aExp = extractFloat32Exp(a);
2491 aSign = extractFloat32Sign(a);
2492 bSig = extractFloat32Frac(b);
2493 bExp = extractFloat32Exp(b);
2494 bSign = extractFloat32Sign(b);
2495 cSig = extractFloat32Frac(c);
2496 cExp = extractFloat32Exp(c);
2497 cSign = extractFloat32Sign(c);
2498
2499 infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
2500 (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
2501
2502
2503
2504
2505
2506
2507 if (((aExp == 0xff) && aSig) ||
2508 ((bExp == 0xff) && bSig) ||
2509 ((cExp == 0xff) && cSig)) {
2510 return propagateFloat32MulAddNaN(a, b, c, infzero, status);
2511 }
2512
2513 if (infzero) {
2514 float_raise(float_flag_invalid, status);
2515 return float32_default_nan(status);
2516 }
2517
2518 if (flags & float_muladd_negate_c) {
2519 cSign ^= 1;
2520 }
2521
2522 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
2523
2524
2525 pSign = aSign ^ bSign;
2526 if (flags & float_muladd_negate_product) {
2527 pSign ^= 1;
2528 }
2529 pInf = (aExp == 0xff) || (bExp == 0xff);
2530 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
2531
2532 if (cExp == 0xff) {
2533 if (pInf && (pSign ^ cSign)) {
2534
2535 float_raise(float_flag_invalid, status);
2536 return float32_default_nan(status);
2537 }
2538
2539 return packFloat32(cSign ^ signflip, 0xff, 0);
2540 }
2541
2542 if (pInf) {
2543 return packFloat32(pSign ^ signflip, 0xff, 0);
2544 }
2545
2546 if (pZero) {
2547 if (cExp == 0) {
2548 if (cSig == 0) {
2549
2550 if (pSign == cSign) {
2551 zSign = pSign;
2552 } else if (status->float_rounding_mode == float_round_down) {
2553 zSign = 1;
2554 } else {
2555 zSign = 0;
2556 }
2557 return packFloat32(zSign ^ signflip, 0, 0);
2558 }
2559
2560 if (status->flush_to_zero) {
2561 float_raise(float_flag_output_denormal, status);
2562 return packFloat32(cSign ^ signflip, 0, 0);
2563 }
2564 }
2565
2566 if (flags & float_muladd_halve_result) {
2567 if (cExp == 0) {
2568 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2569 }
2570
2571
2572
2573 cExp -= 2;
2574 cSig = (cSig | 0x00800000) << 7;
2575 return roundAndPackFloat32(cSign ^ signflip, cExp, cSig, status);
2576 }
2577 return packFloat32(cSign ^ signflip, cExp, cSig);
2578 }
2579
2580 if (aExp == 0) {
2581 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
2582 }
2583 if (bExp == 0) {
2584 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
2585 }
2586
2587
2588
2589
2590
2591
2592
2593
2594 pExp = aExp + bExp - 0x7e;
2595 aSig = (aSig | 0x00800000) << 7;
2596 bSig = (bSig | 0x00800000) << 8;
2597 pSig64 = (uint64_t)aSig * bSig;
2598 if ((int64_t)(pSig64 << 1) >= 0) {
2599 pSig64 <<= 1;
2600 pExp--;
2601 }
2602
2603 zSign = pSign ^ signflip;
2604
2605
2606
2607
2608 if (cExp == 0) {
2609 if (!cSig) {
2610
2611 shift64RightJamming(pSig64, 32, &pSig64);
2612 pSig = pSig64;
2613 if (flags & float_muladd_halve_result) {
2614 pExp--;
2615 }
2616 return roundAndPackFloat32(zSign, pExp - 1,
2617 pSig, status);
2618 }
2619 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2620 }
2621
2622 cSig64 = (uint64_t)cSig << (62 - 23);
2623 cSig64 |= LIT64(0x4000000000000000);
2624 expDiff = pExp - cExp;
2625
2626 if (pSign == cSign) {
2627
2628 if (expDiff > 0) {
2629
2630 shift64RightJamming(cSig64, expDiff, &cSig64);
2631 zExp = pExp;
2632 } else if (expDiff < 0) {
2633
2634 shift64RightJamming(pSig64, -expDiff, &pSig64);
2635 zExp = cExp;
2636 } else {
2637
2638 zExp = cExp;
2639 }
2640
2641 zSig64 = pSig64 + cSig64;
2642 if ((int64_t)zSig64 < 0) {
2643 shift64RightJamming(zSig64, 1, &zSig64);
2644 } else {
2645 zExp--;
2646 }
2647 } else {
2648
2649 if (expDiff > 0) {
2650 shift64RightJamming(cSig64, expDiff, &cSig64);
2651 zSig64 = pSig64 - cSig64;
2652 zExp = pExp;
2653 } else if (expDiff < 0) {
2654 shift64RightJamming(pSig64, -expDiff, &pSig64);
2655 zSig64 = cSig64 - pSig64;
2656 zExp = cExp;
2657 zSign ^= 1;
2658 } else {
2659 zExp = pExp;
2660 if (cSig64 < pSig64) {
2661 zSig64 = pSig64 - cSig64;
2662 } else if (pSig64 < cSig64) {
2663 zSig64 = cSig64 - pSig64;
2664 zSign ^= 1;
2665 } else {
2666
2667 zSign = signflip;
2668 if (status->float_rounding_mode == float_round_down) {
2669 zSign ^= 1;
2670 }
2671 return packFloat32(zSign, 0, 0);
2672 }
2673 }
2674 --zExp;
2675
2676 shiftcount = countLeadingZeros64(zSig64) - 1;
2677 zSig64 <<= shiftcount;
2678 zExp -= shiftcount;
2679 }
2680 if (flags & float_muladd_halve_result) {
2681 zExp--;
2682 }
2683
2684 shift64RightJamming(zSig64, 32, &zSig64);
2685 return roundAndPackFloat32(zSign, zExp, zSig64, status);
2686}
2687
2688
2689
2690
2691
2692
2693
2694
2695float32 float32_sqrt(float32 a, float_status *status)
2696{
2697 flag aSign;
2698 int aExp, zExp;
2699 uint32_t aSig, zSig;
2700 uint64_t rem, term;
2701 a = float32_squash_input_denormal(a, status);
2702
2703 aSig = extractFloat32Frac( a );
2704 aExp = extractFloat32Exp( a );
2705 aSign = extractFloat32Sign( a );
2706 if ( aExp == 0xFF ) {
2707 if (aSig) {
2708 return propagateFloat32NaN(a, float32_zero, status);
2709 }
2710 if ( ! aSign ) return a;
2711 float_raise(float_flag_invalid, status);
2712 return float32_default_nan(status);
2713 }
2714 if ( aSign ) {
2715 if ( ( aExp | aSig ) == 0 ) return a;
2716 float_raise(float_flag_invalid, status);
2717 return float32_default_nan(status);
2718 }
2719 if ( aExp == 0 ) {
2720 if ( aSig == 0 ) return float32_zero;
2721 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2722 }
2723 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2724 aSig = ( aSig | 0x00800000 )<<8;
2725 zSig = estimateSqrt32( aExp, aSig ) + 2;
2726 if ( ( zSig & 0x7F ) <= 5 ) {
2727 if ( zSig < 2 ) {
2728 zSig = 0x7FFFFFFF;
2729 goto roundAndPack;
2730 }
2731 aSig >>= aExp & 1;
2732 term = ( (uint64_t) zSig ) * zSig;
2733 rem = ( ( (uint64_t) aSig )<<32 ) - term;
2734 while ( (int64_t) rem < 0 ) {
2735 --zSig;
2736 rem += ( ( (uint64_t) zSig )<<1 ) | 1;
2737 }
2738 zSig |= ( rem != 0 );
2739 }
2740 shift32RightJamming( zSig, 1, &zSig );
2741 roundAndPack:
2742 return roundAndPackFloat32(0, zExp, zSig, status);
2743
2744}
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764static const float64 float32_exp2_coefficients[15] =
2765{
2766 const_float64( 0x3ff0000000000000ll ),
2767 const_float64( 0x3fe0000000000000ll ),
2768 const_float64( 0x3fc5555555555555ll ),
2769 const_float64( 0x3fa5555555555555ll ),
2770 const_float64( 0x3f81111111111111ll ),
2771 const_float64( 0x3f56c16c16c16c17ll ),
2772 const_float64( 0x3f2a01a01a01a01all ),
2773 const_float64( 0x3efa01a01a01a01all ),
2774 const_float64( 0x3ec71de3a556c734ll ),
2775 const_float64( 0x3e927e4fb7789f5cll ),
2776 const_float64( 0x3e5ae64567f544e4ll ),
2777 const_float64( 0x3e21eed8eff8d898ll ),
2778 const_float64( 0x3de6124613a86d09ll ),
2779 const_float64( 0x3da93974a8c07c9dll ),
2780 const_float64( 0x3d6ae7f3e733b81fll ),
2781};
2782
2783float32 float32_exp2(float32 a, float_status *status)
2784{
2785 flag aSign;
2786 int aExp;
2787 uint32_t aSig;
2788 float64 r, x, xn;
2789 int i;
2790 a = float32_squash_input_denormal(a, status);
2791
2792 aSig = extractFloat32Frac( a );
2793 aExp = extractFloat32Exp( a );
2794 aSign = extractFloat32Sign( a );
2795
2796 if ( aExp == 0xFF) {
2797 if (aSig) {
2798 return propagateFloat32NaN(a, float32_zero, status);
2799 }
2800 return (aSign) ? float32_zero : a;
2801 }
2802 if (aExp == 0) {
2803 if (aSig == 0) return float32_one;
2804 }
2805
2806 float_raise(float_flag_inexact, status);
2807
2808
2809
2810
2811 x = float32_to_float64(a, status);
2812 x = float64_mul(x, float64_ln2, status);
2813
2814 xn = x;
2815 r = float64_one;
2816 for (i = 0 ; i < 15 ; i++) {
2817 float64 f;
2818
2819 f = float64_mul(xn, float32_exp2_coefficients[i], status);
2820 r = float64_add(r, f, status);
2821
2822 xn = float64_mul(xn, x, status);
2823 }
2824
2825 return float64_to_float32(r, status);
2826}
2827
2828
2829
2830
2831
2832
2833float32 float32_log2(float32 a, float_status *status)
2834{
2835 flag aSign, zSign;
2836 int aExp;
2837 uint32_t aSig, zSig, i;
2838
2839 a = float32_squash_input_denormal(a, status);
2840 aSig = extractFloat32Frac( a );
2841 aExp = extractFloat32Exp( a );
2842 aSign = extractFloat32Sign( a );
2843
2844 if ( aExp == 0 ) {
2845 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2846 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2847 }
2848 if ( aSign ) {
2849 float_raise(float_flag_invalid, status);
2850 return float32_default_nan(status);
2851 }
2852 if ( aExp == 0xFF ) {
2853 if (aSig) {
2854 return propagateFloat32NaN(a, float32_zero, status);
2855 }
2856 return a;
2857 }
2858
2859 aExp -= 0x7F;
2860 aSig |= 0x00800000;
2861 zSign = aExp < 0;
2862 zSig = aExp << 23;
2863
2864 for (i = 1 << 22; i > 0; i >>= 1) {
2865 aSig = ( (uint64_t)aSig * aSig ) >> 23;
2866 if ( aSig & 0x01000000 ) {
2867 aSig >>= 1;
2868 zSig |= i;
2869 }
2870 }
2871
2872 if ( zSign )
2873 zSig = -zSig;
2874
2875 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
2876}
2877
2878
2879
2880
2881
2882
2883
2884
2885int float32_eq(float32 a, float32 b, float_status *status)
2886{
2887 uint32_t av, bv;
2888 a = float32_squash_input_denormal(a, status);
2889 b = float32_squash_input_denormal(b, status);
2890
2891 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2892 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2893 ) {
2894 float_raise(float_flag_invalid, status);
2895 return 0;
2896 }
2897 av = float32_val(a);
2898 bv = float32_val(b);
2899 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2900}
2901
2902
2903
2904
2905
2906
2907
2908
2909int float32_le(float32 a, float32 b, float_status *status)
2910{
2911 flag aSign, bSign;
2912 uint32_t av, bv;
2913 a = float32_squash_input_denormal(a, status);
2914 b = float32_squash_input_denormal(b, status);
2915
2916 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2917 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2918 ) {
2919 float_raise(float_flag_invalid, status);
2920 return 0;
2921 }
2922 aSign = extractFloat32Sign( a );
2923 bSign = extractFloat32Sign( b );
2924 av = float32_val(a);
2925 bv = float32_val(b);
2926 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
2927 return ( av == bv ) || ( aSign ^ ( av < bv ) );
2928
2929}
2930
2931
2932
2933
2934
2935
2936
2937
2938int float32_lt(float32 a, float32 b, float_status *status)
2939{
2940 flag aSign, bSign;
2941 uint32_t av, bv;
2942 a = float32_squash_input_denormal(a, status);
2943 b = float32_squash_input_denormal(b, status);
2944
2945 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2946 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2947 ) {
2948 float_raise(float_flag_invalid, status);
2949 return 0;
2950 }
2951 aSign = extractFloat32Sign( a );
2952 bSign = extractFloat32Sign( b );
2953 av = float32_val(a);
2954 bv = float32_val(b);
2955 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
2956 return ( av != bv ) && ( aSign ^ ( av < bv ) );
2957
2958}
2959
2960
2961
2962
2963
2964
2965
2966
2967int float32_unordered(float32 a, float32 b, float_status *status)
2968{
2969 a = float32_squash_input_denormal(a, status);
2970 b = float32_squash_input_denormal(b, status);
2971
2972 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2973 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2974 ) {
2975 float_raise(float_flag_invalid, status);
2976 return 1;
2977 }
2978 return 0;
2979}
2980
2981
2982
2983
2984
2985
2986
2987
2988int float32_eq_quiet(float32 a, float32 b, float_status *status)
2989{
2990 a = float32_squash_input_denormal(a, status);
2991 b = float32_squash_input_denormal(b, status);
2992
2993 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2994 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2995 ) {
2996 if (float32_is_signaling_nan(a, status)
2997 || float32_is_signaling_nan(b, status)) {
2998 float_raise(float_flag_invalid, status);
2999 }
3000 return 0;
3001 }
3002 return ( float32_val(a) == float32_val(b) ) ||
3003 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
3004}
3005
3006
3007
3008
3009
3010
3011
3012
3013int float32_le_quiet(float32 a, float32 b, float_status *status)
3014{
3015 flag aSign, bSign;
3016 uint32_t av, bv;
3017 a = float32_squash_input_denormal(a, status);
3018 b = float32_squash_input_denormal(b, status);
3019
3020 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3021 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3022 ) {
3023 if (float32_is_signaling_nan(a, status)
3024 || float32_is_signaling_nan(b, status)) {
3025 float_raise(float_flag_invalid, status);
3026 }
3027 return 0;
3028 }
3029 aSign = extractFloat32Sign( a );
3030 bSign = extractFloat32Sign( b );
3031 av = float32_val(a);
3032 bv = float32_val(b);
3033 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
3034 return ( av == bv ) || ( aSign ^ ( av < bv ) );
3035
3036}
3037
3038
3039
3040
3041
3042
3043
3044
3045int float32_lt_quiet(float32 a, float32 b, float_status *status)
3046{
3047 flag aSign, bSign;
3048 uint32_t av, bv;
3049 a = float32_squash_input_denormal(a, status);
3050 b = float32_squash_input_denormal(b, status);
3051
3052 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3053 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3054 ) {
3055 if (float32_is_signaling_nan(a, status)
3056 || float32_is_signaling_nan(b, status)) {
3057 float_raise(float_flag_invalid, status);
3058 }
3059 return 0;
3060 }
3061 aSign = extractFloat32Sign( a );
3062 bSign = extractFloat32Sign( b );
3063 av = float32_val(a);
3064 bv = float32_val(b);
3065 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
3066 return ( av != bv ) && ( aSign ^ ( av < bv ) );
3067
3068}
3069
3070
3071
3072
3073
3074
3075
3076
3077int float32_unordered_quiet(float32 a, float32 b, float_status *status)
3078{
3079 a = float32_squash_input_denormal(a, status);
3080 b = float32_squash_input_denormal(b, status);
3081
3082 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3083 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3084 ) {
3085 if (float32_is_signaling_nan(a, status)
3086 || float32_is_signaling_nan(b, status)) {
3087 float_raise(float_flag_invalid, status);
3088 }
3089 return 1;
3090 }
3091 return 0;
3092}
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104int32_t float64_to_int32(float64 a, float_status *status)
3105{
3106 flag aSign;
3107 int aExp;
3108 int shiftCount;
3109 uint64_t aSig;
3110 a = float64_squash_input_denormal(a, status);
3111
3112 aSig = extractFloat64Frac( a );
3113 aExp = extractFloat64Exp( a );
3114 aSign = extractFloat64Sign( a );
3115 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
3116 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3117 shiftCount = 0x42C - aExp;
3118 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
3119 return roundAndPackInt32(aSign, aSig, status);
3120
3121}
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133int32_t float64_to_int32_round_to_zero(float64 a, float_status *status)
3134{
3135 flag aSign;
3136 int aExp;
3137 int shiftCount;
3138 uint64_t aSig, savedASig;
3139 int32_t z;
3140 a = float64_squash_input_denormal(a, status);
3141
3142 aSig = extractFloat64Frac( a );
3143 aExp = extractFloat64Exp( a );
3144 aSign = extractFloat64Sign( a );
3145 if ( 0x41E < aExp ) {
3146 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
3147 goto invalid;
3148 }
3149 else if ( aExp < 0x3FF ) {
3150 if (aExp || aSig) {
3151 status->float_exception_flags |= float_flag_inexact;
3152 }
3153 return 0;
3154 }
3155 aSig |= LIT64( 0x0010000000000000 );
3156 shiftCount = 0x433 - aExp;
3157 savedASig = aSig;
3158 aSig >>= shiftCount;
3159 z = aSig;
3160 if ( aSign ) z = - z;
3161 if ( ( z < 0 ) ^ aSign ) {
3162 invalid:
3163 float_raise(float_flag_invalid, status);
3164 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
3165 }
3166 if ( ( aSig<<shiftCount ) != savedASig ) {
3167 status->float_exception_flags |= float_flag_inexact;
3168 }
3169 return z;
3170
3171}
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183int16_t float64_to_int16_round_to_zero(float64 a, float_status *status)
3184{
3185 flag aSign;
3186 int aExp;
3187 int shiftCount;
3188 uint64_t aSig, savedASig;
3189 int32_t z;
3190
3191 aSig = extractFloat64Frac( a );
3192 aExp = extractFloat64Exp( a );
3193 aSign = extractFloat64Sign( a );
3194 if ( 0x40E < aExp ) {
3195 if ( ( aExp == 0x7FF ) && aSig ) {
3196 aSign = 0;
3197 }
3198 goto invalid;
3199 }
3200 else if ( aExp < 0x3FF ) {
3201 if ( aExp || aSig ) {
3202 status->float_exception_flags |= float_flag_inexact;
3203 }
3204 return 0;
3205 }
3206 aSig |= LIT64( 0x0010000000000000 );
3207 shiftCount = 0x433 - aExp;
3208 savedASig = aSig;
3209 aSig >>= shiftCount;
3210 z = aSig;
3211 if ( aSign ) {
3212 z = - z;
3213 }
3214 if ( ( (int16_t)z < 0 ) ^ aSign ) {
3215 invalid:
3216 float_raise(float_flag_invalid, status);
3217 return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
3218 }
3219 if ( ( aSig<<shiftCount ) != savedASig ) {
3220 status->float_exception_flags |= float_flag_inexact;
3221 }
3222 return z;
3223}
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235int64_t float64_to_int64(float64 a, float_status *status)
3236{
3237 flag aSign;
3238 int aExp;
3239 int shiftCount;
3240 uint64_t aSig, aSigExtra;
3241 a = float64_squash_input_denormal(a, status);
3242
3243 aSig = extractFloat64Frac( a );
3244 aExp = extractFloat64Exp( a );
3245 aSign = extractFloat64Sign( a );
3246 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3247 shiftCount = 0x433 - aExp;
3248 if ( shiftCount <= 0 ) {
3249 if ( 0x43E < aExp ) {
3250 float_raise(float_flag_invalid, status);
3251 if ( ! aSign
3252 || ( ( aExp == 0x7FF )
3253 && ( aSig != LIT64( 0x0010000000000000 ) ) )
3254 ) {
3255 return LIT64( 0x7FFFFFFFFFFFFFFF );
3256 }
3257 return (int64_t) LIT64( 0x8000000000000000 );
3258 }
3259 aSigExtra = 0;
3260 aSig <<= - shiftCount;
3261 }
3262 else {
3263 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3264 }
3265 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
3266
3267}
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279int64_t float64_to_int64_round_to_zero(float64 a, float_status *status)
3280{
3281 flag aSign;
3282 int aExp;
3283 int shiftCount;
3284 uint64_t aSig;
3285 int64_t z;
3286 a = float64_squash_input_denormal(a, status);
3287
3288 aSig = extractFloat64Frac( a );
3289 aExp = extractFloat64Exp( a );
3290 aSign = extractFloat64Sign( a );
3291 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3292 shiftCount = aExp - 0x433;
3293 if ( 0 <= shiftCount ) {
3294 if ( 0x43E <= aExp ) {
3295 if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
3296 float_raise(float_flag_invalid, status);
3297 if ( ! aSign
3298 || ( ( aExp == 0x7FF )
3299 && ( aSig != LIT64( 0x0010000000000000 ) ) )
3300 ) {
3301 return LIT64( 0x7FFFFFFFFFFFFFFF );
3302 }
3303 }
3304 return (int64_t) LIT64( 0x8000000000000000 );
3305 }
3306 z = aSig<<shiftCount;
3307 }
3308 else {
3309 if ( aExp < 0x3FE ) {
3310 if (aExp | aSig) {
3311 status->float_exception_flags |= float_flag_inexact;
3312 }
3313 return 0;
3314 }
3315 z = aSig>>( - shiftCount );
3316 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
3317 status->float_exception_flags |= float_flag_inexact;
3318 }
3319 }
3320 if ( aSign ) z = - z;
3321 return z;
3322
3323}
3324
3325
3326
3327
3328
3329
3330
3331
3332float32 float64_to_float32(float64 a, float_status *status)
3333{
3334 flag aSign;
3335 int aExp;
3336 uint64_t aSig;
3337 uint32_t zSig;
3338 a = float64_squash_input_denormal(a, status);
3339
3340 aSig = extractFloat64Frac( a );
3341 aExp = extractFloat64Exp( a );
3342 aSign = extractFloat64Sign( a );
3343 if ( aExp == 0x7FF ) {
3344 if (aSig) {
3345 return commonNaNToFloat32(float64ToCommonNaN(a, status), status);
3346 }
3347 return packFloat32( aSign, 0xFF, 0 );
3348 }
3349 shift64RightJamming( aSig, 22, &aSig );
3350 zSig = aSig;
3351 if ( aExp || zSig ) {
3352 zSig |= 0x40000000;
3353 aExp -= 0x381;
3354 }
3355 return roundAndPackFloat32(aSign, aExp, zSig, status);
3356
3357}
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370static float16 packFloat16(flag zSign, int zExp, uint16_t zSig)
3371{
3372 return make_float16(
3373 (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
3374}
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404static float16 roundAndPackFloat16(flag zSign, int zExp,
3405 uint32_t zSig, flag ieee,
3406 float_status *status)
3407{
3408 int maxexp = ieee ? 29 : 30;
3409 uint32_t mask;
3410 uint32_t increment;
3411 bool rounding_bumps_exp;
3412 bool is_tiny = false;
3413
3414
3415
3416
3417 if (zExp < 1) {
3418
3419 mask = 0x00ffffff;
3420 if (zExp >= -11) {
3421 mask >>= 11 + zExp;
3422 }
3423 } else {
3424
3425 mask = 0x00001fff;
3426 }
3427
3428 switch (status->float_rounding_mode) {
3429 case float_round_nearest_even:
3430 increment = (mask + 1) >> 1;
3431 if ((zSig & mask) == increment) {
3432 increment = zSig & (increment << 1);
3433 }
3434 break;
3435 case float_round_ties_away:
3436 increment = (mask + 1) >> 1;
3437 break;
3438 case float_round_up:
3439 increment = zSign ? 0 : mask;
3440 break;
3441 case float_round_down:
3442 increment = zSign ? mask : 0;
3443 break;
3444 default:
3445 increment = 0;
3446 break;
3447 }
3448
3449 rounding_bumps_exp = (zSig + increment >= 0x01000000);
3450
3451 if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) {
3452 if (ieee) {
3453 float_raise(float_flag_overflow | float_flag_inexact, status);
3454 return packFloat16(zSign, 0x1f, 0);
3455 } else {
3456 float_raise(float_flag_invalid, status);
3457 return packFloat16(zSign, 0x1f, 0x3ff);
3458 }
3459 }
3460
3461 if (zExp < 0) {
3462
3463 is_tiny =
3464 (status->float_detect_tininess == float_tininess_before_rounding)
3465 || (zExp < -1)
3466 || (!rounding_bumps_exp);
3467 }
3468 if (zSig & mask) {
3469 float_raise(float_flag_inexact, status);
3470 if (is_tiny) {
3471 float_raise(float_flag_underflow, status);
3472 }
3473 }
3474
3475 zSig += increment;
3476 if (rounding_bumps_exp) {
3477 zSig >>= 1;
3478 zExp++;
3479 }
3480
3481 if (zExp < -10) {
3482 return packFloat16(zSign, 0, 0);
3483 }
3484 if (zExp < 0) {
3485 zSig >>= -zExp;
3486 zExp = 0;
3487 }
3488 return packFloat16(zSign, zExp, zSig >> 13);
3489}
3490
3491static void normalizeFloat16Subnormal(uint32_t aSig, int *zExpPtr,
3492 uint32_t *zSigPtr)
3493{
3494 int8_t shiftCount = countLeadingZeros32(aSig) - 21;
3495 *zSigPtr = aSig << shiftCount;
3496 *zExpPtr = 1 - shiftCount;
3497}
3498
3499
3500
3501
3502float32 float16_to_float32(float16 a, flag ieee, float_status *status)
3503{
3504 flag aSign;
3505 int aExp;
3506 uint32_t aSig;
3507
3508 aSign = extractFloat16Sign(a);
3509 aExp = extractFloat16Exp(a);
3510 aSig = extractFloat16Frac(a);
3511
3512 if (aExp == 0x1f && ieee) {
3513 if (aSig) {
3514 return commonNaNToFloat32(float16ToCommonNaN(a, status), status);
3515 }
3516 return packFloat32(aSign, 0xff, 0);
3517 }
3518 if (aExp == 0) {
3519 if (aSig == 0) {
3520 return packFloat32(aSign, 0, 0);
3521 }
3522
3523 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3524 aExp--;
3525 }
3526 return packFloat32( aSign, aExp + 0x70, aSig << 13);
3527}
3528
3529float16 float32_to_float16(float32 a, flag ieee, float_status *status)
3530{
3531 flag aSign;
3532 int aExp;
3533 uint32_t aSig;
3534
3535 a = float32_squash_input_denormal(a, status);
3536
3537 aSig = extractFloat32Frac( a );
3538 aExp = extractFloat32Exp( a );
3539 aSign = extractFloat32Sign( a );
3540 if ( aExp == 0xFF ) {
3541 if (aSig) {
3542
3543 if (!ieee) {
3544 float_raise(float_flag_invalid, status);
3545 return packFloat16(aSign, 0, 0);
3546 }
3547 return commonNaNToFloat16(
3548 float32ToCommonNaN(a, status), status);
3549 }
3550
3551 if (!ieee) {
3552 float_raise(float_flag_invalid, status);
3553 return packFloat16(aSign, 0x1f, 0x3ff);
3554 }
3555 return packFloat16(aSign, 0x1f, 0);
3556 }
3557 if (aExp == 0 && aSig == 0) {
3558 return packFloat16(aSign, 0, 0);
3559 }
3560
3561
3562
3563
3564
3565
3566
3567 aSig |= 0x00800000;
3568 aExp -= 0x71;
3569
3570 return roundAndPackFloat16(aSign, aExp, aSig, ieee, status);
3571}
3572
3573float64 float16_to_float64(float16 a, flag ieee, float_status *status)
3574{
3575 flag aSign;
3576 int aExp;
3577 uint32_t aSig;
3578
3579 aSign = extractFloat16Sign(a);
3580 aExp = extractFloat16Exp(a);
3581 aSig = extractFloat16Frac(a);
3582
3583 if (aExp == 0x1f && ieee) {
3584 if (aSig) {
3585 return commonNaNToFloat64(
3586 float16ToCommonNaN(a, status), status);
3587 }
3588 return packFloat64(aSign, 0x7ff, 0);
3589 }
3590 if (aExp == 0) {
3591 if (aSig == 0) {
3592 return packFloat64(aSign, 0, 0);
3593 }
3594
3595 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3596 aExp--;
3597 }
3598 return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
3599}
3600
3601float16 float64_to_float16(float64 a, flag ieee, float_status *status)
3602{
3603 flag aSign;
3604 int aExp;
3605 uint64_t aSig;
3606 uint32_t zSig;
3607
3608 a = float64_squash_input_denormal(a, status);
3609
3610 aSig = extractFloat64Frac(a);
3611 aExp = extractFloat64Exp(a);
3612 aSign = extractFloat64Sign(a);
3613 if (aExp == 0x7FF) {
3614 if (aSig) {
3615
3616 if (!ieee) {
3617 float_raise(float_flag_invalid, status);
3618 return packFloat16(aSign, 0, 0);
3619 }
3620 return commonNaNToFloat16(
3621 float64ToCommonNaN(a, status), status);
3622 }
3623
3624 if (!ieee) {
3625 float_raise(float_flag_invalid, status);
3626 return packFloat16(aSign, 0x1f, 0x3ff);
3627 }
3628 return packFloat16(aSign, 0x1f, 0);
3629 }
3630 shift64RightJamming(aSig, 29, &aSig);
3631 zSig = aSig;
3632 if (aExp == 0 && zSig == 0) {
3633 return packFloat16(aSign, 0, 0);
3634 }
3635
3636
3637
3638
3639
3640
3641
3642 zSig |= 0x00800000;
3643 aExp -= 0x3F1;
3644
3645 return roundAndPackFloat16(aSign, aExp, zSig, ieee, status);
3646}
3647
3648
3649
3650
3651
3652
3653
3654
3655floatx80 float64_to_floatx80(float64 a, float_status *status)
3656{
3657 flag aSign;
3658 int aExp;
3659 uint64_t aSig;
3660
3661 a = float64_squash_input_denormal(a, status);
3662 aSig = extractFloat64Frac( a );
3663 aExp = extractFloat64Exp( a );
3664 aSign = extractFloat64Sign( a );
3665 if ( aExp == 0x7FF ) {
3666 if (aSig) {
3667 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
3668 }
3669 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3670 }
3671 if ( aExp == 0 ) {
3672 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3673 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3674 }
3675 return
3676 packFloatx80(
3677 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3678
3679}
3680
3681
3682
3683
3684
3685
3686
3687
3688float128 float64_to_float128(float64 a, float_status *status)
3689{
3690 flag aSign;
3691 int aExp;
3692 uint64_t aSig, zSig0, zSig1;
3693
3694 a = float64_squash_input_denormal(a, status);
3695 aSig = extractFloat64Frac( a );
3696 aExp = extractFloat64Exp( a );
3697 aSign = extractFloat64Sign( a );
3698 if ( aExp == 0x7FF ) {
3699 if (aSig) {
3700 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
3701 }
3702 return packFloat128( aSign, 0x7FFF, 0, 0 );
3703 }
3704 if ( aExp == 0 ) {
3705 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3706 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3707 --aExp;
3708 }
3709 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3710 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3711
3712}
3713
3714
3715
3716
3717
3718
3719
3720
3721float64 float64_round_to_int(float64 a, float_status *status)
3722{
3723 flag aSign;
3724 int aExp;
3725 uint64_t lastBitMask, roundBitsMask;
3726 uint64_t z;
3727 a = float64_squash_input_denormal(a, status);
3728
3729 aExp = extractFloat64Exp( a );
3730 if ( 0x433 <= aExp ) {
3731 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
3732 return propagateFloat64NaN(a, a, status);
3733 }
3734 return a;
3735 }
3736 if ( aExp < 0x3FF ) {
3737 if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
3738 status->float_exception_flags |= float_flag_inexact;
3739 aSign = extractFloat64Sign( a );
3740 switch (status->float_rounding_mode) {
3741 case float_round_nearest_even:
3742 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3743 return packFloat64( aSign, 0x3FF, 0 );
3744 }
3745 break;
3746 case float_round_ties_away:
3747 if (aExp == 0x3FE) {
3748 return packFloat64(aSign, 0x3ff, 0);
3749 }
3750 break;
3751 case float_round_down:
3752 return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
3753 case float_round_up:
3754 return make_float64(
3755 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
3756 }
3757 return packFloat64( aSign, 0, 0 );
3758 }
3759 lastBitMask = 1;
3760 lastBitMask <<= 0x433 - aExp;
3761 roundBitsMask = lastBitMask - 1;
3762 z = float64_val(a);
3763 switch (status->float_rounding_mode) {
3764 case float_round_nearest_even:
3765 z += lastBitMask >> 1;
3766 if ((z & roundBitsMask) == 0) {
3767 z &= ~lastBitMask;
3768 }
3769 break;
3770 case float_round_ties_away:
3771 z += lastBitMask >> 1;
3772 break;
3773 case float_round_to_zero:
3774 break;
3775 case float_round_up:
3776 if (!extractFloat64Sign(make_float64(z))) {
3777 z += roundBitsMask;
3778 }
3779 break;
3780 case float_round_down:
3781 if (extractFloat64Sign(make_float64(z))) {
3782 z += roundBitsMask;
3783 }
3784 break;
3785 default:
3786 abort();
3787 }
3788 z &= ~ roundBitsMask;
3789 if (z != float64_val(a)) {
3790 status->float_exception_flags |= float_flag_inexact;
3791 }
3792 return make_float64(z);
3793
3794}
3795
3796float64 float64_trunc_to_int(float64 a, float_status *status)
3797{
3798 int oldmode;
3799 float64 res;
3800 oldmode = status->float_rounding_mode;
3801 status->float_rounding_mode = float_round_to_zero;
3802 res = float64_round_to_int(a, status);
3803 status->float_rounding_mode = oldmode;
3804 return res;
3805}
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815static float64 addFloat64Sigs(float64 a, float64 b, flag zSign,
3816 float_status *status)
3817{
3818 int aExp, bExp, zExp;
3819 uint64_t aSig, bSig, zSig;
3820 int expDiff;
3821
3822 aSig = extractFloat64Frac( a );
3823 aExp = extractFloat64Exp( a );
3824 bSig = extractFloat64Frac( b );
3825 bExp = extractFloat64Exp( b );
3826 expDiff = aExp - bExp;
3827 aSig <<= 9;
3828 bSig <<= 9;
3829 if ( 0 < expDiff ) {
3830 if ( aExp == 0x7FF ) {
3831 if (aSig) {
3832 return propagateFloat64NaN(a, b, status);
3833 }
3834 return a;
3835 }
3836 if ( bExp == 0 ) {
3837 --expDiff;
3838 }
3839 else {
3840 bSig |= LIT64( 0x2000000000000000 );
3841 }
3842 shift64RightJamming( bSig, expDiff, &bSig );
3843 zExp = aExp;
3844 }
3845 else if ( expDiff < 0 ) {
3846 if ( bExp == 0x7FF ) {
3847 if (bSig) {
3848 return propagateFloat64NaN(a, b, status);
3849 }
3850 return packFloat64( zSign, 0x7FF, 0 );
3851 }
3852 if ( aExp == 0 ) {
3853 ++expDiff;
3854 }
3855 else {
3856 aSig |= LIT64( 0x2000000000000000 );
3857 }
3858 shift64RightJamming( aSig, - expDiff, &aSig );
3859 zExp = bExp;
3860 }
3861 else {
3862 if ( aExp == 0x7FF ) {
3863 if (aSig | bSig) {
3864 return propagateFloat64NaN(a, b, status);
3865 }
3866 return a;
3867 }
3868 if ( aExp == 0 ) {
3869 if (status->flush_to_zero) {
3870 if (aSig | bSig) {
3871 float_raise(float_flag_output_denormal, status);
3872 }
3873 return packFloat64(zSign, 0, 0);
3874 }
3875 return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3876 }
3877 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3878 zExp = aExp;
3879 goto roundAndPack;
3880 }
3881 aSig |= LIT64( 0x2000000000000000 );
3882 zSig = ( aSig + bSig )<<1;
3883 --zExp;
3884 if ( (int64_t) zSig < 0 ) {
3885 zSig = aSig + bSig;
3886 ++zExp;
3887 }
3888 roundAndPack:
3889 return roundAndPackFloat64(zSign, zExp, zSig, status);
3890
3891}
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901static float64 subFloat64Sigs(float64 a, float64 b, flag zSign,
3902 float_status *status)
3903{
3904 int aExp, bExp, zExp;
3905 uint64_t aSig, bSig, zSig;
3906 int expDiff;
3907
3908 aSig = extractFloat64Frac( a );
3909 aExp = extractFloat64Exp( a );
3910 bSig = extractFloat64Frac( b );
3911 bExp = extractFloat64Exp( b );
3912 expDiff = aExp - bExp;
3913 aSig <<= 10;
3914 bSig <<= 10;
3915 if ( 0 < expDiff ) goto aExpBigger;
3916 if ( expDiff < 0 ) goto bExpBigger;
3917 if ( aExp == 0x7FF ) {
3918 if (aSig | bSig) {
3919 return propagateFloat64NaN(a, b, status);
3920 }
3921 float_raise(float_flag_invalid, status);
3922 return float64_default_nan(status);
3923 }
3924 if ( aExp == 0 ) {
3925 aExp = 1;
3926 bExp = 1;
3927 }
3928 if ( bSig < aSig ) goto aBigger;
3929 if ( aSig < bSig ) goto bBigger;
3930 return packFloat64(status->float_rounding_mode == float_round_down, 0, 0);
3931 bExpBigger:
3932 if ( bExp == 0x7FF ) {
3933 if (bSig) {
3934 return propagateFloat64NaN(a, b, status);
3935 }
3936 return packFloat64( zSign ^ 1, 0x7FF, 0 );
3937 }
3938 if ( aExp == 0 ) {
3939 ++expDiff;
3940 }
3941 else {
3942 aSig |= LIT64( 0x4000000000000000 );
3943 }
3944 shift64RightJamming( aSig, - expDiff, &aSig );
3945 bSig |= LIT64( 0x4000000000000000 );
3946 bBigger:
3947 zSig = bSig - aSig;
3948 zExp = bExp;
3949 zSign ^= 1;
3950 goto normalizeRoundAndPack;
3951 aExpBigger:
3952 if ( aExp == 0x7FF ) {
3953 if (aSig) {
3954 return propagateFloat64NaN(a, b, status);
3955 }
3956 return a;
3957 }
3958 if ( bExp == 0 ) {
3959 --expDiff;
3960 }
3961 else {
3962 bSig |= LIT64( 0x4000000000000000 );
3963 }
3964 shift64RightJamming( bSig, expDiff, &bSig );
3965 aSig |= LIT64( 0x4000000000000000 );
3966 aBigger:
3967 zSig = aSig - bSig;
3968 zExp = aExp;
3969 normalizeRoundAndPack:
3970 --zExp;
3971 return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status);
3972
3973}
3974
3975
3976
3977
3978
3979
3980
3981float64 float64_add(float64 a, float64 b, float_status *status)
3982{
3983 flag aSign, bSign;
3984 a = float64_squash_input_denormal(a, status);
3985 b = float64_squash_input_denormal(b, status);
3986
3987 aSign = extractFloat64Sign( a );
3988 bSign = extractFloat64Sign( b );
3989 if ( aSign == bSign ) {
3990 return addFloat64Sigs(a, b, aSign, status);
3991 }
3992 else {
3993 return subFloat64Sigs(a, b, aSign, status);
3994 }
3995
3996}
3997
3998
3999
4000
4001
4002
4003
4004float64 float64_sub(float64 a, float64 b, float_status *status)
4005{
4006 flag aSign, bSign;
4007 a = float64_squash_input_denormal(a, status);
4008 b = float64_squash_input_denormal(b, status);
4009
4010 aSign = extractFloat64Sign( a );
4011 bSign = extractFloat64Sign( b );
4012 if ( aSign == bSign ) {
4013 return subFloat64Sigs(a, b, aSign, status);
4014 }
4015 else {
4016 return addFloat64Sigs(a, b, aSign, status);
4017 }
4018
4019}
4020
4021
4022
4023
4024
4025
4026
4027float64 float64_mul(float64 a, float64 b, float_status *status)
4028{
4029 flag aSign, bSign, zSign;
4030 int aExp, bExp, zExp;
4031 uint64_t aSig, bSig, zSig0, zSig1;
4032
4033 a = float64_squash_input_denormal(a, status);
4034 b = float64_squash_input_denormal(b, status);
4035
4036 aSig = extractFloat64Frac( a );
4037 aExp = extractFloat64Exp( a );
4038 aSign = extractFloat64Sign( a );
4039 bSig = extractFloat64Frac( b );
4040 bExp = extractFloat64Exp( b );
4041 bSign = extractFloat64Sign( b );
4042 zSign = aSign ^ bSign;
4043 if ( aExp == 0x7FF ) {
4044 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
4045 return propagateFloat64NaN(a, b, status);
4046 }
4047 if ( ( bExp | bSig ) == 0 ) {
4048 float_raise(float_flag_invalid, status);
4049 return float64_default_nan(status);
4050 }
4051 return packFloat64( zSign, 0x7FF, 0 );
4052 }
4053 if ( bExp == 0x7FF ) {
4054 if (bSig) {
4055 return propagateFloat64NaN(a, b, status);
4056 }
4057 if ( ( aExp | aSig ) == 0 ) {
4058 float_raise(float_flag_invalid, status);
4059 return float64_default_nan(status);
4060 }
4061 return packFloat64( zSign, 0x7FF, 0 );
4062 }
4063 if ( aExp == 0 ) {
4064 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
4065 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4066 }
4067 if ( bExp == 0 ) {
4068 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
4069 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4070 }
4071 zExp = aExp + bExp - 0x3FF;
4072 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
4073 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4074 mul64To128( aSig, bSig, &zSig0, &zSig1 );
4075 zSig0 |= ( zSig1 != 0 );
4076 if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
4077 zSig0 <<= 1;
4078 --zExp;
4079 }
4080 return roundAndPackFloat64(zSign, zExp, zSig0, status);
4081
4082}
4083
4084
4085
4086
4087
4088
4089
4090float64 float64_div(float64 a, float64 b, float_status *status)
4091{
4092 flag aSign, bSign, zSign;
4093 int aExp, bExp, zExp;
4094 uint64_t aSig, bSig, zSig;
4095 uint64_t rem0, rem1;
4096 uint64_t term0, term1;
4097 a = float64_squash_input_denormal(a, status);
4098 b = float64_squash_input_denormal(b, status);
4099
4100 aSig = extractFloat64Frac( a );
4101 aExp = extractFloat64Exp( a );
4102 aSign = extractFloat64Sign( a );
4103 bSig = extractFloat64Frac( b );
4104 bExp = extractFloat64Exp( b );
4105 bSign = extractFloat64Sign( b );
4106 zSign = aSign ^ bSign;
4107 if ( aExp == 0x7FF ) {
4108 if (aSig) {
4109 return propagateFloat64NaN(a, b, status);
4110 }
4111 if ( bExp == 0x7FF ) {
4112 if (bSig) {
4113 return propagateFloat64NaN(a, b, status);
4114 }
4115 float_raise(float_flag_invalid, status);
4116 return float64_default_nan(status);
4117 }
4118 return packFloat64( zSign, 0x7FF, 0 );
4119 }
4120 if ( bExp == 0x7FF ) {
4121 if (bSig) {
4122 return propagateFloat64NaN(a, b, status);
4123 }
4124 return packFloat64( zSign, 0, 0 );
4125 }
4126 if ( bExp == 0 ) {
4127 if ( bSig == 0 ) {
4128 if ( ( aExp | aSig ) == 0 ) {
4129 float_raise(float_flag_invalid, status);
4130 return float64_default_nan(status);
4131 }
4132 float_raise(float_flag_divbyzero, status);
4133 return packFloat64( zSign, 0x7FF, 0 );
4134 }
4135 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4136 }
4137 if ( aExp == 0 ) {
4138 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
4139 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4140 }
4141 zExp = aExp - bExp + 0x3FD;
4142 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
4143 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4144 if ( bSig <= ( aSig + aSig ) ) {
4145 aSig >>= 1;
4146 ++zExp;
4147 }
4148 zSig = estimateDiv128To64( aSig, 0, bSig );
4149 if ( ( zSig & 0x1FF ) <= 2 ) {
4150 mul64To128( bSig, zSig, &term0, &term1 );
4151 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
4152 while ( (int64_t) rem0 < 0 ) {
4153 --zSig;
4154 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4155 }
4156 zSig |= ( rem1 != 0 );
4157 }
4158 return roundAndPackFloat64(zSign, zExp, zSig, status);
4159
4160}
4161
4162
4163
4164
4165
4166
4167
4168float64 float64_rem(float64 a, float64 b, float_status *status)
4169{
4170 flag aSign, zSign;
4171 int aExp, bExp, expDiff;
4172 uint64_t aSig, bSig;
4173 uint64_t q, alternateASig;
4174 int64_t sigMean;
4175
4176 a = float64_squash_input_denormal(a, status);
4177 b = float64_squash_input_denormal(b, status);
4178 aSig = extractFloat64Frac( a );
4179 aExp = extractFloat64Exp( a );
4180 aSign = extractFloat64Sign( a );
4181 bSig = extractFloat64Frac( b );
4182 bExp = extractFloat64Exp( b );
4183 if ( aExp == 0x7FF ) {
4184 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
4185 return propagateFloat64NaN(a, b, status);
4186 }
4187 float_raise(float_flag_invalid, status);
4188 return float64_default_nan(status);
4189 }
4190 if ( bExp == 0x7FF ) {
4191 if (bSig) {
4192 return propagateFloat64NaN(a, b, status);
4193 }
4194 return a;
4195 }
4196 if ( bExp == 0 ) {
4197 if ( bSig == 0 ) {
4198 float_raise(float_flag_invalid, status);
4199 return float64_default_nan(status);
4200 }
4201 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
4202 }
4203 if ( aExp == 0 ) {
4204 if ( aSig == 0 ) return a;
4205 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4206 }
4207 expDiff = aExp - bExp;
4208 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
4209 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
4210 if ( expDiff < 0 ) {
4211 if ( expDiff < -1 ) return a;
4212 aSig >>= 1;
4213 }
4214 q = ( bSig <= aSig );
4215 if ( q ) aSig -= bSig;
4216 expDiff -= 64;
4217 while ( 0 < expDiff ) {
4218 q = estimateDiv128To64( aSig, 0, bSig );
4219 q = ( 2 < q ) ? q - 2 : 0;
4220 aSig = - ( ( bSig>>2 ) * q );
4221 expDiff -= 62;
4222 }
4223 expDiff += 64;
4224 if ( 0 < expDiff ) {
4225 q = estimateDiv128To64( aSig, 0, bSig );
4226 q = ( 2 < q ) ? q - 2 : 0;
4227 q >>= 64 - expDiff;
4228 bSig >>= 2;
4229 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4230 }
4231 else {
4232 aSig >>= 2;
4233 bSig >>= 2;
4234 }
4235 do {
4236 alternateASig = aSig;
4237 ++q;
4238 aSig -= bSig;
4239 } while ( 0 <= (int64_t) aSig );
4240 sigMean = aSig + alternateASig;
4241 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4242 aSig = alternateASig;
4243 }
4244 zSign = ( (int64_t) aSig < 0 );
4245 if ( zSign ) aSig = - aSig;
4246 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
4247
4248}
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261float64 float64_muladd(float64 a, float64 b, float64 c, int flags,
4262 float_status *status)
4263{
4264 flag aSign, bSign, cSign, zSign;
4265 int aExp, bExp, cExp, pExp, zExp, expDiff;
4266 uint64_t aSig, bSig, cSig;
4267 flag pInf, pZero, pSign;
4268 uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
4269 int shiftcount;
4270 flag signflip, infzero;
4271
4272 a = float64_squash_input_denormal(a, status);
4273 b = float64_squash_input_denormal(b, status);
4274 c = float64_squash_input_denormal(c, status);
4275 aSig = extractFloat64Frac(a);
4276 aExp = extractFloat64Exp(a);
4277 aSign = extractFloat64Sign(a);
4278 bSig = extractFloat64Frac(b);
4279 bExp = extractFloat64Exp(b);
4280 bSign = extractFloat64Sign(b);
4281 cSig = extractFloat64Frac(c);
4282 cExp = extractFloat64Exp(c);
4283 cSign = extractFloat64Sign(c);
4284
4285 infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
4286 (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
4287
4288
4289
4290
4291
4292
4293 if (((aExp == 0x7ff) && aSig) ||
4294 ((bExp == 0x7ff) && bSig) ||
4295 ((cExp == 0x7ff) && cSig)) {
4296 return propagateFloat64MulAddNaN(a, b, c, infzero, status);
4297 }
4298
4299 if (infzero) {
4300 float_raise(float_flag_invalid, status);
4301 return float64_default_nan(status);
4302 }
4303
4304 if (flags & float_muladd_negate_c) {
4305 cSign ^= 1;
4306 }
4307
4308 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
4309
4310
4311 pSign = aSign ^ bSign;
4312 if (flags & float_muladd_negate_product) {
4313 pSign ^= 1;
4314 }
4315 pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
4316 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
4317
4318 if (cExp == 0x7ff) {
4319 if (pInf && (pSign ^ cSign)) {
4320
4321 float_raise(float_flag_invalid, status);
4322 return float64_default_nan(status);
4323 }
4324
4325 return packFloat64(cSign ^ signflip, 0x7ff, 0);
4326 }
4327
4328 if (pInf) {
4329 return packFloat64(pSign ^ signflip, 0x7ff, 0);
4330 }
4331
4332 if (pZero) {
4333 if (cExp == 0) {
4334 if (cSig == 0) {
4335
4336 if (pSign == cSign) {
4337 zSign = pSign;
4338 } else if (status->float_rounding_mode == float_round_down) {
4339 zSign = 1;
4340 } else {
4341 zSign = 0;
4342 }
4343 return packFloat64(zSign ^ signflip, 0, 0);
4344 }
4345
4346 if (status->flush_to_zero) {
4347 float_raise(float_flag_output_denormal, status);
4348 return packFloat64(cSign ^ signflip, 0, 0);
4349 }
4350 }
4351
4352 if (flags & float_muladd_halve_result) {
4353 if (cExp == 0) {
4354 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
4355 }
4356
4357
4358
4359 cExp -= 2;
4360 cSig = (cSig | 0x0010000000000000ULL) << 10;
4361 return roundAndPackFloat64(cSign ^ signflip, cExp, cSig, status);
4362 }
4363 return packFloat64(cSign ^ signflip, cExp, cSig);
4364 }
4365
4366 if (aExp == 0) {
4367 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
4368 }
4369 if (bExp == 0) {
4370 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
4371 }
4372
4373
4374
4375
4376
4377
4378
4379
4380 pExp = aExp + bExp - 0x3fe;
4381 aSig = (aSig | LIT64(0x0010000000000000))<<10;
4382 bSig = (bSig | LIT64(0x0010000000000000))<<11;
4383 mul64To128(aSig, bSig, &pSig0, &pSig1);
4384 if ((int64_t)(pSig0 << 1) >= 0) {
4385 shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
4386 pExp--;
4387 }
4388
4389 zSign = pSign ^ signflip;
4390
4391
4392
4393
4394 if (cExp == 0) {
4395 if (!cSig) {
4396
4397 shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
4398 if (flags & float_muladd_halve_result) {
4399 pExp--;
4400 }
4401 return roundAndPackFloat64(zSign, pExp - 1,
4402 pSig1, status);
4403 }
4404 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
4405 }
4406
4407
4408
4409
4410 cSig0 = cSig << (126 - 64 - 52);
4411 cSig1 = 0;
4412 cSig0 |= LIT64(0x4000000000000000);
4413 expDiff = pExp - cExp;
4414
4415 if (pSign == cSign) {
4416
4417 if (expDiff > 0) {
4418
4419 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
4420 zExp = pExp;
4421 } else if (expDiff < 0) {
4422
4423 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
4424 zExp = cExp;
4425 } else {
4426
4427 zExp = cExp;
4428 }
4429
4430 add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4431 if ((int64_t)zSig0 < 0) {
4432 shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
4433 } else {
4434 zExp--;
4435 }
4436 shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
4437 if (flags & float_muladd_halve_result) {
4438 zExp--;
4439 }
4440 return roundAndPackFloat64(zSign, zExp, zSig1, status);
4441 } else {
4442
4443 if (expDiff > 0) {
4444 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
4445 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4446 zExp = pExp;
4447 } else if (expDiff < 0) {
4448 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
4449 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
4450 zExp = cExp;
4451 zSign ^= 1;
4452 } else {
4453 zExp = pExp;
4454 if (lt128(cSig0, cSig1, pSig0, pSig1)) {
4455 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4456 } else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
4457 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
4458 zSign ^= 1;
4459 } else {
4460
4461 zSign = signflip;
4462 if (status->float_rounding_mode == float_round_down) {
4463 zSign ^= 1;
4464 }
4465 return packFloat64(zSign, 0, 0);
4466 }
4467 }
4468 --zExp;
4469
4470
4471
4472 if (zSig0) {
4473 shiftcount = countLeadingZeros64(zSig0) - 1;
4474 shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
4475 if (zSig1) {
4476 zSig0 |= 1;
4477 }
4478 zExp -= shiftcount;
4479 } else {
4480 shiftcount = countLeadingZeros64(zSig1);
4481 if (shiftcount == 0) {
4482 zSig0 = (zSig1 >> 1) | (zSig1 & 1);
4483 zExp -= 63;
4484 } else {
4485 shiftcount--;
4486 zSig0 = zSig1 << shiftcount;
4487 zExp -= (shiftcount + 64);
4488 }
4489 }
4490 if (flags & float_muladd_halve_result) {
4491 zExp--;
4492 }
4493 return roundAndPackFloat64(zSign, zExp, zSig0, status);
4494 }
4495}
4496
4497
4498
4499
4500
4501
4502
4503float64 float64_sqrt(float64 a, float_status *status)
4504{
4505 flag aSign;
4506 int aExp, zExp;
4507 uint64_t aSig, zSig, doubleZSig;
4508 uint64_t rem0, rem1, term0, term1;
4509 a = float64_squash_input_denormal(a, status);
4510
4511 aSig = extractFloat64Frac( a );
4512 aExp = extractFloat64Exp( a );
4513 aSign = extractFloat64Sign( a );
4514 if ( aExp == 0x7FF ) {
4515 if (aSig) {
4516 return propagateFloat64NaN(a, a, status);
4517 }
4518 if ( ! aSign ) return a;
4519 float_raise(float_flag_invalid, status);
4520 return float64_default_nan(status);
4521 }
4522 if ( aSign ) {
4523 if ( ( aExp | aSig ) == 0 ) return a;
4524 float_raise(float_flag_invalid, status);
4525 return float64_default_nan(status);
4526 }
4527 if ( aExp == 0 ) {
4528 if ( aSig == 0 ) return float64_zero;
4529 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4530 }
4531 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
4532 aSig |= LIT64( 0x0010000000000000 );
4533 zSig = estimateSqrt32( aExp, aSig>>21 );
4534 aSig <<= 9 - ( aExp & 1 );
4535 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
4536 if ( ( zSig & 0x1FF ) <= 5 ) {
4537 doubleZSig = zSig<<1;
4538 mul64To128( zSig, zSig, &term0, &term1 );
4539 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
4540 while ( (int64_t) rem0 < 0 ) {
4541 --zSig;
4542 doubleZSig -= 2;
4543 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
4544 }
4545 zSig |= ( ( rem0 | rem1 ) != 0 );
4546 }
4547 return roundAndPackFloat64(0, zExp, zSig, status);
4548
4549}
4550
4551
4552
4553
4554
4555
4556float64 float64_log2(float64 a, float_status *status)
4557{
4558 flag aSign, zSign;
4559 int aExp;
4560 uint64_t aSig, aSig0, aSig1, zSig, i;
4561 a = float64_squash_input_denormal(a, status);
4562
4563 aSig = extractFloat64Frac( a );
4564 aExp = extractFloat64Exp( a );
4565 aSign = extractFloat64Sign( a );
4566
4567 if ( aExp == 0 ) {
4568 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4569 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4570 }
4571 if ( aSign ) {
4572 float_raise(float_flag_invalid, status);
4573 return float64_default_nan(status);
4574 }
4575 if ( aExp == 0x7FF ) {
4576 if (aSig) {
4577 return propagateFloat64NaN(a, float64_zero, status);
4578 }
4579 return a;
4580 }
4581
4582 aExp -= 0x3FF;
4583 aSig |= LIT64( 0x0010000000000000 );
4584 zSign = aExp < 0;
4585 zSig = (uint64_t)aExp << 52;
4586 for (i = 1LL << 51; i > 0; i >>= 1) {
4587 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4588 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4589 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4590 aSig >>= 1;
4591 zSig |= i;
4592 }
4593 }
4594
4595 if ( zSign )
4596 zSig = -zSig;
4597 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
4598}
4599
4600
4601
4602
4603
4604
4605
4606
4607int float64_eq(float64 a, float64 b, float_status *status)
4608{
4609 uint64_t av, bv;
4610 a = float64_squash_input_denormal(a, status);
4611 b = float64_squash_input_denormal(b, status);
4612
4613 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4614 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4615 ) {
4616 float_raise(float_flag_invalid, status);
4617 return 0;
4618 }
4619 av = float64_val(a);
4620 bv = float64_val(b);
4621 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4622
4623}
4624
4625
4626
4627
4628
4629
4630
4631
4632int float64_le(float64 a, float64 b, float_status *status)
4633{
4634 flag aSign, bSign;
4635 uint64_t av, bv;
4636 a = float64_squash_input_denormal(a, status);
4637 b = float64_squash_input_denormal(b, status);
4638
4639 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4640 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4641 ) {
4642 float_raise(float_flag_invalid, status);
4643 return 0;
4644 }
4645 aSign = extractFloat64Sign( a );
4646 bSign = extractFloat64Sign( b );
4647 av = float64_val(a);
4648 bv = float64_val(b);
4649 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4650 return ( av == bv ) || ( aSign ^ ( av < bv ) );
4651
4652}
4653
4654
4655
4656
4657
4658
4659
4660
4661int float64_lt(float64 a, float64 b, float_status *status)
4662{
4663 flag aSign, bSign;
4664 uint64_t av, bv;
4665
4666 a = float64_squash_input_denormal(a, status);
4667 b = float64_squash_input_denormal(b, status);
4668 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4669 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4670 ) {
4671 float_raise(float_flag_invalid, status);
4672 return 0;
4673 }
4674 aSign = extractFloat64Sign( a );
4675 bSign = extractFloat64Sign( b );
4676 av = float64_val(a);
4677 bv = float64_val(b);
4678 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4679 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4680
4681}
4682
4683
4684
4685
4686
4687
4688
4689
4690int float64_unordered(float64 a, float64 b, float_status *status)
4691{
4692 a = float64_squash_input_denormal(a, status);
4693 b = float64_squash_input_denormal(b, status);
4694
4695 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4696 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4697 ) {
4698 float_raise(float_flag_invalid, status);
4699 return 1;
4700 }
4701 return 0;
4702}
4703
4704
4705
4706
4707
4708
4709
4710
4711int float64_eq_quiet(float64 a, float64 b, float_status *status)
4712{
4713 uint64_t av, bv;
4714 a = float64_squash_input_denormal(a, status);
4715 b = float64_squash_input_denormal(b, status);
4716
4717 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4718 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4719 ) {
4720 if (float64_is_signaling_nan(a, status)
4721 || float64_is_signaling_nan(b, status)) {
4722 float_raise(float_flag_invalid, status);
4723 }
4724 return 0;
4725 }
4726 av = float64_val(a);
4727 bv = float64_val(b);
4728 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4729
4730}
4731
4732
4733
4734
4735
4736
4737
4738
4739int float64_le_quiet(float64 a, float64 b, float_status *status)
4740{
4741 flag aSign, bSign;
4742 uint64_t av, bv;
4743 a = float64_squash_input_denormal(a, status);
4744 b = float64_squash_input_denormal(b, status);
4745
4746 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4747 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4748 ) {
4749 if (float64_is_signaling_nan(a, status)
4750 || float64_is_signaling_nan(b, status)) {
4751 float_raise(float_flag_invalid, status);
4752 }
4753 return 0;
4754 }
4755 aSign = extractFloat64Sign( a );
4756 bSign = extractFloat64Sign( b );
4757 av = float64_val(a);
4758 bv = float64_val(b);
4759 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
4760 return ( av == bv ) || ( aSign ^ ( av < bv ) );
4761
4762}
4763
4764
4765
4766
4767
4768
4769
4770
4771int float64_lt_quiet(float64 a, float64 b, float_status *status)
4772{
4773 flag aSign, bSign;
4774 uint64_t av, bv;
4775 a = float64_squash_input_denormal(a, status);
4776 b = float64_squash_input_denormal(b, status);
4777
4778 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4779 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4780 ) {
4781 if (float64_is_signaling_nan(a, status)
4782 || float64_is_signaling_nan(b, status)) {
4783 float_raise(float_flag_invalid, status);
4784 }
4785 return 0;
4786 }
4787 aSign = extractFloat64Sign( a );
4788 bSign = extractFloat64Sign( b );
4789 av = float64_val(a);
4790 bv = float64_val(b);
4791 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
4792 return ( av != bv ) && ( aSign ^ ( av < bv ) );
4793
4794}
4795
4796
4797
4798
4799
4800
4801
4802
4803int float64_unordered_quiet(float64 a, float64 b, float_status *status)
4804{
4805 a = float64_squash_input_denormal(a, status);
4806 b = float64_squash_input_denormal(b, status);
4807
4808 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4809 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4810 ) {
4811 if (float64_is_signaling_nan(a, status)
4812 || float64_is_signaling_nan(b, status)) {
4813 float_raise(float_flag_invalid, status);
4814 }
4815 return 1;
4816 }
4817 return 0;
4818}
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830int32_t floatx80_to_int32(floatx80 a, float_status *status)
4831{
4832 flag aSign;
4833 int32_t aExp, shiftCount;
4834 uint64_t aSig;
4835
4836 if (floatx80_invalid_encoding(a)) {
4837 float_raise(float_flag_invalid, status);
4838 return 1 << 31;
4839 }
4840 aSig = extractFloatx80Frac( a );
4841 aExp = extractFloatx80Exp( a );
4842 aSign = extractFloatx80Sign( a );
4843 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4844 shiftCount = 0x4037 - aExp;
4845 if ( shiftCount <= 0 ) shiftCount = 1;
4846 shift64RightJamming( aSig, shiftCount, &aSig );
4847 return roundAndPackInt32(aSign, aSig, status);
4848
4849}
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
4862{
4863 flag aSign;
4864 int32_t aExp, shiftCount;
4865 uint64_t aSig, savedASig;
4866 int32_t z;
4867
4868 if (floatx80_invalid_encoding(a)) {
4869 float_raise(float_flag_invalid, status);
4870 return 1 << 31;
4871 }
4872 aSig = extractFloatx80Frac( a );
4873 aExp = extractFloatx80Exp( a );
4874 aSign = extractFloatx80Sign( a );
4875 if ( 0x401E < aExp ) {
4876 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
4877 goto invalid;
4878 }
4879 else if ( aExp < 0x3FFF ) {
4880 if (aExp || aSig) {
4881 status->float_exception_flags |= float_flag_inexact;
4882 }
4883 return 0;
4884 }
4885 shiftCount = 0x403E - aExp;
4886 savedASig = aSig;
4887 aSig >>= shiftCount;
4888 z = aSig;
4889 if ( aSign ) z = - z;
4890 if ( ( z < 0 ) ^ aSign ) {
4891 invalid:
4892 float_raise(float_flag_invalid, status);
4893 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
4894 }
4895 if ( ( aSig<<shiftCount ) != savedASig ) {
4896 status->float_exception_flags |= float_flag_inexact;
4897 }
4898 return z;
4899
4900}
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912int64_t floatx80_to_int64(floatx80 a, float_status *status)
4913{
4914 flag aSign;
4915 int32_t aExp, shiftCount;
4916 uint64_t aSig, aSigExtra;
4917
4918 if (floatx80_invalid_encoding(a)) {
4919 float_raise(float_flag_invalid, status);
4920 return 1ULL << 63;
4921 }
4922 aSig = extractFloatx80Frac( a );
4923 aExp = extractFloatx80Exp( a );
4924 aSign = extractFloatx80Sign( a );
4925 shiftCount = 0x403E - aExp;
4926 if ( shiftCount <= 0 ) {
4927 if ( shiftCount ) {
4928 float_raise(float_flag_invalid, status);
4929 if ( ! aSign
4930 || ( ( aExp == 0x7FFF )
4931 && ( aSig != LIT64( 0x8000000000000000 ) ) )
4932 ) {
4933 return LIT64( 0x7FFFFFFFFFFFFFFF );
4934 }
4935 return (int64_t) LIT64( 0x8000000000000000 );
4936 }
4937 aSigExtra = 0;
4938 }
4939 else {
4940 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4941 }
4942 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
4943
4944}
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
4957{
4958 flag aSign;
4959 int32_t aExp, shiftCount;
4960 uint64_t aSig;
4961 int64_t z;
4962
4963 if (floatx80_invalid_encoding(a)) {
4964 float_raise(float_flag_invalid, status);
4965 return 1ULL << 63;
4966 }
4967 aSig = extractFloatx80Frac( a );
4968 aExp = extractFloatx80Exp( a );
4969 aSign = extractFloatx80Sign( a );
4970 shiftCount = aExp - 0x403E;
4971 if ( 0 <= shiftCount ) {
4972 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4973 if ( ( a.high != 0xC03E ) || aSig ) {
4974 float_raise(float_flag_invalid, status);
4975 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4976 return LIT64( 0x7FFFFFFFFFFFFFFF );
4977 }
4978 }
4979 return (int64_t) LIT64( 0x8000000000000000 );
4980 }
4981 else if ( aExp < 0x3FFF ) {
4982 if (aExp | aSig) {
4983 status->float_exception_flags |= float_flag_inexact;
4984 }
4985 return 0;
4986 }
4987 z = aSig>>( - shiftCount );
4988 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
4989 status->float_exception_flags |= float_flag_inexact;
4990 }
4991 if ( aSign ) z = - z;
4992 return z;
4993
4994}
4995
4996
4997
4998
4999
5000
5001
5002
5003float32 floatx80_to_float32(floatx80 a, float_status *status)
5004{
5005 flag aSign;
5006 int32_t aExp;
5007 uint64_t aSig;
5008
5009 if (floatx80_invalid_encoding(a)) {
5010 float_raise(float_flag_invalid, status);
5011 return float32_default_nan(status);
5012 }
5013 aSig = extractFloatx80Frac( a );
5014 aExp = extractFloatx80Exp( a );
5015 aSign = extractFloatx80Sign( a );
5016 if ( aExp == 0x7FFF ) {
5017 if ( (uint64_t) ( aSig<<1 ) ) {
5018 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
5019 }
5020 return packFloat32( aSign, 0xFF, 0 );
5021 }
5022 shift64RightJamming( aSig, 33, &aSig );
5023 if ( aExp || aSig ) aExp -= 0x3F81;
5024 return roundAndPackFloat32(aSign, aExp, aSig, status);
5025
5026}
5027
5028
5029
5030
5031
5032
5033
5034
5035float64 floatx80_to_float64(floatx80 a, float_status *status)
5036{
5037 flag aSign;
5038 int32_t aExp;
5039 uint64_t aSig, zSig;
5040
5041 if (floatx80_invalid_encoding(a)) {
5042 float_raise(float_flag_invalid, status);
5043 return float64_default_nan(status);
5044 }
5045 aSig = extractFloatx80Frac( a );
5046 aExp = extractFloatx80Exp( a );
5047 aSign = extractFloatx80Sign( a );
5048 if ( aExp == 0x7FFF ) {
5049 if ( (uint64_t) ( aSig<<1 ) ) {
5050 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
5051 }
5052 return packFloat64( aSign, 0x7FF, 0 );
5053 }
5054 shift64RightJamming( aSig, 1, &zSig );
5055 if ( aExp || aSig ) aExp -= 0x3C01;
5056 return roundAndPackFloat64(aSign, aExp, zSig, status);
5057
5058}
5059
5060
5061
5062
5063
5064
5065
5066
5067float128 floatx80_to_float128(floatx80 a, float_status *status)
5068{
5069 flag aSign;
5070 int aExp;
5071 uint64_t aSig, zSig0, zSig1;
5072
5073 if (floatx80_invalid_encoding(a)) {
5074 float_raise(float_flag_invalid, status);
5075 return float128_default_nan(status);
5076 }
5077 aSig = extractFloatx80Frac( a );
5078 aExp = extractFloatx80Exp( a );
5079 aSign = extractFloatx80Sign( a );
5080 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5081 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
5082 }
5083 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5084 return packFloat128( aSign, aExp, zSig0, zSig1 );
5085
5086}
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096floatx80 floatx80_round(floatx80 a, float_status *status)
5097{
5098 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5099 extractFloatx80Sign(a),
5100 extractFloatx80Exp(a),
5101 extractFloatx80Frac(a), 0, status);
5102}
5103
5104
5105
5106
5107
5108
5109
5110
5111floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5112{
5113 flag aSign;
5114 int32_t aExp;
5115 uint64_t lastBitMask, roundBitsMask;
5116 floatx80 z;
5117
5118 if (floatx80_invalid_encoding(a)) {
5119 float_raise(float_flag_invalid, status);
5120 return floatx80_default_nan(status);
5121 }
5122 aExp = extractFloatx80Exp( a );
5123 if ( 0x403E <= aExp ) {
5124 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5125 return propagateFloatx80NaN(a, a, status);
5126 }
5127 return a;
5128 }
5129 if ( aExp < 0x3FFF ) {
5130 if ( ( aExp == 0 )
5131 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
5132 return a;
5133 }
5134 status->float_exception_flags |= float_flag_inexact;
5135 aSign = extractFloatx80Sign( a );
5136 switch (status->float_rounding_mode) {
5137 case float_round_nearest_even:
5138 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5139 ) {
5140 return
5141 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
5142 }
5143 break;
5144 case float_round_ties_away:
5145 if (aExp == 0x3FFE) {
5146 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
5147 }
5148 break;
5149 case float_round_down:
5150 return
5151 aSign ?
5152 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
5153 : packFloatx80( 0, 0, 0 );
5154 case float_round_up:
5155 return
5156 aSign ? packFloatx80( 1, 0, 0 )
5157 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
5158 }
5159 return packFloatx80( aSign, 0, 0 );
5160 }
5161 lastBitMask = 1;
5162 lastBitMask <<= 0x403E - aExp;
5163 roundBitsMask = lastBitMask - 1;
5164 z = a;
5165 switch (status->float_rounding_mode) {
5166 case float_round_nearest_even:
5167 z.low += lastBitMask>>1;
5168 if ((z.low & roundBitsMask) == 0) {
5169 z.low &= ~lastBitMask;
5170 }
5171 break;
5172 case float_round_ties_away:
5173 z.low += lastBitMask >> 1;
5174 break;
5175 case float_round_to_zero:
5176 break;
5177 case float_round_up:
5178 if (!extractFloatx80Sign(z)) {
5179 z.low += roundBitsMask;
5180 }
5181 break;
5182 case float_round_down:
5183 if (extractFloatx80Sign(z)) {
5184 z.low += roundBitsMask;
5185 }
5186 break;
5187 default:
5188 abort();
5189 }
5190 z.low &= ~ roundBitsMask;
5191 if ( z.low == 0 ) {
5192 ++z.high;
5193 z.low = LIT64( 0x8000000000000000 );
5194 }
5195 if (z.low != a.low) {
5196 status->float_exception_flags |= float_flag_inexact;
5197 }
5198 return z;
5199
5200}
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5211 float_status *status)
5212{
5213 int32_t aExp, bExp, zExp;
5214 uint64_t aSig, bSig, zSig0, zSig1;
5215 int32_t expDiff;
5216
5217 aSig = extractFloatx80Frac( a );
5218 aExp = extractFloatx80Exp( a );
5219 bSig = extractFloatx80Frac( b );
5220 bExp = extractFloatx80Exp( b );
5221 expDiff = aExp - bExp;
5222 if ( 0 < expDiff ) {
5223 if ( aExp == 0x7FFF ) {
5224 if ((uint64_t)(aSig << 1)) {
5225 return propagateFloatx80NaN(a, b, status);
5226 }
5227 return a;
5228 }
5229 if ( bExp == 0 ) --expDiff;
5230 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5231 zExp = aExp;
5232 }
5233 else if ( expDiff < 0 ) {
5234 if ( bExp == 0x7FFF ) {
5235 if ((uint64_t)(bSig << 1)) {
5236 return propagateFloatx80NaN(a, b, status);
5237 }
5238 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5239 }
5240 if ( aExp == 0 ) ++expDiff;
5241 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5242 zExp = bExp;
5243 }
5244 else {
5245 if ( aExp == 0x7FFF ) {
5246 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5247 return propagateFloatx80NaN(a, b, status);
5248 }
5249 return a;
5250 }
5251 zSig1 = 0;
5252 zSig0 = aSig + bSig;
5253 if ( aExp == 0 ) {
5254 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5255 goto roundAndPack;
5256 }
5257 zExp = aExp;
5258 goto shiftRight1;
5259 }
5260 zSig0 = aSig + bSig;
5261 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5262 shiftRight1:
5263 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5264 zSig0 |= LIT64( 0x8000000000000000 );
5265 ++zExp;
5266 roundAndPack:
5267 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5268 zSign, zExp, zSig0, zSig1, status);
5269}
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5280 float_status *status)
5281{
5282 int32_t aExp, bExp, zExp;
5283 uint64_t aSig, bSig, zSig0, zSig1;
5284 int32_t expDiff;
5285
5286 aSig = extractFloatx80Frac( a );
5287 aExp = extractFloatx80Exp( a );
5288 bSig = extractFloatx80Frac( b );
5289 bExp = extractFloatx80Exp( b );
5290 expDiff = aExp - bExp;
5291 if ( 0 < expDiff ) goto aExpBigger;
5292 if ( expDiff < 0 ) goto bExpBigger;
5293 if ( aExp == 0x7FFF ) {
5294 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5295 return propagateFloatx80NaN(a, b, status);
5296 }
5297 float_raise(float_flag_invalid, status);
5298 return floatx80_default_nan(status);
5299 }
5300 if ( aExp == 0 ) {
5301 aExp = 1;
5302 bExp = 1;
5303 }
5304 zSig1 = 0;
5305 if ( bSig < aSig ) goto aBigger;
5306 if ( aSig < bSig ) goto bBigger;
5307 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5308 bExpBigger:
5309 if ( bExp == 0x7FFF ) {
5310 if ((uint64_t)(bSig << 1)) {
5311 return propagateFloatx80NaN(a, b, status);
5312 }
5313 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
5314 }
5315 if ( aExp == 0 ) ++expDiff;
5316 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5317 bBigger:
5318 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5319 zExp = bExp;
5320 zSign ^= 1;
5321 goto normalizeRoundAndPack;
5322 aExpBigger:
5323 if ( aExp == 0x7FFF ) {
5324 if ((uint64_t)(aSig << 1)) {
5325 return propagateFloatx80NaN(a, b, status);
5326 }
5327 return a;
5328 }
5329 if ( bExp == 0 ) --expDiff;
5330 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5331 aBigger:
5332 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5333 zExp = aExp;
5334 normalizeRoundAndPack:
5335 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5336 zSign, zExp, zSig0, zSig1, status);
5337}
5338
5339
5340
5341
5342
5343
5344
5345floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5346{
5347 flag aSign, bSign;
5348
5349 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5350 float_raise(float_flag_invalid, status);
5351 return floatx80_default_nan(status);
5352 }
5353 aSign = extractFloatx80Sign( a );
5354 bSign = extractFloatx80Sign( b );
5355 if ( aSign == bSign ) {
5356 return addFloatx80Sigs(a, b, aSign, status);
5357 }
5358 else {
5359 return subFloatx80Sigs(a, b, aSign, status);
5360 }
5361
5362}
5363
5364
5365
5366
5367
5368
5369
5370floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5371{
5372 flag aSign, bSign;
5373
5374 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5375 float_raise(float_flag_invalid, status);
5376 return floatx80_default_nan(status);
5377 }
5378 aSign = extractFloatx80Sign( a );
5379 bSign = extractFloatx80Sign( b );
5380 if ( aSign == bSign ) {
5381 return subFloatx80Sigs(a, b, aSign, status);
5382 }
5383 else {
5384 return addFloatx80Sigs(a, b, aSign, status);
5385 }
5386
5387}
5388
5389
5390
5391
5392
5393
5394
5395floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5396{
5397 flag aSign, bSign, zSign;
5398 int32_t aExp, bExp, zExp;
5399 uint64_t aSig, bSig, zSig0, zSig1;
5400
5401 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5402 float_raise(float_flag_invalid, status);
5403 return floatx80_default_nan(status);
5404 }
5405 aSig = extractFloatx80Frac( a );
5406 aExp = extractFloatx80Exp( a );
5407 aSign = extractFloatx80Sign( a );
5408 bSig = extractFloatx80Frac( b );
5409 bExp = extractFloatx80Exp( b );
5410 bSign = extractFloatx80Sign( b );
5411 zSign = aSign ^ bSign;
5412 if ( aExp == 0x7FFF ) {
5413 if ( (uint64_t) ( aSig<<1 )
5414 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5415 return propagateFloatx80NaN(a, b, status);
5416 }
5417 if ( ( bExp | bSig ) == 0 ) goto invalid;
5418 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5419 }
5420 if ( bExp == 0x7FFF ) {
5421 if ((uint64_t)(bSig << 1)) {
5422 return propagateFloatx80NaN(a, b, status);
5423 }
5424 if ( ( aExp | aSig ) == 0 ) {
5425 invalid:
5426 float_raise(float_flag_invalid, status);
5427 return floatx80_default_nan(status);
5428 }
5429 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5430 }
5431 if ( aExp == 0 ) {
5432 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5433 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5434 }
5435 if ( bExp == 0 ) {
5436 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5437 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5438 }
5439 zExp = aExp + bExp - 0x3FFE;
5440 mul64To128( aSig, bSig, &zSig0, &zSig1 );
5441 if ( 0 < (int64_t) zSig0 ) {
5442 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5443 --zExp;
5444 }
5445 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5446 zSign, zExp, zSig0, zSig1, status);
5447}
5448
5449
5450
5451
5452
5453
5454
5455floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
5456{
5457 flag aSign, bSign, zSign;
5458 int32_t aExp, bExp, zExp;
5459 uint64_t aSig, bSig, zSig0, zSig1;
5460 uint64_t rem0, rem1, rem2, term0, term1, term2;
5461
5462 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5463 float_raise(float_flag_invalid, status);
5464 return floatx80_default_nan(status);
5465 }
5466 aSig = extractFloatx80Frac( a );
5467 aExp = extractFloatx80Exp( a );
5468 aSign = extractFloatx80Sign( a );
5469 bSig = extractFloatx80Frac( b );
5470 bExp = extractFloatx80Exp( b );
5471 bSign = extractFloatx80Sign( b );
5472 zSign = aSign ^ bSign;
5473 if ( aExp == 0x7FFF ) {
5474 if ((uint64_t)(aSig << 1)) {
5475 return propagateFloatx80NaN(a, b, status);
5476 }
5477 if ( bExp == 0x7FFF ) {
5478 if ((uint64_t)(bSig << 1)) {
5479 return propagateFloatx80NaN(a, b, status);
5480 }
5481 goto invalid;
5482 }
5483 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5484 }
5485 if ( bExp == 0x7FFF ) {
5486 if ((uint64_t)(bSig << 1)) {
5487 return propagateFloatx80NaN(a, b, status);
5488 }
5489 return packFloatx80( zSign, 0, 0 );
5490 }
5491 if ( bExp == 0 ) {
5492 if ( bSig == 0 ) {
5493 if ( ( aExp | aSig ) == 0 ) {
5494 invalid:
5495 float_raise(float_flag_invalid, status);
5496 return floatx80_default_nan(status);
5497 }
5498 float_raise(float_flag_divbyzero, status);
5499 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5500 }
5501 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5502 }
5503 if ( aExp == 0 ) {
5504 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5505 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5506 }
5507 zExp = aExp - bExp + 0x3FFE;
5508 rem1 = 0;
5509 if ( bSig <= aSig ) {
5510 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5511 ++zExp;
5512 }
5513 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5514 mul64To128( bSig, zSig0, &term0, &term1 );
5515 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
5516 while ( (int64_t) rem0 < 0 ) {
5517 --zSig0;
5518 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5519 }
5520 zSig1 = estimateDiv128To64( rem1, 0, bSig );
5521 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
5522 mul64To128( bSig, zSig1, &term1, &term2 );
5523 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5524 while ( (int64_t) rem1 < 0 ) {
5525 --zSig1;
5526 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5527 }
5528 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5529 }
5530 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5531 zSign, zExp, zSig0, zSig1, status);
5532}
5533
5534
5535
5536
5537
5538
5539
5540floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
5541{
5542 flag aSign, zSign;
5543 int32_t aExp, bExp, expDiff;
5544 uint64_t aSig0, aSig1, bSig;
5545 uint64_t q, term0, term1, alternateASig0, alternateASig1;
5546
5547 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5548 float_raise(float_flag_invalid, status);
5549 return floatx80_default_nan(status);
5550 }
5551 aSig0 = extractFloatx80Frac( a );
5552 aExp = extractFloatx80Exp( a );
5553 aSign = extractFloatx80Sign( a );
5554 bSig = extractFloatx80Frac( b );
5555 bExp = extractFloatx80Exp( b );
5556 if ( aExp == 0x7FFF ) {
5557 if ( (uint64_t) ( aSig0<<1 )
5558 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5559 return propagateFloatx80NaN(a, b, status);
5560 }
5561 goto invalid;
5562 }
5563 if ( bExp == 0x7FFF ) {
5564 if ((uint64_t)(bSig << 1)) {
5565 return propagateFloatx80NaN(a, b, status);
5566 }
5567 return a;
5568 }
5569 if ( bExp == 0 ) {
5570 if ( bSig == 0 ) {
5571 invalid:
5572 float_raise(float_flag_invalid, status);
5573 return floatx80_default_nan(status);
5574 }
5575 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5576 }
5577 if ( aExp == 0 ) {
5578 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
5579 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5580 }
5581 bSig |= LIT64( 0x8000000000000000 );
5582 zSign = aSign;
5583 expDiff = aExp - bExp;
5584 aSig1 = 0;
5585 if ( expDiff < 0 ) {
5586 if ( expDiff < -1 ) return a;
5587 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5588 expDiff = 0;
5589 }
5590 q = ( bSig <= aSig0 );
5591 if ( q ) aSig0 -= bSig;
5592 expDiff -= 64;
5593 while ( 0 < expDiff ) {
5594 q = estimateDiv128To64( aSig0, aSig1, bSig );
5595 q = ( 2 < q ) ? q - 2 : 0;
5596 mul64To128( bSig, q, &term0, &term1 );
5597 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5598 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5599 expDiff -= 62;
5600 }
5601 expDiff += 64;
5602 if ( 0 < expDiff ) {
5603 q = estimateDiv128To64( aSig0, aSig1, bSig );
5604 q = ( 2 < q ) ? q - 2 : 0;
5605 q >>= 64 - expDiff;
5606 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5607 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5608 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5609 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5610 ++q;
5611 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5612 }
5613 }
5614 else {
5615 term1 = 0;
5616 term0 = bSig;
5617 }
5618 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5619 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5620 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5621 && ( q & 1 ) )
5622 ) {
5623 aSig0 = alternateASig0;
5624 aSig1 = alternateASig1;
5625 zSign = ! zSign;
5626 }
5627 return
5628 normalizeRoundAndPackFloatx80(
5629 80, zSign, bExp + expDiff, aSig0, aSig1, status);
5630
5631}
5632
5633
5634
5635
5636
5637
5638
5639floatx80 floatx80_sqrt(floatx80 a, float_status *status)
5640{
5641 flag aSign;
5642 int32_t aExp, zExp;
5643 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5644 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5645
5646 if (floatx80_invalid_encoding(a)) {
5647 float_raise(float_flag_invalid, status);
5648 return floatx80_default_nan(status);
5649 }
5650 aSig0 = extractFloatx80Frac( a );
5651 aExp = extractFloatx80Exp( a );
5652 aSign = extractFloatx80Sign( a );
5653 if ( aExp == 0x7FFF ) {
5654 if ((uint64_t)(aSig0 << 1)) {
5655 return propagateFloatx80NaN(a, a, status);
5656 }
5657 if ( ! aSign ) return a;
5658 goto invalid;
5659 }
5660 if ( aSign ) {
5661 if ( ( aExp | aSig0 ) == 0 ) return a;
5662 invalid:
5663 float_raise(float_flag_invalid, status);
5664 return floatx80_default_nan(status);
5665 }
5666 if ( aExp == 0 ) {
5667 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5668 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5669 }
5670 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5671 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5672 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5673 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5674 doubleZSig0 = zSig0<<1;
5675 mul64To128( zSig0, zSig0, &term0, &term1 );
5676 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5677 while ( (int64_t) rem0 < 0 ) {
5678 --zSig0;
5679 doubleZSig0 -= 2;
5680 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5681 }
5682 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5683 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5684 if ( zSig1 == 0 ) zSig1 = 1;
5685 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5686 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5687 mul64To128( zSig1, zSig1, &term2, &term3 );
5688 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5689 while ( (int64_t) rem1 < 0 ) {
5690 --zSig1;
5691 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5692 term3 |= 1;
5693 term2 |= doubleZSig0;
5694 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5695 }
5696 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5697 }
5698 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5699 zSig0 |= doubleZSig0;
5700 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5701 0, zExp, zSig0, zSig1, status);
5702}
5703
5704
5705
5706
5707
5708
5709
5710
5711int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
5712{
5713
5714 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5715 || (extractFloatx80Exp(a) == 0x7FFF
5716 && (uint64_t) (extractFloatx80Frac(a) << 1))
5717 || (extractFloatx80Exp(b) == 0x7FFF
5718 && (uint64_t) (extractFloatx80Frac(b) << 1))
5719 ) {
5720 float_raise(float_flag_invalid, status);
5721 return 0;
5722 }
5723 return
5724 ( a.low == b.low )
5725 && ( ( a.high == b.high )
5726 || ( ( a.low == 0 )
5727 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5728 );
5729
5730}
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740int floatx80_le(floatx80 a, floatx80 b, float_status *status)
5741{
5742 flag aSign, bSign;
5743
5744 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5745 || (extractFloatx80Exp(a) == 0x7FFF
5746 && (uint64_t) (extractFloatx80Frac(a) << 1))
5747 || (extractFloatx80Exp(b) == 0x7FFF
5748 && (uint64_t) (extractFloatx80Frac(b) << 1))
5749 ) {
5750 float_raise(float_flag_invalid, status);
5751 return 0;
5752 }
5753 aSign = extractFloatx80Sign( a );
5754 bSign = extractFloatx80Sign( b );
5755 if ( aSign != bSign ) {
5756 return
5757 aSign
5758 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5759 == 0 );
5760 }
5761 return
5762 aSign ? le128( b.high, b.low, a.high, a.low )
5763 : le128( a.high, a.low, b.high, b.low );
5764
5765}
5766
5767
5768
5769
5770
5771
5772
5773
5774int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
5775{
5776 flag aSign, bSign;
5777
5778 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5779 || (extractFloatx80Exp(a) == 0x7FFF
5780 && (uint64_t) (extractFloatx80Frac(a) << 1))
5781 || (extractFloatx80Exp(b) == 0x7FFF
5782 && (uint64_t) (extractFloatx80Frac(b) << 1))
5783 ) {
5784 float_raise(float_flag_invalid, status);
5785 return 0;
5786 }
5787 aSign = extractFloatx80Sign( a );
5788 bSign = extractFloatx80Sign( b );
5789 if ( aSign != bSign ) {
5790 return
5791 aSign
5792 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5793 != 0 );
5794 }
5795 return
5796 aSign ? lt128( b.high, b.low, a.high, a.low )
5797 : lt128( a.high, a.low, b.high, b.low );
5798
5799}
5800
5801
5802
5803
5804
5805
5806
5807int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
5808{
5809 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5810 || (extractFloatx80Exp(a) == 0x7FFF
5811 && (uint64_t) (extractFloatx80Frac(a) << 1))
5812 || (extractFloatx80Exp(b) == 0x7FFF
5813 && (uint64_t) (extractFloatx80Frac(b) << 1))
5814 ) {
5815 float_raise(float_flag_invalid, status);
5816 return 1;
5817 }
5818 return 0;
5819}
5820
5821
5822
5823
5824
5825
5826
5827
5828int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
5829{
5830
5831 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5832 float_raise(float_flag_invalid, status);
5833 return 0;
5834 }
5835 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5836 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5837 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5838 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5839 ) {
5840 if (floatx80_is_signaling_nan(a, status)
5841 || floatx80_is_signaling_nan(b, status)) {
5842 float_raise(float_flag_invalid, status);
5843 }
5844 return 0;
5845 }
5846 return
5847 ( a.low == b.low )
5848 && ( ( a.high == b.high )
5849 || ( ( a.low == 0 )
5850 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
5851 );
5852
5853}
5854
5855
5856
5857
5858
5859
5860
5861
5862int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
5863{
5864 flag aSign, bSign;
5865
5866 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5867 float_raise(float_flag_invalid, status);
5868 return 0;
5869 }
5870 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5871 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5872 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5873 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5874 ) {
5875 if (floatx80_is_signaling_nan(a, status)
5876 || floatx80_is_signaling_nan(b, status)) {
5877 float_raise(float_flag_invalid, status);
5878 }
5879 return 0;
5880 }
5881 aSign = extractFloatx80Sign( a );
5882 bSign = extractFloatx80Sign( b );
5883 if ( aSign != bSign ) {
5884 return
5885 aSign
5886 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5887 == 0 );
5888 }
5889 return
5890 aSign ? le128( b.high, b.low, a.high, a.low )
5891 : le128( a.high, a.low, b.high, b.low );
5892
5893}
5894
5895
5896
5897
5898
5899
5900
5901
5902int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
5903{
5904 flag aSign, bSign;
5905
5906 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5907 float_raise(float_flag_invalid, status);
5908 return 0;
5909 }
5910 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5911 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5912 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5913 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5914 ) {
5915 if (floatx80_is_signaling_nan(a, status)
5916 || floatx80_is_signaling_nan(b, status)) {
5917 float_raise(float_flag_invalid, status);
5918 }
5919 return 0;
5920 }
5921 aSign = extractFloatx80Sign( a );
5922 bSign = extractFloatx80Sign( b );
5923 if ( aSign != bSign ) {
5924 return
5925 aSign
5926 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5927 != 0 );
5928 }
5929 return
5930 aSign ? lt128( b.high, b.low, a.high, a.low )
5931 : lt128( a.high, a.low, b.high, b.low );
5932
5933}
5934
5935
5936
5937
5938
5939
5940
5941int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
5942{
5943 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5944 float_raise(float_flag_invalid, status);
5945 return 1;
5946 }
5947 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5948 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5949 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5950 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5951 ) {
5952 if (floatx80_is_signaling_nan(a, status)
5953 || floatx80_is_signaling_nan(b, status)) {
5954 float_raise(float_flag_invalid, status);
5955 }
5956 return 1;
5957 }
5958 return 0;
5959}
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971int32_t float128_to_int32(float128 a, float_status *status)
5972{
5973 flag aSign;
5974 int32_t aExp, shiftCount;
5975 uint64_t aSig0, aSig1;
5976
5977 aSig1 = extractFloat128Frac1( a );
5978 aSig0 = extractFloat128Frac0( a );
5979 aExp = extractFloat128Exp( a );
5980 aSign = extractFloat128Sign( a );
5981 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5982 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5983 aSig0 |= ( aSig1 != 0 );
5984 shiftCount = 0x4028 - aExp;
5985 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
5986 return roundAndPackInt32(aSign, aSig0, status);
5987
5988}
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
6001{
6002 flag aSign;
6003 int32_t aExp, shiftCount;
6004 uint64_t aSig0, aSig1, savedASig;
6005 int32_t z;
6006
6007 aSig1 = extractFloat128Frac1( a );
6008 aSig0 = extractFloat128Frac0( a );
6009 aExp = extractFloat128Exp( a );
6010 aSign = extractFloat128Sign( a );
6011 aSig0 |= ( aSig1 != 0 );
6012 if ( 0x401E < aExp ) {
6013 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6014 goto invalid;
6015 }
6016 else if ( aExp < 0x3FFF ) {
6017 if (aExp || aSig0) {
6018 status->float_exception_flags |= float_flag_inexact;
6019 }
6020 return 0;
6021 }
6022 aSig0 |= LIT64( 0x0001000000000000 );
6023 shiftCount = 0x402F - aExp;
6024 savedASig = aSig0;
6025 aSig0 >>= shiftCount;
6026 z = aSig0;
6027 if ( aSign ) z = - z;
6028 if ( ( z < 0 ) ^ aSign ) {
6029 invalid:
6030 float_raise(float_flag_invalid, status);
6031 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
6032 }
6033 if ( ( aSig0<<shiftCount ) != savedASig ) {
6034 status->float_exception_flags |= float_flag_inexact;
6035 }
6036 return z;
6037
6038}
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050int64_t float128_to_int64(float128 a, float_status *status)
6051{
6052 flag aSign;
6053 int32_t aExp, shiftCount;
6054 uint64_t aSig0, aSig1;
6055
6056 aSig1 = extractFloat128Frac1( a );
6057 aSig0 = extractFloat128Frac0( a );
6058 aExp = extractFloat128Exp( a );
6059 aSign = extractFloat128Sign( a );
6060 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6061 shiftCount = 0x402F - aExp;
6062 if ( shiftCount <= 0 ) {
6063 if ( 0x403E < aExp ) {
6064 float_raise(float_flag_invalid, status);
6065 if ( ! aSign
6066 || ( ( aExp == 0x7FFF )
6067 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
6068 )
6069 ) {
6070 return LIT64( 0x7FFFFFFFFFFFFFFF );
6071 }
6072 return (int64_t) LIT64( 0x8000000000000000 );
6073 }
6074 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6075 }
6076 else {
6077 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6078 }
6079 return roundAndPackInt64(aSign, aSig0, aSig1, status);
6080
6081}
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
6094{
6095 flag aSign;
6096 int32_t aExp, shiftCount;
6097 uint64_t aSig0, aSig1;
6098 int64_t z;
6099
6100 aSig1 = extractFloat128Frac1( a );
6101 aSig0 = extractFloat128Frac0( a );
6102 aExp = extractFloat128Exp( a );
6103 aSign = extractFloat128Sign( a );
6104 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
6105 shiftCount = aExp - 0x402F;
6106 if ( 0 < shiftCount ) {
6107 if ( 0x403E <= aExp ) {
6108 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
6109 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
6110 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
6111 if (aSig1) {
6112 status->float_exception_flags |= float_flag_inexact;
6113 }
6114 }
6115 else {
6116 float_raise(float_flag_invalid, status);
6117 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6118 return LIT64( 0x7FFFFFFFFFFFFFFF );
6119 }
6120 }
6121 return (int64_t) LIT64( 0x8000000000000000 );
6122 }
6123 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
6124 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
6125 status->float_exception_flags |= float_flag_inexact;
6126 }
6127 }
6128 else {
6129 if ( aExp < 0x3FFF ) {
6130 if ( aExp | aSig0 | aSig1 ) {
6131 status->float_exception_flags |= float_flag_inexact;
6132 }
6133 return 0;
6134 }
6135 z = aSig0>>( - shiftCount );
6136 if ( aSig1
6137 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
6138 status->float_exception_flags |= float_flag_inexact;
6139 }
6140 }
6141 if ( aSign ) z = - z;
6142 return z;
6143
6144}
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158uint64_t float128_to_uint64(float128 a, float_status *status)
6159{
6160 flag aSign;
6161 int aExp;
6162 int shiftCount;
6163 uint64_t aSig0, aSig1;
6164
6165 aSig0 = extractFloat128Frac0(a);
6166 aSig1 = extractFloat128Frac1(a);
6167 aExp = extractFloat128Exp(a);
6168 aSign = extractFloat128Sign(a);
6169 if (aSign && (aExp > 0x3FFE)) {
6170 float_raise(float_flag_invalid, status);
6171 if (float128_is_any_nan(a)) {
6172 return LIT64(0xFFFFFFFFFFFFFFFF);
6173 } else {
6174 return 0;
6175 }
6176 }
6177 if (aExp) {
6178 aSig0 |= LIT64(0x0001000000000000);
6179 }
6180 shiftCount = 0x402F - aExp;
6181 if (shiftCount <= 0) {
6182 if (0x403E < aExp) {
6183 float_raise(float_flag_invalid, status);
6184 return LIT64(0xFFFFFFFFFFFFFFFF);
6185 }
6186 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6187 } else {
6188 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6189 }
6190 return roundAndPackUint64(aSign, aSig0, aSig1, status);
6191}
6192
6193uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6194{
6195 uint64_t v;
6196 signed char current_rounding_mode = status->float_rounding_mode;
6197
6198 set_float_rounding_mode(float_round_to_zero, status);
6199 v = float128_to_uint64(a, status);
6200 set_float_rounding_mode(current_rounding_mode, status);
6201
6202 return v;
6203}
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6217{
6218 uint64_t v;
6219 uint32_t res;
6220 int old_exc_flags = get_float_exception_flags(status);
6221
6222 v = float128_to_uint64_round_to_zero(a, status);
6223 if (v > 0xffffffff) {
6224 res = 0xffffffff;
6225 } else {
6226 return v;
6227 }
6228 set_float_exception_flags(old_exc_flags, status);
6229 float_raise(float_flag_invalid, status);
6230 return res;
6231}
6232
6233
6234
6235
6236
6237
6238
6239
6240float32 float128_to_float32(float128 a, float_status *status)
6241{
6242 flag aSign;
6243 int32_t aExp;
6244 uint64_t aSig0, aSig1;
6245 uint32_t zSig;
6246
6247 aSig1 = extractFloat128Frac1( a );
6248 aSig0 = extractFloat128Frac0( a );
6249 aExp = extractFloat128Exp( a );
6250 aSign = extractFloat128Sign( a );
6251 if ( aExp == 0x7FFF ) {
6252 if ( aSig0 | aSig1 ) {
6253 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
6254 }
6255 return packFloat32( aSign, 0xFF, 0 );
6256 }
6257 aSig0 |= ( aSig1 != 0 );
6258 shift64RightJamming( aSig0, 18, &aSig0 );
6259 zSig = aSig0;
6260 if ( aExp || zSig ) {
6261 zSig |= 0x40000000;
6262 aExp -= 0x3F81;
6263 }
6264 return roundAndPackFloat32(aSign, aExp, zSig, status);
6265
6266}
6267
6268
6269
6270
6271
6272
6273
6274
6275float64 float128_to_float64(float128 a, float_status *status)
6276{
6277 flag aSign;
6278 int32_t aExp;
6279 uint64_t aSig0, aSig1;
6280
6281 aSig1 = extractFloat128Frac1( a );
6282 aSig0 = extractFloat128Frac0( a );
6283 aExp = extractFloat128Exp( a );
6284 aSign = extractFloat128Sign( a );
6285 if ( aExp == 0x7FFF ) {
6286 if ( aSig0 | aSig1 ) {
6287 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
6288 }
6289 return packFloat64( aSign, 0x7FF, 0 );
6290 }
6291 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6292 aSig0 |= ( aSig1 != 0 );
6293 if ( aExp || aSig0 ) {
6294 aSig0 |= LIT64( 0x4000000000000000 );
6295 aExp -= 0x3C01;
6296 }
6297 return roundAndPackFloat64(aSign, aExp, aSig0, status);
6298
6299}
6300
6301
6302
6303
6304
6305
6306
6307
6308floatx80 float128_to_floatx80(float128 a, float_status *status)
6309{
6310 flag aSign;
6311 int32_t aExp;
6312 uint64_t aSig0, aSig1;
6313
6314 aSig1 = extractFloat128Frac1( a );
6315 aSig0 = extractFloat128Frac0( a );
6316 aExp = extractFloat128Exp( a );
6317 aSign = extractFloat128Sign( a );
6318 if ( aExp == 0x7FFF ) {
6319 if ( aSig0 | aSig1 ) {
6320 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
6321 }
6322 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
6323 }
6324 if ( aExp == 0 ) {
6325 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6326 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6327 }
6328 else {
6329 aSig0 |= LIT64( 0x0001000000000000 );
6330 }
6331 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6332 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6333
6334}
6335
6336
6337
6338
6339
6340
6341
6342
6343float128 float128_round_to_int(float128 a, float_status *status)
6344{
6345 flag aSign;
6346 int32_t aExp;
6347 uint64_t lastBitMask, roundBitsMask;
6348 float128 z;
6349
6350 aExp = extractFloat128Exp( a );
6351 if ( 0x402F <= aExp ) {
6352 if ( 0x406F <= aExp ) {
6353 if ( ( aExp == 0x7FFF )
6354 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6355 ) {
6356 return propagateFloat128NaN(a, a, status);
6357 }
6358 return a;
6359 }
6360 lastBitMask = 1;
6361 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6362 roundBitsMask = lastBitMask - 1;
6363 z = a;
6364 switch (status->float_rounding_mode) {
6365 case float_round_nearest_even:
6366 if ( lastBitMask ) {
6367 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6368 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6369 }
6370 else {
6371 if ( (int64_t) z.low < 0 ) {
6372 ++z.high;
6373 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
6374 }
6375 }
6376 break;
6377 case float_round_ties_away:
6378 if (lastBitMask) {
6379 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6380 } else {
6381 if ((int64_t) z.low < 0) {
6382 ++z.high;
6383 }
6384 }
6385 break;
6386 case float_round_to_zero:
6387 break;
6388 case float_round_up:
6389 if (!extractFloat128Sign(z)) {
6390 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6391 }
6392 break;
6393 case float_round_down:
6394 if (extractFloat128Sign(z)) {
6395 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6396 }
6397 break;
6398 default:
6399 abort();
6400 }
6401 z.low &= ~ roundBitsMask;
6402 }
6403 else {
6404 if ( aExp < 0x3FFF ) {
6405 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
6406 status->float_exception_flags |= float_flag_inexact;
6407 aSign = extractFloat128Sign( a );
6408 switch (status->float_rounding_mode) {
6409 case float_round_nearest_even:
6410 if ( ( aExp == 0x3FFE )
6411 && ( extractFloat128Frac0( a )
6412 | extractFloat128Frac1( a ) )
6413 ) {
6414 return packFloat128( aSign, 0x3FFF, 0, 0 );
6415 }
6416 break;
6417 case float_round_ties_away:
6418 if (aExp == 0x3FFE) {
6419 return packFloat128(aSign, 0x3FFF, 0, 0);
6420 }
6421 break;
6422 case float_round_down:
6423 return
6424 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6425 : packFloat128( 0, 0, 0, 0 );
6426 case float_round_up:
6427 return
6428 aSign ? packFloat128( 1, 0, 0, 0 )
6429 : packFloat128( 0, 0x3FFF, 0, 0 );
6430 }
6431 return packFloat128( aSign, 0, 0, 0 );
6432 }
6433 lastBitMask = 1;
6434 lastBitMask <<= 0x402F - aExp;
6435 roundBitsMask = lastBitMask - 1;
6436 z.low = 0;
6437 z.high = a.high;
6438 switch (status->float_rounding_mode) {
6439 case float_round_nearest_even:
6440 z.high += lastBitMask>>1;
6441 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6442 z.high &= ~ lastBitMask;
6443 }
6444 break;
6445 case float_round_ties_away:
6446 z.high += lastBitMask>>1;
6447 break;
6448 case float_round_to_zero:
6449 break;
6450 case float_round_up:
6451 if (!extractFloat128Sign(z)) {
6452 z.high |= ( a.low != 0 );
6453 z.high += roundBitsMask;
6454 }
6455 break;
6456 case float_round_down:
6457 if (extractFloat128Sign(z)) {
6458 z.high |= (a.low != 0);
6459 z.high += roundBitsMask;
6460 }
6461 break;
6462 default:
6463 abort();
6464 }
6465 z.high &= ~ roundBitsMask;
6466 }
6467 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
6468 status->float_exception_flags |= float_flag_inexact;
6469 }
6470 return z;
6471
6472}
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
6483 float_status *status)
6484{
6485 int32_t aExp, bExp, zExp;
6486 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6487 int32_t expDiff;
6488
6489 aSig1 = extractFloat128Frac1( a );
6490 aSig0 = extractFloat128Frac0( a );
6491 aExp = extractFloat128Exp( a );
6492 bSig1 = extractFloat128Frac1( b );
6493 bSig0 = extractFloat128Frac0( b );
6494 bExp = extractFloat128Exp( b );
6495 expDiff = aExp - bExp;
6496 if ( 0 < expDiff ) {
6497 if ( aExp == 0x7FFF ) {
6498 if (aSig0 | aSig1) {
6499 return propagateFloat128NaN(a, b, status);
6500 }
6501 return a;
6502 }
6503 if ( bExp == 0 ) {
6504 --expDiff;
6505 }
6506 else {
6507 bSig0 |= LIT64( 0x0001000000000000 );
6508 }
6509 shift128ExtraRightJamming(
6510 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6511 zExp = aExp;
6512 }
6513 else if ( expDiff < 0 ) {
6514 if ( bExp == 0x7FFF ) {
6515 if (bSig0 | bSig1) {
6516 return propagateFloat128NaN(a, b, status);
6517 }
6518 return packFloat128( zSign, 0x7FFF, 0, 0 );
6519 }
6520 if ( aExp == 0 ) {
6521 ++expDiff;
6522 }
6523 else {
6524 aSig0 |= LIT64( 0x0001000000000000 );
6525 }
6526 shift128ExtraRightJamming(
6527 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6528 zExp = bExp;
6529 }
6530 else {
6531 if ( aExp == 0x7FFF ) {
6532 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6533 return propagateFloat128NaN(a, b, status);
6534 }
6535 return a;
6536 }
6537 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6538 if ( aExp == 0 ) {
6539 if (status->flush_to_zero) {
6540 if (zSig0 | zSig1) {
6541 float_raise(float_flag_output_denormal, status);
6542 }
6543 return packFloat128(zSign, 0, 0, 0);
6544 }
6545 return packFloat128( zSign, 0, zSig0, zSig1 );
6546 }
6547 zSig2 = 0;
6548 zSig0 |= LIT64( 0x0002000000000000 );
6549 zExp = aExp;
6550 goto shiftRight1;
6551 }
6552 aSig0 |= LIT64( 0x0001000000000000 );
6553 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6554 --zExp;
6555 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6556 ++zExp;
6557 shiftRight1:
6558 shift128ExtraRightJamming(
6559 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6560 roundAndPack:
6561 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6562
6563}
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6574 float_status *status)
6575{
6576 int32_t aExp, bExp, zExp;
6577 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
6578 int32_t expDiff;
6579
6580 aSig1 = extractFloat128Frac1( a );
6581 aSig0 = extractFloat128Frac0( a );
6582 aExp = extractFloat128Exp( a );
6583 bSig1 = extractFloat128Frac1( b );
6584 bSig0 = extractFloat128Frac0( b );
6585 bExp = extractFloat128Exp( b );
6586 expDiff = aExp - bExp;
6587 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6588 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6589 if ( 0 < expDiff ) goto aExpBigger;
6590 if ( expDiff < 0 ) goto bExpBigger;
6591 if ( aExp == 0x7FFF ) {
6592 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6593 return propagateFloat128NaN(a, b, status);
6594 }
6595 float_raise(float_flag_invalid, status);
6596 return float128_default_nan(status);
6597 }
6598 if ( aExp == 0 ) {
6599 aExp = 1;
6600 bExp = 1;
6601 }
6602 if ( bSig0 < aSig0 ) goto aBigger;
6603 if ( aSig0 < bSig0 ) goto bBigger;
6604 if ( bSig1 < aSig1 ) goto aBigger;
6605 if ( aSig1 < bSig1 ) goto bBigger;
6606 return packFloat128(status->float_rounding_mode == float_round_down,
6607 0, 0, 0);
6608 bExpBigger:
6609 if ( bExp == 0x7FFF ) {
6610 if (bSig0 | bSig1) {
6611 return propagateFloat128NaN(a, b, status);
6612 }
6613 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6614 }
6615 if ( aExp == 0 ) {
6616 ++expDiff;
6617 }
6618 else {
6619 aSig0 |= LIT64( 0x4000000000000000 );
6620 }
6621 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6622 bSig0 |= LIT64( 0x4000000000000000 );
6623 bBigger:
6624 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6625 zExp = bExp;
6626 zSign ^= 1;
6627 goto normalizeRoundAndPack;
6628 aExpBigger:
6629 if ( aExp == 0x7FFF ) {
6630 if (aSig0 | aSig1) {
6631 return propagateFloat128NaN(a, b, status);
6632 }
6633 return a;
6634 }
6635 if ( bExp == 0 ) {
6636 --expDiff;
6637 }
6638 else {
6639 bSig0 |= LIT64( 0x4000000000000000 );
6640 }
6641 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6642 aSig0 |= LIT64( 0x4000000000000000 );
6643 aBigger:
6644 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6645 zExp = aExp;
6646 normalizeRoundAndPack:
6647 --zExp;
6648 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6649 status);
6650
6651}
6652
6653
6654
6655
6656
6657
6658
6659float128 float128_add(float128 a, float128 b, float_status *status)
6660{
6661 flag aSign, bSign;
6662
6663 aSign = extractFloat128Sign( a );
6664 bSign = extractFloat128Sign( b );
6665 if ( aSign == bSign ) {
6666 return addFloat128Sigs(a, b, aSign, status);
6667 }
6668 else {
6669 return subFloat128Sigs(a, b, aSign, status);
6670 }
6671
6672}
6673
6674
6675
6676
6677
6678
6679
6680float128 float128_sub(float128 a, float128 b, float_status *status)
6681{
6682 flag aSign, bSign;
6683
6684 aSign = extractFloat128Sign( a );
6685 bSign = extractFloat128Sign( b );
6686 if ( aSign == bSign ) {
6687 return subFloat128Sigs(a, b, aSign, status);
6688 }
6689 else {
6690 return addFloat128Sigs(a, b, aSign, status);
6691 }
6692
6693}
6694
6695
6696
6697
6698
6699
6700
6701float128 float128_mul(float128 a, float128 b, float_status *status)
6702{
6703 flag aSign, bSign, zSign;
6704 int32_t aExp, bExp, zExp;
6705 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
6706
6707 aSig1 = extractFloat128Frac1( a );
6708 aSig0 = extractFloat128Frac0( a );
6709 aExp = extractFloat128Exp( a );
6710 aSign = extractFloat128Sign( a );
6711 bSig1 = extractFloat128Frac1( b );
6712 bSig0 = extractFloat128Frac0( b );
6713 bExp = extractFloat128Exp( b );
6714 bSign = extractFloat128Sign( b );
6715 zSign = aSign ^ bSign;
6716 if ( aExp == 0x7FFF ) {
6717 if ( ( aSig0 | aSig1 )
6718 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6719 return propagateFloat128NaN(a, b, status);
6720 }
6721 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6722 return packFloat128( zSign, 0x7FFF, 0, 0 );
6723 }
6724 if ( bExp == 0x7FFF ) {
6725 if (bSig0 | bSig1) {
6726 return propagateFloat128NaN(a, b, status);
6727 }
6728 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6729 invalid:
6730 float_raise(float_flag_invalid, status);
6731 return float128_default_nan(status);
6732 }
6733 return packFloat128( zSign, 0x7FFF, 0, 0 );
6734 }
6735 if ( aExp == 0 ) {
6736 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6737 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6738 }
6739 if ( bExp == 0 ) {
6740 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6741 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6742 }
6743 zExp = aExp + bExp - 0x4000;
6744 aSig0 |= LIT64( 0x0001000000000000 );
6745 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6746 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6747 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6748 zSig2 |= ( zSig3 != 0 );
6749 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6750 shift128ExtraRightJamming(
6751 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6752 ++zExp;
6753 }
6754 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6755
6756}
6757
6758
6759
6760
6761
6762
6763
6764float128 float128_div(float128 a, float128 b, float_status *status)
6765{
6766 flag aSign, bSign, zSign;
6767 int32_t aExp, bExp, zExp;
6768 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6769 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6770
6771 aSig1 = extractFloat128Frac1( a );
6772 aSig0 = extractFloat128Frac0( a );
6773 aExp = extractFloat128Exp( a );
6774 aSign = extractFloat128Sign( a );
6775 bSig1 = extractFloat128Frac1( b );
6776 bSig0 = extractFloat128Frac0( b );
6777 bExp = extractFloat128Exp( b );
6778 bSign = extractFloat128Sign( b );
6779 zSign = aSign ^ bSign;
6780 if ( aExp == 0x7FFF ) {
6781 if (aSig0 | aSig1) {
6782 return propagateFloat128NaN(a, b, status);
6783 }
6784 if ( bExp == 0x7FFF ) {
6785 if (bSig0 | bSig1) {
6786 return propagateFloat128NaN(a, b, status);
6787 }
6788 goto invalid;
6789 }
6790 return packFloat128( zSign, 0x7FFF, 0, 0 );
6791 }
6792 if ( bExp == 0x7FFF ) {
6793 if (bSig0 | bSig1) {
6794 return propagateFloat128NaN(a, b, status);
6795 }
6796 return packFloat128( zSign, 0, 0, 0 );
6797 }
6798 if ( bExp == 0 ) {
6799 if ( ( bSig0 | bSig1 ) == 0 ) {
6800 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6801 invalid:
6802 float_raise(float_flag_invalid, status);
6803 return float128_default_nan(status);
6804 }
6805 float_raise(float_flag_divbyzero, status);
6806 return packFloat128( zSign, 0x7FFF, 0, 0 );
6807 }
6808 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6809 }
6810 if ( aExp == 0 ) {
6811 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6812 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6813 }
6814 zExp = aExp - bExp + 0x3FFD;
6815 shortShift128Left(
6816 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6817 shortShift128Left(
6818 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6819 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6820 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6821 ++zExp;
6822 }
6823 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6824 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6825 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
6826 while ( (int64_t) rem0 < 0 ) {
6827 --zSig0;
6828 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6829 }
6830 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6831 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6832 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6833 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
6834 while ( (int64_t) rem1 < 0 ) {
6835 --zSig1;
6836 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6837 }
6838 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6839 }
6840 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
6841 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
6842
6843}
6844
6845
6846
6847
6848
6849
6850
6851float128 float128_rem(float128 a, float128 b, float_status *status)
6852{
6853 flag aSign, zSign;
6854 int32_t aExp, bExp, expDiff;
6855 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6856 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6857 int64_t sigMean0;
6858
6859 aSig1 = extractFloat128Frac1( a );
6860 aSig0 = extractFloat128Frac0( a );
6861 aExp = extractFloat128Exp( a );
6862 aSign = extractFloat128Sign( a );
6863 bSig1 = extractFloat128Frac1( b );
6864 bSig0 = extractFloat128Frac0( b );
6865 bExp = extractFloat128Exp( b );
6866 if ( aExp == 0x7FFF ) {
6867 if ( ( aSig0 | aSig1 )
6868 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6869 return propagateFloat128NaN(a, b, status);
6870 }
6871 goto invalid;
6872 }
6873 if ( bExp == 0x7FFF ) {
6874 if (bSig0 | bSig1) {
6875 return propagateFloat128NaN(a, b, status);
6876 }
6877 return a;
6878 }
6879 if ( bExp == 0 ) {
6880 if ( ( bSig0 | bSig1 ) == 0 ) {
6881 invalid:
6882 float_raise(float_flag_invalid, status);
6883 return float128_default_nan(status);
6884 }
6885 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6886 }
6887 if ( aExp == 0 ) {
6888 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6889 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6890 }
6891 expDiff = aExp - bExp;
6892 if ( expDiff < -1 ) return a;
6893 shortShift128Left(
6894 aSig0 | LIT64( 0x0001000000000000 ),
6895 aSig1,
6896 15 - ( expDiff < 0 ),
6897 &aSig0,
6898 &aSig1
6899 );
6900 shortShift128Left(
6901 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6902 q = le128( bSig0, bSig1, aSig0, aSig1 );
6903 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6904 expDiff -= 64;
6905 while ( 0 < expDiff ) {
6906 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6907 q = ( 4 < q ) ? q - 4 : 0;
6908 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6909 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6910 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6911 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6912 expDiff -= 61;
6913 }
6914 if ( -64 < expDiff ) {
6915 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6916 q = ( 4 < q ) ? q - 4 : 0;
6917 q >>= - expDiff;
6918 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6919 expDiff += 52;
6920 if ( expDiff < 0 ) {
6921 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6922 }
6923 else {
6924 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6925 }
6926 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6927 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6928 }
6929 else {
6930 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6931 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6932 }
6933 do {
6934 alternateASig0 = aSig0;
6935 alternateASig1 = aSig1;
6936 ++q;
6937 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6938 } while ( 0 <= (int64_t) aSig0 );
6939 add128(
6940 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
6941 if ( ( sigMean0 < 0 )
6942 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6943 aSig0 = alternateASig0;
6944 aSig1 = alternateASig1;
6945 }
6946 zSign = ( (int64_t) aSig0 < 0 );
6947 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6948 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6949 status);
6950}
6951
6952
6953
6954
6955
6956
6957
6958float128 float128_sqrt(float128 a, float_status *status)
6959{
6960 flag aSign;
6961 int32_t aExp, zExp;
6962 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6963 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6964
6965 aSig1 = extractFloat128Frac1( a );
6966 aSig0 = extractFloat128Frac0( a );
6967 aExp = extractFloat128Exp( a );
6968 aSign = extractFloat128Sign( a );
6969 if ( aExp == 0x7FFF ) {
6970 if (aSig0 | aSig1) {
6971 return propagateFloat128NaN(a, a, status);
6972 }
6973 if ( ! aSign ) return a;
6974 goto invalid;
6975 }
6976 if ( aSign ) {
6977 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6978 invalid:
6979 float_raise(float_flag_invalid, status);
6980 return float128_default_nan(status);
6981 }
6982 if ( aExp == 0 ) {
6983 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6984 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6985 }
6986 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6987 aSig0 |= LIT64( 0x0001000000000000 );
6988 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6989 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6990 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6991 doubleZSig0 = zSig0<<1;
6992 mul64To128( zSig0, zSig0, &term0, &term1 );
6993 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6994 while ( (int64_t) rem0 < 0 ) {
6995 --zSig0;
6996 doubleZSig0 -= 2;
6997 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6998 }
6999 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7000 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7001 if ( zSig1 == 0 ) zSig1 = 1;
7002 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7003 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7004 mul64To128( zSig1, zSig1, &term2, &term3 );
7005 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
7006 while ( (int64_t) rem1 < 0 ) {
7007 --zSig1;
7008 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7009 term3 |= 1;
7010 term2 |= doubleZSig0;
7011 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7012 }
7013 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7014 }
7015 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
7016 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
7017
7018}
7019
7020
7021
7022
7023
7024
7025
7026
7027int float128_eq(float128 a, float128 b, float_status *status)
7028{
7029
7030 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7031 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7032 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7033 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7034 ) {
7035 float_raise(float_flag_invalid, status);
7036 return 0;
7037 }
7038 return
7039 ( a.low == b.low )
7040 && ( ( a.high == b.high )
7041 || ( ( a.low == 0 )
7042 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
7043 );
7044
7045}
7046
7047
7048
7049
7050
7051
7052
7053
7054int float128_le(float128 a, float128 b, float_status *status)
7055{
7056 flag aSign, bSign;
7057
7058 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7059 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7060 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7061 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7062 ) {
7063 float_raise(float_flag_invalid, status);
7064 return 0;
7065 }
7066 aSign = extractFloat128Sign( a );
7067 bSign = extractFloat128Sign( b );
7068 if ( aSign != bSign ) {
7069 return
7070 aSign
7071 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
7072 == 0 );
7073 }
7074 return
7075 aSign ? le128( b.high, b.low, a.high, a.low )
7076 : le128( a.high, a.low, b.high, b.low );
7077
7078}
7079
7080
7081
7082
7083
7084
7085
7086
7087int float128_lt(float128 a, float128 b, float_status *status)
7088{
7089 flag aSign, bSign;
7090
7091 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7092 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7093 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7094 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7095 ) {
7096 float_raise(float_flag_invalid, status);
7097 return 0;
7098 }
7099 aSign = extractFloat128Sign( a );
7100 bSign = extractFloat128Sign( b );
7101 if ( aSign != bSign ) {
7102 return
7103 aSign
7104 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
7105 != 0 );
7106 }
7107 return
7108 aSign ? lt128( b.high, b.low, a.high, a.low )
7109 : lt128( a.high, a.low, b.high, b.low );
7110
7111}
7112
7113
7114
7115
7116
7117
7118
7119
7120int float128_unordered(float128 a, float128 b, float_status *status)
7121{
7122 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7123 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7124 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7125 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7126 ) {
7127 float_raise(float_flag_invalid, status);
7128 return 1;
7129 }
7130 return 0;
7131}
7132
7133
7134
7135
7136
7137
7138
7139
7140int float128_eq_quiet(float128 a, float128 b, float_status *status)
7141{
7142
7143 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7144 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7145 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7146 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7147 ) {
7148 if (float128_is_signaling_nan(a, status)
7149 || float128_is_signaling_nan(b, status)) {
7150 float_raise(float_flag_invalid, status);
7151 }
7152 return 0;
7153 }
7154 return
7155 ( a.low == b.low )
7156 && ( ( a.high == b.high )
7157 || ( ( a.low == 0 )
7158 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
7159 );
7160
7161}
7162
7163
7164
7165
7166
7167
7168
7169
7170int float128_le_quiet(float128 a, float128 b, float_status *status)
7171{
7172 flag aSign, bSign;
7173
7174 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7175 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7176 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7177 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7178 ) {
7179 if (float128_is_signaling_nan(a, status)
7180 || float128_is_signaling_nan(b, status)) {
7181 float_raise(float_flag_invalid, status);
7182 }
7183 return 0;
7184 }
7185 aSign = extractFloat128Sign( a );
7186 bSign = extractFloat128Sign( b );
7187 if ( aSign != bSign ) {
7188 return
7189 aSign
7190 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
7191 == 0 );
7192 }
7193 return
7194 aSign ? le128( b.high, b.low, a.high, a.low )
7195 : le128( a.high, a.low, b.high, b.low );
7196
7197}
7198
7199
7200
7201
7202
7203
7204
7205
7206int float128_lt_quiet(float128 a, float128 b, float_status *status)
7207{
7208 flag aSign, bSign;
7209
7210 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7211 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7212 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7213 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7214 ) {
7215 if (float128_is_signaling_nan(a, status)
7216 || float128_is_signaling_nan(b, status)) {
7217 float_raise(float_flag_invalid, status);
7218 }
7219 return 0;
7220 }
7221 aSign = extractFloat128Sign( a );
7222 bSign = extractFloat128Sign( b );
7223 if ( aSign != bSign ) {
7224 return
7225 aSign
7226 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
7227 != 0 );
7228 }
7229 return
7230 aSign ? lt128( b.high, b.low, a.high, a.low )
7231 : lt128( a.high, a.low, b.high, b.low );
7232
7233}
7234
7235
7236
7237
7238
7239
7240
7241
7242int float128_unordered_quiet(float128 a, float128 b, float_status *status)
7243{
7244 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
7245 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
7246 || ( ( extractFloat128Exp( b ) == 0x7FFF )
7247 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
7248 ) {
7249 if (float128_is_signaling_nan(a, status)
7250 || float128_is_signaling_nan(b, status)) {
7251 float_raise(float_flag_invalid, status);
7252 }
7253 return 1;
7254 }
7255 return 0;
7256}
7257
7258
7259float32 uint32_to_float32(uint32_t a, float_status *status)
7260{
7261 return int64_to_float32(a, status);
7262}
7263
7264float64 uint32_to_float64(uint32_t a, float_status *status)
7265{
7266 return int64_to_float64(a, status);
7267}
7268
7269uint32_t float32_to_uint32(float32 a, float_status *status)
7270{
7271 int64_t v;
7272 uint32_t res;
7273 int old_exc_flags = get_float_exception_flags(status);
7274
7275 v = float32_to_int64(a, status);
7276 if (v < 0) {
7277 res = 0;
7278 } else if (v > 0xffffffff) {
7279 res = 0xffffffff;
7280 } else {
7281 return v;
7282 }
7283 set_float_exception_flags(old_exc_flags, status);
7284 float_raise(float_flag_invalid, status);
7285 return res;
7286}
7287
7288uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *status)
7289{
7290 int64_t v;
7291 uint32_t res;
7292 int old_exc_flags = get_float_exception_flags(status);
7293
7294 v = float32_to_int64_round_to_zero(a, status);
7295 if (v < 0) {
7296 res = 0;
7297 } else if (v > 0xffffffff) {
7298 res = 0xffffffff;
7299 } else {
7300 return v;
7301 }
7302 set_float_exception_flags(old_exc_flags, status);
7303 float_raise(float_flag_invalid, status);
7304 return res;
7305}
7306
7307int16_t float32_to_int16(float32 a, float_status *status)
7308{
7309 int32_t v;
7310 int16_t res;
7311 int old_exc_flags = get_float_exception_flags(status);
7312
7313 v = float32_to_int32(a, status);
7314 if (v < -0x8000) {
7315 res = -0x8000;
7316 } else if (v > 0x7fff) {
7317 res = 0x7fff;
7318 } else {
7319 return v;
7320 }
7321
7322 set_float_exception_flags(old_exc_flags, status);
7323 float_raise(float_flag_invalid, status);
7324 return res;
7325}
7326
7327uint16_t float32_to_uint16(float32 a, float_status *status)
7328{
7329 int32_t v;
7330 uint16_t res;
7331 int old_exc_flags = get_float_exception_flags(status);
7332
7333 v = float32_to_int32(a, status);
7334 if (v < 0) {
7335 res = 0;
7336 } else if (v > 0xffff) {
7337 res = 0xffff;
7338 } else {
7339 return v;
7340 }
7341
7342 set_float_exception_flags(old_exc_flags, status);
7343 float_raise(float_flag_invalid, status);
7344 return res;
7345}
7346
7347uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *status)
7348{
7349 int64_t v;
7350 uint16_t res;
7351 int old_exc_flags = get_float_exception_flags(status);
7352
7353 v = float32_to_int64_round_to_zero(a, status);
7354 if (v < 0) {
7355 res = 0;
7356 } else if (v > 0xffff) {
7357 res = 0xffff;
7358 } else {
7359 return v;
7360 }
7361 set_float_exception_flags(old_exc_flags, status);
7362 float_raise(float_flag_invalid, status);
7363 return res;
7364}
7365
7366uint32_t float64_to_uint32(float64 a, float_status *status)
7367{
7368 uint64_t v;
7369 uint32_t res;
7370 int old_exc_flags = get_float_exception_flags(status);
7371
7372 v = float64_to_uint64(a, status);
7373 if (v > 0xffffffff) {
7374 res = 0xffffffff;
7375 } else {
7376 return v;
7377 }
7378 set_float_exception_flags(old_exc_flags, status);
7379 float_raise(float_flag_invalid, status);
7380 return res;
7381}
7382
7383uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *status)
7384{
7385 uint64_t v;
7386 uint32_t res;
7387 int old_exc_flags = get_float_exception_flags(status);
7388
7389 v = float64_to_uint64_round_to_zero(a, status);
7390 if (v > 0xffffffff) {
7391 res = 0xffffffff;
7392 } else {
7393 return v;
7394 }
7395 set_float_exception_flags(old_exc_flags, status);
7396 float_raise(float_flag_invalid, status);
7397 return res;
7398}
7399
7400int16_t float64_to_int16(float64 a, float_status *status)
7401{
7402 int64_t v;
7403 int16_t res;
7404 int old_exc_flags = get_float_exception_flags(status);
7405
7406 v = float64_to_int32(a, status);
7407 if (v < -0x8000) {
7408 res = -0x8000;
7409 } else if (v > 0x7fff) {
7410 res = 0x7fff;
7411 } else {
7412 return v;
7413 }
7414
7415 set_float_exception_flags(old_exc_flags, status);
7416 float_raise(float_flag_invalid, status);
7417 return res;
7418}
7419
7420uint16_t float64_to_uint16(float64 a, float_status *status)
7421{
7422 int64_t v;
7423 uint16_t res;
7424 int old_exc_flags = get_float_exception_flags(status);
7425
7426 v = float64_to_int32(a, status);
7427 if (v < 0) {
7428 res = 0;
7429 } else if (v > 0xffff) {
7430 res = 0xffff;
7431 } else {
7432 return v;
7433 }
7434
7435 set_float_exception_flags(old_exc_flags, status);
7436 float_raise(float_flag_invalid, status);
7437 return res;
7438}
7439
7440uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *status)
7441{
7442 int64_t v;
7443 uint16_t res;
7444 int old_exc_flags = get_float_exception_flags(status);
7445
7446 v = float64_to_int64_round_to_zero(a, status);
7447 if (v < 0) {
7448 res = 0;
7449 } else if (v > 0xffff) {
7450 res = 0xffff;
7451 } else {
7452 return v;
7453 }
7454 set_float_exception_flags(old_exc_flags, status);
7455 float_raise(float_flag_invalid, status);
7456 return res;
7457}
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471uint64_t float64_to_uint64(float64 a, float_status *status)
7472{
7473 flag aSign;
7474 int aExp;
7475 int shiftCount;
7476 uint64_t aSig, aSigExtra;
7477 a = float64_squash_input_denormal(a, status);
7478
7479 aSig = extractFloat64Frac(a);
7480 aExp = extractFloat64Exp(a);
7481 aSign = extractFloat64Sign(a);
7482 if (aSign && (aExp > 1022)) {
7483 float_raise(float_flag_invalid, status);
7484 if (float64_is_any_nan(a)) {
7485 return LIT64(0xFFFFFFFFFFFFFFFF);
7486 } else {
7487 return 0;
7488 }
7489 }
7490 if (aExp) {
7491 aSig |= LIT64(0x0010000000000000);
7492 }
7493 shiftCount = 0x433 - aExp;
7494 if (shiftCount <= 0) {
7495 if (0x43E < aExp) {
7496 float_raise(float_flag_invalid, status);
7497 return LIT64(0xFFFFFFFFFFFFFFFF);
7498 }
7499 aSigExtra = 0;
7500 aSig <<= -shiftCount;
7501 } else {
7502 shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
7503 }
7504 return roundAndPackUint64(aSign, aSig, aSigExtra, status);
7505}
7506
7507uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *status)
7508{
7509 signed char current_rounding_mode = status->float_rounding_mode;
7510 set_float_rounding_mode(float_round_to_zero, status);
7511 uint64_t v = float64_to_uint64(a, status);
7512 set_float_rounding_mode(current_rounding_mode, status);
7513 return v;
7514}
7515
7516#define COMPARE(s, nan_exp) \
7517static inline int float ## s ## _compare_internal(float ## s a, float ## s b,\
7518 int is_quiet, float_status *status) \
7519{ \
7520 flag aSign, bSign; \
7521 uint ## s ## _t av, bv; \
7522 a = float ## s ## _squash_input_denormal(a, status); \
7523 b = float ## s ## _squash_input_denormal(b, status); \
7524 \
7525 if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
7526 extractFloat ## s ## Frac( a ) ) || \
7527 ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
7528 extractFloat ## s ## Frac( b ) )) { \
7529 if (!is_quiet || \
7530 float ## s ## _is_signaling_nan(a, status) || \
7531 float ## s ## _is_signaling_nan(b, status)) { \
7532 float_raise(float_flag_invalid, status); \
7533 } \
7534 return float_relation_unordered; \
7535 } \
7536 aSign = extractFloat ## s ## Sign( a ); \
7537 bSign = extractFloat ## s ## Sign( b ); \
7538 av = float ## s ## _val(a); \
7539 bv = float ## s ## _val(b); \
7540 if ( aSign != bSign ) { \
7541 if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) { \
7542 \
7543 return float_relation_equal; \
7544 } else { \
7545 return 1 - (2 * aSign); \
7546 } \
7547 } else { \
7548 if (av == bv) { \
7549 return float_relation_equal; \
7550 } else { \
7551 return 1 - 2 * (aSign ^ ( av < bv )); \
7552 } \
7553 } \
7554} \
7555 \
7556int float ## s ## _compare(float ## s a, float ## s b, float_status *status) \
7557{ \
7558 return float ## s ## _compare_internal(a, b, 0, status); \
7559} \
7560 \
7561int float ## s ## _compare_quiet(float ## s a, float ## s b, \
7562 float_status *status) \
7563{ \
7564 return float ## s ## _compare_internal(a, b, 1, status); \
7565}
7566
7567COMPARE(32, 0xff)
7568COMPARE(64, 0x7ff)
7569
7570static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7571 int is_quiet, float_status *status)
7572{
7573 flag aSign, bSign;
7574
7575 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7576 float_raise(float_flag_invalid, status);
7577 return float_relation_unordered;
7578 }
7579 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7580 ( extractFloatx80Frac( a )<<1 ) ) ||
7581 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7582 ( extractFloatx80Frac( b )<<1 ) )) {
7583 if (!is_quiet ||
7584 floatx80_is_signaling_nan(a, status) ||
7585 floatx80_is_signaling_nan(b, status)) {
7586 float_raise(float_flag_invalid, status);
7587 }
7588 return float_relation_unordered;
7589 }
7590 aSign = extractFloatx80Sign( a );
7591 bSign = extractFloatx80Sign( b );
7592 if ( aSign != bSign ) {
7593
7594 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7595 ( ( a.low | b.low ) == 0 ) ) {
7596
7597 return float_relation_equal;
7598 } else {
7599 return 1 - (2 * aSign);
7600 }
7601 } else {
7602 if (a.low == b.low && a.high == b.high) {
7603 return float_relation_equal;
7604 } else {
7605 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7606 }
7607 }
7608}
7609
7610int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7611{
7612 return floatx80_compare_internal(a, b, 0, status);
7613}
7614
7615int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
7616{
7617 return floatx80_compare_internal(a, b, 1, status);
7618}
7619
7620static inline int float128_compare_internal(float128 a, float128 b,
7621 int is_quiet, float_status *status)
7622{
7623 flag aSign, bSign;
7624
7625 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7626 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7627 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7628 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7629 if (!is_quiet ||
7630 float128_is_signaling_nan(a, status) ||
7631 float128_is_signaling_nan(b, status)) {
7632 float_raise(float_flag_invalid, status);
7633 }
7634 return float_relation_unordered;
7635 }
7636 aSign = extractFloat128Sign( a );
7637 bSign = extractFloat128Sign( b );
7638 if ( aSign != bSign ) {
7639 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7640
7641 return float_relation_equal;
7642 } else {
7643 return 1 - (2 * aSign);
7644 }
7645 } else {
7646 if (a.low == b.low && a.high == b.high) {
7647 return float_relation_equal;
7648 } else {
7649 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7650 }
7651 }
7652}
7653
7654int float128_compare(float128 a, float128 b, float_status *status)
7655{
7656 return float128_compare_internal(a, b, 0, status);
7657}
7658
7659int float128_compare_quiet(float128 a, float128 b, float_status *status)
7660{
7661 return float128_compare_internal(a, b, 1, status);
7662}
7663
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674
7675
7676
7677
7678#define MINMAX(s) \
7679static inline float ## s float ## s ## _minmax(float ## s a, float ## s b, \
7680 int ismin, int isieee, \
7681 int ismag, \
7682 float_status *status) \
7683{ \
7684 flag aSign, bSign; \
7685 uint ## s ## _t av, bv, aav, abv; \
7686 a = float ## s ## _squash_input_denormal(a, status); \
7687 b = float ## s ## _squash_input_denormal(b, status); \
7688 if (float ## s ## _is_any_nan(a) || \
7689 float ## s ## _is_any_nan(b)) { \
7690 if (isieee) { \
7691 if (float ## s ## _is_quiet_nan(a, status) && \
7692 !float ## s ##_is_any_nan(b)) { \
7693 return b; \
7694 } else if (float ## s ## _is_quiet_nan(b, status) && \
7695 !float ## s ## _is_any_nan(a)) { \
7696 return a; \
7697 } \
7698 } \
7699 return propagateFloat ## s ## NaN(a, b, status); \
7700 } \
7701 aSign = extractFloat ## s ## Sign(a); \
7702 bSign = extractFloat ## s ## Sign(b); \
7703 av = float ## s ## _val(a); \
7704 bv = float ## s ## _val(b); \
7705 if (ismag) { \
7706 aav = float ## s ## _abs(av); \
7707 abv = float ## s ## _abs(bv); \
7708 if (aav != abv) { \
7709 if (ismin) { \
7710 return (aav < abv) ? a : b; \
7711 } else { \
7712 return (aav < abv) ? b : a; \
7713 } \
7714 } \
7715 } \
7716 if (aSign != bSign) { \
7717 if (ismin) { \
7718 return aSign ? a : b; \
7719 } else { \
7720 return aSign ? b : a; \
7721 } \
7722 } else { \
7723 if (ismin) { \
7724 return (aSign ^ (av < bv)) ? a : b; \
7725 } else { \
7726 return (aSign ^ (av < bv)) ? b : a; \
7727 } \
7728 } \
7729} \
7730 \
7731float ## s float ## s ## _min(float ## s a, float ## s b, \
7732 float_status *status) \
7733{ \
7734 return float ## s ## _minmax(a, b, 1, 0, 0, status); \
7735} \
7736 \
7737float ## s float ## s ## _max(float ## s a, float ## s b, \
7738 float_status *status) \
7739{ \
7740 return float ## s ## _minmax(a, b, 0, 0, 0, status); \
7741} \
7742 \
7743float ## s float ## s ## _minnum(float ## s a, float ## s b, \
7744 float_status *status) \
7745{ \
7746 return float ## s ## _minmax(a, b, 1, 1, 0, status); \
7747} \
7748 \
7749float ## s float ## s ## _maxnum(float ## s a, float ## s b, \
7750 float_status *status) \
7751{ \
7752 return float ## s ## _minmax(a, b, 0, 1, 0, status); \
7753} \
7754 \
7755float ## s float ## s ## _minnummag(float ## s a, float ## s b, \
7756 float_status *status) \
7757{ \
7758 return float ## s ## _minmax(a, b, 1, 1, 1, status); \
7759} \
7760 \
7761float ## s float ## s ## _maxnummag(float ## s a, float ## s b, \
7762 float_status *status) \
7763{ \
7764 return float ## s ## _minmax(a, b, 0, 1, 1, status); \
7765}
7766
7767MINMAX(32)
7768MINMAX(64)
7769
7770
7771
7772float32 float32_scalbn(float32 a, int n, float_status *status)
7773{
7774 flag aSign;
7775 int16_t aExp;
7776 uint32_t aSig;
7777
7778 a = float32_squash_input_denormal(a, status);
7779 aSig = extractFloat32Frac( a );
7780 aExp = extractFloat32Exp( a );
7781 aSign = extractFloat32Sign( a );
7782
7783 if ( aExp == 0xFF ) {
7784 if ( aSig ) {
7785 return propagateFloat32NaN(a, a, status);
7786 }
7787 return a;
7788 }
7789 if (aExp != 0) {
7790 aSig |= 0x00800000;
7791 } else if (aSig == 0) {
7792 return a;
7793 } else {
7794 aExp++;
7795 }
7796
7797 if (n > 0x200) {
7798 n = 0x200;
7799 } else if (n < -0x200) {
7800 n = -0x200;
7801 }
7802
7803 aExp += n - 1;
7804 aSig <<= 7;
7805 return normalizeRoundAndPackFloat32(aSign, aExp, aSig, status);
7806}
7807
7808float64 float64_scalbn(float64 a, int n, float_status *status)
7809{
7810 flag aSign;
7811 int16_t aExp;
7812 uint64_t aSig;
7813
7814 a = float64_squash_input_denormal(a, status);
7815 aSig = extractFloat64Frac( a );
7816 aExp = extractFloat64Exp( a );
7817 aSign = extractFloat64Sign( a );
7818
7819 if ( aExp == 0x7FF ) {
7820 if ( aSig ) {
7821 return propagateFloat64NaN(a, a, status);
7822 }
7823 return a;
7824 }
7825 if (aExp != 0) {
7826 aSig |= LIT64( 0x0010000000000000 );
7827 } else if (aSig == 0) {
7828 return a;
7829 } else {
7830 aExp++;
7831 }
7832
7833 if (n > 0x1000) {
7834 n = 0x1000;
7835 } else if (n < -0x1000) {
7836 n = -0x1000;
7837 }
7838
7839 aExp += n - 1;
7840 aSig <<= 10;
7841 return normalizeRoundAndPackFloat64(aSign, aExp, aSig, status);
7842}
7843
7844floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7845{
7846 flag aSign;
7847 int32_t aExp;
7848 uint64_t aSig;
7849
7850 if (floatx80_invalid_encoding(a)) {
7851 float_raise(float_flag_invalid, status);
7852 return floatx80_default_nan(status);
7853 }
7854 aSig = extractFloatx80Frac( a );
7855 aExp = extractFloatx80Exp( a );
7856 aSign = extractFloatx80Sign( a );
7857
7858 if ( aExp == 0x7FFF ) {
7859 if ( aSig<<1 ) {
7860 return propagateFloatx80NaN(a, a, status);
7861 }
7862 return a;
7863 }
7864
7865 if (aExp == 0) {
7866 if (aSig == 0) {
7867 return a;
7868 }
7869 aExp++;
7870 }
7871
7872 if (n > 0x10000) {
7873 n = 0x10000;
7874 } else if (n < -0x10000) {
7875 n = -0x10000;
7876 }
7877
7878 aExp += n;
7879 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7880 aSign, aExp, aSig, 0, status);
7881}
7882
7883float128 float128_scalbn(float128 a, int n, float_status *status)
7884{
7885 flag aSign;
7886 int32_t aExp;
7887 uint64_t aSig0, aSig1;
7888
7889 aSig1 = extractFloat128Frac1( a );
7890 aSig0 = extractFloat128Frac0( a );
7891 aExp = extractFloat128Exp( a );
7892 aSign = extractFloat128Sign( a );
7893 if ( aExp == 0x7FFF ) {
7894 if ( aSig0 | aSig1 ) {
7895 return propagateFloat128NaN(a, a, status);
7896 }
7897 return a;
7898 }
7899 if (aExp != 0) {
7900 aSig0 |= LIT64( 0x0001000000000000 );
7901 } else if (aSig0 == 0 && aSig1 == 0) {
7902 return a;
7903 } else {
7904 aExp++;
7905 }
7906
7907 if (n > 0x10000) {
7908 n = 0x10000;
7909 } else if (n < -0x10000) {
7910 n = -0x10000;
7911 }
7912
7913 aExp += n - 1;
7914 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7915 , status);
7916
7917}
7918