qemu/fpu/softfloat.c
<<
>>
Prefs
   1/*
   2 * QEMU float support
   3 *
   4 * The code in this source file is derived from release 2a of the SoftFloat
   5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6 * some later contributions) are provided under that license, as detailed below.
   7 * It has subsequently been modified by contributors to the QEMU Project,
   8 * so some portions are provided under:
   9 *  the SoftFloat-2a license
  10 *  the BSD license
  11 *  GPL-v2-or-later
  12 *
  13 * Any future contributions to this file after December 1st 2014 will be
  14 * taken to be licensed under the Softfloat-2a license unless specifically
  15 * indicated otherwise.
  16 */
  17
  18/*
  19===============================================================================
  20This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21Arithmetic Package, Release 2a.
  22
  23Written by John R. Hauser.  This work was made possible in part by the
  24International Computer Science Institute, located at Suite 600, 1947 Center
  25Street, Berkeley, California 94704.  Funding was partially provided by the
  26National Science Foundation under grant MIP-9311980.  The original version
  27of this code was written as part of a project to build a fixed-point vector
  28processor in collaboration with the University of California at Berkeley,
  29overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31arithmetic/SoftFloat.html'.
  32
  33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39Derivative works are acceptable, even for commercial purposes, so long as
  40(1) they include prominent notice that the work is derivative, and (2) they
  41include prominent notice akin to these four paragraphs for those parts of
  42this code that are retained.
  43
  44===============================================================================
  45*/
  46
  47/* BSD licensing:
  48 * Copyright (c) 2006, Fabrice Bellard
  49 * All rights reserved.
  50 *
  51 * Redistribution and use in source and binary forms, with or without
  52 * modification, are permitted provided that the following conditions are met:
  53 *
  54 * 1. Redistributions of source code must retain the above copyright notice,
  55 * this list of conditions and the following disclaimer.
  56 *
  57 * 2. Redistributions in binary form must reproduce the above copyright notice,
  58 * this list of conditions and the following disclaimer in the documentation
  59 * and/or other materials provided with the distribution.
  60 *
  61 * 3. Neither the name of the copyright holder nor the names of its contributors
  62 * may be used to endorse or promote products derived from this software without
  63 * specific prior written permission.
  64 *
  65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75 * THE POSSIBILITY OF SUCH DAMAGE.
  76 */
  77
  78/* Portions of this work are licensed under the terms of the GNU GPL,
  79 * version 2 or later. See the COPYING file in the top-level directory.
  80 */
  81
  82/* softfloat (and in particular the code in softfloat-specialize.h) is
  83 * target-dependent and needs the TARGET_* macros.
  84 */
  85#include "qemu/osdep.h"
  86#include <math.h>
  87#include "qemu/bitops.h"
  88#include "fpu/softfloat.h"
  89
  90/* We only need stdlib for abort() */
  91
  92/*----------------------------------------------------------------------------
  93| Primitive arithmetic functions, including multi-word arithmetic, and
  94| division and square root approximations.  (Can be specialized to target if
  95| desired.)
  96*----------------------------------------------------------------------------*/
  97#include "fpu/softfloat-macros.h"
  98
  99/*
 100 * Hardfloat
 101 *
 102 * Fast emulation of guest FP instructions is challenging for two reasons.
 103 * First, FP instruction semantics are similar but not identical, particularly
 104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105 * exception flags is not trivial: reading the host's flags register with a
 106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107 * and trapping on every FP exception is not fast nor pleasant to work with.
 108 *
 109 * We address these challenges by leveraging the host FPU for a subset of the
 110 * operations. To do this we expand on the idea presented in this paper:
 111 *
 112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114 *
 115 * The idea is thus to leverage the host FPU to (1) compute FP operations
 116 * and (2) identify whether FP exceptions occurred while avoiding
 117 * expensive exception flag register accesses.
 118 *
 119 * An important optimization shown in the paper is that given that exception
 120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121 * This is particularly useful for the inexact flag, which is very frequently
 122 * raised in floating-point workloads.
 123 *
 124 * We optimize the code further by deferring to soft-fp whenever FP exception
 125 * detection might get hairy. Two examples: (1) when at least one operand is
 126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127 * and the result is < the minimum normal.
 128 */
 129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130    static inline void name(soft_t *a, float_status *s)                 \
 131    {                                                                   \
 132        if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133            *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                     soft_t ## _is_neg(*a));            \
 135            s->float_exception_flags |= float_flag_input_denormal;      \
 136        }                                                               \
 137    }
 138
 139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141#undef GEN_INPUT_FLUSH__NOCHECK
 142
 143#define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144    static inline void name(soft_t *a, float_status *s) \
 145    {                                                   \
 146        if (likely(!s->flush_inputs_to_zero)) {         \
 147            return;                                     \
 148        }                                               \
 149        soft_t ## _input_flush__nocheck(a, s);          \
 150    }
 151
 152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154#undef GEN_INPUT_FLUSH1
 155
 156#define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157    static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158    {                                                                   \
 159        if (likely(!s->flush_inputs_to_zero)) {                         \
 160            return;                                                     \
 161        }                                                               \
 162        soft_t ## _input_flush__nocheck(a, s);                          \
 163        soft_t ## _input_flush__nocheck(b, s);                          \
 164    }
 165
 166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168#undef GEN_INPUT_FLUSH2
 169
 170#define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171    static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172    {                                                                   \
 173        if (likely(!s->flush_inputs_to_zero)) {                         \
 174            return;                                                     \
 175        }                                                               \
 176        soft_t ## _input_flush__nocheck(a, s);                          \
 177        soft_t ## _input_flush__nocheck(b, s);                          \
 178        soft_t ## _input_flush__nocheck(c, s);                          \
 179    }
 180
 181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183#undef GEN_INPUT_FLUSH3
 184
 185/*
 186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187 * hardfloat functions. Each combination of number of inputs and float size
 188 * gets its own value.
 189 */
 190#if defined(__x86_64__)
 191# define QEMU_HARDFLOAT_1F32_USE_FP 0
 192# define QEMU_HARDFLOAT_1F64_USE_FP 1
 193# define QEMU_HARDFLOAT_2F32_USE_FP 0
 194# define QEMU_HARDFLOAT_2F64_USE_FP 1
 195# define QEMU_HARDFLOAT_3F32_USE_FP 0
 196# define QEMU_HARDFLOAT_3F64_USE_FP 1
 197#else
 198# define QEMU_HARDFLOAT_1F32_USE_FP 0
 199# define QEMU_HARDFLOAT_1F64_USE_FP 0
 200# define QEMU_HARDFLOAT_2F32_USE_FP 0
 201# define QEMU_HARDFLOAT_2F64_USE_FP 0
 202# define QEMU_HARDFLOAT_3F32_USE_FP 0
 203# define QEMU_HARDFLOAT_3F64_USE_FP 0
 204#endif
 205
 206/*
 207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208 * float{32,64}_is_infinity when !USE_FP.
 209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211 */
 212#if defined(__x86_64__) || defined(__aarch64__)
 213# define QEMU_HARDFLOAT_USE_ISINF   1
 214#else
 215# define QEMU_HARDFLOAT_USE_ISINF   0
 216#endif
 217
 218/*
 219 * Some targets clear the FP flags before most FP operations. This prevents
 220 * the use of hardfloat, since hardfloat relies on the inexact flag being
 221 * already set.
 222 */
 223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224# if defined(__FAST_MATH__)
 225#  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226    IEEE implementation
 227# endif
 228# define QEMU_NO_HARDFLOAT 1
 229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230#else
 231# define QEMU_NO_HARDFLOAT 0
 232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233#endif
 234
 235static inline bool can_use_fpu(const float_status *s)
 236{
 237    if (QEMU_NO_HARDFLOAT) {
 238        return false;
 239    }
 240    return likely(s->float_exception_flags & float_flag_inexact &&
 241                  s->float_rounding_mode == float_round_nearest_even);
 242}
 243
 244/*
 245 * Hardfloat generation functions. Each operation can have two flavors:
 246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247 * most condition checks, or native ones (e.g. fpclassify).
 248 *
 249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250 * compiler to propagate constants and inline everything into the callers.
 251 *
 252 * We only generate functions for operations with two inputs, since only
 253 * these are common enough to justify consolidating them into common code.
 254 */
 255
 256typedef union {
 257    float32 s;
 258    float h;
 259} union_float32;
 260
 261typedef union {
 262    float64 s;
 263    double h;
 264} union_float64;
 265
 266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271typedef float   (*hard_f32_op2_fn)(float a, float b);
 272typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274/* 2-input is-zero-or-normal */
 275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276{
 277    if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278        /*
 279         * Not using a temp variable for consecutive fpclassify calls ends up
 280         * generating faster code.
 281         */
 282        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284    }
 285    return float32_is_zero_or_normal(a.s) &&
 286           float32_is_zero_or_normal(b.s);
 287}
 288
 289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290{
 291    if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294    }
 295    return float64_is_zero_or_normal(a.s) &&
 296           float64_is_zero_or_normal(b.s);
 297}
 298
 299/* 3-input is-zero-or-normal */
 300static inline
 301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302{
 303    if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306               (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307    }
 308    return float32_is_zero_or_normal(a.s) &&
 309           float32_is_zero_or_normal(b.s) &&
 310           float32_is_zero_or_normal(c.s);
 311}
 312
 313static inline
 314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315{
 316    if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319               (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320    }
 321    return float64_is_zero_or_normal(a.s) &&
 322           float64_is_zero_or_normal(b.s) &&
 323           float64_is_zero_or_normal(c.s);
 324}
 325
 326static inline bool f32_is_inf(union_float32 a)
 327{
 328    if (QEMU_HARDFLOAT_USE_ISINF) {
 329        return isinf(a.h);
 330    }
 331    return float32_is_infinity(a.s);
 332}
 333
 334static inline bool f64_is_inf(union_float64 a)
 335{
 336    if (QEMU_HARDFLOAT_USE_ISINF) {
 337        return isinf(a.h);
 338    }
 339    return float64_is_infinity(a.s);
 340}
 341
 342static inline float32
 343float32_gen2(float32 xa, float32 xb, float_status *s,
 344             hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345             f32_check_fn pre, f32_check_fn post)
 346{
 347    union_float32 ua, ub, ur;
 348
 349    ua.s = xa;
 350    ub.s = xb;
 351
 352    if (unlikely(!can_use_fpu(s))) {
 353        goto soft;
 354    }
 355
 356    float32_input_flush2(&ua.s, &ub.s, s);
 357    if (unlikely(!pre(ua, ub))) {
 358        goto soft;
 359    }
 360
 361    ur.h = hard(ua.h, ub.h);
 362    if (unlikely(f32_is_inf(ur))) {
 363        s->float_exception_flags |= float_flag_overflow;
 364    } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365        goto soft;
 366    }
 367    return ur.s;
 368
 369 soft:
 370    return soft(ua.s, ub.s, s);
 371}
 372
 373static inline float64
 374float64_gen2(float64 xa, float64 xb, float_status *s,
 375             hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376             f64_check_fn pre, f64_check_fn post)
 377{
 378    union_float64 ua, ub, ur;
 379
 380    ua.s = xa;
 381    ub.s = xb;
 382
 383    if (unlikely(!can_use_fpu(s))) {
 384        goto soft;
 385    }
 386
 387    float64_input_flush2(&ua.s, &ub.s, s);
 388    if (unlikely(!pre(ua, ub))) {
 389        goto soft;
 390    }
 391
 392    ur.h = hard(ua.h, ub.h);
 393    if (unlikely(f64_is_inf(ur))) {
 394        s->float_exception_flags |= float_flag_overflow;
 395    } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396        goto soft;
 397    }
 398    return ur.s;
 399
 400 soft:
 401    return soft(ua.s, ub.s, s);
 402}
 403
 404/*----------------------------------------------------------------------------
 405| Returns the fraction bits of the single-precision floating-point value `a'.
 406*----------------------------------------------------------------------------*/
 407
 408static inline uint32_t extractFloat32Frac(float32 a)
 409{
 410    return float32_val(a) & 0x007FFFFF;
 411}
 412
 413/*----------------------------------------------------------------------------
 414| Returns the exponent bits of the single-precision floating-point value `a'.
 415*----------------------------------------------------------------------------*/
 416
 417static inline int extractFloat32Exp(float32 a)
 418{
 419    return (float32_val(a) >> 23) & 0xFF;
 420}
 421
 422/*----------------------------------------------------------------------------
 423| Returns the sign bit of the single-precision floating-point value `a'.
 424*----------------------------------------------------------------------------*/
 425
 426static inline bool extractFloat32Sign(float32 a)
 427{
 428    return float32_val(a) >> 31;
 429}
 430
 431/*----------------------------------------------------------------------------
 432| Returns the fraction bits of the double-precision floating-point value `a'.
 433*----------------------------------------------------------------------------*/
 434
 435static inline uint64_t extractFloat64Frac(float64 a)
 436{
 437    return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
 438}
 439
 440/*----------------------------------------------------------------------------
 441| Returns the exponent bits of the double-precision floating-point value `a'.
 442*----------------------------------------------------------------------------*/
 443
 444static inline int extractFloat64Exp(float64 a)
 445{
 446    return (float64_val(a) >> 52) & 0x7FF;
 447}
 448
 449/*----------------------------------------------------------------------------
 450| Returns the sign bit of the double-precision floating-point value `a'.
 451*----------------------------------------------------------------------------*/
 452
 453static inline bool extractFloat64Sign(float64 a)
 454{
 455    return float64_val(a) >> 63;
 456}
 457
 458/*
 459 * Classify a floating point number. Everything above float_class_qnan
 460 * is a NaN so cls >= float_class_qnan is any NaN.
 461 */
 462
 463typedef enum __attribute__ ((__packed__)) {
 464    float_class_unclassified,
 465    float_class_zero,
 466    float_class_normal,
 467    float_class_inf,
 468    float_class_qnan,  /* all NaNs from here */
 469    float_class_snan,
 470} FloatClass;
 471
 472/* Simple helpers for checking if, or what kind of, NaN we have */
 473static inline __attribute__((unused)) bool is_nan(FloatClass c)
 474{
 475    return unlikely(c >= float_class_qnan);
 476}
 477
 478static inline __attribute__((unused)) bool is_snan(FloatClass c)
 479{
 480    return c == float_class_snan;
 481}
 482
 483static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 484{
 485    return c == float_class_qnan;
 486}
 487
 488/*
 489 * Structure holding all of the decomposed parts of a float. The
 490 * exponent is unbiased and the fraction is normalized. All
 491 * calculations are done with a 64 bit fraction and then rounded as
 492 * appropriate for the final format.
 493 *
 494 * Thanks to the packed FloatClass a decent compiler should be able to
 495 * fit the whole structure into registers and avoid using the stack
 496 * for parameter passing.
 497 */
 498
 499typedef struct {
 500    uint64_t frac;
 501    int32_t  exp;
 502    FloatClass cls;
 503    bool sign;
 504} FloatParts;
 505
 506#define DECOMPOSED_BINARY_POINT    (64 - 2)
 507#define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 508#define DECOMPOSED_OVERFLOW_BIT    (DECOMPOSED_IMPLICIT_BIT << 1)
 509
 510/* Structure holding all of the relevant parameters for a format.
 511 *   exp_size: the size of the exponent field
 512 *   exp_bias: the offset applied to the exponent field
 513 *   exp_max: the maximum normalised exponent
 514 *   frac_size: the size of the fraction field
 515 *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 516 * The following are computed based the size of fraction
 517 *   frac_lsb: least significant bit of fraction
 518 *   frac_lsbm1: the bit below the least significant bit (for rounding)
 519 *   round_mask/roundeven_mask: masks used for rounding
 520 * The following optional modifiers are available:
 521 *   arm_althp: handle ARM Alternative Half Precision
 522 */
 523typedef struct {
 524    int exp_size;
 525    int exp_bias;
 526    int exp_max;
 527    int frac_size;
 528    int frac_shift;
 529    uint64_t frac_lsb;
 530    uint64_t frac_lsbm1;
 531    uint64_t round_mask;
 532    uint64_t roundeven_mask;
 533    bool arm_althp;
 534} FloatFmt;
 535
 536/* Expand fields based on the size of exponent and fraction */
 537#define FLOAT_PARAMS(E, F)                                           \
 538    .exp_size       = E,                                             \
 539    .exp_bias       = ((1 << E) - 1) >> 1,                           \
 540    .exp_max        = (1 << E) - 1,                                  \
 541    .frac_size      = F,                                             \
 542    .frac_shift     = DECOMPOSED_BINARY_POINT - F,                   \
 543    .frac_lsb       = 1ull << (DECOMPOSED_BINARY_POINT - F),         \
 544    .frac_lsbm1     = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1),   \
 545    .round_mask     = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1,   \
 546    .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
 547
 548static const FloatFmt float16_params = {
 549    FLOAT_PARAMS(5, 10)
 550};
 551
 552static const FloatFmt float16_params_ahp = {
 553    FLOAT_PARAMS(5, 10),
 554    .arm_althp = true
 555};
 556
 557static const FloatFmt bfloat16_params = {
 558    FLOAT_PARAMS(8, 7)
 559};
 560
 561static const FloatFmt float32_params = {
 562    FLOAT_PARAMS(8, 23)
 563};
 564
 565static const FloatFmt float64_params = {
 566    FLOAT_PARAMS(11, 52)
 567};
 568
 569/* Unpack a float to parts, but do not canonicalize.  */
 570static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
 571{
 572    const int sign_pos = fmt.frac_size + fmt.exp_size;
 573
 574    return (FloatParts) {
 575        .cls = float_class_unclassified,
 576        .sign = extract64(raw, sign_pos, 1),
 577        .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
 578        .frac = extract64(raw, 0, fmt.frac_size),
 579    };
 580}
 581
 582static inline FloatParts float16_unpack_raw(float16 f)
 583{
 584    return unpack_raw(float16_params, f);
 585}
 586
 587static inline FloatParts bfloat16_unpack_raw(bfloat16 f)
 588{
 589    return unpack_raw(bfloat16_params, f);
 590}
 591
 592static inline FloatParts float32_unpack_raw(float32 f)
 593{
 594    return unpack_raw(float32_params, f);
 595}
 596
 597static inline FloatParts float64_unpack_raw(float64 f)
 598{
 599    return unpack_raw(float64_params, f);
 600}
 601
 602/* Pack a float from parts, but do not canonicalize.  */
 603static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
 604{
 605    const int sign_pos = fmt.frac_size + fmt.exp_size;
 606    uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
 607    return deposit64(ret, sign_pos, 1, p.sign);
 608}
 609
 610static inline float16 float16_pack_raw(FloatParts p)
 611{
 612    return make_float16(pack_raw(float16_params, p));
 613}
 614
 615static inline bfloat16 bfloat16_pack_raw(FloatParts p)
 616{
 617    return pack_raw(bfloat16_params, p);
 618}
 619
 620static inline float32 float32_pack_raw(FloatParts p)
 621{
 622    return make_float32(pack_raw(float32_params, p));
 623}
 624
 625static inline float64 float64_pack_raw(FloatParts p)
 626{
 627    return make_float64(pack_raw(float64_params, p));
 628}
 629
 630/*----------------------------------------------------------------------------
 631| Functions and definitions to determine:  (1) whether tininess for underflow
 632| is detected before or after rounding by default, (2) what (if anything)
 633| happens when exceptions are raised, (3) how signaling NaNs are distinguished
 634| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 635| are propagated from function inputs to output.  These details are target-
 636| specific.
 637*----------------------------------------------------------------------------*/
 638#include "softfloat-specialize.c.inc"
 639
 640/* Canonicalize EXP and FRAC, setting CLS.  */
 641static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
 642                                  float_status *status)
 643{
 644    if (part.exp == parm->exp_max && !parm->arm_althp) {
 645        if (part.frac == 0) {
 646            part.cls = float_class_inf;
 647        } else {
 648            part.frac <<= parm->frac_shift;
 649            part.cls = (parts_is_snan_frac(part.frac, status)
 650                        ? float_class_snan : float_class_qnan);
 651        }
 652    } else if (part.exp == 0) {
 653        if (likely(part.frac == 0)) {
 654            part.cls = float_class_zero;
 655        } else if (status->flush_inputs_to_zero) {
 656            float_raise(float_flag_input_denormal, status);
 657            part.cls = float_class_zero;
 658            part.frac = 0;
 659        } else {
 660            int shift = clz64(part.frac) - 1;
 661            part.cls = float_class_normal;
 662            part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
 663            part.frac <<= shift;
 664        }
 665    } else {
 666        part.cls = float_class_normal;
 667        part.exp -= parm->exp_bias;
 668        part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
 669    }
 670    return part;
 671}
 672
 673/* Round and uncanonicalize a floating-point number by parts. There
 674 * are FRAC_SHIFT bits that may require rounding at the bottom of the
 675 * fraction; these bits will be removed. The exponent will be biased
 676 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
 677 */
 678
 679static FloatParts round_canonical(FloatParts p, float_status *s,
 680                                  const FloatFmt *parm)
 681{
 682    const uint64_t frac_lsb = parm->frac_lsb;
 683    const uint64_t frac_lsbm1 = parm->frac_lsbm1;
 684    const uint64_t round_mask = parm->round_mask;
 685    const uint64_t roundeven_mask = parm->roundeven_mask;
 686    const int exp_max = parm->exp_max;
 687    const int frac_shift = parm->frac_shift;
 688    uint64_t frac, inc;
 689    int exp, flags = 0;
 690    bool overflow_norm;
 691
 692    frac = p.frac;
 693    exp = p.exp;
 694
 695    switch (p.cls) {
 696    case float_class_normal:
 697        switch (s->float_rounding_mode) {
 698        case float_round_nearest_even:
 699            overflow_norm = false;
 700            inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
 701            break;
 702        case float_round_ties_away:
 703            overflow_norm = false;
 704            inc = frac_lsbm1;
 705            break;
 706        case float_round_to_zero:
 707            overflow_norm = true;
 708            inc = 0;
 709            break;
 710        case float_round_up:
 711            inc = p.sign ? 0 : round_mask;
 712            overflow_norm = p.sign;
 713            break;
 714        case float_round_down:
 715            inc = p.sign ? round_mask : 0;
 716            overflow_norm = !p.sign;
 717            break;
 718        case float_round_to_odd:
 719            overflow_norm = true;
 720            inc = frac & frac_lsb ? 0 : round_mask;
 721            break;
 722        default:
 723            g_assert_not_reached();
 724        }
 725
 726        exp += parm->exp_bias;
 727        if (likely(exp > 0)) {
 728            if (frac & round_mask) {
 729                flags |= float_flag_inexact;
 730                frac += inc;
 731                if (frac & DECOMPOSED_OVERFLOW_BIT) {
 732                    frac >>= 1;
 733                    exp++;
 734                }
 735            }
 736            frac >>= frac_shift;
 737
 738            if (parm->arm_althp) {
 739                /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
 740                if (unlikely(exp > exp_max)) {
 741                    /* Overflow.  Return the maximum normal.  */
 742                    flags = float_flag_invalid;
 743                    exp = exp_max;
 744                    frac = -1;
 745                }
 746            } else if (unlikely(exp >= exp_max)) {
 747                flags |= float_flag_overflow | float_flag_inexact;
 748                if (overflow_norm) {
 749                    exp = exp_max - 1;
 750                    frac = -1;
 751                } else {
 752                    p.cls = float_class_inf;
 753                    goto do_inf;
 754                }
 755            }
 756        } else if (s->flush_to_zero) {
 757            flags |= float_flag_output_denormal;
 758            p.cls = float_class_zero;
 759            goto do_zero;
 760        } else {
 761            bool is_tiny = s->tininess_before_rounding
 762                        || (exp < 0)
 763                        || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
 764
 765            shift64RightJamming(frac, 1 - exp, &frac);
 766            if (frac & round_mask) {
 767                /* Need to recompute round-to-even.  */
 768                switch (s->float_rounding_mode) {
 769                case float_round_nearest_even:
 770                    inc = ((frac & roundeven_mask) != frac_lsbm1
 771                           ? frac_lsbm1 : 0);
 772                    break;
 773                case float_round_to_odd:
 774                    inc = frac & frac_lsb ? 0 : round_mask;
 775                    break;
 776                default:
 777                    break;
 778                }
 779                flags |= float_flag_inexact;
 780                frac += inc;
 781            }
 782
 783            exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
 784            frac >>= frac_shift;
 785
 786            if (is_tiny && (flags & float_flag_inexact)) {
 787                flags |= float_flag_underflow;
 788            }
 789            if (exp == 0 && frac == 0) {
 790                p.cls = float_class_zero;
 791            }
 792        }
 793        break;
 794
 795    case float_class_zero:
 796    do_zero:
 797        exp = 0;
 798        frac = 0;
 799        break;
 800
 801    case float_class_inf:
 802    do_inf:
 803        assert(!parm->arm_althp);
 804        exp = exp_max;
 805        frac = 0;
 806        break;
 807
 808    case float_class_qnan:
 809    case float_class_snan:
 810        assert(!parm->arm_althp);
 811        exp = exp_max;
 812        frac >>= parm->frac_shift;
 813        break;
 814
 815    default:
 816        g_assert_not_reached();
 817    }
 818
 819    float_raise(flags, s);
 820    p.exp = exp;
 821    p.frac = frac;
 822    return p;
 823}
 824
 825/* Explicit FloatFmt version */
 826static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
 827                                            const FloatFmt *params)
 828{
 829    return sf_canonicalize(float16_unpack_raw(f), params, s);
 830}
 831
 832static FloatParts float16_unpack_canonical(float16 f, float_status *s)
 833{
 834    return float16a_unpack_canonical(f, s, &float16_params);
 835}
 836
 837static FloatParts bfloat16_unpack_canonical(bfloat16 f, float_status *s)
 838{
 839    return sf_canonicalize(bfloat16_unpack_raw(f), &bfloat16_params, s);
 840}
 841
 842static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
 843                                             const FloatFmt *params)
 844{
 845    return float16_pack_raw(round_canonical(p, s, params));
 846}
 847
 848static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
 849{
 850    return float16a_round_pack_canonical(p, s, &float16_params);
 851}
 852
 853static bfloat16 bfloat16_round_pack_canonical(FloatParts p, float_status *s)
 854{
 855    return bfloat16_pack_raw(round_canonical(p, s, &bfloat16_params));
 856}
 857
 858static FloatParts float32_unpack_canonical(float32 f, float_status *s)
 859{
 860    return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
 861}
 862
 863static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
 864{
 865    return float32_pack_raw(round_canonical(p, s, &float32_params));
 866}
 867
 868static FloatParts float64_unpack_canonical(float64 f, float_status *s)
 869{
 870    return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
 871}
 872
 873static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
 874{
 875    return float64_pack_raw(round_canonical(p, s, &float64_params));
 876}
 877
 878static FloatParts return_nan(FloatParts a, float_status *s)
 879{
 880    switch (a.cls) {
 881    case float_class_snan:
 882        s->float_exception_flags |= float_flag_invalid;
 883        a = parts_silence_nan(a, s);
 884        /* fall through */
 885    case float_class_qnan:
 886        if (s->default_nan_mode) {
 887            return parts_default_nan(s);
 888        }
 889        break;
 890
 891    default:
 892        g_assert_not_reached();
 893    }
 894    return a;
 895}
 896
 897static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
 898{
 899    if (is_snan(a.cls) || is_snan(b.cls)) {
 900        s->float_exception_flags |= float_flag_invalid;
 901    }
 902
 903    if (s->default_nan_mode) {
 904        return parts_default_nan(s);
 905    } else {
 906        if (pickNaN(a.cls, b.cls,
 907                    a.frac > b.frac ||
 908                    (a.frac == b.frac && a.sign < b.sign), s)) {
 909            a = b;
 910        }
 911        if (is_snan(a.cls)) {
 912            return parts_silence_nan(a, s);
 913        }
 914    }
 915    return a;
 916}
 917
 918static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
 919                                  bool inf_zero, float_status *s)
 920{
 921    int which;
 922
 923    if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
 924        s->float_exception_flags |= float_flag_invalid;
 925    }
 926
 927    which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
 928
 929    if (s->default_nan_mode) {
 930        /* Note that this check is after pickNaNMulAdd so that function
 931         * has an opportunity to set the Invalid flag.
 932         */
 933        which = 3;
 934    }
 935
 936    switch (which) {
 937    case 0:
 938        break;
 939    case 1:
 940        a = b;
 941        break;
 942    case 2:
 943        a = c;
 944        break;
 945    case 3:
 946        return parts_default_nan(s);
 947    default:
 948        g_assert_not_reached();
 949    }
 950
 951    if (is_snan(a.cls)) {
 952        return parts_silence_nan(a, s);
 953    }
 954    return a;
 955}
 956
 957/*
 958 * Returns the result of adding or subtracting the values of the
 959 * floating-point values `a' and `b'. The operation is performed
 960 * according to the IEC/IEEE Standard for Binary Floating-Point
 961 * Arithmetic.
 962 */
 963
 964static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
 965                                float_status *s)
 966{
 967    bool a_sign = a.sign;
 968    bool b_sign = b.sign ^ subtract;
 969
 970    if (a_sign != b_sign) {
 971        /* Subtraction */
 972
 973        if (a.cls == float_class_normal && b.cls == float_class_normal) {
 974            if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
 975                shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
 976                a.frac = a.frac - b.frac;
 977            } else {
 978                shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
 979                a.frac = b.frac - a.frac;
 980                a.exp = b.exp;
 981                a_sign ^= 1;
 982            }
 983
 984            if (a.frac == 0) {
 985                a.cls = float_class_zero;
 986                a.sign = s->float_rounding_mode == float_round_down;
 987            } else {
 988                int shift = clz64(a.frac) - 1;
 989                a.frac = a.frac << shift;
 990                a.exp = a.exp - shift;
 991                a.sign = a_sign;
 992            }
 993            return a;
 994        }
 995        if (is_nan(a.cls) || is_nan(b.cls)) {
 996            return pick_nan(a, b, s);
 997        }
 998        if (a.cls == float_class_inf) {
 999            if (b.cls == float_class_inf) {
1000                float_raise(float_flag_invalid, s);
1001                return parts_default_nan(s);
1002            }
1003            return a;
1004        }
1005        if (a.cls == float_class_zero && b.cls == float_class_zero) {
1006            a.sign = s->float_rounding_mode == float_round_down;
1007            return a;
1008        }
1009        if (a.cls == float_class_zero || b.cls == float_class_inf) {
1010            b.sign = a_sign ^ 1;
1011            return b;
1012        }
1013        if (b.cls == float_class_zero) {
1014            return a;
1015        }
1016    } else {
1017        /* Addition */
1018        if (a.cls == float_class_normal && b.cls == float_class_normal) {
1019            if (a.exp > b.exp) {
1020                shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1021            } else if (a.exp < b.exp) {
1022                shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1023                a.exp = b.exp;
1024            }
1025            a.frac += b.frac;
1026            if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1027                shift64RightJamming(a.frac, 1, &a.frac);
1028                a.exp += 1;
1029            }
1030            return a;
1031        }
1032        if (is_nan(a.cls) || is_nan(b.cls)) {
1033            return pick_nan(a, b, s);
1034        }
1035        if (a.cls == float_class_inf || b.cls == float_class_zero) {
1036            return a;
1037        }
1038        if (b.cls == float_class_inf || a.cls == float_class_zero) {
1039            b.sign = b_sign;
1040            return b;
1041        }
1042    }
1043    g_assert_not_reached();
1044}
1045
1046/*
1047 * Returns the result of adding or subtracting the floating-point
1048 * values `a' and `b'. The operation is performed according to the
1049 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1050 */
1051
1052float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
1053{
1054    FloatParts pa = float16_unpack_canonical(a, status);
1055    FloatParts pb = float16_unpack_canonical(b, status);
1056    FloatParts pr = addsub_floats(pa, pb, false, status);
1057
1058    return float16_round_pack_canonical(pr, status);
1059}
1060
1061float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1062{
1063    FloatParts pa = float16_unpack_canonical(a, status);
1064    FloatParts pb = float16_unpack_canonical(b, status);
1065    FloatParts pr = addsub_floats(pa, pb, true, status);
1066
1067    return float16_round_pack_canonical(pr, status);
1068}
1069
1070static float32 QEMU_SOFTFLOAT_ATTR
1071soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
1072{
1073    FloatParts pa = float32_unpack_canonical(a, status);
1074    FloatParts pb = float32_unpack_canonical(b, status);
1075    FloatParts pr = addsub_floats(pa, pb, subtract, status);
1076
1077    return float32_round_pack_canonical(pr, status);
1078}
1079
1080static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1081{
1082    return soft_f32_addsub(a, b, false, status);
1083}
1084
1085static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1086{
1087    return soft_f32_addsub(a, b, true, status);
1088}
1089
1090static float64 QEMU_SOFTFLOAT_ATTR
1091soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
1092{
1093    FloatParts pa = float64_unpack_canonical(a, status);
1094    FloatParts pb = float64_unpack_canonical(b, status);
1095    FloatParts pr = addsub_floats(pa, pb, subtract, status);
1096
1097    return float64_round_pack_canonical(pr, status);
1098}
1099
1100static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
1101{
1102    return soft_f64_addsub(a, b, false, status);
1103}
1104
1105static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1106{
1107    return soft_f64_addsub(a, b, true, status);
1108}
1109
1110static float hard_f32_add(float a, float b)
1111{
1112    return a + b;
1113}
1114
1115static float hard_f32_sub(float a, float b)
1116{
1117    return a - b;
1118}
1119
1120static double hard_f64_add(double a, double b)
1121{
1122    return a + b;
1123}
1124
1125static double hard_f64_sub(double a, double b)
1126{
1127    return a - b;
1128}
1129
1130static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1131{
1132    if (QEMU_HARDFLOAT_2F32_USE_FP) {
1133        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1134    }
1135    return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1136}
1137
1138static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1139{
1140    if (QEMU_HARDFLOAT_2F64_USE_FP) {
1141        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1142    } else {
1143        return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1144    }
1145}
1146
1147static float32 float32_addsub(float32 a, float32 b, float_status *s,
1148                              hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1149{
1150    return float32_gen2(a, b, s, hard, soft,
1151                        f32_is_zon2, f32_addsubmul_post);
1152}
1153
1154static float64 float64_addsub(float64 a, float64 b, float_status *s,
1155                              hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1156{
1157    return float64_gen2(a, b, s, hard, soft,
1158                        f64_is_zon2, f64_addsubmul_post);
1159}
1160
1161float32 QEMU_FLATTEN
1162float32_add(float32 a, float32 b, float_status *s)
1163{
1164    return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1165}
1166
1167float32 QEMU_FLATTEN
1168float32_sub(float32 a, float32 b, float_status *s)
1169{
1170    return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1171}
1172
1173float64 QEMU_FLATTEN
1174float64_add(float64 a, float64 b, float_status *s)
1175{
1176    return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1177}
1178
1179float64 QEMU_FLATTEN
1180float64_sub(float64 a, float64 b, float_status *s)
1181{
1182    return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1183}
1184
1185/*
1186 * Returns the result of adding or subtracting the bfloat16
1187 * values `a' and `b'.
1188 */
1189bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1190{
1191    FloatParts pa = bfloat16_unpack_canonical(a, status);
1192    FloatParts pb = bfloat16_unpack_canonical(b, status);
1193    FloatParts pr = addsub_floats(pa, pb, false, status);
1194
1195    return bfloat16_round_pack_canonical(pr, status);
1196}
1197
1198bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1199{
1200    FloatParts pa = bfloat16_unpack_canonical(a, status);
1201    FloatParts pb = bfloat16_unpack_canonical(b, status);
1202    FloatParts pr = addsub_floats(pa, pb, true, status);
1203
1204    return bfloat16_round_pack_canonical(pr, status);
1205}
1206
1207/*
1208 * Returns the result of multiplying the floating-point values `a' and
1209 * `b'. The operation is performed according to the IEC/IEEE Standard
1210 * for Binary Floating-Point Arithmetic.
1211 */
1212
1213static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
1214{
1215    bool sign = a.sign ^ b.sign;
1216
1217    if (a.cls == float_class_normal && b.cls == float_class_normal) {
1218        uint64_t hi, lo;
1219        int exp = a.exp + b.exp;
1220
1221        mul64To128(a.frac, b.frac, &hi, &lo);
1222        shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1223        if (lo & DECOMPOSED_OVERFLOW_BIT) {
1224            shift64RightJamming(lo, 1, &lo);
1225            exp += 1;
1226        }
1227
1228        /* Re-use a */
1229        a.exp = exp;
1230        a.sign = sign;
1231        a.frac = lo;
1232        return a;
1233    }
1234    /* handle all the NaN cases */
1235    if (is_nan(a.cls) || is_nan(b.cls)) {
1236        return pick_nan(a, b, s);
1237    }
1238    /* Inf * Zero == NaN */
1239    if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
1240        (a.cls == float_class_zero && b.cls == float_class_inf)) {
1241        s->float_exception_flags |= float_flag_invalid;
1242        return parts_default_nan(s);
1243    }
1244    /* Multiply by 0 or Inf */
1245    if (a.cls == float_class_inf || a.cls == float_class_zero) {
1246        a.sign = sign;
1247        return a;
1248    }
1249    if (b.cls == float_class_inf || b.cls == float_class_zero) {
1250        b.sign = sign;
1251        return b;
1252    }
1253    g_assert_not_reached();
1254}
1255
1256float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1257{
1258    FloatParts pa = float16_unpack_canonical(a, status);
1259    FloatParts pb = float16_unpack_canonical(b, status);
1260    FloatParts pr = mul_floats(pa, pb, status);
1261
1262    return float16_round_pack_canonical(pr, status);
1263}
1264
1265static float32 QEMU_SOFTFLOAT_ATTR
1266soft_f32_mul(float32 a, float32 b, float_status *status)
1267{
1268    FloatParts pa = float32_unpack_canonical(a, status);
1269    FloatParts pb = float32_unpack_canonical(b, status);
1270    FloatParts pr = mul_floats(pa, pb, status);
1271
1272    return float32_round_pack_canonical(pr, status);
1273}
1274
1275static float64 QEMU_SOFTFLOAT_ATTR
1276soft_f64_mul(float64 a, float64 b, float_status *status)
1277{
1278    FloatParts pa = float64_unpack_canonical(a, status);
1279    FloatParts pb = float64_unpack_canonical(b, status);
1280    FloatParts pr = mul_floats(pa, pb, status);
1281
1282    return float64_round_pack_canonical(pr, status);
1283}
1284
1285static float hard_f32_mul(float a, float b)
1286{
1287    return a * b;
1288}
1289
1290static double hard_f64_mul(double a, double b)
1291{
1292    return a * b;
1293}
1294
1295float32 QEMU_FLATTEN
1296float32_mul(float32 a, float32 b, float_status *s)
1297{
1298    return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1299                        f32_is_zon2, f32_addsubmul_post);
1300}
1301
1302float64 QEMU_FLATTEN
1303float64_mul(float64 a, float64 b, float_status *s)
1304{
1305    return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1306                        f64_is_zon2, f64_addsubmul_post);
1307}
1308
1309/*
1310 * Returns the result of multiplying the bfloat16
1311 * values `a' and `b'.
1312 */
1313
1314bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
1315{
1316    FloatParts pa = bfloat16_unpack_canonical(a, status);
1317    FloatParts pb = bfloat16_unpack_canonical(b, status);
1318    FloatParts pr = mul_floats(pa, pb, status);
1319
1320    return bfloat16_round_pack_canonical(pr, status);
1321}
1322
1323/*
1324 * Returns the result of multiplying the floating-point values `a' and
1325 * `b' then adding 'c', with no intermediate rounding step after the
1326 * multiplication. The operation is performed according to the
1327 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1328 * The flags argument allows the caller to select negation of the
1329 * addend, the intermediate product, or the final result. (The
1330 * difference between this and having the caller do a separate
1331 * negation is that negating externally will flip the sign bit on
1332 * NaNs.)
1333 */
1334
1335static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
1336                                int flags, float_status *s)
1337{
1338    bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
1339                    ((1 << float_class_inf) | (1 << float_class_zero));
1340    bool p_sign;
1341    bool sign_flip = flags & float_muladd_negate_result;
1342    FloatClass p_class;
1343    uint64_t hi, lo;
1344    int p_exp;
1345
1346    /* It is implementation-defined whether the cases of (0,inf,qnan)
1347     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1348     * they return if they do), so we have to hand this information
1349     * off to the target-specific pick-a-NaN routine.
1350     */
1351    if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
1352        return pick_nan_muladd(a, b, c, inf_zero, s);
1353    }
1354
1355    if (inf_zero) {
1356        s->float_exception_flags |= float_flag_invalid;
1357        return parts_default_nan(s);
1358    }
1359
1360    if (flags & float_muladd_negate_c) {
1361        c.sign ^= 1;
1362    }
1363
1364    p_sign = a.sign ^ b.sign;
1365
1366    if (flags & float_muladd_negate_product) {
1367        p_sign ^= 1;
1368    }
1369
1370    if (a.cls == float_class_inf || b.cls == float_class_inf) {
1371        p_class = float_class_inf;
1372    } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
1373        p_class = float_class_zero;
1374    } else {
1375        p_class = float_class_normal;
1376    }
1377
1378    if (c.cls == float_class_inf) {
1379        if (p_class == float_class_inf && p_sign != c.sign) {
1380            s->float_exception_flags |= float_flag_invalid;
1381            return parts_default_nan(s);
1382        } else {
1383            a.cls = float_class_inf;
1384            a.sign = c.sign ^ sign_flip;
1385            return a;
1386        }
1387    }
1388
1389    if (p_class == float_class_inf) {
1390        a.cls = float_class_inf;
1391        a.sign = p_sign ^ sign_flip;
1392        return a;
1393    }
1394
1395    if (p_class == float_class_zero) {
1396        if (c.cls == float_class_zero) {
1397            if (p_sign != c.sign) {
1398                p_sign = s->float_rounding_mode == float_round_down;
1399            }
1400            c.sign = p_sign;
1401        } else if (flags & float_muladd_halve_result) {
1402            c.exp -= 1;
1403        }
1404        c.sign ^= sign_flip;
1405        return c;
1406    }
1407
1408    /* a & b should be normals now... */
1409    assert(a.cls == float_class_normal &&
1410           b.cls == float_class_normal);
1411
1412    p_exp = a.exp + b.exp;
1413
1414    /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
1415     * result.
1416     */
1417    mul64To128(a.frac, b.frac, &hi, &lo);
1418    /* binary point now at bit 124 */
1419
1420    /* check for overflow */
1421    if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
1422        shift128RightJamming(hi, lo, 1, &hi, &lo);
1423        p_exp += 1;
1424    }
1425
1426    /* + add/sub */
1427    if (c.cls == float_class_zero) {
1428        /* move binary point back to 62 */
1429        shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1430    } else {
1431        int exp_diff = p_exp - c.exp;
1432        if (p_sign == c.sign) {
1433            /* Addition */
1434            if (exp_diff <= 0) {
1435                shift128RightJamming(hi, lo,
1436                                     DECOMPOSED_BINARY_POINT - exp_diff,
1437                                     &hi, &lo);
1438                lo += c.frac;
1439                p_exp = c.exp;
1440            } else {
1441                uint64_t c_hi, c_lo;
1442                /* shift c to the same binary point as the product (124) */
1443                c_hi = c.frac >> 2;
1444                c_lo = 0;
1445                shift128RightJamming(c_hi, c_lo,
1446                                     exp_diff,
1447                                     &c_hi, &c_lo);
1448                add128(hi, lo, c_hi, c_lo, &hi, &lo);
1449                /* move binary point back to 62 */
1450                shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1451            }
1452
1453            if (lo & DECOMPOSED_OVERFLOW_BIT) {
1454                shift64RightJamming(lo, 1, &lo);
1455                p_exp += 1;
1456            }
1457
1458        } else {
1459            /* Subtraction */
1460            uint64_t c_hi, c_lo;
1461            /* make C binary point match product at bit 124 */
1462            c_hi = c.frac >> 2;
1463            c_lo = 0;
1464
1465            if (exp_diff <= 0) {
1466                shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1467                if (exp_diff == 0
1468                    &&
1469                    (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1470                    sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1471                } else {
1472                    sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1473                    p_sign ^= 1;
1474                    p_exp = c.exp;
1475                }
1476            } else {
1477                shift128RightJamming(c_hi, c_lo,
1478                                     exp_diff,
1479                                     &c_hi, &c_lo);
1480                sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1481            }
1482
1483            if (hi == 0 && lo == 0) {
1484                a.cls = float_class_zero;
1485                a.sign = s->float_rounding_mode == float_round_down;
1486                a.sign ^= sign_flip;
1487                return a;
1488            } else {
1489                int shift;
1490                if (hi != 0) {
1491                    shift = clz64(hi);
1492                } else {
1493                    shift = clz64(lo) + 64;
1494                }
1495                /* Normalizing to a binary point of 124 is the
1496                   correct adjust for the exponent.  However since we're
1497                   shifting, we might as well put the binary point back
1498                   at 62 where we really want it.  Therefore shift as
1499                   if we're leaving 1 bit at the top of the word, but
1500                   adjust the exponent as if we're leaving 3 bits.  */
1501                shift -= 1;
1502                if (shift >= 64) {
1503                    lo = lo << (shift - 64);
1504                } else {
1505                    hi = (hi << shift) | (lo >> (64 - shift));
1506                    lo = hi | ((lo << shift) != 0);
1507                }
1508                p_exp -= shift - 2;
1509            }
1510        }
1511    }
1512
1513    if (flags & float_muladd_halve_result) {
1514        p_exp -= 1;
1515    }
1516
1517    /* finally prepare our result */
1518    a.cls = float_class_normal;
1519    a.sign = p_sign ^ sign_flip;
1520    a.exp = p_exp;
1521    a.frac = lo;
1522
1523    return a;
1524}
1525
1526float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1527                                                int flags, float_status *status)
1528{
1529    FloatParts pa = float16_unpack_canonical(a, status);
1530    FloatParts pb = float16_unpack_canonical(b, status);
1531    FloatParts pc = float16_unpack_canonical(c, status);
1532    FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1533
1534    return float16_round_pack_canonical(pr, status);
1535}
1536
1537static float32 QEMU_SOFTFLOAT_ATTR
1538soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1539                float_status *status)
1540{
1541    FloatParts pa = float32_unpack_canonical(a, status);
1542    FloatParts pb = float32_unpack_canonical(b, status);
1543    FloatParts pc = float32_unpack_canonical(c, status);
1544    FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1545
1546    return float32_round_pack_canonical(pr, status);
1547}
1548
1549static float64 QEMU_SOFTFLOAT_ATTR
1550soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1551                float_status *status)
1552{
1553    FloatParts pa = float64_unpack_canonical(a, status);
1554    FloatParts pb = float64_unpack_canonical(b, status);
1555    FloatParts pc = float64_unpack_canonical(c, status);
1556    FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1557
1558    return float64_round_pack_canonical(pr, status);
1559}
1560
1561static bool force_soft_fma;
1562
1563float32 QEMU_FLATTEN
1564float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1565{
1566    union_float32 ua, ub, uc, ur;
1567
1568    ua.s = xa;
1569    ub.s = xb;
1570    uc.s = xc;
1571
1572    if (unlikely(!can_use_fpu(s))) {
1573        goto soft;
1574    }
1575    if (unlikely(flags & float_muladd_halve_result)) {
1576        goto soft;
1577    }
1578
1579    float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1580    if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1581        goto soft;
1582    }
1583
1584    if (unlikely(force_soft_fma)) {
1585        goto soft;
1586    }
1587
1588    /*
1589     * When (a || b) == 0, there's no need to check for under/over flow,
1590     * since we know the addend is (normal || 0) and the product is 0.
1591     */
1592    if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
1593        union_float32 up;
1594        bool prod_sign;
1595
1596        prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1597        prod_sign ^= !!(flags & float_muladd_negate_product);
1598        up.s = float32_set_sign(float32_zero, prod_sign);
1599
1600        if (flags & float_muladd_negate_c) {
1601            uc.h = -uc.h;
1602        }
1603        ur.h = up.h + uc.h;
1604    } else {
1605        union_float32 ua_orig = ua;
1606        union_float32 uc_orig = uc;
1607
1608        if (flags & float_muladd_negate_product) {
1609            ua.h = -ua.h;
1610        }
1611        if (flags & float_muladd_negate_c) {
1612            uc.h = -uc.h;
1613        }
1614
1615        ur.h = fmaf(ua.h, ub.h, uc.h);
1616
1617        if (unlikely(f32_is_inf(ur))) {
1618            s->float_exception_flags |= float_flag_overflow;
1619        } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1620            ua = ua_orig;
1621            uc = uc_orig;
1622            goto soft;
1623        }
1624    }
1625    if (flags & float_muladd_negate_result) {
1626        return float32_chs(ur.s);
1627    }
1628    return ur.s;
1629
1630 soft:
1631    return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1632}
1633
1634float64 QEMU_FLATTEN
1635float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1636{
1637    union_float64 ua, ub, uc, ur;
1638
1639    ua.s = xa;
1640    ub.s = xb;
1641    uc.s = xc;
1642
1643    if (unlikely(!can_use_fpu(s))) {
1644        goto soft;
1645    }
1646    if (unlikely(flags & float_muladd_halve_result)) {
1647        goto soft;
1648    }
1649
1650    float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1651    if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1652        goto soft;
1653    }
1654
1655    if (unlikely(force_soft_fma)) {
1656        goto soft;
1657    }
1658
1659    /*
1660     * When (a || b) == 0, there's no need to check for under/over flow,
1661     * since we know the addend is (normal || 0) and the product is 0.
1662     */
1663    if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
1664        union_float64 up;
1665        bool prod_sign;
1666
1667        prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1668        prod_sign ^= !!(flags & float_muladd_negate_product);
1669        up.s = float64_set_sign(float64_zero, prod_sign);
1670
1671        if (flags & float_muladd_negate_c) {
1672            uc.h = -uc.h;
1673        }
1674        ur.h = up.h + uc.h;
1675    } else {
1676        union_float64 ua_orig = ua;
1677        union_float64 uc_orig = uc;
1678
1679        if (flags & float_muladd_negate_product) {
1680            ua.h = -ua.h;
1681        }
1682        if (flags & float_muladd_negate_c) {
1683            uc.h = -uc.h;
1684        }
1685
1686        ur.h = fma(ua.h, ub.h, uc.h);
1687
1688        if (unlikely(f64_is_inf(ur))) {
1689            s->float_exception_flags |= float_flag_overflow;
1690        } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1691            ua = ua_orig;
1692            uc = uc_orig;
1693            goto soft;
1694        }
1695    }
1696    if (flags & float_muladd_negate_result) {
1697        return float64_chs(ur.s);
1698    }
1699    return ur.s;
1700
1701 soft:
1702    return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1703}
1704
1705/*
1706 * Returns the result of multiplying the bfloat16 values `a'
1707 * and `b' then adding 'c', with no intermediate rounding step after the
1708 * multiplication.
1709 */
1710
1711bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
1712                                      int flags, float_status *status)
1713{
1714    FloatParts pa = bfloat16_unpack_canonical(a, status);
1715    FloatParts pb = bfloat16_unpack_canonical(b, status);
1716    FloatParts pc = bfloat16_unpack_canonical(c, status);
1717    FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1718
1719    return bfloat16_round_pack_canonical(pr, status);
1720}
1721
1722/*
1723 * Returns the result of dividing the floating-point value `a' by the
1724 * corresponding value `b'. The operation is performed according to
1725 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1726 */
1727
1728static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1729{
1730    bool sign = a.sign ^ b.sign;
1731
1732    if (a.cls == float_class_normal && b.cls == float_class_normal) {
1733        uint64_t n0, n1, q, r;
1734        int exp = a.exp - b.exp;
1735
1736        /*
1737         * We want a 2*N / N-bit division to produce exactly an N-bit
1738         * result, so that we do not lose any precision and so that we
1739         * do not have to renormalize afterward.  If A.frac < B.frac,
1740         * then division would produce an (N-1)-bit result; shift A left
1741         * by one to produce the an N-bit result, and decrement the
1742         * exponent to match.
1743         *
1744         * The udiv_qrnnd algorithm that we're using requires normalization,
1745         * i.e. the msb of the denominator must be set.  Since we know that
1746         * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1747         * by one (more), and the remainder must be shifted right by one.
1748         */
1749        if (a.frac < b.frac) {
1750            exp -= 1;
1751            shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
1752        } else {
1753            shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
1754        }
1755        q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
1756
1757        /*
1758         * Set lsb if there is a remainder, to set inexact.
1759         * As mentioned above, to find the actual value of the remainder we
1760         * would need to shift right, but (1) we are only concerned about
1761         * non-zero-ness, and (2) the remainder will always be even because
1762         * both inputs to the division primitive are even.
1763         */
1764        a.frac = q | (r != 0);
1765        a.sign = sign;
1766        a.exp = exp;
1767        return a;
1768    }
1769    /* handle all the NaN cases */
1770    if (is_nan(a.cls) || is_nan(b.cls)) {
1771        return pick_nan(a, b, s);
1772    }
1773    /* 0/0 or Inf/Inf */
1774    if (a.cls == b.cls
1775        &&
1776        (a.cls == float_class_inf || a.cls == float_class_zero)) {
1777        s->float_exception_flags |= float_flag_invalid;
1778        return parts_default_nan(s);
1779    }
1780    /* Inf / x or 0 / x */
1781    if (a.cls == float_class_inf || a.cls == float_class_zero) {
1782        a.sign = sign;
1783        return a;
1784    }
1785    /* Div 0 => Inf */
1786    if (b.cls == float_class_zero) {
1787        s->float_exception_flags |= float_flag_divbyzero;
1788        a.cls = float_class_inf;
1789        a.sign = sign;
1790        return a;
1791    }
1792    /* Div by Inf */
1793    if (b.cls == float_class_inf) {
1794        a.cls = float_class_zero;
1795        a.sign = sign;
1796        return a;
1797    }
1798    g_assert_not_reached();
1799}
1800
1801float16 float16_div(float16 a, float16 b, float_status *status)
1802{
1803    FloatParts pa = float16_unpack_canonical(a, status);
1804    FloatParts pb = float16_unpack_canonical(b, status);
1805    FloatParts pr = div_floats(pa, pb, status);
1806
1807    return float16_round_pack_canonical(pr, status);
1808}
1809
1810static float32 QEMU_SOFTFLOAT_ATTR
1811soft_f32_div(float32 a, float32 b, float_status *status)
1812{
1813    FloatParts pa = float32_unpack_canonical(a, status);
1814    FloatParts pb = float32_unpack_canonical(b, status);
1815    FloatParts pr = div_floats(pa, pb, status);
1816
1817    return float32_round_pack_canonical(pr, status);
1818}
1819
1820static float64 QEMU_SOFTFLOAT_ATTR
1821soft_f64_div(float64 a, float64 b, float_status *status)
1822{
1823    FloatParts pa = float64_unpack_canonical(a, status);
1824    FloatParts pb = float64_unpack_canonical(b, status);
1825    FloatParts pr = div_floats(pa, pb, status);
1826
1827    return float64_round_pack_canonical(pr, status);
1828}
1829
1830static float hard_f32_div(float a, float b)
1831{
1832    return a / b;
1833}
1834
1835static double hard_f64_div(double a, double b)
1836{
1837    return a / b;
1838}
1839
1840static bool f32_div_pre(union_float32 a, union_float32 b)
1841{
1842    if (QEMU_HARDFLOAT_2F32_USE_FP) {
1843        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1844               fpclassify(b.h) == FP_NORMAL;
1845    }
1846    return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1847}
1848
1849static bool f64_div_pre(union_float64 a, union_float64 b)
1850{
1851    if (QEMU_HARDFLOAT_2F64_USE_FP) {
1852        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
1853               fpclassify(b.h) == FP_NORMAL;
1854    }
1855    return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1856}
1857
1858static bool f32_div_post(union_float32 a, union_float32 b)
1859{
1860    if (QEMU_HARDFLOAT_2F32_USE_FP) {
1861        return fpclassify(a.h) != FP_ZERO;
1862    }
1863    return !float32_is_zero(a.s);
1864}
1865
1866static bool f64_div_post(union_float64 a, union_float64 b)
1867{
1868    if (QEMU_HARDFLOAT_2F64_USE_FP) {
1869        return fpclassify(a.h) != FP_ZERO;
1870    }
1871    return !float64_is_zero(a.s);
1872}
1873
1874float32 QEMU_FLATTEN
1875float32_div(float32 a, float32 b, float_status *s)
1876{
1877    return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1878                        f32_div_pre, f32_div_post);
1879}
1880
1881float64 QEMU_FLATTEN
1882float64_div(float64 a, float64 b, float_status *s)
1883{
1884    return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
1885                        f64_div_pre, f64_div_post);
1886}
1887
1888/*
1889 * Returns the result of dividing the bfloat16
1890 * value `a' by the corresponding value `b'.
1891 */
1892
1893bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
1894{
1895    FloatParts pa = bfloat16_unpack_canonical(a, status);
1896    FloatParts pb = bfloat16_unpack_canonical(b, status);
1897    FloatParts pr = div_floats(pa, pb, status);
1898
1899    return bfloat16_round_pack_canonical(pr, status);
1900}
1901
1902/*
1903 * Float to Float conversions
1904 *
1905 * Returns the result of converting one float format to another. The
1906 * conversion is performed according to the IEC/IEEE Standard for
1907 * Binary Floating-Point Arithmetic.
1908 *
1909 * The float_to_float helper only needs to take care of raising
1910 * invalid exceptions and handling the conversion on NaNs.
1911 */
1912
1913static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1914                                 float_status *s)
1915{
1916    if (dstf->arm_althp) {
1917        switch (a.cls) {
1918        case float_class_qnan:
1919        case float_class_snan:
1920            /* There is no NaN in the destination format.  Raise Invalid
1921             * and return a zero with the sign of the input NaN.
1922             */
1923            s->float_exception_flags |= float_flag_invalid;
1924            a.cls = float_class_zero;
1925            a.frac = 0;
1926            a.exp = 0;
1927            break;
1928
1929        case float_class_inf:
1930            /* There is no Inf in the destination format.  Raise Invalid
1931             * and return the maximum normal with the correct sign.
1932             */
1933            s->float_exception_flags |= float_flag_invalid;
1934            a.cls = float_class_normal;
1935            a.exp = dstf->exp_max;
1936            a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1937            break;
1938
1939        default:
1940            break;
1941        }
1942    } else if (is_nan(a.cls)) {
1943        if (is_snan(a.cls)) {
1944            s->float_exception_flags |= float_flag_invalid;
1945            a = parts_silence_nan(a, s);
1946        }
1947        if (s->default_nan_mode) {
1948            return parts_default_nan(s);
1949        }
1950    }
1951    return a;
1952}
1953
1954float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1955{
1956    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1957    FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1958    FloatParts pr = float_to_float(p, &float32_params, s);
1959    return float32_round_pack_canonical(pr, s);
1960}
1961
1962float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1963{
1964    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1965    FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1966    FloatParts pr = float_to_float(p, &float64_params, s);
1967    return float64_round_pack_canonical(pr, s);
1968}
1969
1970float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1971{
1972    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1973    FloatParts p = float32_unpack_canonical(a, s);
1974    FloatParts pr = float_to_float(p, fmt16, s);
1975    return float16a_round_pack_canonical(pr, s, fmt16);
1976}
1977
1978static float64 QEMU_SOFTFLOAT_ATTR
1979soft_float32_to_float64(float32 a, float_status *s)
1980{
1981    FloatParts p = float32_unpack_canonical(a, s);
1982    FloatParts pr = float_to_float(p, &float64_params, s);
1983    return float64_round_pack_canonical(pr, s);
1984}
1985
1986float64 float32_to_float64(float32 a, float_status *s)
1987{
1988    if (likely(float32_is_normal(a))) {
1989        /* Widening conversion can never produce inexact results.  */
1990        union_float32 uf;
1991        union_float64 ud;
1992        uf.s = a;
1993        ud.h = uf.h;
1994        return ud.s;
1995    } else if (float32_is_zero(a)) {
1996        return float64_set_sign(float64_zero, float32_is_neg(a));
1997    } else {
1998        return soft_float32_to_float64(a, s);
1999    }
2000}
2001
2002float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2003{
2004    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2005    FloatParts p = float64_unpack_canonical(a, s);
2006    FloatParts pr = float_to_float(p, fmt16, s);
2007    return float16a_round_pack_canonical(pr, s, fmt16);
2008}
2009
2010float32 float64_to_float32(float64 a, float_status *s)
2011{
2012    FloatParts p = float64_unpack_canonical(a, s);
2013    FloatParts pr = float_to_float(p, &float32_params, s);
2014    return float32_round_pack_canonical(pr, s);
2015}
2016
2017float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2018{
2019    FloatParts p = bfloat16_unpack_canonical(a, s);
2020    FloatParts pr = float_to_float(p, &float32_params, s);
2021    return float32_round_pack_canonical(pr, s);
2022}
2023
2024float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2025{
2026    FloatParts p = bfloat16_unpack_canonical(a, s);
2027    FloatParts pr = float_to_float(p, &float64_params, s);
2028    return float64_round_pack_canonical(pr, s);
2029}
2030
2031bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2032{
2033    FloatParts p = float32_unpack_canonical(a, s);
2034    FloatParts pr = float_to_float(p, &bfloat16_params, s);
2035    return bfloat16_round_pack_canonical(pr, s);
2036}
2037
2038bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2039{
2040    FloatParts p = float64_unpack_canonical(a, s);
2041    FloatParts pr = float_to_float(p, &bfloat16_params, s);
2042    return bfloat16_round_pack_canonical(pr, s);
2043}
2044
2045/*
2046 * Rounds the floating-point value `a' to an integer, and returns the
2047 * result as a floating-point value. The operation is performed
2048 * according to the IEC/IEEE Standard for Binary Floating-Point
2049 * Arithmetic.
2050 */
2051
2052static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
2053                               int scale, float_status *s)
2054{
2055    switch (a.cls) {
2056    case float_class_qnan:
2057    case float_class_snan:
2058        return return_nan(a, s);
2059
2060    case float_class_zero:
2061    case float_class_inf:
2062        /* already "integral" */
2063        break;
2064
2065    case float_class_normal:
2066        scale = MIN(MAX(scale, -0x10000), 0x10000);
2067        a.exp += scale;
2068
2069        if (a.exp >= DECOMPOSED_BINARY_POINT) {
2070            /* already integral */
2071            break;
2072        }
2073        if (a.exp < 0) {
2074            bool one;
2075            /* all fractional */
2076            s->float_exception_flags |= float_flag_inexact;
2077            switch (rmode) {
2078            case float_round_nearest_even:
2079                one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
2080                break;
2081            case float_round_ties_away:
2082                one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
2083                break;
2084            case float_round_to_zero:
2085                one = false;
2086                break;
2087            case float_round_up:
2088                one = !a.sign;
2089                break;
2090            case float_round_down:
2091                one = a.sign;
2092                break;
2093            case float_round_to_odd:
2094                one = true;
2095                break;
2096            default:
2097                g_assert_not_reached();
2098            }
2099
2100            if (one) {
2101                a.frac = DECOMPOSED_IMPLICIT_BIT;
2102                a.exp = 0;
2103            } else {
2104                a.cls = float_class_zero;
2105            }
2106        } else {
2107            uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2108            uint64_t frac_lsbm1 = frac_lsb >> 1;
2109            uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
2110            uint64_t rnd_mask = rnd_even_mask >> 1;
2111            uint64_t inc;
2112
2113            switch (rmode) {
2114            case float_round_nearest_even:
2115                inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
2116                break;
2117            case float_round_ties_away:
2118                inc = frac_lsbm1;
2119                break;
2120            case float_round_to_zero:
2121                inc = 0;
2122                break;
2123            case float_round_up:
2124                inc = a.sign ? 0 : rnd_mask;
2125                break;
2126            case float_round_down:
2127                inc = a.sign ? rnd_mask : 0;
2128                break;
2129            case float_round_to_odd:
2130                inc = a.frac & frac_lsb ? 0 : rnd_mask;
2131                break;
2132            default:
2133                g_assert_not_reached();
2134            }
2135
2136            if (a.frac & rnd_mask) {
2137                s->float_exception_flags |= float_flag_inexact;
2138                a.frac += inc;
2139                a.frac &= ~rnd_mask;
2140                if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
2141                    a.frac >>= 1;
2142                    a.exp++;
2143                }
2144            }
2145        }
2146        break;
2147    default:
2148        g_assert_not_reached();
2149    }
2150    return a;
2151}
2152
2153float16 float16_round_to_int(float16 a, float_status *s)
2154{
2155    FloatParts pa = float16_unpack_canonical(a, s);
2156    FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2157    return float16_round_pack_canonical(pr, s);
2158}
2159
2160float32 float32_round_to_int(float32 a, float_status *s)
2161{
2162    FloatParts pa = float32_unpack_canonical(a, s);
2163    FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2164    return float32_round_pack_canonical(pr, s);
2165}
2166
2167float64 float64_round_to_int(float64 a, float_status *s)
2168{
2169    FloatParts pa = float64_unpack_canonical(a, s);
2170    FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2171    return float64_round_pack_canonical(pr, s);
2172}
2173
2174/*
2175 * Rounds the bfloat16 value `a' to an integer, and returns the
2176 * result as a bfloat16 value.
2177 */
2178
2179bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2180{
2181    FloatParts pa = bfloat16_unpack_canonical(a, s);
2182    FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
2183    return bfloat16_round_pack_canonical(pr, s);
2184}
2185
2186/*
2187 * Returns the result of converting the floating-point value `a' to
2188 * the two's complement integer format. The conversion is performed
2189 * according to the IEC/IEEE Standard for Binary Floating-Point
2190 * Arithmetic---which means in particular that the conversion is
2191 * rounded according to the current rounding mode. If `a' is a NaN,
2192 * the largest positive integer is returned. Otherwise, if the
2193 * conversion overflows, the largest integer with the same sign as `a'
2194 * is returned.
2195*/
2196
2197static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode,
2198                                     int scale, int64_t min, int64_t max,
2199                                     float_status *s)
2200{
2201    uint64_t r;
2202    int orig_flags = get_float_exception_flags(s);
2203    FloatParts p = round_to_int(in, rmode, scale, s);
2204
2205    switch (p.cls) {
2206    case float_class_snan:
2207    case float_class_qnan:
2208        s->float_exception_flags = orig_flags | float_flag_invalid;
2209        return max;
2210    case float_class_inf:
2211        s->float_exception_flags = orig_flags | float_flag_invalid;
2212        return p.sign ? min : max;
2213    case float_class_zero:
2214        return 0;
2215    case float_class_normal:
2216        if (p.exp < DECOMPOSED_BINARY_POINT) {
2217            r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2218        } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2219            r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2220        } else {
2221            r = UINT64_MAX;
2222        }
2223        if (p.sign) {
2224            if (r <= -(uint64_t) min) {
2225                return -r;
2226            } else {
2227                s->float_exception_flags = orig_flags | float_flag_invalid;
2228                return min;
2229            }
2230        } else {
2231            if (r <= max) {
2232                return r;
2233            } else {
2234                s->float_exception_flags = orig_flags | float_flag_invalid;
2235                return max;
2236            }
2237        }
2238    default:
2239        g_assert_not_reached();
2240    }
2241}
2242
2243int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2244                              float_status *s)
2245{
2246    return round_to_int_and_pack(float16_unpack_canonical(a, s),
2247                                 rmode, scale, INT8_MIN, INT8_MAX, s);
2248}
2249
2250int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2251                                float_status *s)
2252{
2253    return round_to_int_and_pack(float16_unpack_canonical(a, s),
2254                                 rmode, scale, INT16_MIN, INT16_MAX, s);
2255}
2256
2257int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2258                                float_status *s)
2259{
2260    return round_to_int_and_pack(float16_unpack_canonical(a, s),
2261                                 rmode, scale, INT32_MIN, INT32_MAX, s);
2262}
2263
2264int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2265                                float_status *s)
2266{
2267    return round_to_int_and_pack(float16_unpack_canonical(a, s),
2268                                 rmode, scale, INT64_MIN, INT64_MAX, s);
2269}
2270
2271int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2272                                float_status *s)
2273{
2274    return round_to_int_and_pack(float32_unpack_canonical(a, s),
2275                                 rmode, scale, INT16_MIN, INT16_MAX, s);
2276}
2277
2278int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2279                                float_status *s)
2280{
2281    return round_to_int_and_pack(float32_unpack_canonical(a, s),
2282                                 rmode, scale, INT32_MIN, INT32_MAX, s);
2283}
2284
2285int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2286                                float_status *s)
2287{
2288    return round_to_int_and_pack(float32_unpack_canonical(a, s),
2289                                 rmode, scale, INT64_MIN, INT64_MAX, s);
2290}
2291
2292int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2293                                float_status *s)
2294{
2295    return round_to_int_and_pack(float64_unpack_canonical(a, s),
2296                                 rmode, scale, INT16_MIN, INT16_MAX, s);
2297}
2298
2299int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2300                                float_status *s)
2301{
2302    return round_to_int_and_pack(float64_unpack_canonical(a, s),
2303                                 rmode, scale, INT32_MIN, INT32_MAX, s);
2304}
2305
2306int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2307                                float_status *s)
2308{
2309    return round_to_int_and_pack(float64_unpack_canonical(a, s),
2310                                 rmode, scale, INT64_MIN, INT64_MAX, s);
2311}
2312
2313int8_t float16_to_int8(float16 a, float_status *s)
2314{
2315    return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
2316}
2317
2318int16_t float16_to_int16(float16 a, float_status *s)
2319{
2320    return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2321}
2322
2323int32_t float16_to_int32(float16 a, float_status *s)
2324{
2325    return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2326}
2327
2328int64_t float16_to_int64(float16 a, float_status *s)
2329{
2330    return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2331}
2332
2333int16_t float32_to_int16(float32 a, float_status *s)
2334{
2335    return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2336}
2337
2338int32_t float32_to_int32(float32 a, float_status *s)
2339{
2340    return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2341}
2342
2343int64_t float32_to_int64(float32 a, float_status *s)
2344{
2345    return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2346}
2347
2348int16_t float64_to_int16(float64 a, float_status *s)
2349{
2350    return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2351}
2352
2353int32_t float64_to_int32(float64 a, float_status *s)
2354{
2355    return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2356}
2357
2358int64_t float64_to_int64(float64 a, float_status *s)
2359{
2360    return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2361}
2362
2363int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2364{
2365    return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2366}
2367
2368int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2369{
2370    return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2371}
2372
2373int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2374{
2375    return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2376}
2377
2378int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2379{
2380    return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
2381}
2382
2383int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2384{
2385    return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
2386}
2387
2388int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2389{
2390    return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
2391}
2392
2393int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2394{
2395    return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
2396}
2397
2398int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2399{
2400    return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
2401}
2402
2403int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2404{
2405    return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
2406}
2407
2408/*
2409 * Returns the result of converting the floating-point value `a' to
2410 * the two's complement integer format.
2411 */
2412
2413int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2414                                 float_status *s)
2415{
2416    return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
2417                                 rmode, scale, INT16_MIN, INT16_MAX, s);
2418}
2419
2420int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2421                                 float_status *s)
2422{
2423    return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
2424                                 rmode, scale, INT32_MIN, INT32_MAX, s);
2425}
2426
2427int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
2428                                 float_status *s)
2429{
2430    return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
2431                                 rmode, scale, INT64_MIN, INT64_MAX, s);
2432}
2433
2434int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
2435{
2436    return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
2437}
2438
2439int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
2440{
2441    return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
2442}
2443
2444int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
2445{
2446    return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
2447}
2448
2449int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
2450{
2451    return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
2452}
2453
2454int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
2455{
2456    return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
2457}
2458
2459int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
2460{
2461    return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
2462}
2463
2464/*
2465 *  Returns the result of converting the floating-point value `a' to
2466 *  the unsigned integer format. The conversion is performed according
2467 *  to the IEC/IEEE Standard for Binary Floating-Point
2468 *  Arithmetic---which means in particular that the conversion is
2469 *  rounded according to the current rounding mode. If `a' is a NaN,
2470 *  the largest unsigned integer is returned. Otherwise, if the
2471 *  conversion overflows, the largest unsigned integer is returned. If
2472 *  the 'a' is negative, the result is rounded and zero is returned;
2473 *  values that do not round to zero will raise the inexact exception
2474 *  flag.
2475 */
2476
2477static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode,
2478                                       int scale, uint64_t max,
2479                                       float_status *s)
2480{
2481    int orig_flags = get_float_exception_flags(s);
2482    FloatParts p = round_to_int(in, rmode, scale, s);
2483    uint64_t r;
2484
2485    switch (p.cls) {
2486    case float_class_snan:
2487    case float_class_qnan:
2488        s->float_exception_flags = orig_flags | float_flag_invalid;
2489        return max;
2490    case float_class_inf:
2491        s->float_exception_flags = orig_flags | float_flag_invalid;
2492        return p.sign ? 0 : max;
2493    case float_class_zero:
2494        return 0;
2495    case float_class_normal:
2496        if (p.sign) {
2497            s->float_exception_flags = orig_flags | float_flag_invalid;
2498            return 0;
2499        }
2500
2501        if (p.exp < DECOMPOSED_BINARY_POINT) {
2502            r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2503        } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
2504            r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2505        } else {
2506            s->float_exception_flags = orig_flags | float_flag_invalid;
2507            return max;
2508        }
2509
2510        /* For uint64 this will never trip, but if p.exp is too large
2511         * to shift a decomposed fraction we shall have exited via the
2512         * 3rd leg above.
2513         */
2514        if (r > max) {
2515            s->float_exception_flags = orig_flags | float_flag_invalid;
2516            return max;
2517        }
2518        return r;
2519    default:
2520        g_assert_not_reached();
2521    }
2522}
2523
2524uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2525                                float_status *s)
2526{
2527    return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2528                                  rmode, scale, UINT8_MAX, s);
2529}
2530
2531uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2532                                  float_status *s)
2533{
2534    return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2535                                  rmode, scale, UINT16_MAX, s);
2536}
2537
2538uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2539                                  float_status *s)
2540{
2541    return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2542                                  rmode, scale, UINT32_MAX, s);
2543}
2544
2545uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2546                                  float_status *s)
2547{
2548    return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2549                                  rmode, scale, UINT64_MAX, s);
2550}
2551
2552uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2553                                  float_status *s)
2554{
2555    return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2556                                  rmode, scale, UINT16_MAX, s);
2557}
2558
2559uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2560                                  float_status *s)
2561{
2562    return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2563                                  rmode, scale, UINT32_MAX, s);
2564}
2565
2566uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2567                                  float_status *s)
2568{
2569    return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2570                                  rmode, scale, UINT64_MAX, s);
2571}
2572
2573uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2574                                  float_status *s)
2575{
2576    return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2577                                  rmode, scale, UINT16_MAX, s);
2578}
2579
2580uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2581                                  float_status *s)
2582{
2583    return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2584                                  rmode, scale, UINT32_MAX, s);
2585}
2586
2587uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
2588                                  float_status *s)
2589{
2590    return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2591                                  rmode, scale, UINT64_MAX, s);
2592}
2593
2594uint8_t float16_to_uint8(float16 a, float_status *s)
2595{
2596    return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
2597}
2598
2599uint16_t float16_to_uint16(float16 a, float_status *s)
2600{
2601    return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2602}
2603
2604uint32_t float16_to_uint32(float16 a, float_status *s)
2605{
2606    return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2607}
2608
2609uint64_t float16_to_uint64(float16 a, float_status *s)
2610{
2611    return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2612}
2613
2614uint16_t float32_to_uint16(float32 a, float_status *s)
2615{
2616    return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2617}
2618
2619uint32_t float32_to_uint32(float32 a, float_status *s)
2620{
2621    return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2622}
2623
2624uint64_t float32_to_uint64(float32 a, float_status *s)
2625{
2626    return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2627}
2628
2629uint16_t float64_to_uint16(float64 a, float_status *s)
2630{
2631    return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2632}
2633
2634uint32_t float64_to_uint32(float64 a, float_status *s)
2635{
2636    return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2637}
2638
2639uint64_t float64_to_uint64(float64 a, float_status *s)
2640{
2641    return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2642}
2643
2644uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2645{
2646    return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2647}
2648
2649uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2650{
2651    return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2652}
2653
2654uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2655{
2656    return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2657}
2658
2659uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2660{
2661    return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2662}
2663
2664uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2665{
2666    return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2667}
2668
2669uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2670{
2671    return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2672}
2673
2674uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2675{
2676    return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2677}
2678
2679uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2680{
2681    return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2682}
2683
2684uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2685{
2686    return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2687}
2688
2689/*
2690 *  Returns the result of converting the bfloat16 value `a' to
2691 *  the unsigned integer format.
2692 */
2693
2694uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
2695                                   int scale, float_status *s)
2696{
2697    return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
2698                                  rmode, scale, UINT16_MAX, s);
2699}
2700
2701uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
2702                                   int scale, float_status *s)
2703{
2704    return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
2705                                  rmode, scale, UINT32_MAX, s);
2706}
2707
2708uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
2709                                   int scale, float_status *s)
2710{
2711    return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
2712                                  rmode, scale, UINT64_MAX, s);
2713}
2714
2715uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
2716{
2717    return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
2718}
2719
2720uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
2721{
2722    return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
2723}
2724
2725uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
2726{
2727    return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
2728}
2729
2730uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
2731{
2732    return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
2733}
2734
2735uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
2736{
2737    return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
2738}
2739
2740uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
2741{
2742    return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
2743}
2744
2745/*
2746 * Integer to float conversions
2747 *
2748 * Returns the result of converting the two's complement integer `a'
2749 * to the floating-point format. The conversion is performed according
2750 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2751 */
2752
2753static FloatParts int_to_float(int64_t a, int scale, float_status *status)
2754{
2755    FloatParts r = { .sign = false };
2756
2757    if (a == 0) {
2758        r.cls = float_class_zero;
2759    } else {
2760        uint64_t f = a;
2761        int shift;
2762
2763        r.cls = float_class_normal;
2764        if (a < 0) {
2765            f = -f;
2766            r.sign = true;
2767        }
2768        shift = clz64(f) - 1;
2769        scale = MIN(MAX(scale, -0x10000), 0x10000);
2770
2771        r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2772        r.frac = (shift < 0 ? DECOMPOSED_IMPLICIT_BIT : f << shift);
2773    }
2774
2775    return r;
2776}
2777
2778float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2779{
2780    FloatParts pa = int_to_float(a, scale, status);
2781    return float16_round_pack_canonical(pa, status);
2782}
2783
2784float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2785{
2786    return int64_to_float16_scalbn(a, scale, status);
2787}
2788
2789float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2790{
2791    return int64_to_float16_scalbn(a, scale, status);
2792}
2793
2794float16 int64_to_float16(int64_t a, float_status *status)
2795{
2796    return int64_to_float16_scalbn(a, 0, status);
2797}
2798
2799float16 int32_to_float16(int32_t a, float_status *status)
2800{
2801    return int64_to_float16_scalbn(a, 0, status);
2802}
2803
2804float16 int16_to_float16(int16_t a, float_status *status)
2805{
2806    return int64_to_float16_scalbn(a, 0, status);
2807}
2808
2809float16 int8_to_float16(int8_t a, float_status *status)
2810{
2811    return int64_to_float16_scalbn(a, 0, status);
2812}
2813
2814float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
2815{
2816    FloatParts pa = int_to_float(a, scale, status);
2817    return float32_round_pack_canonical(pa, status);
2818}
2819
2820float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2821{
2822    return int64_to_float32_scalbn(a, scale, status);
2823}
2824
2825float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2826{
2827    return int64_to_float32_scalbn(a, scale, status);
2828}
2829
2830float32 int64_to_float32(int64_t a, float_status *status)
2831{
2832    return int64_to_float32_scalbn(a, 0, status);
2833}
2834
2835float32 int32_to_float32(int32_t a, float_status *status)
2836{
2837    return int64_to_float32_scalbn(a, 0, status);
2838}
2839
2840float32 int16_to_float32(int16_t a, float_status *status)
2841{
2842    return int64_to_float32_scalbn(a, 0, status);
2843}
2844
2845float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
2846{
2847    FloatParts pa = int_to_float(a, scale, status);
2848    return float64_round_pack_canonical(pa, status);
2849}
2850
2851float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2852{
2853    return int64_to_float64_scalbn(a, scale, status);
2854}
2855
2856float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2857{
2858    return int64_to_float64_scalbn(a, scale, status);
2859}
2860
2861float64 int64_to_float64(int64_t a, float_status *status)
2862{
2863    return int64_to_float64_scalbn(a, 0, status);
2864}
2865
2866float64 int32_to_float64(int32_t a, float_status *status)
2867{
2868    return int64_to_float64_scalbn(a, 0, status);
2869}
2870
2871float64 int16_to_float64(int16_t a, float_status *status)
2872{
2873    return int64_to_float64_scalbn(a, 0, status);
2874}
2875
2876/*
2877 * Returns the result of converting the two's complement integer `a'
2878 * to the bfloat16 format.
2879 */
2880
2881bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
2882{
2883    FloatParts pa = int_to_float(a, scale, status);
2884    return bfloat16_round_pack_canonical(pa, status);
2885}
2886
2887bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
2888{
2889    return int64_to_bfloat16_scalbn(a, scale, status);
2890}
2891
2892bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
2893{
2894    return int64_to_bfloat16_scalbn(a, scale, status);
2895}
2896
2897bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
2898{
2899    return int64_to_bfloat16_scalbn(a, 0, status);
2900}
2901
2902bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
2903{
2904    return int64_to_bfloat16_scalbn(a, 0, status);
2905}
2906
2907bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
2908{
2909    return int64_to_bfloat16_scalbn(a, 0, status);
2910}
2911
2912/*
2913 * Unsigned Integer to float conversions
2914 *
2915 * Returns the result of converting the unsigned integer `a' to the
2916 * floating-point format. The conversion is performed according to the
2917 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2918 */
2919
2920static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
2921{
2922    FloatParts r = { .sign = false };
2923
2924    if (a == 0) {
2925        r.cls = float_class_zero;
2926    } else {
2927        scale = MIN(MAX(scale, -0x10000), 0x10000);
2928        r.cls = float_class_normal;
2929        if ((int64_t)a < 0) {
2930            r.exp = DECOMPOSED_BINARY_POINT + 1 + scale;
2931            shift64RightJamming(a, 1, &a);
2932            r.frac = a;
2933        } else {
2934            int shift = clz64(a) - 1;
2935            r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2936            r.frac = a << shift;
2937        }
2938    }
2939
2940    return r;
2941}
2942
2943float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
2944{
2945    FloatParts pa = uint_to_float(a, scale, status);
2946    return float16_round_pack_canonical(pa, status);
2947}
2948
2949float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2950{
2951    return uint64_to_float16_scalbn(a, scale, status);
2952}
2953
2954float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2955{
2956    return uint64_to_float16_scalbn(a, scale, status);
2957}
2958
2959float16 uint64_to_float16(uint64_t a, float_status *status)
2960{
2961    return uint64_to_float16_scalbn(a, 0, status);
2962}
2963
2964float16 uint32_to_float16(uint32_t a, float_status *status)
2965{
2966    return uint64_to_float16_scalbn(a, 0, status);
2967}
2968
2969float16 uint16_to_float16(uint16_t a, float_status *status)
2970{
2971    return uint64_to_float16_scalbn(a, 0, status);
2972}
2973
2974float16 uint8_to_float16(uint8_t a, float_status *status)
2975{
2976    return uint64_to_float16_scalbn(a, 0, status);
2977}
2978
2979float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
2980{
2981    FloatParts pa = uint_to_float(a, scale, status);
2982    return float32_round_pack_canonical(pa, status);
2983}
2984
2985float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2986{
2987    return uint64_to_float32_scalbn(a, scale, status);
2988}
2989
2990float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2991{
2992    return uint64_to_float32_scalbn(a, scale, status);
2993}
2994
2995float32 uint64_to_float32(uint64_t a, float_status *status)
2996{
2997    return uint64_to_float32_scalbn(a, 0, status);
2998}
2999
3000float32 uint32_to_float32(uint32_t a, float_status *status)
3001{
3002    return uint64_to_float32_scalbn(a, 0, status);
3003}
3004
3005float32 uint16_to_float32(uint16_t a, float_status *status)
3006{
3007    return uint64_to_float32_scalbn(a, 0, status);
3008}
3009
3010float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3011{
3012    FloatParts pa = uint_to_float(a, scale, status);
3013    return float64_round_pack_canonical(pa, status);
3014}
3015
3016float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3017{
3018    return uint64_to_float64_scalbn(a, scale, status);
3019}
3020
3021float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3022{
3023    return uint64_to_float64_scalbn(a, scale, status);
3024}
3025
3026float64 uint64_to_float64(uint64_t a, float_status *status)
3027{
3028    return uint64_to_float64_scalbn(a, 0, status);
3029}
3030
3031float64 uint32_to_float64(uint32_t a, float_status *status)
3032{
3033    return uint64_to_float64_scalbn(a, 0, status);
3034}
3035
3036float64 uint16_to_float64(uint16_t a, float_status *status)
3037{
3038    return uint64_to_float64_scalbn(a, 0, status);
3039}
3040
3041/*
3042 * Returns the result of converting the unsigned integer `a' to the
3043 * bfloat16 format.
3044 */
3045
3046bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3047{
3048    FloatParts pa = uint_to_float(a, scale, status);
3049    return bfloat16_round_pack_canonical(pa, status);
3050}
3051
3052bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3053{
3054    return uint64_to_bfloat16_scalbn(a, scale, status);
3055}
3056
3057bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3058{
3059    return uint64_to_bfloat16_scalbn(a, scale, status);
3060}
3061
3062bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3063{
3064    return uint64_to_bfloat16_scalbn(a, 0, status);
3065}
3066
3067bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3068{
3069    return uint64_to_bfloat16_scalbn(a, 0, status);
3070}
3071
3072bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3073{
3074    return uint64_to_bfloat16_scalbn(a, 0, status);
3075}
3076
3077/* Float Min/Max */
3078/* min() and max() functions. These can't be implemented as
3079 * 'compare and pick one input' because that would mishandle
3080 * NaNs and +0 vs -0.
3081 *
3082 * minnum() and maxnum() functions. These are similar to the min()
3083 * and max() functions but if one of the arguments is a QNaN and
3084 * the other is numerical then the numerical argument is returned.
3085 * SNaNs will get quietened before being returned.
3086 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
3087 * and maxNum() operations. min() and max() are the typical min/max
3088 * semantics provided by many CPUs which predate that specification.
3089 *
3090 * minnummag() and maxnummag() functions correspond to minNumMag()
3091 * and minNumMag() from the IEEE-754 2008.
3092 */
3093static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
3094                                bool ieee, bool ismag, float_status *s)
3095{
3096    if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
3097        if (ieee) {
3098            /* Takes two floating-point values `a' and `b', one of
3099             * which is a NaN, and returns the appropriate NaN
3100             * result. If either `a' or `b' is a signaling NaN,
3101             * the invalid exception is raised.
3102             */
3103            if (is_snan(a.cls) || is_snan(b.cls)) {
3104                return pick_nan(a, b, s);
3105            } else if (is_nan(a.cls) && !is_nan(b.cls)) {
3106                return b;
3107            } else if (is_nan(b.cls) && !is_nan(a.cls)) {
3108                return a;
3109            }
3110        }
3111        return pick_nan(a, b, s);
3112    } else {
3113        int a_exp, b_exp;
3114
3115        switch (a.cls) {
3116        case float_class_normal:
3117            a_exp = a.exp;
3118            break;
3119        case float_class_inf:
3120            a_exp = INT_MAX;
3121            break;
3122        case float_class_zero:
3123            a_exp = INT_MIN;
3124            break;
3125        default:
3126            g_assert_not_reached();
3127            break;
3128        }
3129        switch (b.cls) {
3130        case float_class_normal:
3131            b_exp = b.exp;
3132            break;
3133        case float_class_inf:
3134            b_exp = INT_MAX;
3135            break;
3136        case float_class_zero:
3137            b_exp = INT_MIN;
3138            break;
3139        default:
3140            g_assert_not_reached();
3141            break;
3142        }
3143
3144        if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
3145            bool a_less = a_exp < b_exp;
3146            if (a_exp == b_exp) {
3147                a_less = a.frac < b.frac;
3148            }
3149            return a_less ^ ismin ? b : a;
3150        }
3151
3152        if (a.sign == b.sign) {
3153            bool a_less = a_exp < b_exp;
3154            if (a_exp == b_exp) {
3155                a_less = a.frac < b.frac;
3156            }
3157            return a.sign ^ a_less ^ ismin ? b : a;
3158        } else {
3159            return a.sign ^ ismin ? b : a;
3160        }
3161    }
3162}
3163
3164#define MINMAX(sz, name, ismin, isiee, ismag)                           \
3165float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b,      \
3166                                     float_status *s)                   \
3167{                                                                       \
3168    FloatParts pa = float ## sz ## _unpack_canonical(a, s);             \
3169    FloatParts pb = float ## sz ## _unpack_canonical(b, s);             \
3170    FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);      \
3171                                                                        \
3172    return float ## sz ## _round_pack_canonical(pr, s);                 \
3173}
3174
3175MINMAX(16, min, true, false, false)
3176MINMAX(16, minnum, true, true, false)
3177MINMAX(16, minnummag, true, true, true)
3178MINMAX(16, max, false, false, false)
3179MINMAX(16, maxnum, false, true, false)
3180MINMAX(16, maxnummag, false, true, true)
3181
3182MINMAX(32, min, true, false, false)
3183MINMAX(32, minnum, true, true, false)
3184MINMAX(32, minnummag, true, true, true)
3185MINMAX(32, max, false, false, false)
3186MINMAX(32, maxnum, false, true, false)
3187MINMAX(32, maxnummag, false, true, true)
3188
3189MINMAX(64, min, true, false, false)
3190MINMAX(64, minnum, true, true, false)
3191MINMAX(64, minnummag, true, true, true)
3192MINMAX(64, max, false, false, false)
3193MINMAX(64, maxnum, false, true, false)
3194MINMAX(64, maxnummag, false, true, true)
3195
3196#undef MINMAX
3197
3198#define BF16_MINMAX(name, ismin, isiee, ismag)                          \
3199bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
3200{                                                                       \
3201    FloatParts pa = bfloat16_unpack_canonical(a, s);                    \
3202    FloatParts pb = bfloat16_unpack_canonical(b, s);                    \
3203    FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);      \
3204                                                                        \
3205    return bfloat16_round_pack_canonical(pr, s);                        \
3206}
3207
3208BF16_MINMAX(min, true, false, false)
3209BF16_MINMAX(minnum, true, true, false)
3210BF16_MINMAX(minnummag, true, true, true)
3211BF16_MINMAX(max, false, false, false)
3212BF16_MINMAX(maxnum, false, true, false)
3213BF16_MINMAX(maxnummag, false, true, true)
3214
3215#undef BF16_MINMAX
3216
3217/* Floating point compare */
3218static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
3219                                    float_status *s)
3220{
3221    if (is_nan(a.cls) || is_nan(b.cls)) {
3222        if (!is_quiet ||
3223            a.cls == float_class_snan ||
3224            b.cls == float_class_snan) {
3225            s->float_exception_flags |= float_flag_invalid;
3226        }
3227        return float_relation_unordered;
3228    }
3229
3230    if (a.cls == float_class_zero) {
3231        if (b.cls == float_class_zero) {
3232            return float_relation_equal;
3233        }
3234        return b.sign ? float_relation_greater : float_relation_less;
3235    } else if (b.cls == float_class_zero) {
3236        return a.sign ? float_relation_less : float_relation_greater;
3237    }
3238
3239    /* The only really important thing about infinity is its sign. If
3240     * both are infinities the sign marks the smallest of the two.
3241     */
3242    if (a.cls == float_class_inf) {
3243        if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
3244            return float_relation_equal;
3245        }
3246        return a.sign ? float_relation_less : float_relation_greater;
3247    } else if (b.cls == float_class_inf) {
3248        return b.sign ? float_relation_greater : float_relation_less;
3249    }
3250
3251    if (a.sign != b.sign) {
3252        return a.sign ? float_relation_less : float_relation_greater;
3253    }
3254
3255    if (a.exp == b.exp) {
3256        if (a.frac == b.frac) {
3257            return float_relation_equal;
3258        }
3259        if (a.sign) {
3260            return a.frac > b.frac ?
3261                float_relation_less : float_relation_greater;
3262        } else {
3263            return a.frac > b.frac ?
3264                float_relation_greater : float_relation_less;
3265        }
3266    } else {
3267        if (a.sign) {
3268            return a.exp > b.exp ? float_relation_less : float_relation_greater;
3269        } else {
3270            return a.exp > b.exp ? float_relation_greater : float_relation_less;
3271        }
3272    }
3273}
3274
3275#define COMPARE(name, attr, sz)                                         \
3276static int attr                                                         \
3277name(float ## sz a, float ## sz b, bool is_quiet, float_status *s)      \
3278{                                                                       \
3279    FloatParts pa = float ## sz ## _unpack_canonical(a, s);             \
3280    FloatParts pb = float ## sz ## _unpack_canonical(b, s);             \
3281    return compare_floats(pa, pb, is_quiet, s);                         \
3282}
3283
3284COMPARE(soft_f16_compare, QEMU_FLATTEN, 16)
3285COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, 32)
3286COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, 64)
3287
3288#undef COMPARE
3289
3290FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3291{
3292    return soft_f16_compare(a, b, false, s);
3293}
3294
3295FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3296{
3297    return soft_f16_compare(a, b, true, s);
3298}
3299
3300static FloatRelation QEMU_FLATTEN
3301f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
3302{
3303    union_float32 ua, ub;
3304
3305    ua.s = xa;
3306    ub.s = xb;
3307
3308    if (QEMU_NO_HARDFLOAT) {
3309        goto soft;
3310    }
3311
3312    float32_input_flush2(&ua.s, &ub.s, s);
3313    if (isgreaterequal(ua.h, ub.h)) {
3314        if (isgreater(ua.h, ub.h)) {
3315            return float_relation_greater;
3316        }
3317        return float_relation_equal;
3318    }
3319    if (likely(isless(ua.h, ub.h))) {
3320        return float_relation_less;
3321    }
3322    /* The only condition remaining is unordered.
3323     * Fall through to set flags.
3324     */
3325 soft:
3326    return soft_f32_compare(ua.s, ub.s, is_quiet, s);
3327}
3328
3329FloatRelation float32_compare(float32 a, float32 b, float_status *s)
3330{
3331    return f32_compare(a, b, false, s);
3332}
3333
3334FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
3335{
3336    return f32_compare(a, b, true, s);
3337}
3338
3339static FloatRelation QEMU_FLATTEN
3340f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
3341{
3342    union_float64 ua, ub;
3343
3344    ua.s = xa;
3345    ub.s = xb;
3346
3347    if (QEMU_NO_HARDFLOAT) {
3348        goto soft;
3349    }
3350
3351    float64_input_flush2(&ua.s, &ub.s, s);
3352    if (isgreaterequal(ua.h, ub.h)) {
3353        if (isgreater(ua.h, ub.h)) {
3354            return float_relation_greater;
3355        }
3356        return float_relation_equal;
3357    }
3358    if (likely(isless(ua.h, ub.h))) {
3359        return float_relation_less;
3360    }
3361    /* The only condition remaining is unordered.
3362     * Fall through to set flags.
3363     */
3364 soft:
3365    return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3366}
3367
3368FloatRelation float64_compare(float64 a, float64 b, float_status *s)
3369{
3370    return f64_compare(a, b, false, s);
3371}
3372
3373FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
3374{
3375    return f64_compare(a, b, true, s);
3376}
3377
3378static FloatRelation QEMU_FLATTEN
3379soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
3380{
3381    FloatParts pa = bfloat16_unpack_canonical(a, s);
3382    FloatParts pb = bfloat16_unpack_canonical(b, s);
3383    return compare_floats(pa, pb, is_quiet, s);
3384}
3385
3386FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
3387{
3388    return soft_bf16_compare(a, b, false, s);
3389}
3390
3391FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
3392{
3393    return soft_bf16_compare(a, b, true, s);
3394}
3395
3396/* Multiply A by 2 raised to the power N.  */
3397static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
3398{
3399    if (unlikely(is_nan(a.cls))) {
3400        return return_nan(a, s);
3401    }
3402    if (a.cls == float_class_normal) {
3403        /* The largest float type (even though not supported by FloatParts)
3404         * is float128, which has a 15 bit exponent.  Bounding N to 16 bits
3405         * still allows rounding to infinity, without allowing overflow
3406         * within the int32_t that backs FloatParts.exp.
3407         */
3408        n = MIN(MAX(n, -0x10000), 0x10000);
3409        a.exp += n;
3410    }
3411    return a;
3412}
3413
3414float16 float16_scalbn(float16 a, int n, float_status *status)
3415{
3416    FloatParts pa = float16_unpack_canonical(a, status);
3417    FloatParts pr = scalbn_decomposed(pa, n, status);
3418    return float16_round_pack_canonical(pr, status);
3419}
3420
3421float32 float32_scalbn(float32 a, int n, float_status *status)
3422{
3423    FloatParts pa = float32_unpack_canonical(a, status);
3424    FloatParts pr = scalbn_decomposed(pa, n, status);
3425    return float32_round_pack_canonical(pr, status);
3426}
3427
3428float64 float64_scalbn(float64 a, int n, float_status *status)
3429{
3430    FloatParts pa = float64_unpack_canonical(a, status);
3431    FloatParts pr = scalbn_decomposed(pa, n, status);
3432    return float64_round_pack_canonical(pr, status);
3433}
3434
3435bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
3436{
3437    FloatParts pa = bfloat16_unpack_canonical(a, status);
3438    FloatParts pr = scalbn_decomposed(pa, n, status);
3439    return bfloat16_round_pack_canonical(pr, status);
3440}
3441
3442/*
3443 * Square Root
3444 *
3445 * The old softfloat code did an approximation step before zeroing in
3446 * on the final result. However for simpleness we just compute the
3447 * square root by iterating down from the implicit bit to enough extra
3448 * bits to ensure we get a correctly rounded result.
3449 *
3450 * This does mean however the calculation is slower than before,
3451 * especially for 64 bit floats.
3452 */
3453
3454static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
3455{
3456    uint64_t a_frac, r_frac, s_frac;
3457    int bit, last_bit;
3458
3459    if (is_nan(a.cls)) {
3460        return return_nan(a, s);
3461    }
3462    if (a.cls == float_class_zero) {
3463        return a;  /* sqrt(+-0) = +-0 */
3464    }
3465    if (a.sign) {
3466        s->float_exception_flags |= float_flag_invalid;
3467        return parts_default_nan(s);
3468    }
3469    if (a.cls == float_class_inf) {
3470        return a;  /* sqrt(+inf) = +inf */
3471    }
3472
3473    assert(a.cls == float_class_normal);
3474
3475    /* We need two overflow bits at the top. Adding room for that is a
3476     * right shift. If the exponent is odd, we can discard the low bit
3477     * by multiplying the fraction by 2; that's a left shift. Combine
3478     * those and we shift right if the exponent is even.
3479     */
3480    a_frac = a.frac;
3481    if (!(a.exp & 1)) {
3482        a_frac >>= 1;
3483    }
3484    a.exp >>= 1;
3485
3486    /* Bit-by-bit computation of sqrt.  */
3487    r_frac = 0;
3488    s_frac = 0;
3489
3490    /* Iterate from implicit bit down to the 3 extra bits to compute a
3491     * properly rounded result. Remember we've inserted one more bit
3492     * at the top, so these positions are one less.
3493     */
3494    bit = DECOMPOSED_BINARY_POINT - 1;
3495    last_bit = MAX(p->frac_shift - 4, 0);
3496    do {
3497        uint64_t q = 1ULL << bit;
3498        uint64_t t_frac = s_frac + q;
3499        if (t_frac <= a_frac) {
3500            s_frac = t_frac + q;
3501            a_frac -= t_frac;
3502            r_frac += q;
3503        }
3504        a_frac <<= 1;
3505    } while (--bit >= last_bit);
3506
3507    /* Undo the right shift done above. If there is any remaining
3508     * fraction, the result is inexact. Set the sticky bit.
3509     */
3510    a.frac = (r_frac << 1) + (a_frac != 0);
3511
3512    return a;
3513}
3514
3515float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3516{
3517    FloatParts pa = float16_unpack_canonical(a, status);
3518    FloatParts pr = sqrt_float(pa, status, &float16_params);
3519    return float16_round_pack_canonical(pr, status);
3520}
3521
3522static float32 QEMU_SOFTFLOAT_ATTR
3523soft_f32_sqrt(float32 a, float_status *status)
3524{
3525    FloatParts pa = float32_unpack_canonical(a, status);
3526    FloatParts pr = sqrt_float(pa, status, &float32_params);
3527    return float32_round_pack_canonical(pr, status);
3528}
3529
3530static float64 QEMU_SOFTFLOAT_ATTR
3531soft_f64_sqrt(float64 a, float_status *status)
3532{
3533    FloatParts pa = float64_unpack_canonical(a, status);
3534    FloatParts pr = sqrt_float(pa, status, &float64_params);
3535    return float64_round_pack_canonical(pr, status);
3536}
3537
3538float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3539{
3540    union_float32 ua, ur;
3541
3542    ua.s = xa;
3543    if (unlikely(!can_use_fpu(s))) {
3544        goto soft;
3545    }
3546
3547    float32_input_flush1(&ua.s, s);
3548    if (QEMU_HARDFLOAT_1F32_USE_FP) {
3549        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3550                       fpclassify(ua.h) == FP_ZERO) ||
3551                     signbit(ua.h))) {
3552            goto soft;
3553        }
3554    } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
3555                        float32_is_neg(ua.s))) {
3556        goto soft;
3557    }
3558    ur.h = sqrtf(ua.h);
3559    return ur.s;
3560
3561 soft:
3562    return soft_f32_sqrt(ua.s, s);
3563}
3564
3565float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3566{
3567    union_float64 ua, ur;
3568
3569    ua.s = xa;
3570    if (unlikely(!can_use_fpu(s))) {
3571        goto soft;
3572    }
3573
3574    float64_input_flush1(&ua.s, s);
3575    if (QEMU_HARDFLOAT_1F64_USE_FP) {
3576        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
3577                       fpclassify(ua.h) == FP_ZERO) ||
3578                     signbit(ua.h))) {
3579            goto soft;
3580        }
3581    } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
3582                        float64_is_neg(ua.s))) {
3583        goto soft;
3584    }
3585    ur.h = sqrt(ua.h);
3586    return ur.s;
3587
3588 soft:
3589    return soft_f64_sqrt(ua.s, s);
3590}
3591
3592bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
3593{
3594    FloatParts pa = bfloat16_unpack_canonical(a, status);
3595    FloatParts pr = sqrt_float(pa, status, &bfloat16_params);
3596    return bfloat16_round_pack_canonical(pr, status);
3597}
3598
3599/*----------------------------------------------------------------------------
3600| The pattern for a default generated NaN.
3601*----------------------------------------------------------------------------*/
3602
3603float16 float16_default_nan(float_status *status)
3604{
3605    FloatParts p = parts_default_nan(status);
3606    p.frac >>= float16_params.frac_shift;
3607    return float16_pack_raw(p);
3608}
3609
3610float32 float32_default_nan(float_status *status)
3611{
3612    FloatParts p = parts_default_nan(status);
3613    p.frac >>= float32_params.frac_shift;
3614    return float32_pack_raw(p);
3615}
3616
3617float64 float64_default_nan(float_status *status)
3618{
3619    FloatParts p = parts_default_nan(status);
3620    p.frac >>= float64_params.frac_shift;
3621    return float64_pack_raw(p);
3622}
3623
3624float128 float128_default_nan(float_status *status)
3625{
3626    FloatParts p = parts_default_nan(status);
3627    float128 r;
3628
3629    /* Extrapolate from the choices made by parts_default_nan to fill
3630     * in the quad-floating format.  If the low bit is set, assume we
3631     * want to set all non-snan bits.
3632     */
3633    r.low = -(p.frac & 1);
3634    r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
3635    r.high |= UINT64_C(0x7FFF000000000000);
3636    r.high |= (uint64_t)p.sign << 63;
3637
3638    return r;
3639}
3640
3641bfloat16 bfloat16_default_nan(float_status *status)
3642{
3643    FloatParts p = parts_default_nan(status);
3644    p.frac >>= bfloat16_params.frac_shift;
3645    return bfloat16_pack_raw(p);
3646}
3647
3648/*----------------------------------------------------------------------------
3649| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3650*----------------------------------------------------------------------------*/
3651
3652float16 float16_silence_nan(float16 a, float_status *status)
3653{
3654    FloatParts p = float16_unpack_raw(a);
3655    p.frac <<= float16_params.frac_shift;
3656    p = parts_silence_nan(p, status);
3657    p.frac >>= float16_params.frac_shift;
3658    return float16_pack_raw(p);
3659}
3660
3661float32 float32_silence_nan(float32 a, float_status *status)
3662{
3663    FloatParts p = float32_unpack_raw(a);
3664    p.frac <<= float32_params.frac_shift;
3665    p = parts_silence_nan(p, status);
3666    p.frac >>= float32_params.frac_shift;
3667    return float32_pack_raw(p);
3668}
3669
3670float64 float64_silence_nan(float64 a, float_status *status)
3671{
3672    FloatParts p = float64_unpack_raw(a);
3673    p.frac <<= float64_params.frac_shift;
3674    p = parts_silence_nan(p, status);
3675    p.frac >>= float64_params.frac_shift;
3676    return float64_pack_raw(p);
3677}
3678
3679bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
3680{
3681    FloatParts p = bfloat16_unpack_raw(a);
3682    p.frac <<= bfloat16_params.frac_shift;
3683    p = parts_silence_nan(p, status);
3684    p.frac >>= bfloat16_params.frac_shift;
3685    return bfloat16_pack_raw(p);
3686}
3687
3688/*----------------------------------------------------------------------------
3689| If `a' is denormal and we are in flush-to-zero mode then set the
3690| input-denormal exception and return zero. Otherwise just return the value.
3691*----------------------------------------------------------------------------*/
3692
3693static bool parts_squash_denormal(FloatParts p, float_status *status)
3694{
3695    if (p.exp == 0 && p.frac != 0) {
3696        float_raise(float_flag_input_denormal, status);
3697        return true;
3698    }
3699
3700    return false;
3701}
3702
3703float16 float16_squash_input_denormal(float16 a, float_status *status)
3704{
3705    if (status->flush_inputs_to_zero) {
3706        FloatParts p = float16_unpack_raw(a);
3707        if (parts_squash_denormal(p, status)) {
3708            return float16_set_sign(float16_zero, p.sign);
3709        }
3710    }
3711    return a;
3712}
3713
3714float32 float32_squash_input_denormal(float32 a, float_status *status)
3715{
3716    if (status->flush_inputs_to_zero) {
3717        FloatParts p = float32_unpack_raw(a);
3718        if (parts_squash_denormal(p, status)) {
3719            return float32_set_sign(float32_zero, p.sign);
3720        }
3721    }
3722    return a;
3723}
3724
3725float64 float64_squash_input_denormal(float64 a, float_status *status)
3726{
3727    if (status->flush_inputs_to_zero) {
3728        FloatParts p = float64_unpack_raw(a);
3729        if (parts_squash_denormal(p, status)) {
3730            return float64_set_sign(float64_zero, p.sign);
3731        }
3732    }
3733    return a;
3734}
3735
3736bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
3737{
3738    if (status->flush_inputs_to_zero) {
3739        FloatParts p = bfloat16_unpack_raw(a);
3740        if (parts_squash_denormal(p, status)) {
3741            return bfloat16_set_sign(bfloat16_zero, p.sign);
3742        }
3743    }
3744    return a;
3745}
3746
3747/*----------------------------------------------------------------------------
3748| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3749| and 7, and returns the properly rounded 32-bit integer corresponding to the
3750| input.  If `zSign' is 1, the input is negated before being converted to an
3751| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
3752| is simply rounded to an integer, with the inexact exception raised if the
3753| input cannot be represented exactly as an integer.  However, if the fixed-
3754| point input is too large, the invalid exception is raised and the largest
3755| positive or negative integer is returned.
3756*----------------------------------------------------------------------------*/
3757
3758static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
3759                                 float_status *status)
3760{
3761    int8_t roundingMode;
3762    bool roundNearestEven;
3763    int8_t roundIncrement, roundBits;
3764    int32_t z;
3765
3766    roundingMode = status->float_rounding_mode;
3767    roundNearestEven = ( roundingMode == float_round_nearest_even );
3768    switch (roundingMode) {
3769    case float_round_nearest_even:
3770    case float_round_ties_away:
3771        roundIncrement = 0x40;
3772        break;
3773    case float_round_to_zero:
3774        roundIncrement = 0;
3775        break;
3776    case float_round_up:
3777        roundIncrement = zSign ? 0 : 0x7f;
3778        break;
3779    case float_round_down:
3780        roundIncrement = zSign ? 0x7f : 0;
3781        break;
3782    case float_round_to_odd:
3783        roundIncrement = absZ & 0x80 ? 0 : 0x7f;
3784        break;
3785    default:
3786        abort();
3787    }
3788    roundBits = absZ & 0x7F;
3789    absZ = ( absZ + roundIncrement )>>7;
3790    if (!(roundBits ^ 0x40) && roundNearestEven) {
3791        absZ &= ~1;
3792    }
3793    z = absZ;
3794    if ( zSign ) z = - z;
3795    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
3796        float_raise(float_flag_invalid, status);
3797        return zSign ? INT32_MIN : INT32_MAX;
3798    }
3799    if (roundBits) {
3800        status->float_exception_flags |= float_flag_inexact;
3801    }
3802    return z;
3803
3804}
3805
3806/*----------------------------------------------------------------------------
3807| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3808| `absZ1', with binary point between bits 63 and 64 (between the input words),
3809| and returns the properly rounded 64-bit integer corresponding to the input.
3810| If `zSign' is 1, the input is negated before being converted to an integer.
3811| Ordinarily, the fixed-point input is simply rounded to an integer, with
3812| the inexact exception raised if the input cannot be represented exactly as
3813| an integer.  However, if the fixed-point input is too large, the invalid
3814| exception is raised and the largest positive or negative integer is
3815| returned.
3816*----------------------------------------------------------------------------*/
3817
3818static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
3819                               float_status *status)
3820{
3821    int8_t roundingMode;
3822    bool roundNearestEven, increment;
3823    int64_t z;
3824
3825    roundingMode = status->float_rounding_mode;
3826    roundNearestEven = ( roundingMode == float_round_nearest_even );
3827    switch (roundingMode) {
3828    case float_round_nearest_even:
3829    case float_round_ties_away:
3830        increment = ((int64_t) absZ1 < 0);
3831        break;
3832    case float_round_to_zero:
3833        increment = 0;
3834        break;
3835    case float_round_up:
3836        increment = !zSign && absZ1;
3837        break;
3838    case float_round_down:
3839        increment = zSign && absZ1;
3840        break;
3841    case float_round_to_odd:
3842        increment = !(absZ0 & 1) && absZ1;
3843        break;
3844    default:
3845        abort();
3846    }
3847    if ( increment ) {
3848        ++absZ0;
3849        if ( absZ0 == 0 ) goto overflow;
3850        if (!(absZ1 << 1) && roundNearestEven) {
3851            absZ0 &= ~1;
3852        }
3853    }
3854    z = absZ0;
3855    if ( zSign ) z = - z;
3856    if ( z && ( ( z < 0 ) ^ zSign ) ) {
3857 overflow:
3858        float_raise(float_flag_invalid, status);
3859        return zSign ? INT64_MIN : INT64_MAX;
3860    }
3861    if (absZ1) {
3862        status->float_exception_flags |= float_flag_inexact;
3863    }
3864    return z;
3865
3866}
3867
3868/*----------------------------------------------------------------------------
3869| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3870| `absZ1', with binary point between bits 63 and 64 (between the input words),
3871| and returns the properly rounded 64-bit unsigned integer corresponding to the
3872| input.  Ordinarily, the fixed-point input is simply rounded to an integer,
3873| with the inexact exception raised if the input cannot be represented exactly
3874| as an integer.  However, if the fixed-point input is too large, the invalid
3875| exception is raised and the largest unsigned integer is returned.
3876*----------------------------------------------------------------------------*/
3877
3878static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
3879                                uint64_t absZ1, float_status *status)
3880{
3881    int8_t roundingMode;
3882    bool roundNearestEven, increment;
3883
3884    roundingMode = status->float_rounding_mode;
3885    roundNearestEven = (roundingMode == float_round_nearest_even);
3886    switch (roundingMode) {
3887    case float_round_nearest_even:
3888    case float_round_ties_away:
3889        increment = ((int64_t)absZ1 < 0);
3890        break;
3891    case float_round_to_zero:
3892        increment = 0;
3893        break;
3894    case float_round_up:
3895        increment = !zSign && absZ1;
3896        break;
3897    case float_round_down:
3898        increment = zSign && absZ1;
3899        break;
3900    case float_round_to_odd:
3901        increment = !(absZ0 & 1) && absZ1;
3902        break;
3903    default:
3904        abort();
3905    }
3906    if (increment) {
3907        ++absZ0;
3908        if (absZ0 == 0) {
3909            float_raise(float_flag_invalid, status);
3910            return UINT64_MAX;
3911        }
3912        if (!(absZ1 << 1) && roundNearestEven) {
3913            absZ0 &= ~1;
3914        }
3915    }
3916
3917    if (zSign && absZ0) {
3918        float_raise(float_flag_invalid, status);
3919        return 0;
3920    }
3921
3922    if (absZ1) {
3923        status->float_exception_flags |= float_flag_inexact;
3924    }
3925    return absZ0;
3926}
3927
3928/*----------------------------------------------------------------------------
3929| Normalizes the subnormal single-precision floating-point value represented
3930| by the denormalized significand `aSig'.  The normalized exponent and
3931| significand are stored at the locations pointed to by `zExpPtr' and
3932| `zSigPtr', respectively.
3933*----------------------------------------------------------------------------*/
3934
3935static void
3936 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
3937{
3938    int8_t shiftCount;
3939
3940    shiftCount = clz32(aSig) - 8;
3941    *zSigPtr = aSig<<shiftCount;
3942    *zExpPtr = 1 - shiftCount;
3943
3944}
3945
3946/*----------------------------------------------------------------------------
3947| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3948| and significand `zSig', and returns the proper single-precision floating-
3949| point value corresponding to the abstract input.  Ordinarily, the abstract
3950| value is simply rounded and packed into the single-precision format, with
3951| the inexact exception raised if the abstract input cannot be represented
3952| exactly.  However, if the abstract value is too large, the overflow and
3953| inexact exceptions are raised and an infinity or maximal finite value is
3954| returned.  If the abstract value is too small, the input value is rounded to
3955| a subnormal number, and the underflow and inexact exceptions are raised if
3956| the abstract input cannot be represented exactly as a subnormal single-
3957| precision floating-point number.
3958|     The input significand `zSig' has its binary point between bits 30
3959| and 29, which is 7 bits to the left of the usual location.  This shifted
3960| significand must be normalized or smaller.  If `zSig' is not normalized,
3961| `zExp' must be 0; in that case, the result returned is a subnormal number,
3962| and it must not require rounding.  In the usual case that `zSig' is
3963| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3964| The handling of underflow and overflow follows the IEC/IEEE Standard for
3965| Binary Floating-Point Arithmetic.
3966*----------------------------------------------------------------------------*/
3967
3968static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
3969                                   float_status *status)
3970{
3971    int8_t roundingMode;
3972    bool roundNearestEven;
3973    int8_t roundIncrement, roundBits;
3974    bool isTiny;
3975
3976    roundingMode = status->float_rounding_mode;
3977    roundNearestEven = ( roundingMode == float_round_nearest_even );
3978    switch (roundingMode) {
3979    case float_round_nearest_even:
3980    case float_round_ties_away:
3981        roundIncrement = 0x40;
3982        break;
3983    case float_round_to_zero:
3984        roundIncrement = 0;
3985        break;
3986    case float_round_up:
3987        roundIncrement = zSign ? 0 : 0x7f;
3988        break;
3989    case float_round_down:
3990        roundIncrement = zSign ? 0x7f : 0;
3991        break;
3992    case float_round_to_odd:
3993        roundIncrement = zSig & 0x80 ? 0 : 0x7f;
3994        break;
3995    default:
3996        abort();
3997        break;
3998    }
3999    roundBits = zSig & 0x7F;
4000    if ( 0xFD <= (uint16_t) zExp ) {
4001        if (    ( 0xFD < zExp )
4002             || (    ( zExp == 0xFD )
4003                  && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
4004           ) {
4005            bool overflow_to_inf = roundingMode != float_round_to_odd &&
4006                                   roundIncrement != 0;
4007            float_raise(float_flag_overflow | float_flag_inexact, status);
4008            return packFloat32(zSign, 0xFF, -!overflow_to_inf);
4009        }
4010        if ( zExp < 0 ) {
4011            if (status->flush_to_zero) {
4012                float_raise(float_flag_output_denormal, status);
4013                return packFloat32(zSign, 0, 0);
4014            }
4015            isTiny = status->tininess_before_rounding
4016                  || (zExp < -1)
4017                  || (zSig + roundIncrement < 0x80000000);
4018            shift32RightJamming( zSig, - zExp, &zSig );
4019            zExp = 0;
4020            roundBits = zSig & 0x7F;
4021            if (isTiny && roundBits) {
4022                float_raise(float_flag_underflow, status);
4023            }
4024            if (roundingMode == float_round_to_odd) {
4025                /*
4026                 * For round-to-odd case, the roundIncrement depends on
4027                 * zSig which just changed.
4028                 */
4029                roundIncrement = zSig & 0x80 ? 0 : 0x7f;
4030            }
4031        }
4032    }
4033    if (roundBits) {
4034        status->float_exception_flags |= float_flag_inexact;
4035    }
4036    zSig = ( zSig + roundIncrement )>>7;
4037    if (!(roundBits ^ 0x40) && roundNearestEven) {
4038        zSig &= ~1;
4039    }
4040    if ( zSig == 0 ) zExp = 0;
4041    return packFloat32( zSign, zExp, zSig );
4042
4043}
4044
4045/*----------------------------------------------------------------------------
4046| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4047| and significand `zSig', and returns the proper single-precision floating-
4048| point value corresponding to the abstract input.  This routine is just like
4049| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
4050| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4051| floating-point exponent.
4052*----------------------------------------------------------------------------*/
4053
4054static float32
4055 normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
4056                              float_status *status)
4057{
4058    int8_t shiftCount;
4059
4060    shiftCount = clz32(zSig) - 1;
4061    return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
4062                               status);
4063
4064}
4065
4066/*----------------------------------------------------------------------------
4067| Normalizes the subnormal double-precision floating-point value represented
4068| by the denormalized significand `aSig'.  The normalized exponent and
4069| significand are stored at the locations pointed to by `zExpPtr' and
4070| `zSigPtr', respectively.
4071*----------------------------------------------------------------------------*/
4072
4073static void
4074 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
4075{
4076    int8_t shiftCount;
4077
4078    shiftCount = clz64(aSig) - 11;
4079    *zSigPtr = aSig<<shiftCount;
4080    *zExpPtr = 1 - shiftCount;
4081
4082}
4083
4084/*----------------------------------------------------------------------------
4085| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
4086| double-precision floating-point value, returning the result.  After being
4087| shifted into the proper positions, the three fields are simply added
4088| together to form the result.  This means that any integer portion of `zSig'
4089| will be added into the exponent.  Since a properly normalized significand
4090| will have an integer portion equal to 1, the `zExp' input should be 1 less
4091| than the desired result exponent whenever `zSig' is a complete, normalized
4092| significand.
4093*----------------------------------------------------------------------------*/
4094
4095static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
4096{
4097
4098    return make_float64(
4099        ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
4100
4101}
4102
4103/*----------------------------------------------------------------------------
4104| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4105| and significand `zSig', and returns the proper double-precision floating-
4106| point value corresponding to the abstract input.  Ordinarily, the abstract
4107| value is simply rounded and packed into the double-precision format, with
4108| the inexact exception raised if the abstract input cannot be represented
4109| exactly.  However, if the abstract value is too large, the overflow and
4110| inexact exceptions are raised and an infinity or maximal finite value is
4111| returned.  If the abstract value is too small, the input value is rounded to
4112| a subnormal number, and the underflow and inexact exceptions are raised if
4113| the abstract input cannot be represented exactly as a subnormal double-
4114| precision floating-point number.
4115|     The input significand `zSig' has its binary point between bits 62
4116| and 61, which is 10 bits to the left of the usual location.  This shifted
4117| significand must be normalized or smaller.  If `zSig' is not normalized,
4118| `zExp' must be 0; in that case, the result returned is a subnormal number,
4119| and it must not require rounding.  In the usual case that `zSig' is
4120| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
4121| The handling of underflow and overflow follows the IEC/IEEE Standard for
4122| Binary Floating-Point Arithmetic.
4123*----------------------------------------------------------------------------*/
4124
4125static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4126                                   float_status *status)
4127{
4128    int8_t roundingMode;
4129    bool roundNearestEven;
4130    int roundIncrement, roundBits;
4131    bool isTiny;
4132
4133    roundingMode = status->float_rounding_mode;
4134    roundNearestEven = ( roundingMode == float_round_nearest_even );
4135    switch (roundingMode) {
4136    case float_round_nearest_even:
4137    case float_round_ties_away:
4138        roundIncrement = 0x200;
4139        break;
4140    case float_round_to_zero:
4141        roundIncrement = 0;
4142        break;
4143    case float_round_up:
4144        roundIncrement = zSign ? 0 : 0x3ff;
4145        break;
4146    case float_round_down:
4147        roundIncrement = zSign ? 0x3ff : 0;
4148        break;
4149    case float_round_to_odd:
4150        roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4151        break;
4152    default:
4153        abort();
4154    }
4155    roundBits = zSig & 0x3FF;
4156    if ( 0x7FD <= (uint16_t) zExp ) {
4157        if (    ( 0x7FD < zExp )
4158             || (    ( zExp == 0x7FD )
4159                  && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
4160           ) {
4161            bool overflow_to_inf = roundingMode != float_round_to_odd &&
4162                                   roundIncrement != 0;
4163            float_raise(float_flag_overflow | float_flag_inexact, status);
4164            return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
4165        }
4166        if ( zExp < 0 ) {
4167            if (status->flush_to_zero) {
4168                float_raise(float_flag_output_denormal, status);
4169                return packFloat64(zSign, 0, 0);
4170            }
4171            isTiny = status->tininess_before_rounding
4172                  || (zExp < -1)
4173                  || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
4174            shift64RightJamming( zSig, - zExp, &zSig );
4175            zExp = 0;
4176            roundBits = zSig & 0x3FF;
4177            if (isTiny && roundBits) {
4178                float_raise(float_flag_underflow, status);
4179            }
4180            if (roundingMode == float_round_to_odd) {
4181                /*
4182                 * For round-to-odd case, the roundIncrement depends on
4183                 * zSig which just changed.
4184                 */
4185                roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
4186            }
4187        }
4188    }
4189    if (roundBits) {
4190        status->float_exception_flags |= float_flag_inexact;
4191    }
4192    zSig = ( zSig + roundIncrement )>>10;
4193    if (!(roundBits ^ 0x200) && roundNearestEven) {
4194        zSig &= ~1;
4195    }
4196    if ( zSig == 0 ) zExp = 0;
4197    return packFloat64( zSign, zExp, zSig );
4198
4199}
4200
4201/*----------------------------------------------------------------------------
4202| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4203| and significand `zSig', and returns the proper double-precision floating-
4204| point value corresponding to the abstract input.  This routine is just like
4205| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
4206| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
4207| floating-point exponent.
4208*----------------------------------------------------------------------------*/
4209
4210static float64
4211 normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
4212                              float_status *status)
4213{
4214    int8_t shiftCount;
4215
4216    shiftCount = clz64(zSig) - 1;
4217    return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
4218                               status);
4219
4220}
4221
4222/*----------------------------------------------------------------------------
4223| Normalizes the subnormal extended double-precision floating-point value
4224| represented by the denormalized significand `aSig'.  The normalized exponent
4225| and significand are stored at the locations pointed to by `zExpPtr' and
4226| `zSigPtr', respectively.
4227*----------------------------------------------------------------------------*/
4228
4229void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4230                                uint64_t *zSigPtr)
4231{
4232    int8_t shiftCount;
4233
4234    shiftCount = clz64(aSig);
4235    *zSigPtr = aSig<<shiftCount;
4236    *zExpPtr = 1 - shiftCount;
4237}
4238
4239/*----------------------------------------------------------------------------
4240| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4241| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4242| and returns the proper extended double-precision floating-point value
4243| corresponding to the abstract input.  Ordinarily, the abstract value is
4244| rounded and packed into the extended double-precision format, with the
4245| inexact exception raised if the abstract input cannot be represented
4246| exactly.  However, if the abstract value is too large, the overflow and
4247| inexact exceptions are raised and an infinity or maximal finite value is
4248| returned.  If the abstract value is too small, the input value is rounded to
4249| a subnormal number, and the underflow and inexact exceptions are raised if
4250| the abstract input cannot be represented exactly as a subnormal extended
4251| double-precision floating-point number.
4252|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
4253| number of bits as single or double precision, respectively.  Otherwise, the
4254| result is rounded to the full precision of the extended double-precision
4255| format.
4256|     The input significand must be normalized or smaller.  If the input
4257| significand is not normalized, `zExp' must be 0; in that case, the result
4258| returned is a subnormal number, and it must not require rounding.  The
4259| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4260| Floating-Point Arithmetic.
4261*----------------------------------------------------------------------------*/
4262
4263floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
4264                              int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4265                              float_status *status)
4266{
4267    int8_t roundingMode;
4268    bool roundNearestEven, increment, isTiny;
4269    int64_t roundIncrement, roundMask, roundBits;
4270
4271    roundingMode = status->float_rounding_mode;
4272    roundNearestEven = ( roundingMode == float_round_nearest_even );
4273    if ( roundingPrecision == 80 ) goto precision80;
4274    if ( roundingPrecision == 64 ) {
4275        roundIncrement = UINT64_C(0x0000000000000400);
4276        roundMask = UINT64_C(0x00000000000007FF);
4277    }
4278    else if ( roundingPrecision == 32 ) {
4279        roundIncrement = UINT64_C(0x0000008000000000);
4280        roundMask = UINT64_C(0x000000FFFFFFFFFF);
4281    }
4282    else {
4283        goto precision80;
4284    }
4285    zSig0 |= ( zSig1 != 0 );
4286    switch (roundingMode) {
4287    case float_round_nearest_even:
4288    case float_round_ties_away:
4289        break;
4290    case float_round_to_zero:
4291        roundIncrement = 0;
4292        break;
4293    case float_round_up:
4294        roundIncrement = zSign ? 0 : roundMask;
4295        break;
4296    case float_round_down:
4297        roundIncrement = zSign ? roundMask : 0;
4298        break;
4299    default:
4300        abort();
4301    }
4302    roundBits = zSig0 & roundMask;
4303    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4304        if (    ( 0x7FFE < zExp )
4305             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4306           ) {
4307            goto overflow;
4308        }
4309        if ( zExp <= 0 ) {
4310            if (status->flush_to_zero) {
4311                float_raise(float_flag_output_denormal, status);
4312                return packFloatx80(zSign, 0, 0);
4313            }
4314            isTiny = status->tininess_before_rounding
4315                  || (zExp < 0 )
4316                  || (zSig0 <= zSig0 + roundIncrement);
4317            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4318            zExp = 0;
4319            roundBits = zSig0 & roundMask;
4320            if (isTiny && roundBits) {
4321                float_raise(float_flag_underflow, status);
4322            }
4323            if (roundBits) {
4324                status->float_exception_flags |= float_flag_inexact;
4325            }
4326            zSig0 += roundIncrement;
4327            if ( (int64_t) zSig0 < 0 ) zExp = 1;
4328            roundIncrement = roundMask + 1;
4329            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4330                roundMask |= roundIncrement;
4331            }
4332            zSig0 &= ~ roundMask;
4333            return packFloatx80( zSign, zExp, zSig0 );
4334        }
4335    }
4336    if (roundBits) {
4337        status->float_exception_flags |= float_flag_inexact;
4338    }
4339    zSig0 += roundIncrement;
4340    if ( zSig0 < roundIncrement ) {
4341        ++zExp;
4342        zSig0 = UINT64_C(0x8000000000000000);
4343    }
4344    roundIncrement = roundMask + 1;
4345    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4346        roundMask |= roundIncrement;
4347    }
4348    zSig0 &= ~ roundMask;
4349    if ( zSig0 == 0 ) zExp = 0;
4350    return packFloatx80( zSign, zExp, zSig0 );
4351 precision80:
4352    switch (roundingMode) {
4353    case float_round_nearest_even:
4354    case float_round_ties_away:
4355        increment = ((int64_t)zSig1 < 0);
4356        break;
4357    case float_round_to_zero:
4358        increment = 0;
4359        break;
4360    case float_round_up:
4361        increment = !zSign && zSig1;
4362        break;
4363    case float_round_down:
4364        increment = zSign && zSig1;
4365        break;
4366    default:
4367        abort();
4368    }
4369    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4370        if (    ( 0x7FFE < zExp )
4371             || (    ( zExp == 0x7FFE )
4372                  && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4373                  && increment
4374                )
4375           ) {
4376            roundMask = 0;
4377 overflow:
4378            float_raise(float_flag_overflow | float_flag_inexact, status);
4379            if (    ( roundingMode == float_round_to_zero )
4380                 || ( zSign && ( roundingMode == float_round_up ) )
4381                 || ( ! zSign && ( roundingMode == float_round_down ) )
4382               ) {
4383                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4384            }
4385            return packFloatx80(zSign,
4386                                floatx80_infinity_high,
4387                                floatx80_infinity_low);
4388        }
4389        if ( zExp <= 0 ) {
4390            isTiny = status->tininess_before_rounding
4391                  || (zExp < 0)
4392                  || !increment
4393                  || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4394            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4395            zExp = 0;
4396            if (isTiny && zSig1) {
4397                float_raise(float_flag_underflow, status);
4398            }
4399            if (zSig1) {
4400                status->float_exception_flags |= float_flag_inexact;
4401            }
4402            switch (roundingMode) {
4403            case float_round_nearest_even:
4404            case float_round_ties_away:
4405                increment = ((int64_t)zSig1 < 0);
4406                break;
4407            case float_round_to_zero:
4408                increment = 0;
4409                break;
4410            case float_round_up:
4411                increment = !zSign && zSig1;
4412                break;
4413            case float_round_down:
4414                increment = zSign && zSig1;
4415                break;
4416            default:
4417                abort();
4418            }
4419            if ( increment ) {
4420                ++zSig0;
4421                if (!(zSig1 << 1) && roundNearestEven) {
4422                    zSig0 &= ~1;
4423                }
4424                if ( (int64_t) zSig0 < 0 ) zExp = 1;
4425            }
4426            return packFloatx80( zSign, zExp, zSig0 );
4427        }
4428    }
4429    if (zSig1) {
4430        status->float_exception_flags |= float_flag_inexact;
4431    }
4432    if ( increment ) {
4433        ++zSig0;
4434        if ( zSig0 == 0 ) {
4435            ++zExp;
4436            zSig0 = UINT64_C(0x8000000000000000);
4437        }
4438        else {
4439            if (!(zSig1 << 1) && roundNearestEven) {
4440                zSig0 &= ~1;
4441            }
4442        }
4443    }
4444    else {
4445        if ( zSig0 == 0 ) zExp = 0;
4446    }
4447    return packFloatx80( zSign, zExp, zSig0 );
4448
4449}
4450
4451/*----------------------------------------------------------------------------
4452| Takes an abstract floating-point value having sign `zSign', exponent
4453| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4454| and returns the proper extended double-precision floating-point value
4455| corresponding to the abstract input.  This routine is just like
4456| `roundAndPackFloatx80' except that the input significand does not have to be
4457| normalized.
4458*----------------------------------------------------------------------------*/
4459
4460floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4461                                       bool zSign, int32_t zExp,
4462                                       uint64_t zSig0, uint64_t zSig1,
4463                                       float_status *status)
4464{
4465    int8_t shiftCount;
4466
4467    if ( zSig0 == 0 ) {
4468        zSig0 = zSig1;
4469        zSig1 = 0;
4470        zExp -= 64;
4471    }
4472    shiftCount = clz64(zSig0);
4473    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4474    zExp -= shiftCount;
4475    return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4476                                zSig0, zSig1, status);
4477
4478}
4479
4480/*----------------------------------------------------------------------------
4481| Returns the least-significant 64 fraction bits of the quadruple-precision
4482| floating-point value `a'.
4483*----------------------------------------------------------------------------*/
4484
4485static inline uint64_t extractFloat128Frac1( float128 a )
4486{
4487
4488    return a.low;
4489
4490}
4491
4492/*----------------------------------------------------------------------------
4493| Returns the most-significant 48 fraction bits of the quadruple-precision
4494| floating-point value `a'.
4495*----------------------------------------------------------------------------*/
4496
4497static inline uint64_t extractFloat128Frac0( float128 a )
4498{
4499
4500    return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
4501
4502}
4503
4504/*----------------------------------------------------------------------------
4505| Returns the exponent bits of the quadruple-precision floating-point value
4506| `a'.
4507*----------------------------------------------------------------------------*/
4508
4509static inline int32_t extractFloat128Exp( float128 a )
4510{
4511
4512    return ( a.high>>48 ) & 0x7FFF;
4513
4514}
4515
4516/*----------------------------------------------------------------------------
4517| Returns the sign bit of the quadruple-precision floating-point value `a'.
4518*----------------------------------------------------------------------------*/
4519
4520static inline bool extractFloat128Sign(float128 a)
4521{
4522    return a.high >> 63;
4523}
4524
4525/*----------------------------------------------------------------------------
4526| Normalizes the subnormal quadruple-precision floating-point value
4527| represented by the denormalized significand formed by the concatenation of
4528| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
4529| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
4530| significand are stored at the location pointed to by `zSig0Ptr', and the
4531| least significant 64 bits of the normalized significand are stored at the
4532| location pointed to by `zSig1Ptr'.
4533*----------------------------------------------------------------------------*/
4534
4535static void
4536 normalizeFloat128Subnormal(
4537     uint64_t aSig0,
4538     uint64_t aSig1,
4539     int32_t *zExpPtr,
4540     uint64_t *zSig0Ptr,
4541     uint64_t *zSig1Ptr
4542 )
4543{
4544    int8_t shiftCount;
4545
4546    if ( aSig0 == 0 ) {
4547        shiftCount = clz64(aSig1) - 15;
4548        if ( shiftCount < 0 ) {
4549            *zSig0Ptr = aSig1>>( - shiftCount );
4550            *zSig1Ptr = aSig1<<( shiftCount & 63 );
4551        }
4552        else {
4553            *zSig0Ptr = aSig1<<shiftCount;
4554            *zSig1Ptr = 0;
4555        }
4556        *zExpPtr = - shiftCount - 63;
4557    }
4558    else {
4559        shiftCount = clz64(aSig0) - 15;
4560        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4561        *zExpPtr = 1 - shiftCount;
4562    }
4563
4564}
4565
4566/*----------------------------------------------------------------------------
4567| Packs the sign `zSign', the exponent `zExp', and the significand formed
4568| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4569| floating-point value, returning the result.  After being shifted into the
4570| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4571| added together to form the most significant 32 bits of the result.  This
4572| means that any integer portion of `zSig0' will be added into the exponent.
4573| Since a properly normalized significand will have an integer portion equal
4574| to 1, the `zExp' input should be 1 less than the desired result exponent
4575| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4576| significand.
4577*----------------------------------------------------------------------------*/
4578
4579static inline float128
4580packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
4581{
4582    float128 z;
4583
4584    z.low = zSig1;
4585    z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
4586    return z;
4587}
4588
4589/*----------------------------------------------------------------------------
4590| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4591| and extended significand formed by the concatenation of `zSig0', `zSig1',
4592| and `zSig2', and returns the proper quadruple-precision floating-point value
4593| corresponding to the abstract input.  Ordinarily, the abstract value is
4594| simply rounded and packed into the quadruple-precision format, with the
4595| inexact exception raised if the abstract input cannot be represented
4596| exactly.  However, if the abstract value is too large, the overflow and
4597| inexact exceptions are raised and an infinity or maximal finite value is
4598| returned.  If the abstract value is too small, the input value is rounded to
4599| a subnormal number, and the underflow and inexact exceptions are raised if
4600| the abstract input cannot be represented exactly as a subnormal quadruple-
4601| precision floating-point number.
4602|     The input significand must be normalized or smaller.  If the input
4603| significand is not normalized, `zExp' must be 0; in that case, the result
4604| returned is a subnormal number, and it must not require rounding.  In the
4605| usual case that the input significand is normalized, `zExp' must be 1 less
4606| than the ``true'' floating-point exponent.  The handling of underflow and
4607| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4608*----------------------------------------------------------------------------*/
4609
4610static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
4611                                     uint64_t zSig0, uint64_t zSig1,
4612                                     uint64_t zSig2, float_status *status)
4613{
4614    int8_t roundingMode;
4615    bool roundNearestEven, increment, isTiny;
4616
4617    roundingMode = status->float_rounding_mode;
4618    roundNearestEven = ( roundingMode == float_round_nearest_even );
4619    switch (roundingMode) {
4620    case float_round_nearest_even:
4621    case float_round_ties_away:
4622        increment = ((int64_t)zSig2 < 0);
4623        break;
4624    case float_round_to_zero:
4625        increment = 0;
4626        break;
4627    case float_round_up:
4628        increment = !zSign && zSig2;
4629        break;
4630    case float_round_down:
4631        increment = zSign && zSig2;
4632        break;
4633    case float_round_to_odd:
4634        increment = !(zSig1 & 0x1) && zSig2;
4635        break;
4636    default:
4637        abort();
4638    }
4639    if ( 0x7FFD <= (uint32_t) zExp ) {
4640        if (    ( 0x7FFD < zExp )
4641             || (    ( zExp == 0x7FFD )
4642                  && eq128(
4643                         UINT64_C(0x0001FFFFFFFFFFFF),
4644                         UINT64_C(0xFFFFFFFFFFFFFFFF),
4645                         zSig0,
4646                         zSig1
4647                     )
4648                  && increment
4649                )
4650           ) {
4651            float_raise(float_flag_overflow | float_flag_inexact, status);
4652            if (    ( roundingMode == float_round_to_zero )
4653                 || ( zSign && ( roundingMode == float_round_up ) )
4654                 || ( ! zSign && ( roundingMode == float_round_down ) )
4655                 || (roundingMode == float_round_to_odd)
4656               ) {
4657                return
4658                    packFloat128(
4659                        zSign,
4660                        0x7FFE,
4661                        UINT64_C(0x0000FFFFFFFFFFFF),
4662                        UINT64_C(0xFFFFFFFFFFFFFFFF)
4663                    );
4664            }
4665            return packFloat128( zSign, 0x7FFF, 0, 0 );
4666        }
4667        if ( zExp < 0 ) {
4668            if (status->flush_to_zero) {
4669                float_raise(float_flag_output_denormal, status);
4670                return packFloat128(zSign, 0, 0, 0);
4671            }
4672            isTiny = status->tininess_before_rounding
4673                  || (zExp < -1)
4674                  || !increment
4675                  || lt128(zSig0, zSig1,
4676                           UINT64_C(0x0001FFFFFFFFFFFF),
4677                           UINT64_C(0xFFFFFFFFFFFFFFFF));
4678            shift128ExtraRightJamming(
4679                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4680            zExp = 0;
4681            if (isTiny && zSig2) {
4682                float_raise(float_flag_underflow, status);
4683            }
4684            switch (roundingMode) {
4685            case float_round_nearest_even:
4686            case float_round_ties_away:
4687                increment = ((int64_t)zSig2 < 0);
4688                break;
4689            case float_round_to_zero:
4690                increment = 0;
4691                break;
4692            case float_round_up:
4693                increment = !zSign && zSig2;
4694                break;
4695            case float_round_down:
4696                increment = zSign && zSig2;
4697                break;
4698            case float_round_to_odd:
4699                increment = !(zSig1 & 0x1) && zSig2;
4700                break;
4701            default:
4702                abort();
4703            }
4704        }
4705    }
4706    if (zSig2) {
4707        status->float_exception_flags |= float_flag_inexact;
4708    }
4709    if ( increment ) {
4710        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
4711        if ((zSig2 + zSig2 == 0) && roundNearestEven) {
4712            zSig1 &= ~1;
4713        }
4714    }
4715    else {
4716        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
4717    }
4718    return packFloat128( zSign, zExp, zSig0, zSig1 );
4719
4720}
4721
4722/*----------------------------------------------------------------------------
4723| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4724| and significand formed by the concatenation of `zSig0' and `zSig1', and
4725| returns the proper quadruple-precision floating-point value corresponding
4726| to the abstract input.  This routine is just like `roundAndPackFloat128'
4727| except that the input significand has fewer bits and does not have to be
4728| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
4729| point exponent.
4730*----------------------------------------------------------------------------*/
4731
4732static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
4733                                              uint64_t zSig0, uint64_t zSig1,
4734                                              float_status *status)
4735{
4736    int8_t shiftCount;
4737    uint64_t zSig2;
4738
4739    if ( zSig0 == 0 ) {
4740        zSig0 = zSig1;
4741        zSig1 = 0;
4742        zExp -= 64;
4743    }
4744    shiftCount = clz64(zSig0) - 15;
4745    if ( 0 <= shiftCount ) {
4746        zSig2 = 0;
4747        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4748    }
4749    else {
4750        shift128ExtraRightJamming(
4751            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
4752    }
4753    zExp -= shiftCount;
4754    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4755
4756}
4757
4758
4759/*----------------------------------------------------------------------------
4760| Returns the result of converting the 32-bit two's complement integer `a'
4761| to the extended double-precision floating-point format.  The conversion
4762| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4763| Arithmetic.
4764*----------------------------------------------------------------------------*/
4765
4766floatx80 int32_to_floatx80(int32_t a, float_status *status)
4767{
4768    bool zSign;
4769    uint32_t absA;
4770    int8_t shiftCount;
4771    uint64_t zSig;
4772
4773    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4774    zSign = ( a < 0 );
4775    absA = zSign ? - a : a;
4776    shiftCount = clz32(absA) + 32;
4777    zSig = absA;
4778    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
4779
4780}
4781
4782/*----------------------------------------------------------------------------
4783| Returns the result of converting the 32-bit two's complement integer `a' to
4784| the quadruple-precision floating-point format.  The conversion is performed
4785| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4786*----------------------------------------------------------------------------*/
4787
4788float128 int32_to_float128(int32_t a, float_status *status)
4789{
4790    bool zSign;
4791    uint32_t absA;
4792    int8_t shiftCount;
4793    uint64_t zSig0;
4794
4795    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4796    zSign = ( a < 0 );
4797    absA = zSign ? - a : a;
4798    shiftCount = clz32(absA) + 17;
4799    zSig0 = absA;
4800    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
4801
4802}
4803
4804/*----------------------------------------------------------------------------
4805| Returns the result of converting the 64-bit two's complement integer `a'
4806| to the extended double-precision floating-point format.  The conversion
4807| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4808| Arithmetic.
4809*----------------------------------------------------------------------------*/
4810
4811floatx80 int64_to_floatx80(int64_t a, float_status *status)
4812{
4813    bool zSign;
4814    uint64_t absA;
4815    int8_t shiftCount;
4816
4817    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
4818    zSign = ( a < 0 );
4819    absA = zSign ? - a : a;
4820    shiftCount = clz64(absA);
4821    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
4822
4823}
4824
4825/*----------------------------------------------------------------------------
4826| Returns the result of converting the 64-bit two's complement integer `a' to
4827| the quadruple-precision floating-point format.  The conversion is performed
4828| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4829*----------------------------------------------------------------------------*/
4830
4831float128 int64_to_float128(int64_t a, float_status *status)
4832{
4833    bool zSign;
4834    uint64_t absA;
4835    int8_t shiftCount;
4836    int32_t zExp;
4837    uint64_t zSig0, zSig1;
4838
4839    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
4840    zSign = ( a < 0 );
4841    absA = zSign ? - a : a;
4842    shiftCount = clz64(absA) + 49;
4843    zExp = 0x406E - shiftCount;
4844    if ( 64 <= shiftCount ) {
4845        zSig1 = 0;
4846        zSig0 = absA;
4847        shiftCount -= 64;
4848    }
4849    else {
4850        zSig1 = absA;
4851        zSig0 = 0;
4852    }
4853    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4854    return packFloat128( zSign, zExp, zSig0, zSig1 );
4855
4856}
4857
4858/*----------------------------------------------------------------------------
4859| Returns the result of converting the 64-bit unsigned integer `a'
4860| to the quadruple-precision floating-point format.  The conversion is performed
4861| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4862*----------------------------------------------------------------------------*/
4863
4864float128 uint64_to_float128(uint64_t a, float_status *status)
4865{
4866    if (a == 0) {
4867        return float128_zero;
4868    }
4869    return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
4870}
4871
4872/*----------------------------------------------------------------------------
4873| Returns the result of converting the single-precision floating-point value
4874| `a' to the extended double-precision floating-point format.  The conversion
4875| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4876| Arithmetic.
4877*----------------------------------------------------------------------------*/
4878
4879floatx80 float32_to_floatx80(float32 a, float_status *status)
4880{
4881    bool aSign;
4882    int aExp;
4883    uint32_t aSig;
4884
4885    a = float32_squash_input_denormal(a, status);
4886    aSig = extractFloat32Frac( a );
4887    aExp = extractFloat32Exp( a );
4888    aSign = extractFloat32Sign( a );
4889    if ( aExp == 0xFF ) {
4890        if (aSig) {
4891            floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
4892                                               status);
4893            return floatx80_silence_nan(res, status);
4894        }
4895        return packFloatx80(aSign,
4896                            floatx80_infinity_high,
4897                            floatx80_infinity_low);
4898    }
4899    if ( aExp == 0 ) {
4900        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
4901        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4902    }
4903    aSig |= 0x00800000;
4904    return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
4905
4906}
4907
4908/*----------------------------------------------------------------------------
4909| Returns the result of converting the single-precision floating-point value
4910| `a' to the double-precision floating-point format.  The conversion is
4911| performed according to the IEC/IEEE Standard for Binary Floating-Point
4912| Arithmetic.
4913*----------------------------------------------------------------------------*/
4914
4915float128 float32_to_float128(float32 a, float_status *status)
4916{
4917    bool aSign;
4918    int aExp;
4919    uint32_t aSig;
4920
4921    a = float32_squash_input_denormal(a, status);
4922    aSig = extractFloat32Frac( a );
4923    aExp = extractFloat32Exp( a );
4924    aSign = extractFloat32Sign( a );
4925    if ( aExp == 0xFF ) {
4926        if (aSig) {
4927            return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
4928        }
4929        return packFloat128( aSign, 0x7FFF, 0, 0 );
4930    }
4931    if ( aExp == 0 ) {
4932        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
4933        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4934        --aExp;
4935    }
4936    return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
4937
4938}
4939
4940/*----------------------------------------------------------------------------
4941| Returns the remainder of the single-precision floating-point value `a'
4942| with respect to the corresponding value `b'.  The operation is performed
4943| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4944*----------------------------------------------------------------------------*/
4945
4946float32 float32_rem(float32 a, float32 b, float_status *status)
4947{
4948    bool aSign, zSign;
4949    int aExp, bExp, expDiff;
4950    uint32_t aSig, bSig;
4951    uint32_t q;
4952    uint64_t aSig64, bSig64, q64;
4953    uint32_t alternateASig;
4954    int32_t sigMean;
4955    a = float32_squash_input_denormal(a, status);
4956    b = float32_squash_input_denormal(b, status);
4957
4958    aSig = extractFloat32Frac( a );
4959    aExp = extractFloat32Exp( a );
4960    aSign = extractFloat32Sign( a );
4961    bSig = extractFloat32Frac( b );
4962    bExp = extractFloat32Exp( b );
4963    if ( aExp == 0xFF ) {
4964        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
4965            return propagateFloat32NaN(a, b, status);
4966        }
4967        float_raise(float_flag_invalid, status);
4968        return float32_default_nan(status);
4969    }
4970    if ( bExp == 0xFF ) {
4971        if (bSig) {
4972            return propagateFloat32NaN(a, b, status);
4973        }
4974        return a;
4975    }
4976    if ( bExp == 0 ) {
4977        if ( bSig == 0 ) {
4978            float_raise(float_flag_invalid, status);
4979            return float32_default_nan(status);
4980        }
4981        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4982    }
4983    if ( aExp == 0 ) {
4984        if ( aSig == 0 ) return a;
4985        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4986    }
4987    expDiff = aExp - bExp;
4988    aSig |= 0x00800000;
4989    bSig |= 0x00800000;
4990    if ( expDiff < 32 ) {
4991        aSig <<= 8;
4992        bSig <<= 8;
4993        if ( expDiff < 0 ) {
4994            if ( expDiff < -1 ) return a;
4995            aSig >>= 1;
4996        }
4997        q = ( bSig <= aSig );
4998        if ( q ) aSig -= bSig;
4999        if ( 0 < expDiff ) {
5000            q = ( ( (uint64_t) aSig )<<32 ) / bSig;
5001            q >>= 32 - expDiff;
5002            bSig >>= 2;
5003            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5004        }
5005        else {
5006            aSig >>= 2;
5007            bSig >>= 2;
5008        }
5009    }
5010    else {
5011        if ( bSig <= aSig ) aSig -= bSig;
5012        aSig64 = ( (uint64_t) aSig )<<40;
5013        bSig64 = ( (uint64_t) bSig )<<40;
5014        expDiff -= 64;
5015        while ( 0 < expDiff ) {
5016            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5017            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5018            aSig64 = - ( ( bSig * q64 )<<38 );
5019            expDiff -= 62;
5020        }
5021        expDiff += 64;
5022        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
5023        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
5024        q = q64>>( 64 - expDiff );
5025        bSig <<= 6;
5026        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
5027    }
5028    do {
5029        alternateASig = aSig;
5030        ++q;
5031        aSig -= bSig;
5032    } while ( 0 <= (int32_t) aSig );
5033    sigMean = aSig + alternateASig;
5034    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5035        aSig = alternateASig;
5036    }
5037    zSign = ( (int32_t) aSig < 0 );
5038    if ( zSign ) aSig = - aSig;
5039    return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
5040}
5041
5042
5043
5044/*----------------------------------------------------------------------------
5045| Returns the binary exponential of the single-precision floating-point value
5046| `a'. The operation is performed according to the IEC/IEEE Standard for
5047| Binary Floating-Point Arithmetic.
5048|
5049| Uses the following identities:
5050|
5051| 1. -------------------------------------------------------------------------
5052|      x    x*ln(2)
5053|     2  = e
5054|
5055| 2. -------------------------------------------------------------------------
5056|                      2     3     4     5           n
5057|      x        x     x     x     x     x           x
5058|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5059|               1!    2!    3!    4!    5!          n!
5060*----------------------------------------------------------------------------*/
5061
5062static const float64 float32_exp2_coefficients[15] =
5063{
5064    const_float64( 0x3ff0000000000000ll ), /*  1 */
5065    const_float64( 0x3fe0000000000000ll ), /*  2 */
5066    const_float64( 0x3fc5555555555555ll ), /*  3 */
5067    const_float64( 0x3fa5555555555555ll ), /*  4 */
5068    const_float64( 0x3f81111111111111ll ), /*  5 */
5069    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5070    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5071    const_float64( 0x3efa01a01a01a01all ), /*  8 */
5072    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5073    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5074    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5075    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5076    const_float64( 0x3de6124613a86d09ll ), /* 13 */
5077    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5078    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5079};
5080
5081float32 float32_exp2(float32 a, float_status *status)
5082{
5083    bool aSign;
5084    int aExp;
5085    uint32_t aSig;
5086    float64 r, x, xn;
5087    int i;
5088    a = float32_squash_input_denormal(a, status);
5089
5090    aSig = extractFloat32Frac( a );
5091    aExp = extractFloat32Exp( a );
5092    aSign = extractFloat32Sign( a );
5093
5094    if ( aExp == 0xFF) {
5095        if (aSig) {
5096            return propagateFloat32NaN(a, float32_zero, status);
5097        }
5098        return (aSign) ? float32_zero : a;
5099    }
5100    if (aExp == 0) {
5101        if (aSig == 0) return float32_one;
5102    }
5103
5104    float_raise(float_flag_inexact, status);
5105
5106    /* ******************************* */
5107    /* using float64 for approximation */
5108    /* ******************************* */
5109    x = float32_to_float64(a, status);
5110    x = float64_mul(x, float64_ln2, status);
5111
5112    xn = x;
5113    r = float64_one;
5114    for (i = 0 ; i < 15 ; i++) {
5115        float64 f;
5116
5117        f = float64_mul(xn, float32_exp2_coefficients[i], status);
5118        r = float64_add(r, f, status);
5119
5120        xn = float64_mul(xn, x, status);
5121    }
5122
5123    return float64_to_float32(r, status);
5124}
5125
5126/*----------------------------------------------------------------------------
5127| Returns the binary log of the single-precision floating-point value `a'.
5128| The operation is performed according to the IEC/IEEE Standard for Binary
5129| Floating-Point Arithmetic.
5130*----------------------------------------------------------------------------*/
5131float32 float32_log2(float32 a, float_status *status)
5132{
5133    bool aSign, zSign;
5134    int aExp;
5135    uint32_t aSig, zSig, i;
5136
5137    a = float32_squash_input_denormal(a, status);
5138    aSig = extractFloat32Frac( a );
5139    aExp = extractFloat32Exp( a );
5140    aSign = extractFloat32Sign( a );
5141
5142    if ( aExp == 0 ) {
5143        if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
5144        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
5145    }
5146    if ( aSign ) {
5147        float_raise(float_flag_invalid, status);
5148        return float32_default_nan(status);
5149    }
5150    if ( aExp == 0xFF ) {
5151        if (aSig) {
5152            return propagateFloat32NaN(a, float32_zero, status);
5153        }
5154        return a;
5155    }
5156
5157    aExp -= 0x7F;
5158    aSig |= 0x00800000;
5159    zSign = aExp < 0;
5160    zSig = aExp << 23;
5161
5162    for (i = 1 << 22; i > 0; i >>= 1) {
5163        aSig = ( (uint64_t)aSig * aSig ) >> 23;
5164        if ( aSig & 0x01000000 ) {
5165            aSig >>= 1;
5166            zSig |= i;
5167        }
5168    }
5169
5170    if ( zSign )
5171        zSig = -zSig;
5172
5173    return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
5174}
5175
5176/*----------------------------------------------------------------------------
5177| Returns the result of converting the double-precision floating-point value
5178| `a' to the extended double-precision floating-point format.  The conversion
5179| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5180| Arithmetic.
5181*----------------------------------------------------------------------------*/
5182
5183floatx80 float64_to_floatx80(float64 a, float_status *status)
5184{
5185    bool aSign;
5186    int aExp;
5187    uint64_t aSig;
5188
5189    a = float64_squash_input_denormal(a, status);
5190    aSig = extractFloat64Frac( a );
5191    aExp = extractFloat64Exp( a );
5192    aSign = extractFloat64Sign( a );
5193    if ( aExp == 0x7FF ) {
5194        if (aSig) {
5195            floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
5196                                               status);
5197            return floatx80_silence_nan(res, status);
5198        }
5199        return packFloatx80(aSign,
5200                            floatx80_infinity_high,
5201                            floatx80_infinity_low);
5202    }
5203    if ( aExp == 0 ) {
5204        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
5205        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5206    }
5207    return
5208        packFloatx80(
5209            aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
5210
5211}
5212
5213/*----------------------------------------------------------------------------
5214| Returns the result of converting the double-precision floating-point value
5215| `a' to the quadruple-precision floating-point format.  The conversion is
5216| performed according to the IEC/IEEE Standard for Binary Floating-Point
5217| Arithmetic.
5218*----------------------------------------------------------------------------*/
5219
5220float128 float64_to_float128(float64 a, float_status *status)
5221{
5222    bool aSign;
5223    int aExp;
5224    uint64_t aSig, zSig0, zSig1;
5225
5226    a = float64_squash_input_denormal(a, status);
5227    aSig = extractFloat64Frac( a );
5228    aExp = extractFloat64Exp( a );
5229    aSign = extractFloat64Sign( a );
5230    if ( aExp == 0x7FF ) {
5231        if (aSig) {
5232            return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5233        }
5234        return packFloat128( aSign, 0x7FFF, 0, 0 );
5235    }
5236    if ( aExp == 0 ) {
5237        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
5238        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5239        --aExp;
5240    }
5241    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
5242    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
5243
5244}
5245
5246
5247/*----------------------------------------------------------------------------
5248| Returns the remainder of the double-precision floating-point value `a'
5249| with respect to the corresponding value `b'.  The operation is performed
5250| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5251*----------------------------------------------------------------------------*/
5252
5253float64 float64_rem(float64 a, float64 b, float_status *status)
5254{
5255    bool aSign, zSign;
5256    int aExp, bExp, expDiff;
5257    uint64_t aSig, bSig;
5258    uint64_t q, alternateASig;
5259    int64_t sigMean;
5260
5261    a = float64_squash_input_denormal(a, status);
5262    b = float64_squash_input_denormal(b, status);
5263    aSig = extractFloat64Frac( a );
5264    aExp = extractFloat64Exp( a );
5265    aSign = extractFloat64Sign( a );
5266    bSig = extractFloat64Frac( b );
5267    bExp = extractFloat64Exp( b );
5268    if ( aExp == 0x7FF ) {
5269        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
5270            return propagateFloat64NaN(a, b, status);
5271        }
5272        float_raise(float_flag_invalid, status);
5273        return float64_default_nan(status);
5274    }
5275    if ( bExp == 0x7FF ) {
5276        if (bSig) {
5277            return propagateFloat64NaN(a, b, status);
5278        }
5279        return a;
5280    }
5281    if ( bExp == 0 ) {
5282        if ( bSig == 0 ) {
5283            float_raise(float_flag_invalid, status);
5284            return float64_default_nan(status);
5285        }
5286        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5287    }
5288    if ( aExp == 0 ) {
5289        if ( aSig == 0 ) return a;
5290        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5291    }
5292    expDiff = aExp - bExp;
5293    aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
5294    bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
5295    if ( expDiff < 0 ) {
5296        if ( expDiff < -1 ) return a;
5297        aSig >>= 1;
5298    }
5299    q = ( bSig <= aSig );
5300    if ( q ) aSig -= bSig;
5301    expDiff -= 64;
5302    while ( 0 < expDiff ) {
5303        q = estimateDiv128To64( aSig, 0, bSig );
5304        q = ( 2 < q ) ? q - 2 : 0;
5305        aSig = - ( ( bSig>>2 ) * q );
5306        expDiff -= 62;
5307    }
5308    expDiff += 64;
5309    if ( 0 < expDiff ) {
5310        q = estimateDiv128To64( aSig, 0, bSig );
5311        q = ( 2 < q ) ? q - 2 : 0;
5312        q >>= 64 - expDiff;
5313        bSig >>= 2;
5314        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
5315    }
5316    else {
5317        aSig >>= 2;
5318        bSig >>= 2;
5319    }
5320    do {
5321        alternateASig = aSig;
5322        ++q;
5323        aSig -= bSig;
5324    } while ( 0 <= (int64_t) aSig );
5325    sigMean = aSig + alternateASig;
5326    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
5327        aSig = alternateASig;
5328    }
5329    zSign = ( (int64_t) aSig < 0 );
5330    if ( zSign ) aSig = - aSig;
5331    return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5332
5333}
5334
5335/*----------------------------------------------------------------------------
5336| Returns the binary log of the double-precision floating-point value `a'.
5337| The operation is performed according to the IEC/IEEE Standard for Binary
5338| Floating-Point Arithmetic.
5339*----------------------------------------------------------------------------*/
5340float64 float64_log2(float64 a, float_status *status)
5341{
5342    bool aSign, zSign;
5343    int aExp;
5344    uint64_t aSig, aSig0, aSig1, zSig, i;
5345    a = float64_squash_input_denormal(a, status);
5346
5347    aSig = extractFloat64Frac( a );
5348    aExp = extractFloat64Exp( a );
5349    aSign = extractFloat64Sign( a );
5350
5351    if ( aExp == 0 ) {
5352        if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
5353        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5354    }
5355    if ( aSign ) {
5356        float_raise(float_flag_invalid, status);
5357        return float64_default_nan(status);
5358    }
5359    if ( aExp == 0x7FF ) {
5360        if (aSig) {
5361            return propagateFloat64NaN(a, float64_zero, status);
5362        }
5363        return a;
5364    }
5365
5366    aExp -= 0x3FF;
5367    aSig |= UINT64_C(0x0010000000000000);
5368    zSign = aExp < 0;
5369    zSig = (uint64_t)aExp << 52;
5370    for (i = 1LL << 51; i > 0; i >>= 1) {
5371        mul64To128( aSig, aSig, &aSig0, &aSig1 );
5372        aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
5373        if ( aSig & UINT64_C(0x0020000000000000) ) {
5374            aSig >>= 1;
5375            zSig |= i;
5376        }
5377    }
5378
5379    if ( zSign )
5380        zSig = -zSig;
5381    return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
5382}
5383
5384/*----------------------------------------------------------------------------
5385| Returns the result of converting the extended double-precision floating-
5386| point value `a' to the 32-bit two's complement integer format.  The
5387| conversion is performed according to the IEC/IEEE Standard for Binary
5388| Floating-Point Arithmetic---which means in particular that the conversion
5389| is rounded according to the current rounding mode.  If `a' is a NaN, the
5390| largest positive integer is returned.  Otherwise, if the conversion
5391| overflows, the largest integer with the same sign as `a' is returned.
5392*----------------------------------------------------------------------------*/
5393
5394int32_t floatx80_to_int32(floatx80 a, float_status *status)
5395{
5396    bool aSign;
5397    int32_t aExp, shiftCount;
5398    uint64_t aSig;
5399
5400    if (floatx80_invalid_encoding(a)) {
5401        float_raise(float_flag_invalid, status);
5402        return 1 << 31;
5403    }
5404    aSig = extractFloatx80Frac( a );
5405    aExp = extractFloatx80Exp( a );
5406    aSign = extractFloatx80Sign( a );
5407    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5408    shiftCount = 0x4037 - aExp;
5409    if ( shiftCount <= 0 ) shiftCount = 1;
5410    shift64RightJamming( aSig, shiftCount, &aSig );
5411    return roundAndPackInt32(aSign, aSig, status);
5412
5413}
5414
5415/*----------------------------------------------------------------------------
5416| Returns the result of converting the extended double-precision floating-
5417| point value `a' to the 32-bit two's complement integer format.  The
5418| conversion is performed according to the IEC/IEEE Standard for Binary
5419| Floating-Point Arithmetic, except that the conversion is always rounded
5420| toward zero.  If `a' is a NaN, the largest positive integer is returned.
5421| Otherwise, if the conversion overflows, the largest integer with the same
5422| sign as `a' is returned.
5423*----------------------------------------------------------------------------*/
5424
5425int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5426{
5427    bool aSign;
5428    int32_t aExp, shiftCount;
5429    uint64_t aSig, savedASig;
5430    int32_t z;
5431
5432    if (floatx80_invalid_encoding(a)) {
5433        float_raise(float_flag_invalid, status);
5434        return 1 << 31;
5435    }
5436    aSig = extractFloatx80Frac( a );
5437    aExp = extractFloatx80Exp( a );
5438    aSign = extractFloatx80Sign( a );
5439    if ( 0x401E < aExp ) {
5440        if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
5441        goto invalid;
5442    }
5443    else if ( aExp < 0x3FFF ) {
5444        if (aExp || aSig) {
5445            status->float_exception_flags |= float_flag_inexact;
5446        }
5447        return 0;
5448    }
5449    shiftCount = 0x403E - aExp;
5450    savedASig = aSig;
5451    aSig >>= shiftCount;
5452    z = aSig;
5453    if ( aSign ) z = - z;
5454    if ( ( z < 0 ) ^ aSign ) {
5455 invalid:
5456        float_raise(float_flag_invalid, status);
5457        return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
5458    }
5459    if ( ( aSig<<shiftCount ) != savedASig ) {
5460        status->float_exception_flags |= float_flag_inexact;
5461    }
5462    return z;
5463
5464}
5465
5466/*----------------------------------------------------------------------------
5467| Returns the result of converting the extended double-precision floating-
5468| point value `a' to the 64-bit two's complement integer format.  The
5469| conversion is performed according to the IEC/IEEE Standard for Binary
5470| Floating-Point Arithmetic---which means in particular that the conversion
5471| is rounded according to the current rounding mode.  If `a' is a NaN,
5472| the largest positive integer is returned.  Otherwise, if the conversion
5473| overflows, the largest integer with the same sign as `a' is returned.
5474*----------------------------------------------------------------------------*/
5475
5476int64_t floatx80_to_int64(floatx80 a, float_status *status)
5477{
5478    bool aSign;
5479    int32_t aExp, shiftCount;
5480    uint64_t aSig, aSigExtra;
5481
5482    if (floatx80_invalid_encoding(a)) {
5483        float_raise(float_flag_invalid, status);
5484        return 1ULL << 63;
5485    }
5486    aSig = extractFloatx80Frac( a );
5487    aExp = extractFloatx80Exp( a );
5488    aSign = extractFloatx80Sign( a );
5489    shiftCount = 0x403E - aExp;
5490    if ( shiftCount <= 0 ) {
5491        if ( shiftCount ) {
5492            float_raise(float_flag_invalid, status);
5493            if (!aSign || floatx80_is_any_nan(a)) {
5494                return INT64_MAX;
5495            }
5496            return INT64_MIN;
5497        }
5498        aSigExtra = 0;
5499    }
5500    else {
5501        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
5502    }
5503    return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5504
5505}
5506
5507/*----------------------------------------------------------------------------
5508| Returns the result of converting the extended double-precision floating-
5509| point value `a' to the 64-bit two's complement integer format.  The
5510| conversion is performed according to the IEC/IEEE Standard for Binary
5511| Floating-Point Arithmetic, except that the conversion is always rounded
5512| toward zero.  If `a' is a NaN, the largest positive integer is returned.
5513| Otherwise, if the conversion overflows, the largest integer with the same
5514| sign as `a' is returned.
5515*----------------------------------------------------------------------------*/
5516
5517int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5518{
5519    bool aSign;
5520    int32_t aExp, shiftCount;
5521    uint64_t aSig;
5522    int64_t z;
5523
5524    if (floatx80_invalid_encoding(a)) {
5525        float_raise(float_flag_invalid, status);
5526        return 1ULL << 63;
5527    }
5528    aSig = extractFloatx80Frac( a );
5529    aExp = extractFloatx80Exp( a );
5530    aSign = extractFloatx80Sign( a );
5531    shiftCount = aExp - 0x403E;
5532    if ( 0 <= shiftCount ) {
5533        aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
5534        if ( ( a.high != 0xC03E ) || aSig ) {
5535            float_raise(float_flag_invalid, status);
5536            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
5537                return INT64_MAX;
5538            }
5539        }
5540        return INT64_MIN;
5541    }
5542    else if ( aExp < 0x3FFF ) {
5543        if (aExp | aSig) {
5544            status->float_exception_flags |= float_flag_inexact;
5545        }
5546        return 0;
5547    }
5548    z = aSig>>( - shiftCount );
5549    if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
5550        status->float_exception_flags |= float_flag_inexact;
5551    }
5552    if ( aSign ) z = - z;
5553    return z;
5554
5555}
5556
5557/*----------------------------------------------------------------------------
5558| Returns the result of converting the extended double-precision floating-
5559| point value `a' to the single-precision floating-point format.  The
5560| conversion is performed according to the IEC/IEEE Standard for Binary
5561| Floating-Point Arithmetic.
5562*----------------------------------------------------------------------------*/
5563
5564float32 floatx80_to_float32(floatx80 a, float_status *status)
5565{
5566    bool aSign;
5567    int32_t aExp;
5568    uint64_t aSig;
5569
5570    if (floatx80_invalid_encoding(a)) {
5571        float_raise(float_flag_invalid, status);
5572        return float32_default_nan(status);
5573    }
5574    aSig = extractFloatx80Frac( a );
5575    aExp = extractFloatx80Exp( a );
5576    aSign = extractFloatx80Sign( a );
5577    if ( aExp == 0x7FFF ) {
5578        if ( (uint64_t) ( aSig<<1 ) ) {
5579            float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
5580                                             status);
5581            return float32_silence_nan(res, status);
5582        }
5583        return packFloat32( aSign, 0xFF, 0 );
5584    }
5585    shift64RightJamming( aSig, 33, &aSig );
5586    if ( aExp || aSig ) aExp -= 0x3F81;
5587    return roundAndPackFloat32(aSign, aExp, aSig, status);
5588
5589}
5590
5591/*----------------------------------------------------------------------------
5592| Returns the result of converting the extended double-precision floating-
5593| point value `a' to the double-precision floating-point format.  The
5594| conversion is performed according to the IEC/IEEE Standard for Binary
5595| Floating-Point Arithmetic.
5596*----------------------------------------------------------------------------*/
5597
5598float64 floatx80_to_float64(floatx80 a, float_status *status)
5599{
5600    bool aSign;
5601    int32_t aExp;
5602    uint64_t aSig, zSig;
5603
5604    if (floatx80_invalid_encoding(a)) {
5605        float_raise(float_flag_invalid, status);
5606        return float64_default_nan(status);
5607    }
5608    aSig = extractFloatx80Frac( a );
5609    aExp = extractFloatx80Exp( a );
5610    aSign = extractFloatx80Sign( a );
5611    if ( aExp == 0x7FFF ) {
5612        if ( (uint64_t) ( aSig<<1 ) ) {
5613            float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
5614                                             status);
5615            return float64_silence_nan(res, status);
5616        }
5617        return packFloat64( aSign, 0x7FF, 0 );
5618    }
5619    shift64RightJamming( aSig, 1, &zSig );
5620    if ( aExp || aSig ) aExp -= 0x3C01;
5621    return roundAndPackFloat64(aSign, aExp, zSig, status);
5622
5623}
5624
5625/*----------------------------------------------------------------------------
5626| Returns the result of converting the extended double-precision floating-
5627| point value `a' to the quadruple-precision floating-point format.  The
5628| conversion is performed according to the IEC/IEEE Standard for Binary
5629| Floating-Point Arithmetic.
5630*----------------------------------------------------------------------------*/
5631
5632float128 floatx80_to_float128(floatx80 a, float_status *status)
5633{
5634    bool aSign;
5635    int aExp;
5636    uint64_t aSig, zSig0, zSig1;
5637
5638    if (floatx80_invalid_encoding(a)) {
5639        float_raise(float_flag_invalid, status);
5640        return float128_default_nan(status);
5641    }
5642    aSig = extractFloatx80Frac( a );
5643    aExp = extractFloatx80Exp( a );
5644    aSign = extractFloatx80Sign( a );
5645    if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
5646        float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
5647                                           status);
5648        return float128_silence_nan(res, status);
5649    }
5650    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
5651    return packFloat128( aSign, aExp, zSig0, zSig1 );
5652
5653}
5654
5655/*----------------------------------------------------------------------------
5656| Rounds the extended double-precision floating-point value `a'
5657| to the precision provided by floatx80_rounding_precision and returns the
5658| result as an extended double-precision floating-point value.
5659| The operation is performed according to the IEC/IEEE Standard for Binary
5660| Floating-Point Arithmetic.
5661*----------------------------------------------------------------------------*/
5662
5663floatx80 floatx80_round(floatx80 a, float_status *status)
5664{
5665    return roundAndPackFloatx80(status->floatx80_rounding_precision,
5666                                extractFloatx80Sign(a),
5667                                extractFloatx80Exp(a),
5668                                extractFloatx80Frac(a), 0, status);
5669}
5670
5671/*----------------------------------------------------------------------------
5672| Rounds the extended double-precision floating-point value `a' to an integer,
5673| and returns the result as an extended quadruple-precision floating-point
5674| value.  The operation is performed according to the IEC/IEEE Standard for
5675| Binary Floating-Point Arithmetic.
5676*----------------------------------------------------------------------------*/
5677
5678floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5679{
5680    bool aSign;
5681    int32_t aExp;
5682    uint64_t lastBitMask, roundBitsMask;
5683    floatx80 z;
5684
5685    if (floatx80_invalid_encoding(a)) {
5686        float_raise(float_flag_invalid, status);
5687        return floatx80_default_nan(status);
5688    }
5689    aExp = extractFloatx80Exp( a );
5690    if ( 0x403E <= aExp ) {
5691        if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
5692            return propagateFloatx80NaN(a, a, status);
5693        }
5694        return a;
5695    }
5696    if ( aExp < 0x3FFF ) {
5697        if (    ( aExp == 0 )
5698             && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
5699            return a;
5700        }
5701        status->float_exception_flags |= float_flag_inexact;
5702        aSign = extractFloatx80Sign( a );
5703        switch (status->float_rounding_mode) {
5704         case float_round_nearest_even:
5705            if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
5706               ) {
5707                return
5708                    packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5709            }
5710            break;
5711        case float_round_ties_away:
5712            if (aExp == 0x3FFE) {
5713                return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
5714            }
5715            break;
5716         case float_round_down:
5717            return
5718                  aSign ?
5719                      packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
5720                : packFloatx80( 0, 0, 0 );
5721         case float_round_up:
5722            return
5723                  aSign ? packFloatx80( 1, 0, 0 )
5724                : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
5725
5726        case float_round_to_zero:
5727            break;
5728        default:
5729            g_assert_not_reached();
5730        }
5731        return packFloatx80( aSign, 0, 0 );
5732    }
5733    lastBitMask = 1;
5734    lastBitMask <<= 0x403E - aExp;
5735    roundBitsMask = lastBitMask - 1;
5736    z = a;
5737    switch (status->float_rounding_mode) {
5738    case float_round_nearest_even:
5739        z.low += lastBitMask>>1;
5740        if ((z.low & roundBitsMask) == 0) {
5741            z.low &= ~lastBitMask;
5742        }
5743        break;
5744    case float_round_ties_away:
5745        z.low += lastBitMask >> 1;
5746        break;
5747    case float_round_to_zero:
5748        break;
5749    case float_round_up:
5750        if (!extractFloatx80Sign(z)) {
5751            z.low += roundBitsMask;
5752        }
5753        break;
5754    case float_round_down:
5755        if (extractFloatx80Sign(z)) {
5756            z.low += roundBitsMask;
5757        }
5758        break;
5759    default:
5760        abort();
5761    }
5762    z.low &= ~ roundBitsMask;
5763    if ( z.low == 0 ) {
5764        ++z.high;
5765        z.low = UINT64_C(0x8000000000000000);
5766    }
5767    if (z.low != a.low) {
5768        status->float_exception_flags |= float_flag_inexact;
5769    }
5770    return z;
5771
5772}
5773
5774/*----------------------------------------------------------------------------
5775| Returns the result of adding the absolute values of the extended double-
5776| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
5777| negated before being returned.  `zSign' is ignored if the result is a NaN.
5778| The addition is performed according to the IEC/IEEE Standard for Binary
5779| Floating-Point Arithmetic.
5780*----------------------------------------------------------------------------*/
5781
5782static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5783                                float_status *status)
5784{
5785    int32_t aExp, bExp, zExp;
5786    uint64_t aSig, bSig, zSig0, zSig1;
5787    int32_t expDiff;
5788
5789    aSig = extractFloatx80Frac( a );
5790    aExp = extractFloatx80Exp( a );
5791    bSig = extractFloatx80Frac( b );
5792    bExp = extractFloatx80Exp( b );
5793    expDiff = aExp - bExp;
5794    if ( 0 < expDiff ) {
5795        if ( aExp == 0x7FFF ) {
5796            if ((uint64_t)(aSig << 1)) {
5797                return propagateFloatx80NaN(a, b, status);
5798            }
5799            return a;
5800        }
5801        if ( bExp == 0 ) --expDiff;
5802        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5803        zExp = aExp;
5804    }
5805    else if ( expDiff < 0 ) {
5806        if ( bExp == 0x7FFF ) {
5807            if ((uint64_t)(bSig << 1)) {
5808                return propagateFloatx80NaN(a, b, status);
5809            }
5810            return packFloatx80(zSign,
5811                                floatx80_infinity_high,
5812                                floatx80_infinity_low);
5813        }
5814        if ( aExp == 0 ) ++expDiff;
5815        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5816        zExp = bExp;
5817    }
5818    else {
5819        if ( aExp == 0x7FFF ) {
5820            if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5821                return propagateFloatx80NaN(a, b, status);
5822            }
5823            return a;
5824        }
5825        zSig1 = 0;
5826        zSig0 = aSig + bSig;
5827        if ( aExp == 0 ) {
5828            if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
5829                /* At least one of the values is a pseudo-denormal,
5830                 * and there is a carry out of the result.  */
5831                zExp = 1;
5832                goto shiftRight1;
5833            }
5834            if (zSig0 == 0) {
5835                return packFloatx80(zSign, 0, 0);
5836            }
5837            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5838            goto roundAndPack;
5839        }
5840        zExp = aExp;
5841        goto shiftRight1;
5842    }
5843    zSig0 = aSig + bSig;
5844    if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
5845 shiftRight1:
5846    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
5847    zSig0 |= UINT64_C(0x8000000000000000);
5848    ++zExp;
5849 roundAndPack:
5850    return roundAndPackFloatx80(status->floatx80_rounding_precision,
5851                                zSign, zExp, zSig0, zSig1, status);
5852}
5853
5854/*----------------------------------------------------------------------------
5855| Returns the result of subtracting the absolute values of the extended
5856| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
5857| difference is negated before being returned.  `zSign' is ignored if the
5858| result is a NaN.  The subtraction is performed according to the IEC/IEEE
5859| Standard for Binary Floating-Point Arithmetic.
5860*----------------------------------------------------------------------------*/
5861
5862static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
5863                                float_status *status)
5864{
5865    int32_t aExp, bExp, zExp;
5866    uint64_t aSig, bSig, zSig0, zSig1;
5867    int32_t expDiff;
5868
5869    aSig = extractFloatx80Frac( a );
5870    aExp = extractFloatx80Exp( a );
5871    bSig = extractFloatx80Frac( b );
5872    bExp = extractFloatx80Exp( b );
5873    expDiff = aExp - bExp;
5874    if ( 0 < expDiff ) goto aExpBigger;
5875    if ( expDiff < 0 ) goto bExpBigger;
5876    if ( aExp == 0x7FFF ) {
5877        if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
5878            return propagateFloatx80NaN(a, b, status);
5879        }
5880        float_raise(float_flag_invalid, status);
5881        return floatx80_default_nan(status);
5882    }
5883    if ( aExp == 0 ) {
5884        aExp = 1;
5885        bExp = 1;
5886    }
5887    zSig1 = 0;
5888    if ( bSig < aSig ) goto aBigger;
5889    if ( aSig < bSig ) goto bBigger;
5890    return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
5891 bExpBigger:
5892    if ( bExp == 0x7FFF ) {
5893        if ((uint64_t)(bSig << 1)) {
5894            return propagateFloatx80NaN(a, b, status);
5895        }
5896        return packFloatx80(zSign ^ 1, floatx80_infinity_high,
5897                            floatx80_infinity_low);
5898    }
5899    if ( aExp == 0 ) ++expDiff;
5900    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5901 bBigger:
5902    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5903    zExp = bExp;
5904    zSign ^= 1;
5905    goto normalizeRoundAndPack;
5906 aExpBigger:
5907    if ( aExp == 0x7FFF ) {
5908        if ((uint64_t)(aSig << 1)) {
5909            return propagateFloatx80NaN(a, b, status);
5910        }
5911        return a;
5912    }
5913    if ( bExp == 0 ) --expDiff;
5914    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5915 aBigger:
5916    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5917    zExp = aExp;
5918 normalizeRoundAndPack:
5919    return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5920                                         zSign, zExp, zSig0, zSig1, status);
5921}
5922
5923/*----------------------------------------------------------------------------
5924| Returns the result of adding the extended double-precision floating-point
5925| values `a' and `b'.  The operation is performed according to the IEC/IEEE
5926| Standard for Binary Floating-Point Arithmetic.
5927*----------------------------------------------------------------------------*/
5928
5929floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5930{
5931    bool aSign, bSign;
5932
5933    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5934        float_raise(float_flag_invalid, status);
5935        return floatx80_default_nan(status);
5936    }
5937    aSign = extractFloatx80Sign( a );
5938    bSign = extractFloatx80Sign( b );
5939    if ( aSign == bSign ) {
5940        return addFloatx80Sigs(a, b, aSign, status);
5941    }
5942    else {
5943        return subFloatx80Sigs(a, b, aSign, status);
5944    }
5945
5946}
5947
5948/*----------------------------------------------------------------------------
5949| Returns the result of subtracting the extended double-precision floating-
5950| point values `a' and `b'.  The operation is performed according to the
5951| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5952*----------------------------------------------------------------------------*/
5953
5954floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5955{
5956    bool aSign, bSign;
5957
5958    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5959        float_raise(float_flag_invalid, status);
5960        return floatx80_default_nan(status);
5961    }
5962    aSign = extractFloatx80Sign( a );
5963    bSign = extractFloatx80Sign( b );
5964    if ( aSign == bSign ) {
5965        return subFloatx80Sigs(a, b, aSign, status);
5966    }
5967    else {
5968        return addFloatx80Sigs(a, b, aSign, status);
5969    }
5970
5971}
5972
5973/*----------------------------------------------------------------------------
5974| Returns the result of multiplying the extended double-precision floating-
5975| point values `a' and `b'.  The operation is performed according to the
5976| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5977*----------------------------------------------------------------------------*/
5978
5979floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5980{
5981    bool aSign, bSign, zSign;
5982    int32_t aExp, bExp, zExp;
5983    uint64_t aSig, bSig, zSig0, zSig1;
5984
5985    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5986        float_raise(float_flag_invalid, status);
5987        return floatx80_default_nan(status);
5988    }
5989    aSig = extractFloatx80Frac( a );
5990    aExp = extractFloatx80Exp( a );
5991    aSign = extractFloatx80Sign( a );
5992    bSig = extractFloatx80Frac( b );
5993    bExp = extractFloatx80Exp( b );
5994    bSign = extractFloatx80Sign( b );
5995    zSign = aSign ^ bSign;
5996    if ( aExp == 0x7FFF ) {
5997        if (    (uint64_t) ( aSig<<1 )
5998             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
5999            return propagateFloatx80NaN(a, b, status);
6000        }
6001        if ( ( bExp | bSig ) == 0 ) goto invalid;
6002        return packFloatx80(zSign, floatx80_infinity_high,
6003                                   floatx80_infinity_low);
6004    }
6005    if ( bExp == 0x7FFF ) {
6006        if ((uint64_t)(bSig << 1)) {
6007            return propagateFloatx80NaN(a, b, status);
6008        }
6009        if ( ( aExp | aSig ) == 0 ) {
6010 invalid:
6011            float_raise(float_flag_invalid, status);
6012            return floatx80_default_nan(status);
6013        }
6014        return packFloatx80(zSign, floatx80_infinity_high,
6015                                   floatx80_infinity_low);
6016    }
6017    if ( aExp == 0 ) {
6018        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6019        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6020    }
6021    if ( bExp == 0 ) {
6022        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
6023        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6024    }
6025    zExp = aExp + bExp - 0x3FFE;
6026    mul64To128( aSig, bSig, &zSig0, &zSig1 );
6027    if ( 0 < (int64_t) zSig0 ) {
6028        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
6029        --zExp;
6030    }
6031    return roundAndPackFloatx80(status->floatx80_rounding_precision,
6032                                zSign, zExp, zSig0, zSig1, status);
6033}
6034
6035/*----------------------------------------------------------------------------
6036| Returns the result of dividing the extended double-precision floating-point
6037| value `a' by the corresponding value `b'.  The operation is performed
6038| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6039*----------------------------------------------------------------------------*/
6040
6041floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6042{
6043    bool aSign, bSign, zSign;
6044    int32_t aExp, bExp, zExp;
6045    uint64_t aSig, bSig, zSig0, zSig1;
6046    uint64_t rem0, rem1, rem2, term0, term1, term2;
6047
6048    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6049        float_raise(float_flag_invalid, status);
6050        return floatx80_default_nan(status);
6051    }
6052    aSig = extractFloatx80Frac( a );
6053    aExp = extractFloatx80Exp( a );
6054    aSign = extractFloatx80Sign( a );
6055    bSig = extractFloatx80Frac( b );
6056    bExp = extractFloatx80Exp( b );
6057    bSign = extractFloatx80Sign( b );
6058    zSign = aSign ^ bSign;
6059    if ( aExp == 0x7FFF ) {
6060        if ((uint64_t)(aSig << 1)) {
6061            return propagateFloatx80NaN(a, b, status);
6062        }
6063        if ( bExp == 0x7FFF ) {
6064            if ((uint64_t)(bSig << 1)) {
6065                return propagateFloatx80NaN(a, b, status);
6066            }
6067            goto invalid;
6068        }
6069        return packFloatx80(zSign, floatx80_infinity_high,
6070                                   floatx80_infinity_low);
6071    }
6072    if ( bExp == 0x7FFF ) {
6073        if ((uint64_t)(bSig << 1)) {
6074            return propagateFloatx80NaN(a, b, status);
6075        }
6076        return packFloatx80( zSign, 0, 0 );
6077    }
6078    if ( bExp == 0 ) {
6079        if ( bSig == 0 ) {
6080            if ( ( aExp | aSig ) == 0 ) {
6081 invalid:
6082                float_raise(float_flag_invalid, status);
6083                return floatx80_default_nan(status);
6084            }
6085            float_raise(float_flag_divbyzero, status);
6086            return packFloatx80(zSign, floatx80_infinity_high,
6087                                       floatx80_infinity_low);
6088        }
6089        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6090    }
6091    if ( aExp == 0 ) {
6092        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
6093        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6094    }
6095    zExp = aExp - bExp + 0x3FFE;
6096    rem1 = 0;
6097    if ( bSig <= aSig ) {
6098        shift128Right( aSig, 0, 1, &aSig, &rem1 );
6099        ++zExp;
6100    }
6101    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6102    mul64To128( bSig, zSig0, &term0, &term1 );
6103    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6104    while ( (int64_t) rem0 < 0 ) {
6105        --zSig0;
6106        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
6107    }
6108    zSig1 = estimateDiv128To64( rem1, 0, bSig );
6109    if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
6110        mul64To128( bSig, zSig1, &term1, &term2 );
6111        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6112        while ( (int64_t) rem1 < 0 ) {
6113            --zSig1;
6114            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
6115        }
6116        zSig1 |= ( ( rem1 | rem2 ) != 0 );
6117    }
6118    return roundAndPackFloatx80(status->floatx80_rounding_precision,
6119                                zSign, zExp, zSig0, zSig1, status);
6120}
6121
6122/*----------------------------------------------------------------------------
6123| Returns the remainder of the extended double-precision floating-point value
6124| `a' with respect to the corresponding value `b'.  The operation is performed
6125| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic,
6126| if 'mod' is false; if 'mod' is true, return the remainder based on truncating
6127| the quotient toward zero instead.  '*quotient' is set to the low 64 bits of
6128| the absolute value of the integer quotient.
6129*----------------------------------------------------------------------------*/
6130
6131floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod, uint64_t *quotient,
6132                         float_status *status)
6133{
6134    bool aSign, zSign;
6135    int32_t aExp, bExp, expDiff, aExpOrig;
6136    uint64_t aSig0, aSig1, bSig;
6137    uint64_t q, term0, term1, alternateASig0, alternateASig1;
6138
6139    *quotient = 0;
6140    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6141        float_raise(float_flag_invalid, status);
6142        return floatx80_default_nan(status);
6143    }
6144    aSig0 = extractFloatx80Frac( a );
6145    aExpOrig = aExp = extractFloatx80Exp( a );
6146    aSign = extractFloatx80Sign( a );
6147    bSig = extractFloatx80Frac( b );
6148    bExp = extractFloatx80Exp( b );
6149    if ( aExp == 0x7FFF ) {
6150        if (    (uint64_t) ( aSig0<<1 )
6151             || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
6152            return propagateFloatx80NaN(a, b, status);
6153        }
6154        goto invalid;
6155    }
6156    if ( bExp == 0x7FFF ) {
6157        if ((uint64_t)(bSig << 1)) {
6158            return propagateFloatx80NaN(a, b, status);
6159        }
6160        if (aExp == 0 && aSig0 >> 63) {
6161            /*
6162             * Pseudo-denormal argument must be returned in normalized
6163             * form.
6164             */
6165            return packFloatx80(aSign, 1, aSig0);
6166        }
6167        return a;
6168    }
6169    if ( bExp == 0 ) {
6170        if ( bSig == 0 ) {
6171 invalid:
6172            float_raise(float_flag_invalid, status);
6173            return floatx80_default_nan(status);
6174        }
6175        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6176    }
6177    if ( aExp == 0 ) {
6178        if ( aSig0 == 0 ) return a;
6179        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6180    }
6181    zSign = aSign;
6182    expDiff = aExp - bExp;
6183    aSig1 = 0;
6184    if ( expDiff < 0 ) {
6185        if ( mod || expDiff < -1 ) {
6186            if (aExp == 1 && aExpOrig == 0) {
6187                /*
6188                 * Pseudo-denormal argument must be returned in
6189                 * normalized form.
6190                 */
6191                return packFloatx80(aSign, aExp, aSig0);
6192            }
6193            return a;
6194        }
6195        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
6196        expDiff = 0;
6197    }
6198    *quotient = q = ( bSig <= aSig0 );
6199    if ( q ) aSig0 -= bSig;
6200    expDiff -= 64;
6201    while ( 0 < expDiff ) {
6202        q = estimateDiv128To64( aSig0, aSig1, bSig );
6203        q = ( 2 < q ) ? q - 2 : 0;
6204        mul64To128( bSig, q, &term0, &term1 );
6205        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6206        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
6207        expDiff -= 62;
6208        *quotient <<= 62;
6209        *quotient += q;
6210    }
6211    expDiff += 64;
6212    if ( 0 < expDiff ) {
6213        q = estimateDiv128To64( aSig0, aSig1, bSig );
6214        q = ( 2 < q ) ? q - 2 : 0;
6215        q >>= 64 - expDiff;
6216        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
6217        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6218        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
6219        while ( le128( term0, term1, aSig0, aSig1 ) ) {
6220            ++q;
6221            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6222        }
6223        if (expDiff < 64) {
6224            *quotient <<= expDiff;
6225        } else {
6226            *quotient = 0;
6227        }
6228        *quotient += q;
6229    }
6230    else {
6231        term1 = 0;
6232        term0 = bSig;
6233    }
6234    if (!mod) {
6235        sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6236        if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6237                || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6238                        && ( q & 1 ) )
6239            ) {
6240            aSig0 = alternateASig0;
6241            aSig1 = alternateASig1;
6242            zSign = ! zSign;
6243            ++*quotient;
6244        }
6245    }
6246    return
6247        normalizeRoundAndPackFloatx80(
6248            80, zSign, bExp + expDiff, aSig0, aSig1, status);
6249
6250}
6251
6252/*----------------------------------------------------------------------------
6253| Returns the remainder of the extended double-precision floating-point value
6254| `a' with respect to the corresponding value `b'.  The operation is performed
6255| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6256*----------------------------------------------------------------------------*/
6257
6258floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6259{
6260    uint64_t quotient;
6261    return floatx80_modrem(a, b, false, &quotient, status);
6262}
6263
6264/*----------------------------------------------------------------------------
6265| Returns the remainder of the extended double-precision floating-point value
6266| `a' with respect to the corresponding value `b', with the quotient truncated
6267| toward zero.
6268*----------------------------------------------------------------------------*/
6269
6270floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
6271{
6272    uint64_t quotient;
6273    return floatx80_modrem(a, b, true, &quotient, status);
6274}
6275
6276/*----------------------------------------------------------------------------
6277| Returns the square root of the extended double-precision floating-point
6278| value `a'.  The operation is performed according to the IEC/IEEE Standard
6279| for Binary Floating-Point Arithmetic.
6280*----------------------------------------------------------------------------*/
6281
6282floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6283{
6284    bool aSign;
6285    int32_t aExp, zExp;
6286    uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6287    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6288
6289    if (floatx80_invalid_encoding(a)) {
6290        float_raise(float_flag_invalid, status);
6291        return floatx80_default_nan(status);
6292    }
6293    aSig0 = extractFloatx80Frac( a );
6294    aExp = extractFloatx80Exp( a );
6295    aSign = extractFloatx80Sign( a );
6296    if ( aExp == 0x7FFF ) {
6297        if ((uint64_t)(aSig0 << 1)) {
6298            return propagateFloatx80NaN(a, a, status);
6299        }
6300        if ( ! aSign ) return a;
6301        goto invalid;
6302    }
6303    if ( aSign ) {
6304        if ( ( aExp | aSig0 ) == 0 ) return a;
6305 invalid:
6306        float_raise(float_flag_invalid, status);
6307        return floatx80_default_nan(status);
6308    }
6309    if ( aExp == 0 ) {
6310        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
6311        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6312    }
6313    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
6314    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
6315    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
6316    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6317    doubleZSig0 = zSig0<<1;
6318    mul64To128( zSig0, zSig0, &term0, &term1 );
6319    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6320    while ( (int64_t) rem0 < 0 ) {
6321        --zSig0;
6322        doubleZSig0 -= 2;
6323        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6324    }
6325    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6326    if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
6327        if ( zSig1 == 0 ) zSig1 = 1;
6328        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6329        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6330        mul64To128( zSig1, zSig1, &term2, &term3 );
6331        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
6332        while ( (int64_t) rem1 < 0 ) {
6333            --zSig1;
6334            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6335            term3 |= 1;
6336            term2 |= doubleZSig0;
6337            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6338        }
6339        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6340    }
6341    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
6342    zSig0 |= doubleZSig0;
6343    return roundAndPackFloatx80(status->floatx80_rounding_precision,
6344                                0, zExp, zSig0, zSig1, status);
6345}
6346
6347/*----------------------------------------------------------------------------
6348| Returns the result of converting the quadruple-precision floating-point
6349| value `a' to the 32-bit two's complement integer format.  The conversion
6350| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6351| Arithmetic---which means in particular that the conversion is rounded
6352| according to the current rounding mode.  If `a' is a NaN, the largest
6353| positive integer is returned.  Otherwise, if the conversion overflows, the
6354| largest integer with the same sign as `a' is returned.
6355*----------------------------------------------------------------------------*/
6356
6357int32_t float128_to_int32(float128 a, float_status *status)
6358{
6359    bool aSign;
6360    int32_t aExp, shiftCount;
6361    uint64_t aSig0, aSig1;
6362
6363    aSig1 = extractFloat128Frac1( a );
6364    aSig0 = extractFloat128Frac0( a );
6365    aExp = extractFloat128Exp( a );
6366    aSign = extractFloat128Sign( a );
6367    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
6368    if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6369    aSig0 |= ( aSig1 != 0 );
6370    shiftCount = 0x4028 - aExp;
6371    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
6372    return roundAndPackInt32(aSign, aSig0, status);
6373
6374}
6375
6376/*----------------------------------------------------------------------------
6377| Returns the result of converting the quadruple-precision floating-point
6378| value `a' to the 32-bit two's complement integer format.  The conversion
6379| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6380| Arithmetic, except that the conversion is always rounded toward zero.  If
6381| `a' is a NaN, the largest positive integer is returned.  Otherwise, if the
6382| conversion overflows, the largest integer with the same sign as `a' is
6383| returned.
6384*----------------------------------------------------------------------------*/
6385
6386int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
6387{
6388    bool aSign;
6389    int32_t aExp, shiftCount;
6390    uint64_t aSig0, aSig1, savedASig;
6391    int32_t z;
6392
6393    aSig1 = extractFloat128Frac1( a );
6394    aSig0 = extractFloat128Frac0( a );
6395    aExp = extractFloat128Exp( a );
6396    aSign = extractFloat128Sign( a );
6397    aSig0 |= ( aSig1 != 0 );
6398    if ( 0x401E < aExp ) {
6399        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
6400        goto invalid;
6401    }
6402    else if ( aExp < 0x3FFF ) {
6403        if (aExp || aSig0) {
6404            status->float_exception_flags |= float_flag_inexact;
6405        }
6406        return 0;
6407    }
6408    aSig0 |= UINT64_C(0x0001000000000000);
6409    shiftCount = 0x402F - aExp;
6410    savedASig = aSig0;
6411    aSig0 >>= shiftCount;
6412    z = aSig0;
6413    if ( aSign ) z = - z;
6414    if ( ( z < 0 ) ^ aSign ) {
6415 invalid:
6416        float_raise(float_flag_invalid, status);
6417        return aSign ? INT32_MIN : INT32_MAX;
6418    }
6419    if ( ( aSig0<<shiftCount ) != savedASig ) {
6420        status->float_exception_flags |= float_flag_inexact;
6421    }
6422    return z;
6423
6424}
6425
6426/*----------------------------------------------------------------------------
6427| Returns the result of converting the quadruple-precision floating-point
6428| value `a' to the 64-bit two's complement integer format.  The conversion
6429| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6430| Arithmetic---which means in particular that the conversion is rounded
6431| according to the current rounding mode.  If `a' is a NaN, the largest
6432| positive integer is returned.  Otherwise, if the conversion overflows, the
6433| largest integer with the same sign as `a' is returned.
6434*----------------------------------------------------------------------------*/
6435
6436int64_t float128_to_int64(float128 a, float_status *status)
6437{
6438    bool aSign;
6439    int32_t aExp, shiftCount;
6440    uint64_t aSig0, aSig1;
6441
6442    aSig1 = extractFloat128Frac1( a );
6443    aSig0 = extractFloat128Frac0( a );
6444    aExp = extractFloat128Exp( a );
6445    aSign = extractFloat128Sign( a );
6446    if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6447    shiftCount = 0x402F - aExp;
6448    if ( shiftCount <= 0 ) {
6449        if ( 0x403E < aExp ) {
6450            float_raise(float_flag_invalid, status);
6451            if (    ! aSign
6452                 || (    ( aExp == 0x7FFF )
6453                      && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
6454                    )
6455               ) {
6456                return INT64_MAX;
6457            }
6458            return INT64_MIN;
6459        }
6460        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6461    }
6462    else {
6463        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6464    }
6465    return roundAndPackInt64(aSign, aSig0, aSig1, status);
6466
6467}
6468
6469/*----------------------------------------------------------------------------
6470| Returns the result of converting the quadruple-precision floating-point
6471| value `a' to the 64-bit two's complement integer format.  The conversion
6472| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6473| Arithmetic, except that the conversion is always rounded toward zero.
6474| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
6475| the conversion overflows, the largest integer with the same sign as `a' is
6476| returned.
6477*----------------------------------------------------------------------------*/
6478
6479int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
6480{
6481    bool aSign;
6482    int32_t aExp, shiftCount;
6483    uint64_t aSig0, aSig1;
6484    int64_t z;
6485
6486    aSig1 = extractFloat128Frac1( a );
6487    aSig0 = extractFloat128Frac0( a );
6488    aExp = extractFloat128Exp( a );
6489    aSign = extractFloat128Sign( a );
6490    if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
6491    shiftCount = aExp - 0x402F;
6492    if ( 0 < shiftCount ) {
6493        if ( 0x403E <= aExp ) {
6494            aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
6495            if (    ( a.high == UINT64_C(0xC03E000000000000) )
6496                 && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
6497                if (aSig1) {
6498                    status->float_exception_flags |= float_flag_inexact;
6499                }
6500            }
6501            else {
6502                float_raise(float_flag_invalid, status);
6503                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
6504                    return INT64_MAX;
6505                }
6506            }
6507            return INT64_MIN;
6508        }
6509        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
6510        if ( (uint64_t) ( aSig1<<shiftCount ) ) {
6511            status->float_exception_flags |= float_flag_inexact;
6512        }
6513    }
6514    else {
6515        if ( aExp < 0x3FFF ) {
6516            if ( aExp | aSig0 | aSig1 ) {
6517                status->float_exception_flags |= float_flag_inexact;
6518            }
6519            return 0;
6520        }
6521        z = aSig0>>( - shiftCount );
6522        if (    aSig1
6523             || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
6524            status->float_exception_flags |= float_flag_inexact;
6525        }
6526    }
6527    if ( aSign ) z = - z;
6528    return z;
6529
6530}
6531
6532/*----------------------------------------------------------------------------
6533| Returns the result of converting the quadruple-precision floating-point value
6534| `a' to the 64-bit unsigned integer format.  The conversion is
6535| performed according to the IEC/IEEE Standard for Binary Floating-Point
6536| Arithmetic---which means in particular that the conversion is rounded
6537| according to the current rounding mode.  If `a' is a NaN, the largest
6538| positive integer is returned.  If the conversion overflows, the
6539| largest unsigned integer is returned.  If 'a' is negative, the value is
6540| rounded and zero is returned; negative values that do not round to zero
6541| will raise the inexact exception.
6542*----------------------------------------------------------------------------*/
6543
6544uint64_t float128_to_uint64(float128 a, float_status *status)
6545{
6546    bool aSign;
6547    int aExp;
6548    int shiftCount;
6549    uint64_t aSig0, aSig1;
6550
6551    aSig0 = extractFloat128Frac0(a);
6552    aSig1 = extractFloat128Frac1(a);
6553    aExp = extractFloat128Exp(a);
6554    aSign = extractFloat128Sign(a);
6555    if (aSign && (aExp > 0x3FFE)) {
6556        float_raise(float_flag_invalid, status);
6557        if (float128_is_any_nan(a)) {
6558            return UINT64_MAX;
6559        } else {
6560            return 0;
6561        }
6562    }
6563    if (aExp) {
6564        aSig0 |= UINT64_C(0x0001000000000000);
6565    }
6566    shiftCount = 0x402F - aExp;
6567    if (shiftCount <= 0) {
6568        if (0x403E < aExp) {
6569            float_raise(float_flag_invalid, status);
6570            return UINT64_MAX;
6571        }
6572        shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6573    } else {
6574        shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6575    }
6576    return roundAndPackUint64(aSign, aSig0, aSig1, status);
6577}
6578
6579uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6580{
6581    uint64_t v;
6582    signed char current_rounding_mode = status->float_rounding_mode;
6583
6584    set_float_rounding_mode(float_round_to_zero, status);
6585    v = float128_to_uint64(a, status);
6586    set_float_rounding_mode(current_rounding_mode, status);
6587
6588    return v;
6589}
6590
6591/*----------------------------------------------------------------------------
6592| Returns the result of converting the quadruple-precision floating-point
6593| value `a' to the 32-bit unsigned integer format.  The conversion
6594| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6595| Arithmetic except that the conversion is always rounded toward zero.
6596| If `a' is a NaN, the largest positive integer is returned.  Otherwise,
6597| if the conversion overflows, the largest unsigned integer is returned.
6598| If 'a' is negative, the value is rounded and zero is returned; negative
6599| values that do not round to zero will raise the inexact exception.
6600*----------------------------------------------------------------------------*/
6601
6602uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6603{
6604    uint64_t v;
6605    uint32_t res;
6606    int old_exc_flags = get_float_exception_flags(status);
6607
6608    v = float128_to_uint64_round_to_zero(a, status);
6609    if (v > 0xffffffff) {
6610        res = 0xffffffff;
6611    } else {
6612        return v;
6613    }
6614    set_float_exception_flags(old_exc_flags, status);
6615    float_raise(float_flag_invalid, status);
6616    return res;
6617}
6618
6619/*----------------------------------------------------------------------------
6620| Returns the result of converting the quadruple-precision floating-point value
6621| `a' to the 32-bit unsigned integer format.  The conversion is
6622| performed according to the IEC/IEEE Standard for Binary Floating-Point
6623| Arithmetic---which means in particular that the conversion is rounded
6624| according to the current rounding mode.  If `a' is a NaN, the largest
6625| positive integer is returned.  If the conversion overflows, the
6626| largest unsigned integer is returned.  If 'a' is negative, the value is
6627| rounded and zero is returned; negative values that do not round to zero
6628| will raise the inexact exception.
6629*----------------------------------------------------------------------------*/
6630
6631uint32_t float128_to_uint32(float128 a, float_status *status)
6632{
6633    uint64_t v;
6634    uint32_t res;
6635    int old_exc_flags = get_float_exception_flags(status);
6636
6637    v = float128_to_uint64(a, status);
6638    if (v > 0xffffffff) {
6639        res = 0xffffffff;
6640    } else {
6641        return v;
6642    }
6643    set_float_exception_flags(old_exc_flags, status);
6644    float_raise(float_flag_invalid, status);
6645    return res;
6646}
6647
6648/*----------------------------------------------------------------------------
6649| Returns the result of converting the quadruple-precision floating-point
6650| value `a' to the single-precision floating-point format.  The conversion
6651| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6652| Arithmetic.
6653*----------------------------------------------------------------------------*/
6654
6655float32 float128_to_float32(float128 a, float_status *status)
6656{
6657    bool aSign;
6658    int32_t aExp;
6659    uint64_t aSig0, aSig1;
6660    uint32_t zSig;
6661
6662    aSig1 = extractFloat128Frac1( a );
6663    aSig0 = extractFloat128Frac0( a );
6664    aExp = extractFloat128Exp( a );
6665    aSign = extractFloat128Sign( a );
6666    if ( aExp == 0x7FFF ) {
6667        if ( aSig0 | aSig1 ) {
6668            return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
6669        }
6670        return packFloat32( aSign, 0xFF, 0 );
6671    }
6672    aSig0 |= ( aSig1 != 0 );
6673    shift64RightJamming( aSig0, 18, &aSig0 );
6674    zSig = aSig0;
6675    if ( aExp || zSig ) {
6676        zSig |= 0x40000000;
6677        aExp -= 0x3F81;
6678    }
6679    return roundAndPackFloat32(aSign, aExp, zSig, status);
6680
6681}
6682
6683/*----------------------------------------------------------------------------
6684| Returns the result of converting the quadruple-precision floating-point
6685| value `a' to the double-precision floating-point format.  The conversion
6686| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6687| Arithmetic.
6688*----------------------------------------------------------------------------*/
6689
6690float64 float128_to_float64(float128 a, float_status *status)
6691{
6692    bool aSign;
6693    int32_t aExp;
6694    uint64_t aSig0, aSig1;
6695
6696    aSig1 = extractFloat128Frac1( a );
6697    aSig0 = extractFloat128Frac0( a );
6698    aExp = extractFloat128Exp( a );
6699    aSign = extractFloat128Sign( a );
6700    if ( aExp == 0x7FFF ) {
6701        if ( aSig0 | aSig1 ) {
6702            return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
6703        }
6704        return packFloat64( aSign, 0x7FF, 0 );
6705    }
6706    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6707    aSig0 |= ( aSig1 != 0 );
6708    if ( aExp || aSig0 ) {
6709        aSig0 |= UINT64_C(0x4000000000000000);
6710        aExp -= 0x3C01;
6711    }
6712    return roundAndPackFloat64(aSign, aExp, aSig0, status);
6713
6714}
6715
6716/*----------------------------------------------------------------------------
6717| Returns the result of converting the quadruple-precision floating-point
6718| value `a' to the extended double-precision floating-point format.  The
6719| conversion is performed according to the IEC/IEEE Standard for Binary
6720| Floating-Point Arithmetic.
6721*----------------------------------------------------------------------------*/
6722
6723floatx80 float128_to_floatx80(float128 a, float_status *status)
6724{
6725    bool aSign;
6726    int32_t aExp;
6727    uint64_t aSig0, aSig1;
6728
6729    aSig1 = extractFloat128Frac1( a );
6730    aSig0 = extractFloat128Frac0( a );
6731    aExp = extractFloat128Exp( a );
6732    aSign = extractFloat128Sign( a );
6733    if ( aExp == 0x7FFF ) {
6734        if ( aSig0 | aSig1 ) {
6735            floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
6736                                               status);
6737            return floatx80_silence_nan(res, status);
6738        }
6739        return packFloatx80(aSign, floatx80_infinity_high,
6740                                   floatx80_infinity_low);
6741    }
6742    if ( aExp == 0 ) {
6743        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
6744        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6745    }
6746    else {
6747        aSig0 |= UINT64_C(0x0001000000000000);
6748    }
6749    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
6750    return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
6751
6752}
6753
6754/*----------------------------------------------------------------------------
6755| Rounds the quadruple-precision floating-point value `a' to an integer, and
6756| returns the result as a quadruple-precision floating-point value.  The
6757| operation is performed according to the IEC/IEEE Standard for Binary
6758| Floating-Point Arithmetic.
6759*----------------------------------------------------------------------------*/
6760
6761float128 float128_round_to_int(float128 a, float_status *status)
6762{
6763    bool aSign;
6764    int32_t aExp;
6765    uint64_t lastBitMask, roundBitsMask;
6766    float128 z;
6767
6768    aExp = extractFloat128Exp( a );
6769    if ( 0x402F <= aExp ) {
6770        if ( 0x406F <= aExp ) {
6771            if (    ( aExp == 0x7FFF )
6772                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
6773               ) {
6774                return propagateFloat128NaN(a, a, status);
6775            }
6776            return a;
6777        }
6778        lastBitMask = 1;
6779        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
6780        roundBitsMask = lastBitMask - 1;
6781        z = a;
6782        switch (status->float_rounding_mode) {
6783        case float_round_nearest_even:
6784            if ( lastBitMask ) {
6785                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
6786                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
6787            }
6788            else {
6789                if ( (int64_t) z.low < 0 ) {
6790                    ++z.high;
6791                    if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
6792                }
6793            }
6794            break;
6795        case float_round_ties_away:
6796            if (lastBitMask) {
6797                add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
6798            } else {
6799                if ((int64_t) z.low < 0) {
6800                    ++z.high;
6801                }
6802            }
6803            break;
6804        case float_round_to_zero:
6805            break;
6806        case float_round_up:
6807            if (!extractFloat128Sign(z)) {
6808                add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6809            }
6810            break;
6811        case float_round_down:
6812            if (extractFloat128Sign(z)) {
6813                add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6814            }
6815            break;
6816        case float_round_to_odd:
6817            /*
6818             * Note that if lastBitMask == 0, the last bit is the lsb
6819             * of high, and roundBitsMask == -1.
6820             */
6821            if ((lastBitMask ? z.low & lastBitMask : z.high & 1) == 0) {
6822                add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
6823            }
6824            break;
6825        default:
6826            abort();
6827        }
6828        z.low &= ~ roundBitsMask;
6829    }
6830    else {
6831        if ( aExp < 0x3FFF ) {
6832            if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
6833            status->float_exception_flags |= float_flag_inexact;
6834            aSign = extractFloat128Sign( a );
6835            switch (status->float_rounding_mode) {
6836            case float_round_nearest_even:
6837                if (    ( aExp == 0x3FFE )
6838                     && (   extractFloat128Frac0( a )
6839                          | extractFloat128Frac1( a ) )
6840                   ) {
6841                    return packFloat128( aSign, 0x3FFF, 0, 0 );
6842                }
6843                break;
6844            case float_round_ties_away:
6845                if (aExp == 0x3FFE) {
6846                    return packFloat128(aSign, 0x3FFF, 0, 0);
6847                }
6848                break;
6849            case float_round_down:
6850                return
6851                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6852                    : packFloat128( 0, 0, 0, 0 );
6853            case float_round_up:
6854                return
6855                      aSign ? packFloat128( 1, 0, 0, 0 )
6856                    : packFloat128( 0, 0x3FFF, 0, 0 );
6857
6858            case float_round_to_odd:
6859                return packFloat128(aSign, 0x3FFF, 0, 0);
6860
6861            case float_round_to_zero:
6862                break;
6863            }
6864            return packFloat128( aSign, 0, 0, 0 );
6865        }
6866        lastBitMask = 1;
6867        lastBitMask <<= 0x402F - aExp;
6868        roundBitsMask = lastBitMask - 1;
6869        z.low = 0;
6870        z.high = a.high;
6871        switch (status->float_rounding_mode) {
6872        case float_round_nearest_even:
6873            z.high += lastBitMask>>1;
6874            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6875                z.high &= ~ lastBitMask;
6876            }
6877            break;
6878        case float_round_ties_away:
6879            z.high += lastBitMask>>1;
6880            break;
6881        case float_round_to_zero:
6882            break;
6883        case float_round_up:
6884            if (!extractFloat128Sign(z)) {
6885                z.high |= ( a.low != 0 );
6886                z.high += roundBitsMask;
6887            }
6888            break;
6889        case float_round_down:
6890            if (extractFloat128Sign(z)) {
6891                z.high |= (a.low != 0);
6892                z.high += roundBitsMask;
6893            }
6894            break;
6895        case float_round_to_odd:
6896            if ((z.high & lastBitMask) == 0) {
6897                z.high |= (a.low != 0);
6898                z.high += roundBitsMask;
6899            }
6900            break;
6901        default:
6902            abort();
6903        }
6904        z.high &= ~ roundBitsMask;
6905    }
6906    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
6907        status->float_exception_flags |= float_flag_inexact;
6908    }
6909    return z;
6910
6911}
6912
6913/*----------------------------------------------------------------------------
6914| Returns the result of adding the absolute values of the quadruple-precision
6915| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
6916| before being returned.  `zSign' is ignored if the result is a NaN.
6917| The addition is performed according to the IEC/IEEE Standard for Binary
6918| Floating-Point Arithmetic.
6919*----------------------------------------------------------------------------*/
6920
6921static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
6922                                float_status *status)
6923{
6924    int32_t aExp, bExp, zExp;
6925    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6926    int32_t expDiff;
6927
6928    aSig1 = extractFloat128Frac1( a );
6929    aSig0 = extractFloat128Frac0( a );
6930    aExp = extractFloat128Exp( a );
6931    bSig1 = extractFloat128Frac1( b );
6932    bSig0 = extractFloat128Frac0( b );
6933    bExp = extractFloat128Exp( b );
6934    expDiff = aExp - bExp;
6935    if ( 0 < expDiff ) {
6936        if ( aExp == 0x7FFF ) {
6937            if (aSig0 | aSig1) {
6938                return propagateFloat128NaN(a, b, status);
6939            }
6940            return a;
6941        }
6942        if ( bExp == 0 ) {
6943            --expDiff;
6944        }
6945        else {
6946            bSig0 |= UINT64_C(0x0001000000000000);
6947        }
6948        shift128ExtraRightJamming(
6949            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6950        zExp = aExp;
6951    }
6952    else if ( expDiff < 0 ) {
6953        if ( bExp == 0x7FFF ) {
6954            if (bSig0 | bSig1) {
6955                return propagateFloat128NaN(a, b, status);
6956            }
6957            return packFloat128( zSign, 0x7FFF, 0, 0 );
6958        }
6959        if ( aExp == 0 ) {
6960            ++expDiff;
6961        }
6962        else {
6963            aSig0 |= UINT64_C(0x0001000000000000);
6964        }
6965        shift128ExtraRightJamming(
6966            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6967        zExp = bExp;
6968    }
6969    else {
6970        if ( aExp == 0x7FFF ) {
6971            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6972                return propagateFloat128NaN(a, b, status);
6973            }
6974            return a;
6975        }
6976        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6977        if ( aExp == 0 ) {
6978            if (status->flush_to_zero) {
6979                if (zSig0 | zSig1) {
6980                    float_raise(float_flag_output_denormal, status);
6981                }
6982                return packFloat128(zSign, 0, 0, 0);
6983            }
6984            return packFloat128( zSign, 0, zSig0, zSig1 );
6985        }
6986        zSig2 = 0;
6987        zSig0 |= UINT64_C(0x0002000000000000);
6988        zExp = aExp;
6989        goto shiftRight1;
6990    }
6991    aSig0 |= UINT64_C(0x0001000000000000);
6992    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6993    --zExp;
6994    if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
6995    ++zExp;
6996 shiftRight1:
6997    shift128ExtraRightJamming(
6998        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6999 roundAndPack:
7000    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
7001
7002}
7003
7004/*----------------------------------------------------------------------------
7005| Returns the result of subtracting the absolute values of the quadruple-
7006| precision floating-point values `a' and `b'.  If `zSign' is 1, the
7007| difference is negated before being returned.  `zSign' is ignored if the
7008| result is a NaN.  The subtraction is performed according to the IEC/IEEE
7009| Standard for Binary Floating-Point Arithmetic.
7010*----------------------------------------------------------------------------*/
7011
7012static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
7013                                float_status *status)
7014{
7015    int32_t aExp, bExp, zExp;
7016    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
7017    int32_t expDiff;
7018
7019    aSig1 = extractFloat128Frac1( a );
7020    aSig0 = extractFloat128Frac0( a );
7021    aExp = extractFloat128Exp( a );
7022    bSig1 = extractFloat128Frac1( b );
7023    bSig0 = extractFloat128Frac0( b );
7024    bExp = extractFloat128Exp( b );
7025    expDiff = aExp - bExp;
7026    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
7027    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
7028    if ( 0 < expDiff ) goto aExpBigger;
7029    if ( expDiff < 0 ) goto bExpBigger;
7030    if ( aExp == 0x7FFF ) {
7031        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
7032            return propagateFloat128NaN(a, b, status);
7033        }
7034        float_raise(float_flag_invalid, status);
7035        return float128_default_nan(status);
7036    }
7037    if ( aExp == 0 ) {
7038        aExp = 1;
7039        bExp = 1;
7040    }
7041    if ( bSig0 < aSig0 ) goto aBigger;
7042    if ( aSig0 < bSig0 ) goto bBigger;
7043    if ( bSig1 < aSig1 ) goto aBigger;
7044    if ( aSig1 < bSig1 ) goto bBigger;
7045    return packFloat128(status->float_rounding_mode == float_round_down,
7046                        0, 0, 0);
7047 bExpBigger:
7048    if ( bExp == 0x7FFF ) {
7049        if (bSig0 | bSig1) {
7050            return propagateFloat128NaN(a, b, status);
7051        }
7052        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
7053    }
7054    if ( aExp == 0 ) {
7055        ++expDiff;
7056    }
7057    else {
7058        aSig0 |= UINT64_C(0x4000000000000000);
7059    }
7060    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7061    bSig0 |= UINT64_C(0x4000000000000000);
7062 bBigger:
7063    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
7064    zExp = bExp;
7065    zSign ^= 1;
7066    goto normalizeRoundAndPack;
7067 aExpBigger:
7068    if ( aExp == 0x7FFF ) {
7069        if (aSig0 | aSig1) {
7070            return propagateFloat128NaN(a, b, status);
7071        }
7072        return a;
7073    }
7074    if ( bExp == 0 ) {
7075        --expDiff;
7076    }
7077    else {
7078        bSig0 |= UINT64_C(0x4000000000000000);
7079    }
7080    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
7081    aSig0 |= UINT64_C(0x4000000000000000);
7082 aBigger:
7083    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7084    zExp = aExp;
7085 normalizeRoundAndPack:
7086    --zExp;
7087    return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
7088                                         status);
7089
7090}
7091
7092/*----------------------------------------------------------------------------
7093| Returns the result of adding the quadruple-precision floating-point values
7094| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
7095| for Binary Floating-Point Arithmetic.
7096*----------------------------------------------------------------------------*/
7097
7098float128 float128_add(float128 a, float128 b, float_status *status)
7099{
7100    bool aSign, bSign;
7101
7102    aSign = extractFloat128Sign( a );
7103    bSign = extractFloat128Sign( b );
7104    if ( aSign == bSign ) {
7105        return addFloat128Sigs(a, b, aSign, status);
7106    }
7107    else {
7108        return subFloat128Sigs(a, b, aSign, status);
7109    }
7110
7111}
7112
7113/*----------------------------------------------------------------------------
7114| Returns the result of subtracting the quadruple-precision floating-point
7115| values `a' and `b'.  The operation is performed according to the IEC/IEEE
7116| Standard for Binary Floating-Point Arithmetic.
7117*----------------------------------------------------------------------------*/
7118
7119float128 float128_sub(float128 a, float128 b, float_status *status)
7120{
7121    bool aSign, bSign;
7122
7123    aSign = extractFloat128Sign( a );
7124    bSign = extractFloat128Sign( b );
7125    if ( aSign == bSign ) {
7126        return subFloat128Sigs(a, b, aSign, status);
7127    }
7128    else {
7129        return addFloat128Sigs(a, b, aSign, status);
7130    }
7131
7132}
7133
7134/*----------------------------------------------------------------------------
7135| Returns the result of multiplying the quadruple-precision floating-point
7136| values `a' and `b'.  The operation is performed according to the IEC/IEEE
7137| Standard for Binary Floating-Point Arithmetic.
7138*----------------------------------------------------------------------------*/
7139
7140float128 float128_mul(float128 a, float128 b, float_status *status)
7141{
7142    bool aSign, bSign, zSign;
7143    int32_t aExp, bExp, zExp;
7144    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
7145
7146    aSig1 = extractFloat128Frac1( a );
7147    aSig0 = extractFloat128Frac0( a );
7148    aExp = extractFloat128Exp( a );
7149    aSign = extractFloat128Sign( a );
7150    bSig1 = extractFloat128Frac1( b );
7151    bSig0 = extractFloat128Frac0( b );
7152    bExp = extractFloat128Exp( b );
7153    bSign = extractFloat128Sign( b );
7154    zSign = aSign ^ bSign;
7155    if ( aExp == 0x7FFF ) {
7156        if (    ( aSig0 | aSig1 )
7157             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
7158            return propagateFloat128NaN(a, b, status);
7159        }
7160        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
7161        return packFloat128( zSign, 0x7FFF, 0, 0 );
7162    }
7163    if ( bExp == 0x7FFF ) {
7164        if (bSig0 | bSig1) {
7165            return propagateFloat128NaN(a, b, status);
7166        }
7167        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7168 invalid:
7169            float_raise(float_flag_invalid, status);
7170            return float128_default_nan(status);
7171        }
7172        return packFloat128( zSign, 0x7FFF, 0, 0 );
7173    }
7174    if ( aExp == 0 ) {
7175        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7176        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7177    }
7178    if ( bExp == 0 ) {
7179        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7180        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7181    }
7182    zExp = aExp + bExp - 0x4000;
7183    aSig0 |= UINT64_C(0x0001000000000000);
7184    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
7185    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
7186    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
7187    zSig2 |= ( zSig3 != 0 );
7188    if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
7189        shift128ExtraRightJamming(
7190            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
7191        ++zExp;
7192    }
7193    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
7194
7195}
7196
7197/*----------------------------------------------------------------------------
7198| Returns the result of dividing the quadruple-precision floating-point value
7199| `a' by the corresponding value `b'.  The operation is performed according to
7200| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7201*----------------------------------------------------------------------------*/
7202
7203float128 float128_div(float128 a, float128 b, float_status *status)
7204{
7205    bool aSign, bSign, zSign;
7206    int32_t aExp, bExp, zExp;
7207    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7208    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7209
7210    aSig1 = extractFloat128Frac1( a );
7211    aSig0 = extractFloat128Frac0( a );
7212    aExp = extractFloat128Exp( a );
7213    aSign = extractFloat128Sign( a );
7214    bSig1 = extractFloat128Frac1( b );
7215    bSig0 = extractFloat128Frac0( b );
7216    bExp = extractFloat128Exp( b );
7217    bSign = extractFloat128Sign( b );
7218    zSign = aSign ^ bSign;
7219    if ( aExp == 0x7FFF ) {
7220        if (aSig0 | aSig1) {
7221            return propagateFloat128NaN(a, b, status);
7222        }
7223        if ( bExp == 0x7FFF ) {
7224            if (bSig0 | bSig1) {
7225                return propagateFloat128NaN(a, b, status);
7226            }
7227            goto invalid;
7228        }
7229        return packFloat128( zSign, 0x7FFF, 0, 0 );
7230    }
7231    if ( bExp == 0x7FFF ) {
7232        if (bSig0 | bSig1) {
7233            return propagateFloat128NaN(a, b, status);
7234        }
7235        return packFloat128( zSign, 0, 0, 0 );
7236    }
7237    if ( bExp == 0 ) {
7238        if ( ( bSig0 | bSig1 ) == 0 ) {
7239            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
7240 invalid:
7241                float_raise(float_flag_invalid, status);
7242                return float128_default_nan(status);
7243            }
7244            float_raise(float_flag_divbyzero, status);
7245            return packFloat128( zSign, 0x7FFF, 0, 0 );
7246        }
7247        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7248    }
7249    if ( aExp == 0 ) {
7250        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
7251        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7252    }
7253    zExp = aExp - bExp + 0x3FFD;
7254    shortShift128Left(
7255        aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
7256    shortShift128Left(
7257        bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
7258    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7259        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
7260        ++zExp;
7261    }
7262    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7263    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7264    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
7265    while ( (int64_t) rem0 < 0 ) {
7266        --zSig0;
7267        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
7268    }
7269    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7270    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
7271        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7272        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
7273        while ( (int64_t) rem1 < 0 ) {
7274            --zSig1;
7275            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
7276        }
7277        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7278    }
7279    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
7280    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
7281
7282}
7283
7284/*----------------------------------------------------------------------------
7285| Returns the remainder of the quadruple-precision floating-point value `a'
7286| with respect to the corresponding value `b'.  The operation is performed
7287| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7288*----------------------------------------------------------------------------*/
7289
7290float128 float128_rem(float128 a, float128 b, float_status *status)
7291{
7292    bool aSign, zSign;
7293    int32_t aExp, bExp, expDiff;
7294    uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7295    uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7296    int64_t sigMean0;
7297
7298    aSig1 = extractFloat128Frac1( a );
7299    aSig0 = extractFloat128Frac0( a );
7300    aExp = extractFloat128Exp( a );
7301    aSign = extractFloat128Sign( a );
7302    bSig1 = extractFloat128Frac1( b );
7303    bSig0 = extractFloat128Frac0( b );
7304    bExp = extractFloat128Exp( b );
7305    if ( aExp == 0x7FFF ) {
7306        if (    ( aSig0 | aSig1 )
7307             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
7308            return propagateFloat128NaN(a, b, status);
7309        }
7310        goto invalid;
7311    }
7312    if ( bExp == 0x7FFF ) {
7313        if (bSig0 | bSig1) {
7314            return propagateFloat128NaN(a, b, status);
7315        }
7316        return a;
7317    }
7318    if ( bExp == 0 ) {
7319        if ( ( bSig0 | bSig1 ) == 0 ) {
7320 invalid:
7321            float_raise(float_flag_invalid, status);
7322            return float128_default_nan(status);
7323        }
7324        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7325    }
7326    if ( aExp == 0 ) {
7327        if ( ( aSig0 | aSig1 ) == 0 ) return a;
7328        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7329    }
7330    expDiff = aExp - bExp;
7331    if ( expDiff < -1 ) return a;
7332    shortShift128Left(
7333        aSig0 | UINT64_C(0x0001000000000000),
7334        aSig1,
7335        15 - ( expDiff < 0 ),
7336        &aSig0,
7337        &aSig1
7338    );
7339    shortShift128Left(
7340        bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
7341    q = le128( bSig0, bSig1, aSig0, aSig1 );
7342    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7343    expDiff -= 64;
7344    while ( 0 < expDiff ) {
7345        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7346        q = ( 4 < q ) ? q - 4 : 0;
7347        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7348        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
7349        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
7350        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
7351        expDiff -= 61;
7352    }
7353    if ( -64 < expDiff ) {
7354        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7355        q = ( 4 < q ) ? q - 4 : 0;
7356        q >>= - expDiff;
7357        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7358        expDiff += 52;
7359        if ( expDiff < 0 ) {
7360            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7361        }
7362        else {
7363            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7364        }
7365        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7366        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7367    }
7368    else {
7369        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
7370        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
7371    }
7372    do {
7373        alternateASig0 = aSig0;
7374        alternateASig1 = aSig1;
7375        ++q;
7376        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7377    } while ( 0 <= (int64_t) aSig0 );
7378    add128(
7379        aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
7380    if (    ( sigMean0 < 0 )
7381         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
7382        aSig0 = alternateASig0;
7383        aSig1 = alternateASig1;
7384    }
7385    zSign = ( (int64_t) aSig0 < 0 );
7386    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
7387    return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
7388                                         status);
7389}
7390
7391/*----------------------------------------------------------------------------
7392| Returns the square root of the quadruple-precision floating-point value `a'.
7393| The operation is performed according to the IEC/IEEE Standard for Binary
7394| Floating-Point Arithmetic.
7395*----------------------------------------------------------------------------*/
7396
7397float128 float128_sqrt(float128 a, float_status *status)
7398{
7399    bool aSign;
7400    int32_t aExp, zExp;
7401    uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7402    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7403
7404    aSig1 = extractFloat128Frac1( a );
7405    aSig0 = extractFloat128Frac0( a );
7406    aExp = extractFloat128Exp( a );
7407    aSign = extractFloat128Sign( a );
7408    if ( aExp == 0x7FFF ) {
7409        if (aSig0 | aSig1) {
7410            return propagateFloat128NaN(a, a, status);
7411        }
7412        if ( ! aSign ) return a;
7413        goto invalid;
7414    }
7415    if ( aSign ) {
7416        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
7417 invalid:
7418        float_raise(float_flag_invalid, status);
7419        return float128_default_nan(status);
7420    }
7421    if ( aExp == 0 ) {
7422        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
7423        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7424    }
7425    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
7426    aSig0 |= UINT64_C(0x0001000000000000);
7427    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
7428    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
7429    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
7430    doubleZSig0 = zSig0<<1;
7431    mul64To128( zSig0, zSig0, &term0, &term1 );
7432    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
7433    while ( (int64_t) rem0 < 0 ) {
7434        --zSig0;
7435        doubleZSig0 -= 2;
7436        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
7437    }
7438    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
7439    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
7440        if ( zSig1 == 0 ) zSig1 = 1;
7441        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7442        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
7443        mul64To128( zSig1, zSig1, &term2, &term3 );
7444        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
7445        while ( (int64_t) rem1 < 0 ) {
7446            --zSig1;
7447            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
7448            term3 |= 1;
7449            term2 |= doubleZSig0;
7450            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
7451        }
7452        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
7453    }
7454    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
7455    return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
7456
7457}
7458
7459static inline FloatRelation
7460floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
7461                          float_status *status)
7462{
7463    bool aSign, bSign;
7464
7465    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
7466        float_raise(float_flag_invalid, status);
7467        return float_relation_unordered;
7468    }
7469    if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7470          ( extractFloatx80Frac( a )<<1 ) ) ||
7471        ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7472          ( extractFloatx80Frac( b )<<1 ) )) {
7473        if (!is_quiet ||
7474            floatx80_is_signaling_nan(a, status) ||
7475            floatx80_is_signaling_nan(b, status)) {
7476            float_raise(float_flag_invalid, status);
7477        }
7478        return float_relation_unordered;
7479    }
7480    aSign = extractFloatx80Sign( a );
7481    bSign = extractFloatx80Sign( b );
7482    if ( aSign != bSign ) {
7483
7484        if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7485             ( ( a.low | b.low ) == 0 ) ) {
7486            /* zero case */
7487            return float_relation_equal;
7488        } else {
7489            return 1 - (2 * aSign);
7490        }
7491    } else {
7492        /* Normalize pseudo-denormals before comparison.  */
7493        if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
7494            ++a.high;
7495        }
7496        if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
7497            ++b.high;
7498        }
7499        if (a.low == b.low && a.high == b.high) {
7500            return float_relation_equal;
7501        } else {
7502            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7503        }
7504    }
7505}
7506
7507FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7508{
7509    return floatx80_compare_internal(a, b, 0, status);
7510}
7511
7512FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
7513                                     float_status *status)
7514{
7515    return floatx80_compare_internal(a, b, 1, status);
7516}
7517
7518static inline FloatRelation
7519float128_compare_internal(float128 a, float128 b, bool is_quiet,
7520                          float_status *status)
7521{
7522    bool aSign, bSign;
7523
7524    if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7525          ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7526        ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7527          ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7528        if (!is_quiet ||
7529            float128_is_signaling_nan(a, status) ||
7530            float128_is_signaling_nan(b, status)) {
7531            float_raise(float_flag_invalid, status);
7532        }
7533        return float_relation_unordered;
7534    }
7535    aSign = extractFloat128Sign( a );
7536    bSign = extractFloat128Sign( b );
7537    if ( aSign != bSign ) {
7538        if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7539            /* zero case */
7540            return float_relation_equal;
7541        } else {
7542            return 1 - (2 * aSign);
7543        }
7544    } else {
7545        if (a.low == b.low && a.high == b.high) {
7546            return float_relation_equal;
7547        } else {
7548            return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7549        }
7550    }
7551}
7552
7553FloatRelation float128_compare(float128 a, float128 b, float_status *status)
7554{
7555    return float128_compare_internal(a, b, 0, status);
7556}
7557
7558FloatRelation float128_compare_quiet(float128 a, float128 b,
7559                                     float_status *status)
7560{
7561    return float128_compare_internal(a, b, 1, status);
7562}
7563
7564floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7565{
7566    bool aSign;
7567    int32_t aExp;
7568    uint64_t aSig;
7569
7570    if (floatx80_invalid_encoding(a)) {
7571        float_raise(float_flag_invalid, status);
7572        return floatx80_default_nan(status);
7573    }
7574    aSig = extractFloatx80Frac( a );
7575    aExp = extractFloatx80Exp( a );
7576    aSign = extractFloatx80Sign( a );
7577
7578    if ( aExp == 0x7FFF ) {
7579        if ( aSig<<1 ) {
7580            return propagateFloatx80NaN(a, a, status);
7581        }
7582        return a;
7583    }
7584
7585    if (aExp == 0) {
7586        if (aSig == 0) {
7587            return a;
7588        }
7589        aExp++;
7590    }
7591
7592    if (n > 0x10000) {
7593        n = 0x10000;
7594    } else if (n < -0x10000) {
7595        n = -0x10000;
7596    }
7597
7598    aExp += n;
7599    return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
7600                                         aSign, aExp, aSig, 0, status);
7601}
7602
7603float128 float128_scalbn(float128 a, int n, float_status *status)
7604{
7605    bool aSign;
7606    int32_t aExp;
7607    uint64_t aSig0, aSig1;
7608
7609    aSig1 = extractFloat128Frac1( a );
7610    aSig0 = extractFloat128Frac0( a );
7611    aExp = extractFloat128Exp( a );
7612    aSign = extractFloat128Sign( a );
7613    if ( aExp == 0x7FFF ) {
7614        if ( aSig0 | aSig1 ) {
7615            return propagateFloat128NaN(a, a, status);
7616        }
7617        return a;
7618    }
7619    if (aExp != 0) {
7620        aSig0 |= UINT64_C(0x0001000000000000);
7621    } else if (aSig0 == 0 && aSig1 == 0) {
7622        return a;
7623    } else {
7624        aExp++;
7625    }
7626
7627    if (n > 0x10000) {
7628        n = 0x10000;
7629    } else if (n < -0x10000) {
7630        n = -0x10000;
7631    }
7632
7633    aExp += n - 1;
7634    return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7635                                         , status);
7636
7637}
7638
7639static void __attribute__((constructor)) softfloat_init(void)
7640{
7641    union_float64 ua, ub, uc, ur;
7642
7643    if (QEMU_NO_HARDFLOAT) {
7644        return;
7645    }
7646    /*
7647     * Test that the host's FMA is not obviously broken. For example,
7648     * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
7649     *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
7650     */
7651    ua.s = 0x0020000000000001ULL;
7652    ub.s = 0x3ca0000000000000ULL;
7653    uc.s = 0x0020000000000000ULL;
7654    ur.h = fma(ua.h, ub.h, uc.h);
7655    if (ur.s != 0x0020000000000001ULL) {
7656        force_soft_fma = true;
7657    }
7658}
7659