qemu/fpu/softfloat.c
<<
>>
Prefs
   1/*
   2 * QEMU float support
   3 *
   4 * The code in this source file is derived from release 2a of the SoftFloat
   5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6 * some later contributions) are provided under that license, as detailed below.
   7 * It has subsequently been modified by contributors to the QEMU Project,
   8 * so some portions are provided under:
   9 *  the SoftFloat-2a license
  10 *  the BSD license
  11 *  GPL-v2-or-later
  12 *
  13 * Any future contributions to this file after December 1st 2014 will be
  14 * taken to be licensed under the Softfloat-2a license unless specifically
  15 * indicated otherwise.
  16 */
  17
  18/*
  19===============================================================================
  20This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21Arithmetic Package, Release 2a.
  22
  23Written by John R. Hauser.  This work was made possible in part by the
  24International Computer Science Institute, located at Suite 600, 1947 Center
  25Street, Berkeley, California 94704.  Funding was partially provided by the
  26National Science Foundation under grant MIP-9311980.  The original version
  27of this code was written as part of a project to build a fixed-point vector
  28processor in collaboration with the University of California at Berkeley,
  29overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31arithmetic/SoftFloat.html'.
  32
  33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39Derivative works are acceptable, even for commercial purposes, so long as
  40(1) they include prominent notice that the work is derivative, and (2) they
  41include prominent notice akin to these four paragraphs for those parts of
  42this code that are retained.
  43
  44===============================================================================
  45*/
  46
  47/* BSD licensing:
  48 * Copyright (c) 2006, Fabrice Bellard
  49 * All rights reserved.
  50 *
  51 * Redistribution and use in source and binary forms, with or without
  52 * modification, are permitted provided that the following conditions are met:
  53 *
  54 * 1. Redistributions of source code must retain the above copyright notice,
  55 * this list of conditions and the following disclaimer.
  56 *
  57 * 2. Redistributions in binary form must reproduce the above copyright notice,
  58 * this list of conditions and the following disclaimer in the documentation
  59 * and/or other materials provided with the distribution.
  60 *
  61 * 3. Neither the name of the copyright holder nor the names of its contributors
  62 * may be used to endorse or promote products derived from this software without
  63 * specific prior written permission.
  64 *
  65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75 * THE POSSIBILITY OF SUCH DAMAGE.
  76 */
  77
  78/* Portions of this work are licensed under the terms of the GNU GPL,
  79 * version 2 or later. See the COPYING file in the top-level directory.
  80 */
  81
  82/* softfloat (and in particular the code in softfloat-specialize.h) is
  83 * target-dependent and needs the TARGET_* macros.
  84 */
  85#include "qemu/osdep.h"
  86#include <math.h>
  87#include "qemu/bitops.h"
  88#include "fpu/softfloat.h"
  89
  90/* We only need stdlib for abort() */
  91
  92/*----------------------------------------------------------------------------
  93| Primitive arithmetic functions, including multi-word arithmetic, and
  94| division and square root approximations.  (Can be specialized to target if
  95| desired.)
  96*----------------------------------------------------------------------------*/
  97#include "fpu/softfloat-macros.h"
  98
  99/*
 100 * Hardfloat
 101 *
 102 * Fast emulation of guest FP instructions is challenging for two reasons.
 103 * First, FP instruction semantics are similar but not identical, particularly
 104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105 * exception flags is not trivial: reading the host's flags register with a
 106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107 * and trapping on every FP exception is not fast nor pleasant to work with.
 108 *
 109 * We address these challenges by leveraging the host FPU for a subset of the
 110 * operations. To do this we expand on the idea presented in this paper:
 111 *
 112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114 *
 115 * The idea is thus to leverage the host FPU to (1) compute FP operations
 116 * and (2) identify whether FP exceptions occurred while avoiding
 117 * expensive exception flag register accesses.
 118 *
 119 * An important optimization shown in the paper is that given that exception
 120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121 * This is particularly useful for the inexact flag, which is very frequently
 122 * raised in floating-point workloads.
 123 *
 124 * We optimize the code further by deferring to soft-fp whenever FP exception
 125 * detection might get hairy. Two examples: (1) when at least one operand is
 126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127 * and the result is < the minimum normal.
 128 */
 129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130    static inline void name(soft_t *a, float_status *s)                 \
 131    {                                                                   \
 132        if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133            *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                     soft_t ## _is_neg(*a));            \
 135            float_raise(float_flag_input_denormal, s);                  \
 136        }                                                               \
 137    }
 138
 139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141#undef GEN_INPUT_FLUSH__NOCHECK
 142
 143#define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144    static inline void name(soft_t *a, float_status *s) \
 145    {                                                   \
 146        if (likely(!s->flush_inputs_to_zero)) {         \
 147            return;                                     \
 148        }                                               \
 149        soft_t ## _input_flush__nocheck(a, s);          \
 150    }
 151
 152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154#undef GEN_INPUT_FLUSH1
 155
 156#define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157    static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158    {                                                                   \
 159        if (likely(!s->flush_inputs_to_zero)) {                         \
 160            return;                                                     \
 161        }                                                               \
 162        soft_t ## _input_flush__nocheck(a, s);                          \
 163        soft_t ## _input_flush__nocheck(b, s);                          \
 164    }
 165
 166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168#undef GEN_INPUT_FLUSH2
 169
 170#define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171    static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172    {                                                                   \
 173        if (likely(!s->flush_inputs_to_zero)) {                         \
 174            return;                                                     \
 175        }                                                               \
 176        soft_t ## _input_flush__nocheck(a, s);                          \
 177        soft_t ## _input_flush__nocheck(b, s);                          \
 178        soft_t ## _input_flush__nocheck(c, s);                          \
 179    }
 180
 181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183#undef GEN_INPUT_FLUSH3
 184
 185/*
 186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187 * hardfloat functions. Each combination of number of inputs and float size
 188 * gets its own value.
 189 */
 190#if defined(__x86_64__)
 191# define QEMU_HARDFLOAT_1F32_USE_FP 0
 192# define QEMU_HARDFLOAT_1F64_USE_FP 1
 193# define QEMU_HARDFLOAT_2F32_USE_FP 0
 194# define QEMU_HARDFLOAT_2F64_USE_FP 1
 195# define QEMU_HARDFLOAT_3F32_USE_FP 0
 196# define QEMU_HARDFLOAT_3F64_USE_FP 1
 197#else
 198# define QEMU_HARDFLOAT_1F32_USE_FP 0
 199# define QEMU_HARDFLOAT_1F64_USE_FP 0
 200# define QEMU_HARDFLOAT_2F32_USE_FP 0
 201# define QEMU_HARDFLOAT_2F64_USE_FP 0
 202# define QEMU_HARDFLOAT_3F32_USE_FP 0
 203# define QEMU_HARDFLOAT_3F64_USE_FP 0
 204#endif
 205
 206/*
 207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208 * float{32,64}_is_infinity when !USE_FP.
 209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211 */
 212#if defined(__x86_64__) || defined(__aarch64__)
 213# define QEMU_HARDFLOAT_USE_ISINF   1
 214#else
 215# define QEMU_HARDFLOAT_USE_ISINF   0
 216#endif
 217
 218/*
 219 * Some targets clear the FP flags before most FP operations. This prevents
 220 * the use of hardfloat, since hardfloat relies on the inexact flag being
 221 * already set.
 222 */
 223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224# if defined(__FAST_MATH__)
 225#  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226    IEEE implementation
 227# endif
 228# define QEMU_NO_HARDFLOAT 1
 229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230#else
 231# define QEMU_NO_HARDFLOAT 0
 232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233#endif
 234
 235static inline bool can_use_fpu(const float_status *s)
 236{
 237    if (QEMU_NO_HARDFLOAT) {
 238        return false;
 239    }
 240    return likely(s->float_exception_flags & float_flag_inexact &&
 241                  s->float_rounding_mode == float_round_nearest_even);
 242}
 243
 244/*
 245 * Hardfloat generation functions. Each operation can have two flavors:
 246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247 * most condition checks, or native ones (e.g. fpclassify).
 248 *
 249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250 * compiler to propagate constants and inline everything into the callers.
 251 *
 252 * We only generate functions for operations with two inputs, since only
 253 * these are common enough to justify consolidating them into common code.
 254 */
 255
 256typedef union {
 257    float32 s;
 258    float h;
 259} union_float32;
 260
 261typedef union {
 262    float64 s;
 263    double h;
 264} union_float64;
 265
 266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271typedef float   (*hard_f32_op2_fn)(float a, float b);
 272typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274/* 2-input is-zero-or-normal */
 275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276{
 277    if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278        /*
 279         * Not using a temp variable for consecutive fpclassify calls ends up
 280         * generating faster code.
 281         */
 282        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284    }
 285    return float32_is_zero_or_normal(a.s) &&
 286           float32_is_zero_or_normal(b.s);
 287}
 288
 289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290{
 291    if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294    }
 295    return float64_is_zero_or_normal(a.s) &&
 296           float64_is_zero_or_normal(b.s);
 297}
 298
 299/* 3-input is-zero-or-normal */
 300static inline
 301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302{
 303    if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306               (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307    }
 308    return float32_is_zero_or_normal(a.s) &&
 309           float32_is_zero_or_normal(b.s) &&
 310           float32_is_zero_or_normal(c.s);
 311}
 312
 313static inline
 314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315{
 316    if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319               (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320    }
 321    return float64_is_zero_or_normal(a.s) &&
 322           float64_is_zero_or_normal(b.s) &&
 323           float64_is_zero_or_normal(c.s);
 324}
 325
 326static inline bool f32_is_inf(union_float32 a)
 327{
 328    if (QEMU_HARDFLOAT_USE_ISINF) {
 329        return isinf(a.h);
 330    }
 331    return float32_is_infinity(a.s);
 332}
 333
 334static inline bool f64_is_inf(union_float64 a)
 335{
 336    if (QEMU_HARDFLOAT_USE_ISINF) {
 337        return isinf(a.h);
 338    }
 339    return float64_is_infinity(a.s);
 340}
 341
 342static inline float32
 343float32_gen2(float32 xa, float32 xb, float_status *s,
 344             hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345             f32_check_fn pre, f32_check_fn post)
 346{
 347    union_float32 ua, ub, ur;
 348
 349    ua.s = xa;
 350    ub.s = xb;
 351
 352    if (unlikely(!can_use_fpu(s))) {
 353        goto soft;
 354    }
 355
 356    float32_input_flush2(&ua.s, &ub.s, s);
 357    if (unlikely(!pre(ua, ub))) {
 358        goto soft;
 359    }
 360
 361    ur.h = hard(ua.h, ub.h);
 362    if (unlikely(f32_is_inf(ur))) {
 363        float_raise(float_flag_overflow, s);
 364    } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365        goto soft;
 366    }
 367    return ur.s;
 368
 369 soft:
 370    return soft(ua.s, ub.s, s);
 371}
 372
 373static inline float64
 374float64_gen2(float64 xa, float64 xb, float_status *s,
 375             hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376             f64_check_fn pre, f64_check_fn post)
 377{
 378    union_float64 ua, ub, ur;
 379
 380    ua.s = xa;
 381    ub.s = xb;
 382
 383    if (unlikely(!can_use_fpu(s))) {
 384        goto soft;
 385    }
 386
 387    float64_input_flush2(&ua.s, &ub.s, s);
 388    if (unlikely(!pre(ua, ub))) {
 389        goto soft;
 390    }
 391
 392    ur.h = hard(ua.h, ub.h);
 393    if (unlikely(f64_is_inf(ur))) {
 394        float_raise(float_flag_overflow, s);
 395    } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396        goto soft;
 397    }
 398    return ur.s;
 399
 400 soft:
 401    return soft(ua.s, ub.s, s);
 402}
 403
 404/*
 405 * Classify a floating point number. Everything above float_class_qnan
 406 * is a NaN so cls >= float_class_qnan is any NaN.
 407 */
 408
 409typedef enum __attribute__ ((__packed__)) {
 410    float_class_unclassified,
 411    float_class_zero,
 412    float_class_normal,
 413    float_class_inf,
 414    float_class_qnan,  /* all NaNs from here */
 415    float_class_snan,
 416} FloatClass;
 417
 418#define float_cmask(bit)  (1u << (bit))
 419
 420enum {
 421    float_cmask_zero    = float_cmask(float_class_zero),
 422    float_cmask_normal  = float_cmask(float_class_normal),
 423    float_cmask_inf     = float_cmask(float_class_inf),
 424    float_cmask_qnan    = float_cmask(float_class_qnan),
 425    float_cmask_snan    = float_cmask(float_class_snan),
 426
 427    float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 428    float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 429};
 430
 431/* Flags for parts_minmax. */
 432enum {
 433    /* Set for minimum; clear for maximum. */
 434    minmax_ismin = 1,
 435    /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 436    minmax_isnum = 2,
 437    /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 438    minmax_ismag = 4,
 439    /*
 440     * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
 441     * operations.
 442     */
 443    minmax_isnumber = 8,
 444};
 445
 446/* Simple helpers for checking if, or what kind of, NaN we have */
 447static inline __attribute__((unused)) bool is_nan(FloatClass c)
 448{
 449    return unlikely(c >= float_class_qnan);
 450}
 451
 452static inline __attribute__((unused)) bool is_snan(FloatClass c)
 453{
 454    return c == float_class_snan;
 455}
 456
 457static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 458{
 459    return c == float_class_qnan;
 460}
 461
 462/*
 463 * Structure holding all of the decomposed parts of a float.
 464 * The exponent is unbiased and the fraction is normalized.
 465 *
 466 * The fraction words are stored in big-endian word ordering,
 467 * so that truncation from a larger format to a smaller format
 468 * can be done simply by ignoring subsequent elements.
 469 */
 470
 471typedef struct {
 472    FloatClass cls;
 473    bool sign;
 474    int32_t exp;
 475    union {
 476        /* Routines that know the structure may reference the singular name. */
 477        uint64_t frac;
 478        /*
 479         * Routines expanded with multiple structures reference "hi" and "lo"
 480         * depending on the operation.  In FloatParts64, "hi" and "lo" are
 481         * both the same word and aliased here.
 482         */
 483        uint64_t frac_hi;
 484        uint64_t frac_lo;
 485    };
 486} FloatParts64;
 487
 488typedef struct {
 489    FloatClass cls;
 490    bool sign;
 491    int32_t exp;
 492    uint64_t frac_hi;
 493    uint64_t frac_lo;
 494} FloatParts128;
 495
 496typedef struct {
 497    FloatClass cls;
 498    bool sign;
 499    int32_t exp;
 500    uint64_t frac_hi;
 501    uint64_t frac_hm;  /* high-middle */
 502    uint64_t frac_lm;  /* low-middle */
 503    uint64_t frac_lo;
 504} FloatParts256;
 505
 506/* These apply to the most significant word of each FloatPartsN. */
 507#define DECOMPOSED_BINARY_POINT    63
 508#define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 509
 510/* Structure holding all of the relevant parameters for a format.
 511 *   exp_size: the size of the exponent field
 512 *   exp_bias: the offset applied to the exponent field
 513 *   exp_max: the maximum normalised exponent
 514 *   frac_size: the size of the fraction field
 515 *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 516 * The following are computed based the size of fraction
 517 *   round_mask: bits below lsb which must be rounded
 518 * The following optional modifiers are available:
 519 *   arm_althp: handle ARM Alternative Half Precision
 520 */
 521typedef struct {
 522    int exp_size;
 523    int exp_bias;
 524    int exp_max;
 525    int frac_size;
 526    int frac_shift;
 527    bool arm_althp;
 528    uint64_t round_mask;
 529} FloatFmt;
 530
 531/* Expand fields based on the size of exponent and fraction */
 532#define FLOAT_PARAMS_(E)                                \
 533    .exp_size       = E,                                \
 534    .exp_bias       = ((1 << E) - 1) >> 1,              \
 535    .exp_max        = (1 << E) - 1
 536
 537#define FLOAT_PARAMS(E, F)                              \
 538    FLOAT_PARAMS_(E),                                   \
 539    .frac_size      = F,                                \
 540    .frac_shift     = (-F - 1) & 63,                    \
 541    .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 542
 543static const FloatFmt float16_params = {
 544    FLOAT_PARAMS(5, 10)
 545};
 546
 547static const FloatFmt float16_params_ahp = {
 548    FLOAT_PARAMS(5, 10),
 549    .arm_althp = true
 550};
 551
 552static const FloatFmt bfloat16_params = {
 553    FLOAT_PARAMS(8, 7)
 554};
 555
 556static const FloatFmt float32_params = {
 557    FLOAT_PARAMS(8, 23)
 558};
 559
 560static const FloatFmt float64_params = {
 561    FLOAT_PARAMS(11, 52)
 562};
 563
 564static const FloatFmt float128_params = {
 565    FLOAT_PARAMS(15, 112)
 566};
 567
 568#define FLOATX80_PARAMS(R)              \
 569    FLOAT_PARAMS_(15),                  \
 570    .frac_size = R == 64 ? 63 : R,      \
 571    .frac_shift = 0,                    \
 572    .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 573
 574static const FloatFmt floatx80_params[3] = {
 575    [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 576    [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 577    [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 578};
 579
 580/* Unpack a float to parts, but do not canonicalize.  */
 581static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 582{
 583    const int f_size = fmt->frac_size;
 584    const int e_size = fmt->exp_size;
 585
 586    *r = (FloatParts64) {
 587        .cls = float_class_unclassified,
 588        .sign = extract64(raw, f_size + e_size, 1),
 589        .exp = extract64(raw, f_size, e_size),
 590        .frac = extract64(raw, 0, f_size)
 591    };
 592}
 593
 594static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 595{
 596    unpack_raw64(p, &float16_params, f);
 597}
 598
 599static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 600{
 601    unpack_raw64(p, &bfloat16_params, f);
 602}
 603
 604static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 605{
 606    unpack_raw64(p, &float32_params, f);
 607}
 608
 609static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 610{
 611    unpack_raw64(p, &float64_params, f);
 612}
 613
 614static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 615{
 616    *p = (FloatParts128) {
 617        .cls = float_class_unclassified,
 618        .sign = extract32(f.high, 15, 1),
 619        .exp = extract32(f.high, 0, 15),
 620        .frac_hi = f.low
 621    };
 622}
 623
 624static void float128_unpack_raw(FloatParts128 *p, float128 f)
 625{
 626    const int f_size = float128_params.frac_size - 64;
 627    const int e_size = float128_params.exp_size;
 628
 629    *p = (FloatParts128) {
 630        .cls = float_class_unclassified,
 631        .sign = extract64(f.high, f_size + e_size, 1),
 632        .exp = extract64(f.high, f_size, e_size),
 633        .frac_hi = extract64(f.high, 0, f_size),
 634        .frac_lo = f.low,
 635    };
 636}
 637
 638/* Pack a float from parts, but do not canonicalize.  */
 639static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 640{
 641    const int f_size = fmt->frac_size;
 642    const int e_size = fmt->exp_size;
 643    uint64_t ret;
 644
 645    ret = (uint64_t)p->sign << (f_size + e_size);
 646    ret = deposit64(ret, f_size, e_size, p->exp);
 647    ret = deposit64(ret, 0, f_size, p->frac);
 648    return ret;
 649}
 650
 651static inline float16 float16_pack_raw(const FloatParts64 *p)
 652{
 653    return make_float16(pack_raw64(p, &float16_params));
 654}
 655
 656static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 657{
 658    return pack_raw64(p, &bfloat16_params);
 659}
 660
 661static inline float32 float32_pack_raw(const FloatParts64 *p)
 662{
 663    return make_float32(pack_raw64(p, &float32_params));
 664}
 665
 666static inline float64 float64_pack_raw(const FloatParts64 *p)
 667{
 668    return make_float64(pack_raw64(p, &float64_params));
 669}
 670
 671static float128 float128_pack_raw(const FloatParts128 *p)
 672{
 673    const int f_size = float128_params.frac_size - 64;
 674    const int e_size = float128_params.exp_size;
 675    uint64_t hi;
 676
 677    hi = (uint64_t)p->sign << (f_size + e_size);
 678    hi = deposit64(hi, f_size, e_size, p->exp);
 679    hi = deposit64(hi, 0, f_size, p->frac_hi);
 680    return make_float128(hi, p->frac_lo);
 681}
 682
 683/*----------------------------------------------------------------------------
 684| Functions and definitions to determine:  (1) whether tininess for underflow
 685| is detected before or after rounding by default, (2) what (if anything)
 686| happens when exceptions are raised, (3) how signaling NaNs are distinguished
 687| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 688| are propagated from function inputs to output.  These details are target-
 689| specific.
 690*----------------------------------------------------------------------------*/
 691#include "softfloat-specialize.c.inc"
 692
 693#define PARTS_GENERIC_64_128(NAME, P) \
 694    _Generic((P), FloatParts64 *: parts64_##NAME, \
 695                  FloatParts128 *: parts128_##NAME)
 696
 697#define PARTS_GENERIC_64_128_256(NAME, P) \
 698    _Generic((P), FloatParts64 *: parts64_##NAME, \
 699                  FloatParts128 *: parts128_##NAME, \
 700                  FloatParts256 *: parts256_##NAME)
 701
 702#define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 703#define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 704
 705static void parts64_return_nan(FloatParts64 *a, float_status *s);
 706static void parts128_return_nan(FloatParts128 *a, float_status *s);
 707
 708#define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 709
 710static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 711                                      float_status *s);
 712static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 713                                        float_status *s);
 714
 715#define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 716
 717static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 718                                             FloatParts64 *c, float_status *s,
 719                                             int ab_mask, int abc_mask);
 720static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 721                                               FloatParts128 *b,
 722                                               FloatParts128 *c,
 723                                               float_status *s,
 724                                               int ab_mask, int abc_mask);
 725
 726#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 727    PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 728
 729static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 730                                 const FloatFmt *fmt);
 731static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 732                                  const FloatFmt *fmt);
 733
 734#define parts_canonicalize(A, S, F) \
 735    PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 736
 737static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 738                                   const FloatFmt *fmt);
 739static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 740                                    const FloatFmt *fmt);
 741
 742#define parts_uncanon_normal(A, S, F) \
 743    PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 744
 745static void parts64_uncanon(FloatParts64 *p, float_status *status,
 746                            const FloatFmt *fmt);
 747static void parts128_uncanon(FloatParts128 *p, float_status *status,
 748                             const FloatFmt *fmt);
 749
 750#define parts_uncanon(A, S, F) \
 751    PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 752
 753static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 754static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 755static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 756
 757#define parts_add_normal(A, B) \
 758    PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 759
 760static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 761static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 762static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 763
 764#define parts_sub_normal(A, B) \
 765    PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 766
 767static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 768                                    float_status *s, bool subtract);
 769static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 770                                      float_status *s, bool subtract);
 771
 772#define parts_addsub(A, B, S, Z) \
 773    PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 774
 775static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 776                                 float_status *s);
 777static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 778                                   float_status *s);
 779
 780#define parts_mul(A, B, S) \
 781    PARTS_GENERIC_64_128(mul, A)(A, B, S)
 782
 783static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 784                                    FloatParts64 *c, int flags,
 785                                    float_status *s);
 786static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 787                                      FloatParts128 *c, int flags,
 788                                      float_status *s);
 789
 790#define parts_muladd(A, B, C, Z, S) \
 791    PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 792
 793static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 794                                 float_status *s);
 795static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 796                                   float_status *s);
 797
 798#define parts_div(A, B, S) \
 799    PARTS_GENERIC_64_128(div, A)(A, B, S)
 800
 801static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
 802                                    uint64_t *mod_quot, float_status *s);
 803static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
 804                                      uint64_t *mod_quot, float_status *s);
 805
 806#define parts_modrem(A, B, Q, S) \
 807    PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
 808
 809static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 810static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 811
 812#define parts_sqrt(A, S, F) \
 813    PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 814
 815static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 816                                        int scale, int frac_size);
 817static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 818                                         int scale, int frac_size);
 819
 820#define parts_round_to_int_normal(A, R, C, F) \
 821    PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 822
 823static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 824                                 int scale, float_status *s,
 825                                 const FloatFmt *fmt);
 826static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 827                                  int scale, float_status *s,
 828                                  const FloatFmt *fmt);
 829
 830#define parts_round_to_int(A, R, C, S, F) \
 831    PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 832
 833static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 834                                     int scale, int64_t min, int64_t max,
 835                                     float_status *s);
 836static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 837                                     int scale, int64_t min, int64_t max,
 838                                     float_status *s);
 839
 840#define parts_float_to_sint(P, R, Z, MN, MX, S) \
 841    PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 842
 843static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 844                                      int scale, uint64_t max,
 845                                      float_status *s);
 846static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 847                                       int scale, uint64_t max,
 848                                       float_status *s);
 849
 850#define parts_float_to_uint(P, R, Z, M, S) \
 851    PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 852
 853static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 854                                  int scale, float_status *s);
 855static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 856                                   int scale, float_status *s);
 857
 858#define parts_sint_to_float(P, I, Z, S) \
 859    PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 860
 861static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 862                                  int scale, float_status *s);
 863static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 864                                   int scale, float_status *s);
 865
 866#define parts_uint_to_float(P, I, Z, S) \
 867    PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 868
 869static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 870                                    float_status *s, int flags);
 871static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 872                                      float_status *s, int flags);
 873
 874#define parts_minmax(A, B, S, F) \
 875    PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 876
 877static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
 878                           float_status *s, bool q);
 879static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
 880                            float_status *s, bool q);
 881
 882#define parts_compare(A, B, S, Q) \
 883    PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 884
 885static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 886static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 887
 888#define parts_scalbn(A, N, S) \
 889    PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 890
 891static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
 892static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
 893
 894#define parts_log2(A, S, F) \
 895    PARTS_GENERIC_64_128(log2, A)(A, S, F)
 896
 897/*
 898 * Helper functions for softfloat-parts.c.inc, per-size operations.
 899 */
 900
 901#define FRAC_GENERIC_64_128(NAME, P) \
 902    _Generic((P), FloatParts64 *: frac64_##NAME, \
 903                  FloatParts128 *: frac128_##NAME)
 904
 905#define FRAC_GENERIC_64_128_256(NAME, P) \
 906    _Generic((P), FloatParts64 *: frac64_##NAME, \
 907                  FloatParts128 *: frac128_##NAME, \
 908                  FloatParts256 *: frac256_##NAME)
 909
 910static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 911{
 912    return uadd64_overflow(a->frac, b->frac, &r->frac);
 913}
 914
 915static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 916{
 917    bool c = 0;
 918    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 919    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 920    return c;
 921}
 922
 923static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 924{
 925    bool c = 0;
 926    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 927    r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 928    r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 929    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 930    return c;
 931}
 932
 933#define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 934
 935static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 936{
 937    return uadd64_overflow(a->frac, c, &r->frac);
 938}
 939
 940static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 941{
 942    c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 943    return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 944}
 945
 946#define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 947
 948static void frac64_allones(FloatParts64 *a)
 949{
 950    a->frac = -1;
 951}
 952
 953static void frac128_allones(FloatParts128 *a)
 954{
 955    a->frac_hi = a->frac_lo = -1;
 956}
 957
 958#define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 959
 960static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 961{
 962    return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 963}
 964
 965static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 966{
 967    uint64_t ta = a->frac_hi, tb = b->frac_hi;
 968    if (ta == tb) {
 969        ta = a->frac_lo, tb = b->frac_lo;
 970        if (ta == tb) {
 971            return 0;
 972        }
 973    }
 974    return ta < tb ? -1 : 1;
 975}
 976
 977#define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 978
 979static void frac64_clear(FloatParts64 *a)
 980{
 981    a->frac = 0;
 982}
 983
 984static void frac128_clear(FloatParts128 *a)
 985{
 986    a->frac_hi = a->frac_lo = 0;
 987}
 988
 989#define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 990
 991static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 992{
 993    uint64_t n1, n0, r, q;
 994    bool ret;
 995
 996    /*
 997     * We want a 2*N / N-bit division to produce exactly an N-bit
 998     * result, so that we do not lose any precision and so that we
 999     * do not have to renormalize afterward.  If A.frac < B.frac,
1000     * then division would produce an (N-1)-bit result; shift A left
1001     * by one to produce the an N-bit result, and return true to
1002     * decrement the exponent to match.
1003     *
1004     * The udiv_qrnnd algorithm that we're using requires normalization,
1005     * i.e. the msb of the denominator must be set, which is already true.
1006     */
1007    ret = a->frac < b->frac;
1008    if (ret) {
1009        n0 = a->frac;
1010        n1 = 0;
1011    } else {
1012        n0 = a->frac >> 1;
1013        n1 = a->frac << 63;
1014    }
1015    q = udiv_qrnnd(&r, n0, n1, b->frac);
1016
1017    /* Set lsb if there is a remainder, to set inexact. */
1018    a->frac = q | (r != 0);
1019
1020    return ret;
1021}
1022
1023static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1024{
1025    uint64_t q0, q1, a0, a1, b0, b1;
1026    uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1027    bool ret = false;
1028
1029    a0 = a->frac_hi, a1 = a->frac_lo;
1030    b0 = b->frac_hi, b1 = b->frac_lo;
1031
1032    ret = lt128(a0, a1, b0, b1);
1033    if (!ret) {
1034        a1 = shr_double(a0, a1, 1);
1035        a0 = a0 >> 1;
1036    }
1037
1038    /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1039    q0 = estimateDiv128To64(a0, a1, b0);
1040
1041    /*
1042     * Estimate is high because B1 was not included (unless B1 == 0).
1043     * Reduce quotient and increase remainder until remainder is non-negative.
1044     * This loop will execute 0 to 2 times.
1045     */
1046    mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1047    sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1048    while (r0 != 0) {
1049        q0--;
1050        add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1051    }
1052
1053    /* Repeat using the remainder, producing a second word of quotient. */
1054    q1 = estimateDiv128To64(r1, r2, b0);
1055    mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1056    sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1057    while (r1 != 0) {
1058        q1--;
1059        add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1060    }
1061
1062    /* Any remainder indicates inexact; set sticky bit. */
1063    q1 |= (r2 | r3) != 0;
1064
1065    a->frac_hi = q0;
1066    a->frac_lo = q1;
1067    return ret;
1068}
1069
1070#define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1071
1072static bool frac64_eqz(FloatParts64 *a)
1073{
1074    return a->frac == 0;
1075}
1076
1077static bool frac128_eqz(FloatParts128 *a)
1078{
1079    return (a->frac_hi | a->frac_lo) == 0;
1080}
1081
1082#define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1083
1084static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1085{
1086    mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1087}
1088
1089static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1090{
1091    mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1092                &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1093}
1094
1095#define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1096
1097static void frac64_neg(FloatParts64 *a)
1098{
1099    a->frac = -a->frac;
1100}
1101
1102static void frac128_neg(FloatParts128 *a)
1103{
1104    bool c = 0;
1105    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1106    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1107}
1108
1109static void frac256_neg(FloatParts256 *a)
1110{
1111    bool c = 0;
1112    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1113    a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1114    a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1115    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1116}
1117
1118#define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1119
1120static int frac64_normalize(FloatParts64 *a)
1121{
1122    if (a->frac) {
1123        int shift = clz64(a->frac);
1124        a->frac <<= shift;
1125        return shift;
1126    }
1127    return 64;
1128}
1129
1130static int frac128_normalize(FloatParts128 *a)
1131{
1132    if (a->frac_hi) {
1133        int shl = clz64(a->frac_hi);
1134        a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1135        a->frac_lo <<= shl;
1136        return shl;
1137    } else if (a->frac_lo) {
1138        int shl = clz64(a->frac_lo);
1139        a->frac_hi = a->frac_lo << shl;
1140        a->frac_lo = 0;
1141        return shl + 64;
1142    }
1143    return 128;
1144}
1145
1146static int frac256_normalize(FloatParts256 *a)
1147{
1148    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1149    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1150    int ret, shl;
1151
1152    if (likely(a0)) {
1153        shl = clz64(a0);
1154        if (shl == 0) {
1155            return 0;
1156        }
1157        ret = shl;
1158    } else {
1159        if (a1) {
1160            ret = 64;
1161            a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1162        } else if (a2) {
1163            ret = 128;
1164            a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1165        } else if (a3) {
1166            ret = 192;
1167            a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1168        } else {
1169            ret = 256;
1170            a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1171            goto done;
1172        }
1173        shl = clz64(a0);
1174        if (shl == 0) {
1175            goto done;
1176        }
1177        ret += shl;
1178    }
1179
1180    a0 = shl_double(a0, a1, shl);
1181    a1 = shl_double(a1, a2, shl);
1182    a2 = shl_double(a2, a3, shl);
1183    a3 <<= shl;
1184
1185 done:
1186    a->frac_hi = a0;
1187    a->frac_hm = a1;
1188    a->frac_lm = a2;
1189    a->frac_lo = a3;
1190    return ret;
1191}
1192
1193#define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1194
1195static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1196{
1197    uint64_t a0, a1, b0, t0, t1, q, quot;
1198    int exp_diff = a->exp - b->exp;
1199    int shift;
1200
1201    a0 = a->frac;
1202    a1 = 0;
1203
1204    if (exp_diff < -1) {
1205        if (mod_quot) {
1206            *mod_quot = 0;
1207        }
1208        return;
1209    }
1210    if (exp_diff == -1) {
1211        a0 >>= 1;
1212        exp_diff = 0;
1213    }
1214
1215    b0 = b->frac;
1216    quot = q = b0 <= a0;
1217    if (q) {
1218        a0 -= b0;
1219    }
1220
1221    exp_diff -= 64;
1222    while (exp_diff > 0) {
1223        q = estimateDiv128To64(a0, a1, b0);
1224        q = q > 2 ? q - 2 : 0;
1225        mul64To128(b0, q, &t0, &t1);
1226        sub128(a0, a1, t0, t1, &a0, &a1);
1227        shortShift128Left(a0, a1, 62, &a0, &a1);
1228        exp_diff -= 62;
1229        quot = (quot << 62) + q;
1230    }
1231
1232    exp_diff += 64;
1233    if (exp_diff > 0) {
1234        q = estimateDiv128To64(a0, a1, b0);
1235        q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1236        mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1237        sub128(a0, a1, t0, t1, &a0, &a1);
1238        shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1239        while (le128(t0, t1, a0, a1)) {
1240            ++q;
1241            sub128(a0, a1, t0, t1, &a0, &a1);
1242        }
1243        quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1244    } else {
1245        t0 = b0;
1246        t1 = 0;
1247    }
1248
1249    if (mod_quot) {
1250        *mod_quot = quot;
1251    } else {
1252        sub128(t0, t1, a0, a1, &t0, &t1);
1253        if (lt128(t0, t1, a0, a1) ||
1254            (eq128(t0, t1, a0, a1) && (q & 1))) {
1255            a0 = t0;
1256            a1 = t1;
1257            a->sign = !a->sign;
1258        }
1259    }
1260
1261    if (likely(a0)) {
1262        shift = clz64(a0);
1263        shortShift128Left(a0, a1, shift, &a0, &a1);
1264    } else if (likely(a1)) {
1265        shift = clz64(a1);
1266        a0 = a1 << shift;
1267        a1 = 0;
1268        shift += 64;
1269    } else {
1270        a->cls = float_class_zero;
1271        return;
1272    }
1273
1274    a->exp = b->exp + exp_diff - shift;
1275    a->frac = a0 | (a1 != 0);
1276}
1277
1278static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1279                           uint64_t *mod_quot)
1280{
1281    uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1282    int exp_diff = a->exp - b->exp;
1283    int shift;
1284
1285    a0 = a->frac_hi;
1286    a1 = a->frac_lo;
1287    a2 = 0;
1288
1289    if (exp_diff < -1) {
1290        if (mod_quot) {
1291            *mod_quot = 0;
1292        }
1293        return;
1294    }
1295    if (exp_diff == -1) {
1296        shift128Right(a0, a1, 1, &a0, &a1);
1297        exp_diff = 0;
1298    }
1299
1300    b0 = b->frac_hi;
1301    b1 = b->frac_lo;
1302
1303    quot = q = le128(b0, b1, a0, a1);
1304    if (q) {
1305        sub128(a0, a1, b0, b1, &a0, &a1);
1306    }
1307
1308    exp_diff -= 64;
1309    while (exp_diff > 0) {
1310        q = estimateDiv128To64(a0, a1, b0);
1311        q = q > 4 ? q - 4 : 0;
1312        mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1313        sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1314        shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1315        exp_diff -= 61;
1316        quot = (quot << 61) + q;
1317    }
1318
1319    exp_diff += 64;
1320    if (exp_diff > 0) {
1321        q = estimateDiv128To64(a0, a1, b0);
1322        q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1323        mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1324        sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1325        shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1326        while (le192(t0, t1, t2, a0, a1, a2)) {
1327            ++q;
1328            sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1329        }
1330        quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1331    } else {
1332        t0 = b0;
1333        t1 = b1;
1334        t2 = 0;
1335    }
1336
1337    if (mod_quot) {
1338        *mod_quot = quot;
1339    } else {
1340        sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1341        if (lt192(t0, t1, t2, a0, a1, a2) ||
1342            (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1343            a0 = t0;
1344            a1 = t1;
1345            a2 = t2;
1346            a->sign = !a->sign;
1347        }
1348    }
1349
1350    if (likely(a0)) {
1351        shift = clz64(a0);
1352        shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1353    } else if (likely(a1)) {
1354        shift = clz64(a1);
1355        shortShift128Left(a1, a2, shift, &a0, &a1);
1356        a2 = 0;
1357        shift += 64;
1358    } else if (likely(a2)) {
1359        shift = clz64(a2);
1360        a0 = a2 << shift;
1361        a1 = a2 = 0;
1362        shift += 128;
1363    } else {
1364        a->cls = float_class_zero;
1365        return;
1366    }
1367
1368    a->exp = b->exp + exp_diff - shift;
1369    a->frac_hi = a0;
1370    a->frac_lo = a1 | (a2 != 0);
1371}
1372
1373#define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1374
1375static void frac64_shl(FloatParts64 *a, int c)
1376{
1377    a->frac <<= c;
1378}
1379
1380static void frac128_shl(FloatParts128 *a, int c)
1381{
1382    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1383
1384    if (c & 64) {
1385        a0 = a1, a1 = 0;
1386    }
1387
1388    c &= 63;
1389    if (c) {
1390        a0 = shl_double(a0, a1, c);
1391        a1 = a1 << c;
1392    }
1393
1394    a->frac_hi = a0;
1395    a->frac_lo = a1;
1396}
1397
1398#define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1399
1400static void frac64_shr(FloatParts64 *a, int c)
1401{
1402    a->frac >>= c;
1403}
1404
1405static void frac128_shr(FloatParts128 *a, int c)
1406{
1407    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1408
1409    if (c & 64) {
1410        a1 = a0, a0 = 0;
1411    }
1412
1413    c &= 63;
1414    if (c) {
1415        a1 = shr_double(a0, a1, c);
1416        a0 = a0 >> c;
1417    }
1418
1419    a->frac_hi = a0;
1420    a->frac_lo = a1;
1421}
1422
1423#define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1424
1425static void frac64_shrjam(FloatParts64 *a, int c)
1426{
1427    uint64_t a0 = a->frac;
1428
1429    if (likely(c != 0)) {
1430        if (likely(c < 64)) {
1431            a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1432        } else {
1433            a0 = a0 != 0;
1434        }
1435        a->frac = a0;
1436    }
1437}
1438
1439static void frac128_shrjam(FloatParts128 *a, int c)
1440{
1441    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1442    uint64_t sticky = 0;
1443
1444    if (unlikely(c == 0)) {
1445        return;
1446    } else if (likely(c < 64)) {
1447        /* nothing */
1448    } else if (likely(c < 128)) {
1449        sticky = a1;
1450        a1 = a0;
1451        a0 = 0;
1452        c &= 63;
1453        if (c == 0) {
1454            goto done;
1455        }
1456    } else {
1457        sticky = a0 | a1;
1458        a0 = a1 = 0;
1459        goto done;
1460    }
1461
1462    sticky |= shr_double(a1, 0, c);
1463    a1 = shr_double(a0, a1, c);
1464    a0 = a0 >> c;
1465
1466 done:
1467    a->frac_lo = a1 | (sticky != 0);
1468    a->frac_hi = a0;
1469}
1470
1471static void frac256_shrjam(FloatParts256 *a, int c)
1472{
1473    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1474    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1475    uint64_t sticky = 0;
1476
1477    if (unlikely(c == 0)) {
1478        return;
1479    } else if (likely(c < 64)) {
1480        /* nothing */
1481    } else if (likely(c < 256)) {
1482        if (unlikely(c & 128)) {
1483            sticky |= a2 | a3;
1484            a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1485        }
1486        if (unlikely(c & 64)) {
1487            sticky |= a3;
1488            a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1489        }
1490        c &= 63;
1491        if (c == 0) {
1492            goto done;
1493        }
1494    } else {
1495        sticky = a0 | a1 | a2 | a3;
1496        a0 = a1 = a2 = a3 = 0;
1497        goto done;
1498    }
1499
1500    sticky |= shr_double(a3, 0, c);
1501    a3 = shr_double(a2, a3, c);
1502    a2 = shr_double(a1, a2, c);
1503    a1 = shr_double(a0, a1, c);
1504    a0 = a0 >> c;
1505
1506 done:
1507    a->frac_lo = a3 | (sticky != 0);
1508    a->frac_lm = a2;
1509    a->frac_hm = a1;
1510    a->frac_hi = a0;
1511}
1512
1513#define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1514
1515static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1516{
1517    return usub64_overflow(a->frac, b->frac, &r->frac);
1518}
1519
1520static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1521{
1522    bool c = 0;
1523    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1524    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1525    return c;
1526}
1527
1528static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1529{
1530    bool c = 0;
1531    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1532    r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1533    r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1534    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1535    return c;
1536}
1537
1538#define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1539
1540static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1541{
1542    r->frac = a->frac_hi | (a->frac_lo != 0);
1543}
1544
1545static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1546{
1547    r->frac_hi = a->frac_hi;
1548    r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1549}
1550
1551#define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1552
1553static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1554{
1555    r->frac_hi = a->frac;
1556    r->frac_lo = 0;
1557}
1558
1559static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1560{
1561    r->frac_hi = a->frac_hi;
1562    r->frac_hm = a->frac_lo;
1563    r->frac_lm = 0;
1564    r->frac_lo = 0;
1565}
1566
1567#define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1568
1569/*
1570 * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1571 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1572 * and thus MIT licenced.
1573 */
1574static const uint16_t rsqrt_tab[128] = {
1575    0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1576    0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1577    0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1578    0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1579    0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1580    0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1581    0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1582    0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1583    0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1584    0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1585    0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1586    0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1587    0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1588    0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1589    0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1590    0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1591};
1592
1593#define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1594#define FloatPartsN    glue(FloatParts,N)
1595#define FloatPartsW    glue(FloatParts,W)
1596
1597#define N 64
1598#define W 128
1599
1600#include "softfloat-parts-addsub.c.inc"
1601#include "softfloat-parts.c.inc"
1602
1603#undef  N
1604#undef  W
1605#define N 128
1606#define W 256
1607
1608#include "softfloat-parts-addsub.c.inc"
1609#include "softfloat-parts.c.inc"
1610
1611#undef  N
1612#undef  W
1613#define N            256
1614
1615#include "softfloat-parts-addsub.c.inc"
1616
1617#undef  N
1618#undef  W
1619#undef  partsN
1620#undef  FloatPartsN
1621#undef  FloatPartsW
1622
1623/*
1624 * Pack/unpack routines with a specific FloatFmt.
1625 */
1626
1627static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1628                                      float_status *s, const FloatFmt *params)
1629{
1630    float16_unpack_raw(p, f);
1631    parts_canonicalize(p, s, params);
1632}
1633
1634static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1635                                     float_status *s)
1636{
1637    float16a_unpack_canonical(p, f, s, &float16_params);
1638}
1639
1640static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1641                                      float_status *s)
1642{
1643    bfloat16_unpack_raw(p, f);
1644    parts_canonicalize(p, s, &bfloat16_params);
1645}
1646
1647static float16 float16a_round_pack_canonical(FloatParts64 *p,
1648                                             float_status *s,
1649                                             const FloatFmt *params)
1650{
1651    parts_uncanon(p, s, params);
1652    return float16_pack_raw(p);
1653}
1654
1655static float16 float16_round_pack_canonical(FloatParts64 *p,
1656                                            float_status *s)
1657{
1658    return float16a_round_pack_canonical(p, s, &float16_params);
1659}
1660
1661static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1662                                              float_status *s)
1663{
1664    parts_uncanon(p, s, &bfloat16_params);
1665    return bfloat16_pack_raw(p);
1666}
1667
1668static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1669                                     float_status *s)
1670{
1671    float32_unpack_raw(p, f);
1672    parts_canonicalize(p, s, &float32_params);
1673}
1674
1675static float32 float32_round_pack_canonical(FloatParts64 *p,
1676                                            float_status *s)
1677{
1678    parts_uncanon(p, s, &float32_params);
1679    return float32_pack_raw(p);
1680}
1681
1682static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1683                                     float_status *s)
1684{
1685    float64_unpack_raw(p, f);
1686    parts_canonicalize(p, s, &float64_params);
1687}
1688
1689static float64 float64_round_pack_canonical(FloatParts64 *p,
1690                                            float_status *s)
1691{
1692    parts_uncanon(p, s, &float64_params);
1693    return float64_pack_raw(p);
1694}
1695
1696static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1697                                               float_status *s)
1698{
1699    parts_uncanon(p, s, &float32_params);
1700
1701    /*
1702     * In parts_uncanon, we placed the fraction for float32 at the lsb.
1703     * We need to adjust the fraction higher so that the least N bits are
1704     * zero, and the fraction is adjacent to the float64 implicit bit.
1705     */
1706    switch (p->cls) {
1707    case float_class_normal:
1708        if (unlikely(p->exp == 0)) {
1709            /*
1710             * The result is denormal for float32, but can be represented
1711             * in normalized form for float64.  Adjust, per canonicalize.
1712             */
1713            int shift = frac_normalize(p);
1714            p->exp = (float32_params.frac_shift -
1715                      float32_params.exp_bias - shift + 1 +
1716                      float64_params.exp_bias);
1717            frac_shr(p, float64_params.frac_shift);
1718        } else {
1719            frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1720            p->exp += float64_params.exp_bias - float32_params.exp_bias;
1721        }
1722        break;
1723    case float_class_snan:
1724    case float_class_qnan:
1725        frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1726        p->exp = float64_params.exp_max;
1727        break;
1728    case float_class_inf:
1729        p->exp = float64_params.exp_max;
1730        break;
1731    case float_class_zero:
1732        break;
1733    default:
1734        g_assert_not_reached();
1735    }
1736
1737    return float64_pack_raw(p);
1738}
1739
1740static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1741                                      float_status *s)
1742{
1743    float128_unpack_raw(p, f);
1744    parts_canonicalize(p, s, &float128_params);
1745}
1746
1747static float128 float128_round_pack_canonical(FloatParts128 *p,
1748                                              float_status *s)
1749{
1750    parts_uncanon(p, s, &float128_params);
1751    return float128_pack_raw(p);
1752}
1753
1754/* Returns false if the encoding is invalid. */
1755static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1756                                      float_status *s)
1757{
1758    /* Ensure rounding precision is set before beginning. */
1759    switch (s->floatx80_rounding_precision) {
1760    case floatx80_precision_x:
1761    case floatx80_precision_d:
1762    case floatx80_precision_s:
1763        break;
1764    default:
1765        g_assert_not_reached();
1766    }
1767
1768    if (unlikely(floatx80_invalid_encoding(f))) {
1769        float_raise(float_flag_invalid, s);
1770        return false;
1771    }
1772
1773    floatx80_unpack_raw(p, f);
1774
1775    if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1776        parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1777    } else {
1778        /* The explicit integer bit is ignored, after invalid checks. */
1779        p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1780        p->cls = (p->frac_hi == 0 ? float_class_inf
1781                  : parts_is_snan_frac(p->frac_hi, s)
1782                  ? float_class_snan : float_class_qnan);
1783    }
1784    return true;
1785}
1786
1787static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1788                                              float_status *s)
1789{
1790    const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1791    uint64_t frac;
1792    int exp;
1793
1794    switch (p->cls) {
1795    case float_class_normal:
1796        if (s->floatx80_rounding_precision == floatx80_precision_x) {
1797            parts_uncanon_normal(p, s, fmt);
1798            frac = p->frac_hi;
1799            exp = p->exp;
1800        } else {
1801            FloatParts64 p64;
1802
1803            p64.sign = p->sign;
1804            p64.exp = p->exp;
1805            frac_truncjam(&p64, p);
1806            parts_uncanon_normal(&p64, s, fmt);
1807            frac = p64.frac;
1808            exp = p64.exp;
1809        }
1810        if (exp != fmt->exp_max) {
1811            break;
1812        }
1813        /* rounded to inf -- fall through to set frac correctly */
1814
1815    case float_class_inf:
1816        /* x86 and m68k differ in the setting of the integer bit. */
1817        frac = floatx80_infinity_low;
1818        exp = fmt->exp_max;
1819        break;
1820
1821    case float_class_zero:
1822        frac = 0;
1823        exp = 0;
1824        break;
1825
1826    case float_class_snan:
1827    case float_class_qnan:
1828        /* NaNs have the integer bit set. */
1829        frac = p->frac_hi | (1ull << 63);
1830        exp = fmt->exp_max;
1831        break;
1832
1833    default:
1834        g_assert_not_reached();
1835    }
1836
1837    return packFloatx80(p->sign, exp, frac);
1838}
1839
1840/*
1841 * Addition and subtraction
1842 */
1843
1844static float16 QEMU_FLATTEN
1845float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1846{
1847    FloatParts64 pa, pb, *pr;
1848
1849    float16_unpack_canonical(&pa, a, status);
1850    float16_unpack_canonical(&pb, b, status);
1851    pr = parts_addsub(&pa, &pb, status, subtract);
1852
1853    return float16_round_pack_canonical(pr, status);
1854}
1855
1856float16 float16_add(float16 a, float16 b, float_status *status)
1857{
1858    return float16_addsub(a, b, status, false);
1859}
1860
1861float16 float16_sub(float16 a, float16 b, float_status *status)
1862{
1863    return float16_addsub(a, b, status, true);
1864}
1865
1866static float32 QEMU_SOFTFLOAT_ATTR
1867soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1868{
1869    FloatParts64 pa, pb, *pr;
1870
1871    float32_unpack_canonical(&pa, a, status);
1872    float32_unpack_canonical(&pb, b, status);
1873    pr = parts_addsub(&pa, &pb, status, subtract);
1874
1875    return float32_round_pack_canonical(pr, status);
1876}
1877
1878static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1879{
1880    return soft_f32_addsub(a, b, status, false);
1881}
1882
1883static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1884{
1885    return soft_f32_addsub(a, b, status, true);
1886}
1887
1888static float64 QEMU_SOFTFLOAT_ATTR
1889soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1890{
1891    FloatParts64 pa, pb, *pr;
1892
1893    float64_unpack_canonical(&pa, a, status);
1894    float64_unpack_canonical(&pb, b, status);
1895    pr = parts_addsub(&pa, &pb, status, subtract);
1896
1897    return float64_round_pack_canonical(pr, status);
1898}
1899
1900static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1901{
1902    return soft_f64_addsub(a, b, status, false);
1903}
1904
1905static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1906{
1907    return soft_f64_addsub(a, b, status, true);
1908}
1909
1910static float hard_f32_add(float a, float b)
1911{
1912    return a + b;
1913}
1914
1915static float hard_f32_sub(float a, float b)
1916{
1917    return a - b;
1918}
1919
1920static double hard_f64_add(double a, double b)
1921{
1922    return a + b;
1923}
1924
1925static double hard_f64_sub(double a, double b)
1926{
1927    return a - b;
1928}
1929
1930static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1931{
1932    if (QEMU_HARDFLOAT_2F32_USE_FP) {
1933        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1934    }
1935    return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1936}
1937
1938static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1939{
1940    if (QEMU_HARDFLOAT_2F64_USE_FP) {
1941        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1942    } else {
1943        return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1944    }
1945}
1946
1947static float32 float32_addsub(float32 a, float32 b, float_status *s,
1948                              hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1949{
1950    return float32_gen2(a, b, s, hard, soft,
1951                        f32_is_zon2, f32_addsubmul_post);
1952}
1953
1954static float64 float64_addsub(float64 a, float64 b, float_status *s,
1955                              hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1956{
1957    return float64_gen2(a, b, s, hard, soft,
1958                        f64_is_zon2, f64_addsubmul_post);
1959}
1960
1961float32 QEMU_FLATTEN
1962float32_add(float32 a, float32 b, float_status *s)
1963{
1964    return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1965}
1966
1967float32 QEMU_FLATTEN
1968float32_sub(float32 a, float32 b, float_status *s)
1969{
1970    return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1971}
1972
1973float64 QEMU_FLATTEN
1974float64_add(float64 a, float64 b, float_status *s)
1975{
1976    return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1977}
1978
1979float64 QEMU_FLATTEN
1980float64_sub(float64 a, float64 b, float_status *s)
1981{
1982    return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1983}
1984
1985static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
1986                                 bool subtract)
1987{
1988    FloatParts64 pa, pb, *pr;
1989
1990    float64_unpack_canonical(&pa, a, status);
1991    float64_unpack_canonical(&pb, b, status);
1992    pr = parts_addsub(&pa, &pb, status, subtract);
1993
1994    return float64r32_round_pack_canonical(pr, status);
1995}
1996
1997float64 float64r32_add(float64 a, float64 b, float_status *status)
1998{
1999    return float64r32_addsub(a, b, status, false);
2000}
2001
2002float64 float64r32_sub(float64 a, float64 b, float_status *status)
2003{
2004    return float64r32_addsub(a, b, status, true);
2005}
2006
2007static bfloat16 QEMU_FLATTEN
2008bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2009{
2010    FloatParts64 pa, pb, *pr;
2011
2012    bfloat16_unpack_canonical(&pa, a, status);
2013    bfloat16_unpack_canonical(&pb, b, status);
2014    pr = parts_addsub(&pa, &pb, status, subtract);
2015
2016    return bfloat16_round_pack_canonical(pr, status);
2017}
2018
2019bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2020{
2021    return bfloat16_addsub(a, b, status, false);
2022}
2023
2024bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2025{
2026    return bfloat16_addsub(a, b, status, true);
2027}
2028
2029static float128 QEMU_FLATTEN
2030float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2031{
2032    FloatParts128 pa, pb, *pr;
2033
2034    float128_unpack_canonical(&pa, a, status);
2035    float128_unpack_canonical(&pb, b, status);
2036    pr = parts_addsub(&pa, &pb, status, subtract);
2037
2038    return float128_round_pack_canonical(pr, status);
2039}
2040
2041float128 float128_add(float128 a, float128 b, float_status *status)
2042{
2043    return float128_addsub(a, b, status, false);
2044}
2045
2046float128 float128_sub(float128 a, float128 b, float_status *status)
2047{
2048    return float128_addsub(a, b, status, true);
2049}
2050
2051static floatx80 QEMU_FLATTEN
2052floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2053{
2054    FloatParts128 pa, pb, *pr;
2055
2056    if (!floatx80_unpack_canonical(&pa, a, status) ||
2057        !floatx80_unpack_canonical(&pb, b, status)) {
2058        return floatx80_default_nan(status);
2059    }
2060
2061    pr = parts_addsub(&pa, &pb, status, subtract);
2062    return floatx80_round_pack_canonical(pr, status);
2063}
2064
2065floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2066{
2067    return floatx80_addsub(a, b, status, false);
2068}
2069
2070floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2071{
2072    return floatx80_addsub(a, b, status, true);
2073}
2074
2075/*
2076 * Multiplication
2077 */
2078
2079float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2080{
2081    FloatParts64 pa, pb, *pr;
2082
2083    float16_unpack_canonical(&pa, a, status);
2084    float16_unpack_canonical(&pb, b, status);
2085    pr = parts_mul(&pa, &pb, status);
2086
2087    return float16_round_pack_canonical(pr, status);
2088}
2089
2090static float32 QEMU_SOFTFLOAT_ATTR
2091soft_f32_mul(float32 a, float32 b, float_status *status)
2092{
2093    FloatParts64 pa, pb, *pr;
2094
2095    float32_unpack_canonical(&pa, a, status);
2096    float32_unpack_canonical(&pb, b, status);
2097    pr = parts_mul(&pa, &pb, status);
2098
2099    return float32_round_pack_canonical(pr, status);
2100}
2101
2102static float64 QEMU_SOFTFLOAT_ATTR
2103soft_f64_mul(float64 a, float64 b, float_status *status)
2104{
2105    FloatParts64 pa, pb, *pr;
2106
2107    float64_unpack_canonical(&pa, a, status);
2108    float64_unpack_canonical(&pb, b, status);
2109    pr = parts_mul(&pa, &pb, status);
2110
2111    return float64_round_pack_canonical(pr, status);
2112}
2113
2114static float hard_f32_mul(float a, float b)
2115{
2116    return a * b;
2117}
2118
2119static double hard_f64_mul(double a, double b)
2120{
2121    return a * b;
2122}
2123
2124float32 QEMU_FLATTEN
2125float32_mul(float32 a, float32 b, float_status *s)
2126{
2127    return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2128                        f32_is_zon2, f32_addsubmul_post);
2129}
2130
2131float64 QEMU_FLATTEN
2132float64_mul(float64 a, float64 b, float_status *s)
2133{
2134    return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2135                        f64_is_zon2, f64_addsubmul_post);
2136}
2137
2138float64 float64r32_mul(float64 a, float64 b, float_status *status)
2139{
2140    FloatParts64 pa, pb, *pr;
2141
2142    float64_unpack_canonical(&pa, a, status);
2143    float64_unpack_canonical(&pb, b, status);
2144    pr = parts_mul(&pa, &pb, status);
2145
2146    return float64r32_round_pack_canonical(pr, status);
2147}
2148
2149bfloat16 QEMU_FLATTEN
2150bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2151{
2152    FloatParts64 pa, pb, *pr;
2153
2154    bfloat16_unpack_canonical(&pa, a, status);
2155    bfloat16_unpack_canonical(&pb, b, status);
2156    pr = parts_mul(&pa, &pb, status);
2157
2158    return bfloat16_round_pack_canonical(pr, status);
2159}
2160
2161float128 QEMU_FLATTEN
2162float128_mul(float128 a, float128 b, float_status *status)
2163{
2164    FloatParts128 pa, pb, *pr;
2165
2166    float128_unpack_canonical(&pa, a, status);
2167    float128_unpack_canonical(&pb, b, status);
2168    pr = parts_mul(&pa, &pb, status);
2169
2170    return float128_round_pack_canonical(pr, status);
2171}
2172
2173floatx80 QEMU_FLATTEN
2174floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2175{
2176    FloatParts128 pa, pb, *pr;
2177
2178    if (!floatx80_unpack_canonical(&pa, a, status) ||
2179        !floatx80_unpack_canonical(&pb, b, status)) {
2180        return floatx80_default_nan(status);
2181    }
2182
2183    pr = parts_mul(&pa, &pb, status);
2184    return floatx80_round_pack_canonical(pr, status);
2185}
2186
2187/*
2188 * Fused multiply-add
2189 */
2190
2191float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2192                                    int flags, float_status *status)
2193{
2194    FloatParts64 pa, pb, pc, *pr;
2195
2196    float16_unpack_canonical(&pa, a, status);
2197    float16_unpack_canonical(&pb, b, status);
2198    float16_unpack_canonical(&pc, c, status);
2199    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2200
2201    return float16_round_pack_canonical(pr, status);
2202}
2203
2204static float32 QEMU_SOFTFLOAT_ATTR
2205soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2206                float_status *status)
2207{
2208    FloatParts64 pa, pb, pc, *pr;
2209
2210    float32_unpack_canonical(&pa, a, status);
2211    float32_unpack_canonical(&pb, b, status);
2212    float32_unpack_canonical(&pc, c, status);
2213    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2214
2215    return float32_round_pack_canonical(pr, status);
2216}
2217
2218static float64 QEMU_SOFTFLOAT_ATTR
2219soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2220                float_status *status)
2221{
2222    FloatParts64 pa, pb, pc, *pr;
2223
2224    float64_unpack_canonical(&pa, a, status);
2225    float64_unpack_canonical(&pb, b, status);
2226    float64_unpack_canonical(&pc, c, status);
2227    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2228
2229    return float64_round_pack_canonical(pr, status);
2230}
2231
2232static bool force_soft_fma;
2233
2234float32 QEMU_FLATTEN
2235float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2236{
2237    union_float32 ua, ub, uc, ur;
2238
2239    ua.s = xa;
2240    ub.s = xb;
2241    uc.s = xc;
2242
2243    if (unlikely(!can_use_fpu(s))) {
2244        goto soft;
2245    }
2246    if (unlikely(flags & float_muladd_halve_result)) {
2247        goto soft;
2248    }
2249
2250    float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2251    if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2252        goto soft;
2253    }
2254
2255    if (unlikely(force_soft_fma)) {
2256        goto soft;
2257    }
2258
2259    /*
2260     * When (a || b) == 0, there's no need to check for under/over flow,
2261     * since we know the addend is (normal || 0) and the product is 0.
2262     */
2263    if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2264        union_float32 up;
2265        bool prod_sign;
2266
2267        prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2268        prod_sign ^= !!(flags & float_muladd_negate_product);
2269        up.s = float32_set_sign(float32_zero, prod_sign);
2270
2271        if (flags & float_muladd_negate_c) {
2272            uc.h = -uc.h;
2273        }
2274        ur.h = up.h + uc.h;
2275    } else {
2276        union_float32 ua_orig = ua;
2277        union_float32 uc_orig = uc;
2278
2279        if (flags & float_muladd_negate_product) {
2280            ua.h = -ua.h;
2281        }
2282        if (flags & float_muladd_negate_c) {
2283            uc.h = -uc.h;
2284        }
2285
2286        ur.h = fmaf(ua.h, ub.h, uc.h);
2287
2288        if (unlikely(f32_is_inf(ur))) {
2289            float_raise(float_flag_overflow, s);
2290        } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2291            ua = ua_orig;
2292            uc = uc_orig;
2293            goto soft;
2294        }
2295    }
2296    if (flags & float_muladd_negate_result) {
2297        return float32_chs(ur.s);
2298    }
2299    return ur.s;
2300
2301 soft:
2302    return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2303}
2304
2305float64 QEMU_FLATTEN
2306float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2307{
2308    union_float64 ua, ub, uc, ur;
2309
2310    ua.s = xa;
2311    ub.s = xb;
2312    uc.s = xc;
2313
2314    if (unlikely(!can_use_fpu(s))) {
2315        goto soft;
2316    }
2317    if (unlikely(flags & float_muladd_halve_result)) {
2318        goto soft;
2319    }
2320
2321    float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2322    if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2323        goto soft;
2324    }
2325
2326    if (unlikely(force_soft_fma)) {
2327        goto soft;
2328    }
2329
2330    /*
2331     * When (a || b) == 0, there's no need to check for under/over flow,
2332     * since we know the addend is (normal || 0) and the product is 0.
2333     */
2334    if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2335        union_float64 up;
2336        bool prod_sign;
2337
2338        prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2339        prod_sign ^= !!(flags & float_muladd_negate_product);
2340        up.s = float64_set_sign(float64_zero, prod_sign);
2341
2342        if (flags & float_muladd_negate_c) {
2343            uc.h = -uc.h;
2344        }
2345        ur.h = up.h + uc.h;
2346    } else {
2347        union_float64 ua_orig = ua;
2348        union_float64 uc_orig = uc;
2349
2350        if (flags & float_muladd_negate_product) {
2351            ua.h = -ua.h;
2352        }
2353        if (flags & float_muladd_negate_c) {
2354            uc.h = -uc.h;
2355        }
2356
2357        ur.h = fma(ua.h, ub.h, uc.h);
2358
2359        if (unlikely(f64_is_inf(ur))) {
2360            float_raise(float_flag_overflow, s);
2361        } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2362            ua = ua_orig;
2363            uc = uc_orig;
2364            goto soft;
2365        }
2366    }
2367    if (flags & float_muladd_negate_result) {
2368        return float64_chs(ur.s);
2369    }
2370    return ur.s;
2371
2372 soft:
2373    return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2374}
2375
2376float64 float64r32_muladd(float64 a, float64 b, float64 c,
2377                          int flags, float_status *status)
2378{
2379    FloatParts64 pa, pb, pc, *pr;
2380
2381    float64_unpack_canonical(&pa, a, status);
2382    float64_unpack_canonical(&pb, b, status);
2383    float64_unpack_canonical(&pc, c, status);
2384    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2385
2386    return float64r32_round_pack_canonical(pr, status);
2387}
2388
2389bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2390                                      int flags, float_status *status)
2391{
2392    FloatParts64 pa, pb, pc, *pr;
2393
2394    bfloat16_unpack_canonical(&pa, a, status);
2395    bfloat16_unpack_canonical(&pb, b, status);
2396    bfloat16_unpack_canonical(&pc, c, status);
2397    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2398
2399    return bfloat16_round_pack_canonical(pr, status);
2400}
2401
2402float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2403                                      int flags, float_status *status)
2404{
2405    FloatParts128 pa, pb, pc, *pr;
2406
2407    float128_unpack_canonical(&pa, a, status);
2408    float128_unpack_canonical(&pb, b, status);
2409    float128_unpack_canonical(&pc, c, status);
2410    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2411
2412    return float128_round_pack_canonical(pr, status);
2413}
2414
2415/*
2416 * Division
2417 */
2418
2419float16 float16_div(float16 a, float16 b, float_status *status)
2420{
2421    FloatParts64 pa, pb, *pr;
2422
2423    float16_unpack_canonical(&pa, a, status);
2424    float16_unpack_canonical(&pb, b, status);
2425    pr = parts_div(&pa, &pb, status);
2426
2427    return float16_round_pack_canonical(pr, status);
2428}
2429
2430static float32 QEMU_SOFTFLOAT_ATTR
2431soft_f32_div(float32 a, float32 b, float_status *status)
2432{
2433    FloatParts64 pa, pb, *pr;
2434
2435    float32_unpack_canonical(&pa, a, status);
2436    float32_unpack_canonical(&pb, b, status);
2437    pr = parts_div(&pa, &pb, status);
2438
2439    return float32_round_pack_canonical(pr, status);
2440}
2441
2442static float64 QEMU_SOFTFLOAT_ATTR
2443soft_f64_div(float64 a, float64 b, float_status *status)
2444{
2445    FloatParts64 pa, pb, *pr;
2446
2447    float64_unpack_canonical(&pa, a, status);
2448    float64_unpack_canonical(&pb, b, status);
2449    pr = parts_div(&pa, &pb, status);
2450
2451    return float64_round_pack_canonical(pr, status);
2452}
2453
2454static float hard_f32_div(float a, float b)
2455{
2456    return a / b;
2457}
2458
2459static double hard_f64_div(double a, double b)
2460{
2461    return a / b;
2462}
2463
2464static bool f32_div_pre(union_float32 a, union_float32 b)
2465{
2466    if (QEMU_HARDFLOAT_2F32_USE_FP) {
2467        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2468               fpclassify(b.h) == FP_NORMAL;
2469    }
2470    return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2471}
2472
2473static bool f64_div_pre(union_float64 a, union_float64 b)
2474{
2475    if (QEMU_HARDFLOAT_2F64_USE_FP) {
2476        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2477               fpclassify(b.h) == FP_NORMAL;
2478    }
2479    return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2480}
2481
2482static bool f32_div_post(union_float32 a, union_float32 b)
2483{
2484    if (QEMU_HARDFLOAT_2F32_USE_FP) {
2485        return fpclassify(a.h) != FP_ZERO;
2486    }
2487    return !float32_is_zero(a.s);
2488}
2489
2490static bool f64_div_post(union_float64 a, union_float64 b)
2491{
2492    if (QEMU_HARDFLOAT_2F64_USE_FP) {
2493        return fpclassify(a.h) != FP_ZERO;
2494    }
2495    return !float64_is_zero(a.s);
2496}
2497
2498float32 QEMU_FLATTEN
2499float32_div(float32 a, float32 b, float_status *s)
2500{
2501    return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2502                        f32_div_pre, f32_div_post);
2503}
2504
2505float64 QEMU_FLATTEN
2506float64_div(float64 a, float64 b, float_status *s)
2507{
2508    return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2509                        f64_div_pre, f64_div_post);
2510}
2511
2512float64 float64r32_div(float64 a, float64 b, float_status *status)
2513{
2514    FloatParts64 pa, pb, *pr;
2515
2516    float64_unpack_canonical(&pa, a, status);
2517    float64_unpack_canonical(&pb, b, status);
2518    pr = parts_div(&pa, &pb, status);
2519
2520    return float64r32_round_pack_canonical(pr, status);
2521}
2522
2523bfloat16 QEMU_FLATTEN
2524bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2525{
2526    FloatParts64 pa, pb, *pr;
2527
2528    bfloat16_unpack_canonical(&pa, a, status);
2529    bfloat16_unpack_canonical(&pb, b, status);
2530    pr = parts_div(&pa, &pb, status);
2531
2532    return bfloat16_round_pack_canonical(pr, status);
2533}
2534
2535float128 QEMU_FLATTEN
2536float128_div(float128 a, float128 b, float_status *status)
2537{
2538    FloatParts128 pa, pb, *pr;
2539
2540    float128_unpack_canonical(&pa, a, status);
2541    float128_unpack_canonical(&pb, b, status);
2542    pr = parts_div(&pa, &pb, status);
2543
2544    return float128_round_pack_canonical(pr, status);
2545}
2546
2547floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2548{
2549    FloatParts128 pa, pb, *pr;
2550
2551    if (!floatx80_unpack_canonical(&pa, a, status) ||
2552        !floatx80_unpack_canonical(&pb, b, status)) {
2553        return floatx80_default_nan(status);
2554    }
2555
2556    pr = parts_div(&pa, &pb, status);
2557    return floatx80_round_pack_canonical(pr, status);
2558}
2559
2560/*
2561 * Remainder
2562 */
2563
2564float32 float32_rem(float32 a, float32 b, float_status *status)
2565{
2566    FloatParts64 pa, pb, *pr;
2567
2568    float32_unpack_canonical(&pa, a, status);
2569    float32_unpack_canonical(&pb, b, status);
2570    pr = parts_modrem(&pa, &pb, NULL, status);
2571
2572    return float32_round_pack_canonical(pr, status);
2573}
2574
2575float64 float64_rem(float64 a, float64 b, float_status *status)
2576{
2577    FloatParts64 pa, pb, *pr;
2578
2579    float64_unpack_canonical(&pa, a, status);
2580    float64_unpack_canonical(&pb, b, status);
2581    pr = parts_modrem(&pa, &pb, NULL, status);
2582
2583    return float64_round_pack_canonical(pr, status);
2584}
2585
2586float128 float128_rem(float128 a, float128 b, float_status *status)
2587{
2588    FloatParts128 pa, pb, *pr;
2589
2590    float128_unpack_canonical(&pa, a, status);
2591    float128_unpack_canonical(&pb, b, status);
2592    pr = parts_modrem(&pa, &pb, NULL, status);
2593
2594    return float128_round_pack_canonical(pr, status);
2595}
2596
2597/*
2598 * Returns the remainder of the extended double-precision floating-point value
2599 * `a' with respect to the corresponding value `b'.
2600 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2601 * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2602 * the remainder based on truncating the quotient toward zero instead and
2603 * *quotient is set to the low 64 bits of the absolute value of the integer
2604 * quotient.
2605 */
2606floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2607                         uint64_t *quotient, float_status *status)
2608{
2609    FloatParts128 pa, pb, *pr;
2610
2611    *quotient = 0;
2612    if (!floatx80_unpack_canonical(&pa, a, status) ||
2613        !floatx80_unpack_canonical(&pb, b, status)) {
2614        return floatx80_default_nan(status);
2615    }
2616    pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2617
2618    return floatx80_round_pack_canonical(pr, status);
2619}
2620
2621floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2622{
2623    uint64_t quotient;
2624    return floatx80_modrem(a, b, false, &quotient, status);
2625}
2626
2627floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2628{
2629    uint64_t quotient;
2630    return floatx80_modrem(a, b, true, &quotient, status);
2631}
2632
2633/*
2634 * Float to Float conversions
2635 *
2636 * Returns the result of converting one float format to another. The
2637 * conversion is performed according to the IEC/IEEE Standard for
2638 * Binary Floating-Point Arithmetic.
2639 *
2640 * Usually this only needs to take care of raising invalid exceptions
2641 * and handling the conversion on NaNs.
2642 */
2643
2644static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2645{
2646    switch (a->cls) {
2647    case float_class_snan:
2648        float_raise(float_flag_invalid_snan, s);
2649        /* fall through */
2650    case float_class_qnan:
2651        /*
2652         * There is no NaN in the destination format.  Raise Invalid
2653         * and return a zero with the sign of the input NaN.
2654         */
2655        float_raise(float_flag_invalid, s);
2656        a->cls = float_class_zero;
2657        break;
2658
2659    case float_class_inf:
2660        /*
2661         * There is no Inf in the destination format.  Raise Invalid
2662         * and return the maximum normal with the correct sign.
2663         */
2664        float_raise(float_flag_invalid, s);
2665        a->cls = float_class_normal;
2666        a->exp = float16_params_ahp.exp_max;
2667        a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2668                                  float16_params_ahp.frac_size + 1);
2669        break;
2670
2671    case float_class_normal:
2672    case float_class_zero:
2673        break;
2674
2675    default:
2676        g_assert_not_reached();
2677    }
2678}
2679
2680static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2681{
2682    if (is_nan(a->cls)) {
2683        parts_return_nan(a, s);
2684    }
2685}
2686
2687static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2688{
2689    if (is_nan(a->cls)) {
2690        parts_return_nan(a, s);
2691    }
2692}
2693
2694#define parts_float_to_float(P, S) \
2695    PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2696
2697static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2698                                        float_status *s)
2699{
2700    a->cls = b->cls;
2701    a->sign = b->sign;
2702    a->exp = b->exp;
2703
2704    if (a->cls == float_class_normal) {
2705        frac_truncjam(a, b);
2706    } else if (is_nan(a->cls)) {
2707        /* Discard the low bits of the NaN. */
2708        a->frac = b->frac_hi;
2709        parts_return_nan(a, s);
2710    }
2711}
2712
2713static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2714                                       float_status *s)
2715{
2716    a->cls = b->cls;
2717    a->sign = b->sign;
2718    a->exp = b->exp;
2719    frac_widen(a, b);
2720
2721    if (is_nan(a->cls)) {
2722        parts_return_nan(a, s);
2723    }
2724}
2725
2726float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2727{
2728    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2729    FloatParts64 p;
2730
2731    float16a_unpack_canonical(&p, a, s, fmt16);
2732    parts_float_to_float(&p, s);
2733    return float32_round_pack_canonical(&p, s);
2734}
2735
2736float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2737{
2738    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2739    FloatParts64 p;
2740
2741    float16a_unpack_canonical(&p, a, s, fmt16);
2742    parts_float_to_float(&p, s);
2743    return float64_round_pack_canonical(&p, s);
2744}
2745
2746float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2747{
2748    FloatParts64 p;
2749    const FloatFmt *fmt;
2750
2751    float32_unpack_canonical(&p, a, s);
2752    if (ieee) {
2753        parts_float_to_float(&p, s);
2754        fmt = &float16_params;
2755    } else {
2756        parts_float_to_ahp(&p, s);
2757        fmt = &float16_params_ahp;
2758    }
2759    return float16a_round_pack_canonical(&p, s, fmt);
2760}
2761
2762static float64 QEMU_SOFTFLOAT_ATTR
2763soft_float32_to_float64(float32 a, float_status *s)
2764{
2765    FloatParts64 p;
2766
2767    float32_unpack_canonical(&p, a, s);
2768    parts_float_to_float(&p, s);
2769    return float64_round_pack_canonical(&p, s);
2770}
2771
2772float64 float32_to_float64(float32 a, float_status *s)
2773{
2774    if (likely(float32_is_normal(a))) {
2775        /* Widening conversion can never produce inexact results.  */
2776        union_float32 uf;
2777        union_float64 ud;
2778        uf.s = a;
2779        ud.h = uf.h;
2780        return ud.s;
2781    } else if (float32_is_zero(a)) {
2782        return float64_set_sign(float64_zero, float32_is_neg(a));
2783    } else {
2784        return soft_float32_to_float64(a, s);
2785    }
2786}
2787
2788float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2789{
2790    FloatParts64 p;
2791    const FloatFmt *fmt;
2792
2793    float64_unpack_canonical(&p, a, s);
2794    if (ieee) {
2795        parts_float_to_float(&p, s);
2796        fmt = &float16_params;
2797    } else {
2798        parts_float_to_ahp(&p, s);
2799        fmt = &float16_params_ahp;
2800    }
2801    return float16a_round_pack_canonical(&p, s, fmt);
2802}
2803
2804float32 float64_to_float32(float64 a, float_status *s)
2805{
2806    FloatParts64 p;
2807
2808    float64_unpack_canonical(&p, a, s);
2809    parts_float_to_float(&p, s);
2810    return float32_round_pack_canonical(&p, s);
2811}
2812
2813float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2814{
2815    FloatParts64 p;
2816
2817    bfloat16_unpack_canonical(&p, a, s);
2818    parts_float_to_float(&p, s);
2819    return float32_round_pack_canonical(&p, s);
2820}
2821
2822float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2823{
2824    FloatParts64 p;
2825
2826    bfloat16_unpack_canonical(&p, a, s);
2827    parts_float_to_float(&p, s);
2828    return float64_round_pack_canonical(&p, s);
2829}
2830
2831bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2832{
2833    FloatParts64 p;
2834
2835    float32_unpack_canonical(&p, a, s);
2836    parts_float_to_float(&p, s);
2837    return bfloat16_round_pack_canonical(&p, s);
2838}
2839
2840bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2841{
2842    FloatParts64 p;
2843
2844    float64_unpack_canonical(&p, a, s);
2845    parts_float_to_float(&p, s);
2846    return bfloat16_round_pack_canonical(&p, s);
2847}
2848
2849float32 float128_to_float32(float128 a, float_status *s)
2850{
2851    FloatParts64 p64;
2852    FloatParts128 p128;
2853
2854    float128_unpack_canonical(&p128, a, s);
2855    parts_float_to_float_narrow(&p64, &p128, s);
2856    return float32_round_pack_canonical(&p64, s);
2857}
2858
2859float64 float128_to_float64(float128 a, float_status *s)
2860{
2861    FloatParts64 p64;
2862    FloatParts128 p128;
2863
2864    float128_unpack_canonical(&p128, a, s);
2865    parts_float_to_float_narrow(&p64, &p128, s);
2866    return float64_round_pack_canonical(&p64, s);
2867}
2868
2869float128 float32_to_float128(float32 a, float_status *s)
2870{
2871    FloatParts64 p64;
2872    FloatParts128 p128;
2873
2874    float32_unpack_canonical(&p64, a, s);
2875    parts_float_to_float_widen(&p128, &p64, s);
2876    return float128_round_pack_canonical(&p128, s);
2877}
2878
2879float128 float64_to_float128(float64 a, float_status *s)
2880{
2881    FloatParts64 p64;
2882    FloatParts128 p128;
2883
2884    float64_unpack_canonical(&p64, a, s);
2885    parts_float_to_float_widen(&p128, &p64, s);
2886    return float128_round_pack_canonical(&p128, s);
2887}
2888
2889float32 floatx80_to_float32(floatx80 a, float_status *s)
2890{
2891    FloatParts64 p64;
2892    FloatParts128 p128;
2893
2894    if (floatx80_unpack_canonical(&p128, a, s)) {
2895        parts_float_to_float_narrow(&p64, &p128, s);
2896    } else {
2897        parts_default_nan(&p64, s);
2898    }
2899    return float32_round_pack_canonical(&p64, s);
2900}
2901
2902float64 floatx80_to_float64(floatx80 a, float_status *s)
2903{
2904    FloatParts64 p64;
2905    FloatParts128 p128;
2906
2907    if (floatx80_unpack_canonical(&p128, a, s)) {
2908        parts_float_to_float_narrow(&p64, &p128, s);
2909    } else {
2910        parts_default_nan(&p64, s);
2911    }
2912    return float64_round_pack_canonical(&p64, s);
2913}
2914
2915float128 floatx80_to_float128(floatx80 a, float_status *s)
2916{
2917    FloatParts128 p;
2918
2919    if (floatx80_unpack_canonical(&p, a, s)) {
2920        parts_float_to_float(&p, s);
2921    } else {
2922        parts_default_nan(&p, s);
2923    }
2924    return float128_round_pack_canonical(&p, s);
2925}
2926
2927floatx80 float32_to_floatx80(float32 a, float_status *s)
2928{
2929    FloatParts64 p64;
2930    FloatParts128 p128;
2931
2932    float32_unpack_canonical(&p64, a, s);
2933    parts_float_to_float_widen(&p128, &p64, s);
2934    return floatx80_round_pack_canonical(&p128, s);
2935}
2936
2937floatx80 float64_to_floatx80(float64 a, float_status *s)
2938{
2939    FloatParts64 p64;
2940    FloatParts128 p128;
2941
2942    float64_unpack_canonical(&p64, a, s);
2943    parts_float_to_float_widen(&p128, &p64, s);
2944    return floatx80_round_pack_canonical(&p128, s);
2945}
2946
2947floatx80 float128_to_floatx80(float128 a, float_status *s)
2948{
2949    FloatParts128 p;
2950
2951    float128_unpack_canonical(&p, a, s);
2952    parts_float_to_float(&p, s);
2953    return floatx80_round_pack_canonical(&p, s);
2954}
2955
2956/*
2957 * Round to integral value
2958 */
2959
2960float16 float16_round_to_int(float16 a, float_status *s)
2961{
2962    FloatParts64 p;
2963
2964    float16_unpack_canonical(&p, a, s);
2965    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2966    return float16_round_pack_canonical(&p, s);
2967}
2968
2969float32 float32_round_to_int(float32 a, float_status *s)
2970{
2971    FloatParts64 p;
2972
2973    float32_unpack_canonical(&p, a, s);
2974    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2975    return float32_round_pack_canonical(&p, s);
2976}
2977
2978float64 float64_round_to_int(float64 a, float_status *s)
2979{
2980    FloatParts64 p;
2981
2982    float64_unpack_canonical(&p, a, s);
2983    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2984    return float64_round_pack_canonical(&p, s);
2985}
2986
2987bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2988{
2989    FloatParts64 p;
2990
2991    bfloat16_unpack_canonical(&p, a, s);
2992    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2993    return bfloat16_round_pack_canonical(&p, s);
2994}
2995
2996float128 float128_round_to_int(float128 a, float_status *s)
2997{
2998    FloatParts128 p;
2999
3000    float128_unpack_canonical(&p, a, s);
3001    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3002    return float128_round_pack_canonical(&p, s);
3003}
3004
3005floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3006{
3007    FloatParts128 p;
3008
3009    if (!floatx80_unpack_canonical(&p, a, status)) {
3010        return floatx80_default_nan(status);
3011    }
3012
3013    parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3014                       &floatx80_params[status->floatx80_rounding_precision]);
3015    return floatx80_round_pack_canonical(&p, status);
3016}
3017
3018/*
3019 * Floating-point to signed integer conversions
3020 */
3021
3022int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3023                              float_status *s)
3024{
3025    FloatParts64 p;
3026
3027    float16_unpack_canonical(&p, a, s);
3028    return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3029}
3030
3031int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3032                                float_status *s)
3033{
3034    FloatParts64 p;
3035
3036    float16_unpack_canonical(&p, a, s);
3037    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3038}
3039
3040int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3041                                float_status *s)
3042{
3043    FloatParts64 p;
3044
3045    float16_unpack_canonical(&p, a, s);
3046    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3047}
3048
3049int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3050                                float_status *s)
3051{
3052    FloatParts64 p;
3053
3054    float16_unpack_canonical(&p, a, s);
3055    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3056}
3057
3058int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3059                                float_status *s)
3060{
3061    FloatParts64 p;
3062
3063    float32_unpack_canonical(&p, a, s);
3064    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3065}
3066
3067int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3068                                float_status *s)
3069{
3070    FloatParts64 p;
3071
3072    float32_unpack_canonical(&p, a, s);
3073    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3074}
3075
3076int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3077                                float_status *s)
3078{
3079    FloatParts64 p;
3080
3081    float32_unpack_canonical(&p, a, s);
3082    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3083}
3084
3085int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3086                                float_status *s)
3087{
3088    FloatParts64 p;
3089
3090    float64_unpack_canonical(&p, a, s);
3091    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3092}
3093
3094int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3095                                float_status *s)
3096{
3097    FloatParts64 p;
3098
3099    float64_unpack_canonical(&p, a, s);
3100    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3101}
3102
3103int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3104                                float_status *s)
3105{
3106    FloatParts64 p;
3107
3108    float64_unpack_canonical(&p, a, s);
3109    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3110}
3111
3112int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3113                                 float_status *s)
3114{
3115    FloatParts64 p;
3116
3117    bfloat16_unpack_canonical(&p, a, s);
3118    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3119}
3120
3121int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3122                                 float_status *s)
3123{
3124    FloatParts64 p;
3125
3126    bfloat16_unpack_canonical(&p, a, s);
3127    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3128}
3129
3130int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3131                                 float_status *s)
3132{
3133    FloatParts64 p;
3134
3135    bfloat16_unpack_canonical(&p, a, s);
3136    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3137}
3138
3139static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3140                                        int scale, float_status *s)
3141{
3142    FloatParts128 p;
3143
3144    float128_unpack_canonical(&p, a, s);
3145    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3146}
3147
3148static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3149                                        int scale, float_status *s)
3150{
3151    FloatParts128 p;
3152
3153    float128_unpack_canonical(&p, a, s);
3154    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3155}
3156
3157static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3158                                        int scale, float_status *s)
3159{
3160    FloatParts128 p;
3161
3162    if (!floatx80_unpack_canonical(&p, a, s)) {
3163        parts_default_nan(&p, s);
3164    }
3165    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3166}
3167
3168static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3169                                        int scale, float_status *s)
3170{
3171    FloatParts128 p;
3172
3173    if (!floatx80_unpack_canonical(&p, a, s)) {
3174        parts_default_nan(&p, s);
3175    }
3176    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3177}
3178
3179int8_t float16_to_int8(float16 a, float_status *s)
3180{
3181    return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3182}
3183
3184int16_t float16_to_int16(float16 a, float_status *s)
3185{
3186    return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3187}
3188
3189int32_t float16_to_int32(float16 a, float_status *s)
3190{
3191    return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3192}
3193
3194int64_t float16_to_int64(float16 a, float_status *s)
3195{
3196    return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3197}
3198
3199int16_t float32_to_int16(float32 a, float_status *s)
3200{
3201    return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3202}
3203
3204int32_t float32_to_int32(float32 a, float_status *s)
3205{
3206    return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3207}
3208
3209int64_t float32_to_int64(float32 a, float_status *s)
3210{
3211    return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3212}
3213
3214int16_t float64_to_int16(float64 a, float_status *s)
3215{
3216    return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3217}
3218
3219int32_t float64_to_int32(float64 a, float_status *s)
3220{
3221    return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3222}
3223
3224int64_t float64_to_int64(float64 a, float_status *s)
3225{
3226    return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3227}
3228
3229int32_t float128_to_int32(float128 a, float_status *s)
3230{
3231    return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3232}
3233
3234int64_t float128_to_int64(float128 a, float_status *s)
3235{
3236    return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3237}
3238
3239int32_t floatx80_to_int32(floatx80 a, float_status *s)
3240{
3241    return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3242}
3243
3244int64_t floatx80_to_int64(floatx80 a, float_status *s)
3245{
3246    return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3247}
3248
3249int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3250{
3251    return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3252}
3253
3254int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3255{
3256    return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3257}
3258
3259int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3260{
3261    return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3262}
3263
3264int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3265{
3266    return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3267}
3268
3269int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3270{
3271    return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3272}
3273
3274int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3275{
3276    return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3277}
3278
3279int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3280{
3281    return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3282}
3283
3284int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3285{
3286    return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3287}
3288
3289int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3290{
3291    return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3292}
3293
3294int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3295{
3296    return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3297}
3298
3299int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3300{
3301    return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3302}
3303
3304int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3305{
3306    return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3307}
3308
3309int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3310{
3311    return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3312}
3313
3314int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3315{
3316    return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3317}
3318
3319int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3320{
3321    return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3322}
3323
3324int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3325{
3326    return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3327}
3328
3329int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3330{
3331    return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3332}
3333
3334int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3335{
3336    return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3337}
3338
3339int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3340{
3341    return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3342}
3343
3344/*
3345 * Floating-point to unsigned integer conversions
3346 */
3347
3348uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3349                                float_status *s)
3350{
3351    FloatParts64 p;
3352
3353    float16_unpack_canonical(&p, a, s);
3354    return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3355}
3356
3357uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3358                                  float_status *s)
3359{
3360    FloatParts64 p;
3361
3362    float16_unpack_canonical(&p, a, s);
3363    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3364}
3365
3366uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3367                                  float_status *s)
3368{
3369    FloatParts64 p;
3370
3371    float16_unpack_canonical(&p, a, s);
3372    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3373}
3374
3375uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3376                                  float_status *s)
3377{
3378    FloatParts64 p;
3379
3380    float16_unpack_canonical(&p, a, s);
3381    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3382}
3383
3384uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3385                                  float_status *s)
3386{
3387    FloatParts64 p;
3388
3389    float32_unpack_canonical(&p, a, s);
3390    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3391}
3392
3393uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3394                                  float_status *s)
3395{
3396    FloatParts64 p;
3397
3398    float32_unpack_canonical(&p, a, s);
3399    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3400}
3401
3402uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3403                                  float_status *s)
3404{
3405    FloatParts64 p;
3406
3407    float32_unpack_canonical(&p, a, s);
3408    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3409}
3410
3411uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3412                                  float_status *s)
3413{
3414    FloatParts64 p;
3415
3416    float64_unpack_canonical(&p, a, s);
3417    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3418}
3419
3420uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3421                                  float_status *s)
3422{
3423    FloatParts64 p;
3424
3425    float64_unpack_canonical(&p, a, s);
3426    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3427}
3428
3429uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3430                                  float_status *s)
3431{
3432    FloatParts64 p;
3433
3434    float64_unpack_canonical(&p, a, s);
3435    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3436}
3437
3438uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3439                                   int scale, float_status *s)
3440{
3441    FloatParts64 p;
3442
3443    bfloat16_unpack_canonical(&p, a, s);
3444    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3445}
3446
3447uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3448                                   int scale, float_status *s)
3449{
3450    FloatParts64 p;
3451
3452    bfloat16_unpack_canonical(&p, a, s);
3453    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3454}
3455
3456uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3457                                   int scale, float_status *s)
3458{
3459    FloatParts64 p;
3460
3461    bfloat16_unpack_canonical(&p, a, s);
3462    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3463}
3464
3465static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3466                                          int scale, float_status *s)
3467{
3468    FloatParts128 p;
3469
3470    float128_unpack_canonical(&p, a, s);
3471    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3472}
3473
3474static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3475                                          int scale, float_status *s)
3476{
3477    FloatParts128 p;
3478
3479    float128_unpack_canonical(&p, a, s);
3480    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3481}
3482
3483uint8_t float16_to_uint8(float16 a, float_status *s)
3484{
3485    return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3486}
3487
3488uint16_t float16_to_uint16(float16 a, float_status *s)
3489{
3490    return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3491}
3492
3493uint32_t float16_to_uint32(float16 a, float_status *s)
3494{
3495    return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3496}
3497
3498uint64_t float16_to_uint64(float16 a, float_status *s)
3499{
3500    return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3501}
3502
3503uint16_t float32_to_uint16(float32 a, float_status *s)
3504{
3505    return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3506}
3507
3508uint32_t float32_to_uint32(float32 a, float_status *s)
3509{
3510    return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3511}
3512
3513uint64_t float32_to_uint64(float32 a, float_status *s)
3514{
3515    return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3516}
3517
3518uint16_t float64_to_uint16(float64 a, float_status *s)
3519{
3520    return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3521}
3522
3523uint32_t float64_to_uint32(float64 a, float_status *s)
3524{
3525    return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3526}
3527
3528uint64_t float64_to_uint64(float64 a, float_status *s)
3529{
3530    return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3531}
3532
3533uint32_t float128_to_uint32(float128 a, float_status *s)
3534{
3535    return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3536}
3537
3538uint64_t float128_to_uint64(float128 a, float_status *s)
3539{
3540    return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3541}
3542
3543uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3544{
3545    return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3546}
3547
3548uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3549{
3550    return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3551}
3552
3553uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3554{
3555    return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3556}
3557
3558uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3559{
3560    return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3561}
3562
3563uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3564{
3565    return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3566}
3567
3568uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3569{
3570    return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3571}
3572
3573uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3574{
3575    return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3576}
3577
3578uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3579{
3580    return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3581}
3582
3583uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3584{
3585    return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3586}
3587
3588uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3589{
3590    return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3591}
3592
3593uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3594{
3595    return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3596}
3597
3598uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3599{
3600    return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3601}
3602
3603uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3604{
3605    return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3606}
3607
3608uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3609{
3610    return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3611}
3612
3613uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3614{
3615    return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3616}
3617
3618uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3619{
3620    return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3621}
3622
3623uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3624{
3625    return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3626}
3627
3628/*
3629 * Signed integer to floating-point conversions
3630 */
3631
3632float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3633{
3634    FloatParts64 p;
3635
3636    parts_sint_to_float(&p, a, scale, status);
3637    return float16_round_pack_canonical(&p, status);
3638}
3639
3640float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3641{
3642    return int64_to_float16_scalbn(a, scale, status);
3643}
3644
3645float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3646{
3647    return int64_to_float16_scalbn(a, scale, status);
3648}
3649
3650float16 int64_to_float16(int64_t a, float_status *status)
3651{
3652    return int64_to_float16_scalbn(a, 0, status);
3653}
3654
3655float16 int32_to_float16(int32_t a, float_status *status)
3656{
3657    return int64_to_float16_scalbn(a, 0, status);
3658}
3659
3660float16 int16_to_float16(int16_t a, float_status *status)
3661{
3662    return int64_to_float16_scalbn(a, 0, status);
3663}
3664
3665float16 int8_to_float16(int8_t a, float_status *status)
3666{
3667    return int64_to_float16_scalbn(a, 0, status);
3668}
3669
3670float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3671{
3672    FloatParts64 p;
3673
3674    /* Without scaling, there are no overflow concerns. */
3675    if (likely(scale == 0) && can_use_fpu(status)) {
3676        union_float32 ur;
3677        ur.h = a;
3678        return ur.s;
3679    }
3680
3681    parts64_sint_to_float(&p, a, scale, status);
3682    return float32_round_pack_canonical(&p, status);
3683}
3684
3685float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3686{
3687    return int64_to_float32_scalbn(a, scale, status);
3688}
3689
3690float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3691{
3692    return int64_to_float32_scalbn(a, scale, status);
3693}
3694
3695float32 int64_to_float32(int64_t a, float_status *status)
3696{
3697    return int64_to_float32_scalbn(a, 0, status);
3698}
3699
3700float32 int32_to_float32(int32_t a, float_status *status)
3701{
3702    return int64_to_float32_scalbn(a, 0, status);
3703}
3704
3705float32 int16_to_float32(int16_t a, float_status *status)
3706{
3707    return int64_to_float32_scalbn(a, 0, status);
3708}
3709
3710float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3711{
3712    FloatParts64 p;
3713
3714    /* Without scaling, there are no overflow concerns. */
3715    if (likely(scale == 0) && can_use_fpu(status)) {
3716        union_float64 ur;
3717        ur.h = a;
3718        return ur.s;
3719    }
3720
3721    parts_sint_to_float(&p, a, scale, status);
3722    return float64_round_pack_canonical(&p, status);
3723}
3724
3725float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3726{
3727    return int64_to_float64_scalbn(a, scale, status);
3728}
3729
3730float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3731{
3732    return int64_to_float64_scalbn(a, scale, status);
3733}
3734
3735float64 int64_to_float64(int64_t a, float_status *status)
3736{
3737    return int64_to_float64_scalbn(a, 0, status);
3738}
3739
3740float64 int32_to_float64(int32_t a, float_status *status)
3741{
3742    return int64_to_float64_scalbn(a, 0, status);
3743}
3744
3745float64 int16_to_float64(int16_t a, float_status *status)
3746{
3747    return int64_to_float64_scalbn(a, 0, status);
3748}
3749
3750bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3751{
3752    FloatParts64 p;
3753
3754    parts_sint_to_float(&p, a, scale, status);
3755    return bfloat16_round_pack_canonical(&p, status);
3756}
3757
3758bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3759{
3760    return int64_to_bfloat16_scalbn(a, scale, status);
3761}
3762
3763bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3764{
3765    return int64_to_bfloat16_scalbn(a, scale, status);
3766}
3767
3768bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3769{
3770    return int64_to_bfloat16_scalbn(a, 0, status);
3771}
3772
3773bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3774{
3775    return int64_to_bfloat16_scalbn(a, 0, status);
3776}
3777
3778bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3779{
3780    return int64_to_bfloat16_scalbn(a, 0, status);
3781}
3782
3783float128 int64_to_float128(int64_t a, float_status *status)
3784{
3785    FloatParts128 p;
3786
3787    parts_sint_to_float(&p, a, 0, status);
3788    return float128_round_pack_canonical(&p, status);
3789}
3790
3791float128 int32_to_float128(int32_t a, float_status *status)
3792{
3793    return int64_to_float128(a, status);
3794}
3795
3796floatx80 int64_to_floatx80(int64_t a, float_status *status)
3797{
3798    FloatParts128 p;
3799
3800    parts_sint_to_float(&p, a, 0, status);
3801    return floatx80_round_pack_canonical(&p, status);
3802}
3803
3804floatx80 int32_to_floatx80(int32_t a, float_status *status)
3805{
3806    return int64_to_floatx80(a, status);
3807}
3808
3809/*
3810 * Unsigned Integer to floating-point conversions
3811 */
3812
3813float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3814{
3815    FloatParts64 p;
3816
3817    parts_uint_to_float(&p, a, scale, status);
3818    return float16_round_pack_canonical(&p, status);
3819}
3820
3821float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3822{
3823    return uint64_to_float16_scalbn(a, scale, status);
3824}
3825
3826float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3827{
3828    return uint64_to_float16_scalbn(a, scale, status);
3829}
3830
3831float16 uint64_to_float16(uint64_t a, float_status *status)
3832{
3833    return uint64_to_float16_scalbn(a, 0, status);
3834}
3835
3836float16 uint32_to_float16(uint32_t a, float_status *status)
3837{
3838    return uint64_to_float16_scalbn(a, 0, status);
3839}
3840
3841float16 uint16_to_float16(uint16_t a, float_status *status)
3842{
3843    return uint64_to_float16_scalbn(a, 0, status);
3844}
3845
3846float16 uint8_to_float16(uint8_t a, float_status *status)
3847{
3848    return uint64_to_float16_scalbn(a, 0, status);
3849}
3850
3851float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3852{
3853    FloatParts64 p;
3854
3855    /* Without scaling, there are no overflow concerns. */
3856    if (likely(scale == 0) && can_use_fpu(status)) {
3857        union_float32 ur;
3858        ur.h = a;
3859        return ur.s;
3860    }
3861
3862    parts_uint_to_float(&p, a, scale, status);
3863    return float32_round_pack_canonical(&p, status);
3864}
3865
3866float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3867{
3868    return uint64_to_float32_scalbn(a, scale, status);
3869}
3870
3871float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3872{
3873    return uint64_to_float32_scalbn(a, scale, status);
3874}
3875
3876float32 uint64_to_float32(uint64_t a, float_status *status)
3877{
3878    return uint64_to_float32_scalbn(a, 0, status);
3879}
3880
3881float32 uint32_to_float32(uint32_t a, float_status *status)
3882{
3883    return uint64_to_float32_scalbn(a, 0, status);
3884}
3885
3886float32 uint16_to_float32(uint16_t a, float_status *status)
3887{
3888    return uint64_to_float32_scalbn(a, 0, status);
3889}
3890
3891float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3892{
3893    FloatParts64 p;
3894
3895    /* Without scaling, there are no overflow concerns. */
3896    if (likely(scale == 0) && can_use_fpu(status)) {
3897        union_float64 ur;
3898        ur.h = a;
3899        return ur.s;
3900    }
3901
3902    parts_uint_to_float(&p, a, scale, status);
3903    return float64_round_pack_canonical(&p, status);
3904}
3905
3906float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3907{
3908    return uint64_to_float64_scalbn(a, scale, status);
3909}
3910
3911float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3912{
3913    return uint64_to_float64_scalbn(a, scale, status);
3914}
3915
3916float64 uint64_to_float64(uint64_t a, float_status *status)
3917{
3918    return uint64_to_float64_scalbn(a, 0, status);
3919}
3920
3921float64 uint32_to_float64(uint32_t a, float_status *status)
3922{
3923    return uint64_to_float64_scalbn(a, 0, status);
3924}
3925
3926float64 uint16_to_float64(uint16_t a, float_status *status)
3927{
3928    return uint64_to_float64_scalbn(a, 0, status);
3929}
3930
3931bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3932{
3933    FloatParts64 p;
3934
3935    parts_uint_to_float(&p, a, scale, status);
3936    return bfloat16_round_pack_canonical(&p, status);
3937}
3938
3939bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3940{
3941    return uint64_to_bfloat16_scalbn(a, scale, status);
3942}
3943
3944bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3945{
3946    return uint64_to_bfloat16_scalbn(a, scale, status);
3947}
3948
3949bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3950{
3951    return uint64_to_bfloat16_scalbn(a, 0, status);
3952}
3953
3954bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3955{
3956    return uint64_to_bfloat16_scalbn(a, 0, status);
3957}
3958
3959bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3960{
3961    return uint64_to_bfloat16_scalbn(a, 0, status);
3962}
3963
3964float128 uint64_to_float128(uint64_t a, float_status *status)
3965{
3966    FloatParts128 p;
3967
3968    parts_uint_to_float(&p, a, 0, status);
3969    return float128_round_pack_canonical(&p, status);
3970}
3971
3972/*
3973 * Minimum and maximum
3974 */
3975
3976static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3977{
3978    FloatParts64 pa, pb, *pr;
3979
3980    float16_unpack_canonical(&pa, a, s);
3981    float16_unpack_canonical(&pb, b, s);
3982    pr = parts_minmax(&pa, &pb, s, flags);
3983
3984    return float16_round_pack_canonical(pr, s);
3985}
3986
3987static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3988                                float_status *s, int flags)
3989{
3990    FloatParts64 pa, pb, *pr;
3991
3992    bfloat16_unpack_canonical(&pa, a, s);
3993    bfloat16_unpack_canonical(&pb, b, s);
3994    pr = parts_minmax(&pa, &pb, s, flags);
3995
3996    return bfloat16_round_pack_canonical(pr, s);
3997}
3998
3999static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4000{
4001    FloatParts64 pa, pb, *pr;
4002
4003    float32_unpack_canonical(&pa, a, s);
4004    float32_unpack_canonical(&pb, b, s);
4005    pr = parts_minmax(&pa, &pb, s, flags);
4006
4007    return float32_round_pack_canonical(pr, s);
4008}
4009
4010static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4011{
4012    FloatParts64 pa, pb, *pr;
4013
4014    float64_unpack_canonical(&pa, a, s);
4015    float64_unpack_canonical(&pb, b, s);
4016    pr = parts_minmax(&pa, &pb, s, flags);
4017
4018    return float64_round_pack_canonical(pr, s);
4019}
4020
4021static float128 float128_minmax(float128 a, float128 b,
4022                                float_status *s, int flags)
4023{
4024    FloatParts128 pa, pb, *pr;
4025
4026    float128_unpack_canonical(&pa, a, s);
4027    float128_unpack_canonical(&pb, b, s);
4028    pr = parts_minmax(&pa, &pb, s, flags);
4029
4030    return float128_round_pack_canonical(pr, s);
4031}
4032
4033#define MINMAX_1(type, name, flags) \
4034    type type##_##name(type a, type b, float_status *s) \
4035    { return type##_minmax(a, b, s, flags); }
4036
4037#define MINMAX_2(type) \
4038    MINMAX_1(type, max, 0)                                                \
4039    MINMAX_1(type, maxnum, minmax_isnum)                                  \
4040    MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4041    MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4042    MINMAX_1(type, min, minmax_ismin)                                     \
4043    MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4044    MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4045    MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4046
4047MINMAX_2(float16)
4048MINMAX_2(bfloat16)
4049MINMAX_2(float32)
4050MINMAX_2(float64)
4051MINMAX_2(float128)
4052
4053#undef MINMAX_1
4054#undef MINMAX_2
4055
4056/*
4057 * Floating point compare
4058 */
4059
4060static FloatRelation QEMU_FLATTEN
4061float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4062{
4063    FloatParts64 pa, pb;
4064
4065    float16_unpack_canonical(&pa, a, s);
4066    float16_unpack_canonical(&pb, b, s);
4067    return parts_compare(&pa, &pb, s, is_quiet);
4068}
4069
4070FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4071{
4072    return float16_do_compare(a, b, s, false);
4073}
4074
4075FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4076{
4077    return float16_do_compare(a, b, s, true);
4078}
4079
4080static FloatRelation QEMU_SOFTFLOAT_ATTR
4081float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4082{
4083    FloatParts64 pa, pb;
4084
4085    float32_unpack_canonical(&pa, a, s);
4086    float32_unpack_canonical(&pb, b, s);
4087    return parts_compare(&pa, &pb, s, is_quiet);
4088}
4089
4090static FloatRelation QEMU_FLATTEN
4091float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4092{
4093    union_float32 ua, ub;
4094
4095    ua.s = xa;
4096    ub.s = xb;
4097
4098    if (QEMU_NO_HARDFLOAT) {
4099        goto soft;
4100    }
4101
4102    float32_input_flush2(&ua.s, &ub.s, s);
4103    if (isgreaterequal(ua.h, ub.h)) {
4104        if (isgreater(ua.h, ub.h)) {
4105            return float_relation_greater;
4106        }
4107        return float_relation_equal;
4108    }
4109    if (likely(isless(ua.h, ub.h))) {
4110        return float_relation_less;
4111    }
4112    /*
4113     * The only condition remaining is unordered.
4114     * Fall through to set flags.
4115     */
4116 soft:
4117    return float32_do_compare(ua.s, ub.s, s, is_quiet);
4118}
4119
4120FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4121{
4122    return float32_hs_compare(a, b, s, false);
4123}
4124
4125FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4126{
4127    return float32_hs_compare(a, b, s, true);
4128}
4129
4130static FloatRelation QEMU_SOFTFLOAT_ATTR
4131float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4132{
4133    FloatParts64 pa, pb;
4134
4135    float64_unpack_canonical(&pa, a, s);
4136    float64_unpack_canonical(&pb, b, s);
4137    return parts_compare(&pa, &pb, s, is_quiet);
4138}
4139
4140static FloatRelation QEMU_FLATTEN
4141float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4142{
4143    union_float64 ua, ub;
4144
4145    ua.s = xa;
4146    ub.s = xb;
4147
4148    if (QEMU_NO_HARDFLOAT) {
4149        goto soft;
4150    }
4151
4152    float64_input_flush2(&ua.s, &ub.s, s);
4153    if (isgreaterequal(ua.h, ub.h)) {
4154        if (isgreater(ua.h, ub.h)) {
4155            return float_relation_greater;
4156        }
4157        return float_relation_equal;
4158    }
4159    if (likely(isless(ua.h, ub.h))) {
4160        return float_relation_less;
4161    }
4162    /*
4163     * The only condition remaining is unordered.
4164     * Fall through to set flags.
4165     */
4166 soft:
4167    return float64_do_compare(ua.s, ub.s, s, is_quiet);
4168}
4169
4170FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4171{
4172    return float64_hs_compare(a, b, s, false);
4173}
4174
4175FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4176{
4177    return float64_hs_compare(a, b, s, true);
4178}
4179
4180static FloatRelation QEMU_FLATTEN
4181bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4182{
4183    FloatParts64 pa, pb;
4184
4185    bfloat16_unpack_canonical(&pa, a, s);
4186    bfloat16_unpack_canonical(&pb, b, s);
4187    return parts_compare(&pa, &pb, s, is_quiet);
4188}
4189
4190FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4191{
4192    return bfloat16_do_compare(a, b, s, false);
4193}
4194
4195FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4196{
4197    return bfloat16_do_compare(a, b, s, true);
4198}
4199
4200static FloatRelation QEMU_FLATTEN
4201float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4202{
4203    FloatParts128 pa, pb;
4204
4205    float128_unpack_canonical(&pa, a, s);
4206    float128_unpack_canonical(&pb, b, s);
4207    return parts_compare(&pa, &pb, s, is_quiet);
4208}
4209
4210FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4211{
4212    return float128_do_compare(a, b, s, false);
4213}
4214
4215FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4216{
4217    return float128_do_compare(a, b, s, true);
4218}
4219
4220static FloatRelation QEMU_FLATTEN
4221floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4222{
4223    FloatParts128 pa, pb;
4224
4225    if (!floatx80_unpack_canonical(&pa, a, s) ||
4226        !floatx80_unpack_canonical(&pb, b, s)) {
4227        return float_relation_unordered;
4228    }
4229    return parts_compare(&pa, &pb, s, is_quiet);
4230}
4231
4232FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4233{
4234    return floatx80_do_compare(a, b, s, false);
4235}
4236
4237FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4238{
4239    return floatx80_do_compare(a, b, s, true);
4240}
4241
4242/*
4243 * Scale by 2**N
4244 */
4245
4246float16 float16_scalbn(float16 a, int n, float_status *status)
4247{
4248    FloatParts64 p;
4249
4250    float16_unpack_canonical(&p, a, status);
4251    parts_scalbn(&p, n, status);
4252    return float16_round_pack_canonical(&p, status);
4253}
4254
4255float32 float32_scalbn(float32 a, int n, float_status *status)
4256{
4257    FloatParts64 p;
4258
4259    float32_unpack_canonical(&p, a, status);
4260    parts_scalbn(&p, n, status);
4261    return float32_round_pack_canonical(&p, status);
4262}
4263
4264float64 float64_scalbn(float64 a, int n, float_status *status)
4265{
4266    FloatParts64 p;
4267
4268    float64_unpack_canonical(&p, a, status);
4269    parts_scalbn(&p, n, status);
4270    return float64_round_pack_canonical(&p, status);
4271}
4272
4273bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4274{
4275    FloatParts64 p;
4276
4277    bfloat16_unpack_canonical(&p, a, status);
4278    parts_scalbn(&p, n, status);
4279    return bfloat16_round_pack_canonical(&p, status);
4280}
4281
4282float128 float128_scalbn(float128 a, int n, float_status *status)
4283{
4284    FloatParts128 p;
4285
4286    float128_unpack_canonical(&p, a, status);
4287    parts_scalbn(&p, n, status);
4288    return float128_round_pack_canonical(&p, status);
4289}
4290
4291floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4292{
4293    FloatParts128 p;
4294
4295    if (!floatx80_unpack_canonical(&p, a, status)) {
4296        return floatx80_default_nan(status);
4297    }
4298    parts_scalbn(&p, n, status);
4299    return floatx80_round_pack_canonical(&p, status);
4300}
4301
4302/*
4303 * Square Root
4304 */
4305
4306float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4307{
4308    FloatParts64 p;
4309
4310    float16_unpack_canonical(&p, a, status);
4311    parts_sqrt(&p, status, &float16_params);
4312    return float16_round_pack_canonical(&p, status);
4313}
4314
4315static float32 QEMU_SOFTFLOAT_ATTR
4316soft_f32_sqrt(float32 a, float_status *status)
4317{
4318    FloatParts64 p;
4319
4320    float32_unpack_canonical(&p, a, status);
4321    parts_sqrt(&p, status, &float32_params);
4322    return float32_round_pack_canonical(&p, status);
4323}
4324
4325static float64 QEMU_SOFTFLOAT_ATTR
4326soft_f64_sqrt(float64 a, float_status *status)
4327{
4328    FloatParts64 p;
4329
4330    float64_unpack_canonical(&p, a, status);
4331    parts_sqrt(&p, status, &float64_params);
4332    return float64_round_pack_canonical(&p, status);
4333}
4334
4335float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4336{
4337    union_float32 ua, ur;
4338
4339    ua.s = xa;
4340    if (unlikely(!can_use_fpu(s))) {
4341        goto soft;
4342    }
4343
4344    float32_input_flush1(&ua.s, s);
4345    if (QEMU_HARDFLOAT_1F32_USE_FP) {
4346        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4347                       fpclassify(ua.h) == FP_ZERO) ||
4348                     signbit(ua.h))) {
4349            goto soft;
4350        }
4351    } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4352                        float32_is_neg(ua.s))) {
4353        goto soft;
4354    }
4355    ur.h = sqrtf(ua.h);
4356    return ur.s;
4357
4358 soft:
4359    return soft_f32_sqrt(ua.s, s);
4360}
4361
4362float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4363{
4364    union_float64 ua, ur;
4365
4366    ua.s = xa;
4367    if (unlikely(!can_use_fpu(s))) {
4368        goto soft;
4369    }
4370
4371    float64_input_flush1(&ua.s, s);
4372    if (QEMU_HARDFLOAT_1F64_USE_FP) {
4373        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4374                       fpclassify(ua.h) == FP_ZERO) ||
4375                     signbit(ua.h))) {
4376            goto soft;
4377        }
4378    } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4379                        float64_is_neg(ua.s))) {
4380        goto soft;
4381    }
4382    ur.h = sqrt(ua.h);
4383    return ur.s;
4384
4385 soft:
4386    return soft_f64_sqrt(ua.s, s);
4387}
4388
4389float64 float64r32_sqrt(float64 a, float_status *status)
4390{
4391    FloatParts64 p;
4392
4393    float64_unpack_canonical(&p, a, status);
4394    parts_sqrt(&p, status, &float64_params);
4395    return float64r32_round_pack_canonical(&p, status);
4396}
4397
4398bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4399{
4400    FloatParts64 p;
4401
4402    bfloat16_unpack_canonical(&p, a, status);
4403    parts_sqrt(&p, status, &bfloat16_params);
4404    return bfloat16_round_pack_canonical(&p, status);
4405}
4406
4407float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4408{
4409    FloatParts128 p;
4410
4411    float128_unpack_canonical(&p, a, status);
4412    parts_sqrt(&p, status, &float128_params);
4413    return float128_round_pack_canonical(&p, status);
4414}
4415
4416floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4417{
4418    FloatParts128 p;
4419
4420    if (!floatx80_unpack_canonical(&p, a, s)) {
4421        return floatx80_default_nan(s);
4422    }
4423    parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4424    return floatx80_round_pack_canonical(&p, s);
4425}
4426
4427/*
4428 * log2
4429 */
4430float32 float32_log2(float32 a, float_status *status)
4431{
4432    FloatParts64 p;
4433
4434    float32_unpack_canonical(&p, a, status);
4435    parts_log2(&p, status, &float32_params);
4436    return float32_round_pack_canonical(&p, status);
4437}
4438
4439float64 float64_log2(float64 a, float_status *status)
4440{
4441    FloatParts64 p;
4442
4443    float64_unpack_canonical(&p, a, status);
4444    parts_log2(&p, status, &float64_params);
4445    return float64_round_pack_canonical(&p, status);
4446}
4447
4448/*----------------------------------------------------------------------------
4449| The pattern for a default generated NaN.
4450*----------------------------------------------------------------------------*/
4451
4452float16 float16_default_nan(float_status *status)
4453{
4454    FloatParts64 p;
4455
4456    parts_default_nan(&p, status);
4457    p.frac >>= float16_params.frac_shift;
4458    return float16_pack_raw(&p);
4459}
4460
4461float32 float32_default_nan(float_status *status)
4462{
4463    FloatParts64 p;
4464
4465    parts_default_nan(&p, status);
4466    p.frac >>= float32_params.frac_shift;
4467    return float32_pack_raw(&p);
4468}
4469
4470float64 float64_default_nan(float_status *status)
4471{
4472    FloatParts64 p;
4473
4474    parts_default_nan(&p, status);
4475    p.frac >>= float64_params.frac_shift;
4476    return float64_pack_raw(&p);
4477}
4478
4479float128 float128_default_nan(float_status *status)
4480{
4481    FloatParts128 p;
4482
4483    parts_default_nan(&p, status);
4484    frac_shr(&p, float128_params.frac_shift);
4485    return float128_pack_raw(&p);
4486}
4487
4488bfloat16 bfloat16_default_nan(float_status *status)
4489{
4490    FloatParts64 p;
4491
4492    parts_default_nan(&p, status);
4493    p.frac >>= bfloat16_params.frac_shift;
4494    return bfloat16_pack_raw(&p);
4495}
4496
4497/*----------------------------------------------------------------------------
4498| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4499*----------------------------------------------------------------------------*/
4500
4501float16 float16_silence_nan(float16 a, float_status *status)
4502{
4503    FloatParts64 p;
4504
4505    float16_unpack_raw(&p, a);
4506    p.frac <<= float16_params.frac_shift;
4507    parts_silence_nan(&p, status);
4508    p.frac >>= float16_params.frac_shift;
4509    return float16_pack_raw(&p);
4510}
4511
4512float32 float32_silence_nan(float32 a, float_status *status)
4513{
4514    FloatParts64 p;
4515
4516    float32_unpack_raw(&p, a);
4517    p.frac <<= float32_params.frac_shift;
4518    parts_silence_nan(&p, status);
4519    p.frac >>= float32_params.frac_shift;
4520    return float32_pack_raw(&p);
4521}
4522
4523float64 float64_silence_nan(float64 a, float_status *status)
4524{
4525    FloatParts64 p;
4526
4527    float64_unpack_raw(&p, a);
4528    p.frac <<= float64_params.frac_shift;
4529    parts_silence_nan(&p, status);
4530    p.frac >>= float64_params.frac_shift;
4531    return float64_pack_raw(&p);
4532}
4533
4534bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4535{
4536    FloatParts64 p;
4537
4538    bfloat16_unpack_raw(&p, a);
4539    p.frac <<= bfloat16_params.frac_shift;
4540    parts_silence_nan(&p, status);
4541    p.frac >>= bfloat16_params.frac_shift;
4542    return bfloat16_pack_raw(&p);
4543}
4544
4545float128 float128_silence_nan(float128 a, float_status *status)
4546{
4547    FloatParts128 p;
4548
4549    float128_unpack_raw(&p, a);
4550    frac_shl(&p, float128_params.frac_shift);
4551    parts_silence_nan(&p, status);
4552    frac_shr(&p, float128_params.frac_shift);
4553    return float128_pack_raw(&p);
4554}
4555
4556/*----------------------------------------------------------------------------
4557| If `a' is denormal and we are in flush-to-zero mode then set the
4558| input-denormal exception and return zero. Otherwise just return the value.
4559*----------------------------------------------------------------------------*/
4560
4561static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4562{
4563    if (p.exp == 0 && p.frac != 0) {
4564        float_raise(float_flag_input_denormal, status);
4565        return true;
4566    }
4567
4568    return false;
4569}
4570
4571float16 float16_squash_input_denormal(float16 a, float_status *status)
4572{
4573    if (status->flush_inputs_to_zero) {
4574        FloatParts64 p;
4575
4576        float16_unpack_raw(&p, a);
4577        if (parts_squash_denormal(p, status)) {
4578            return float16_set_sign(float16_zero, p.sign);
4579        }
4580    }
4581    return a;
4582}
4583
4584float32 float32_squash_input_denormal(float32 a, float_status *status)
4585{
4586    if (status->flush_inputs_to_zero) {
4587        FloatParts64 p;
4588
4589        float32_unpack_raw(&p, a);
4590        if (parts_squash_denormal(p, status)) {
4591            return float32_set_sign(float32_zero, p.sign);
4592        }
4593    }
4594    return a;
4595}
4596
4597float64 float64_squash_input_denormal(float64 a, float_status *status)
4598{
4599    if (status->flush_inputs_to_zero) {
4600        FloatParts64 p;
4601
4602        float64_unpack_raw(&p, a);
4603        if (parts_squash_denormal(p, status)) {
4604            return float64_set_sign(float64_zero, p.sign);
4605        }
4606    }
4607    return a;
4608}
4609
4610bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4611{
4612    if (status->flush_inputs_to_zero) {
4613        FloatParts64 p;
4614
4615        bfloat16_unpack_raw(&p, a);
4616        if (parts_squash_denormal(p, status)) {
4617            return bfloat16_set_sign(bfloat16_zero, p.sign);
4618        }
4619    }
4620    return a;
4621}
4622
4623/*----------------------------------------------------------------------------
4624| Normalizes the subnormal extended double-precision floating-point value
4625| represented by the denormalized significand `aSig'.  The normalized exponent
4626| and significand are stored at the locations pointed to by `zExpPtr' and
4627| `zSigPtr', respectively.
4628*----------------------------------------------------------------------------*/
4629
4630void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4631                                uint64_t *zSigPtr)
4632{
4633    int8_t shiftCount;
4634
4635    shiftCount = clz64(aSig);
4636    *zSigPtr = aSig<<shiftCount;
4637    *zExpPtr = 1 - shiftCount;
4638}
4639
4640/*----------------------------------------------------------------------------
4641| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4642| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4643| and returns the proper extended double-precision floating-point value
4644| corresponding to the abstract input.  Ordinarily, the abstract value is
4645| rounded and packed into the extended double-precision format, with the
4646| inexact exception raised if the abstract input cannot be represented
4647| exactly.  However, if the abstract value is too large, the overflow and
4648| inexact exceptions are raised and an infinity or maximal finite value is
4649| returned.  If the abstract value is too small, the input value is rounded to
4650| a subnormal number, and the underflow and inexact exceptions are raised if
4651| the abstract input cannot be represented exactly as a subnormal extended
4652| double-precision floating-point number.
4653|     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4654| the result is rounded to the same number of bits as single or double
4655| precision, respectively.  Otherwise, the result is rounded to the full
4656| precision of the extended double-precision format.
4657|     The input significand must be normalized or smaller.  If the input
4658| significand is not normalized, `zExp' must be 0; in that case, the result
4659| returned is a subnormal number, and it must not require rounding.  The
4660| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4661| Floating-Point Arithmetic.
4662*----------------------------------------------------------------------------*/
4663
4664floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4665                              int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4666                              float_status *status)
4667{
4668    FloatRoundMode roundingMode;
4669    bool roundNearestEven, increment, isTiny;
4670    int64_t roundIncrement, roundMask, roundBits;
4671
4672    roundingMode = status->float_rounding_mode;
4673    roundNearestEven = ( roundingMode == float_round_nearest_even );
4674    switch (roundingPrecision) {
4675    case floatx80_precision_x:
4676        goto precision80;
4677    case floatx80_precision_d:
4678        roundIncrement = UINT64_C(0x0000000000000400);
4679        roundMask = UINT64_C(0x00000000000007FF);
4680        break;
4681    case floatx80_precision_s:
4682        roundIncrement = UINT64_C(0x0000008000000000);
4683        roundMask = UINT64_C(0x000000FFFFFFFFFF);
4684        break;
4685    default:
4686        g_assert_not_reached();
4687    }
4688    zSig0 |= ( zSig1 != 0 );
4689    switch (roundingMode) {
4690    case float_round_nearest_even:
4691    case float_round_ties_away:
4692        break;
4693    case float_round_to_zero:
4694        roundIncrement = 0;
4695        break;
4696    case float_round_up:
4697        roundIncrement = zSign ? 0 : roundMask;
4698        break;
4699    case float_round_down:
4700        roundIncrement = zSign ? roundMask : 0;
4701        break;
4702    default:
4703        abort();
4704    }
4705    roundBits = zSig0 & roundMask;
4706    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4707        if (    ( 0x7FFE < zExp )
4708             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4709           ) {
4710            goto overflow;
4711        }
4712        if ( zExp <= 0 ) {
4713            if (status->flush_to_zero) {
4714                float_raise(float_flag_output_denormal, status);
4715                return packFloatx80(zSign, 0, 0);
4716            }
4717            isTiny = status->tininess_before_rounding
4718                  || (zExp < 0 )
4719                  || (zSig0 <= zSig0 + roundIncrement);
4720            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4721            zExp = 0;
4722            roundBits = zSig0 & roundMask;
4723            if (isTiny && roundBits) {
4724                float_raise(float_flag_underflow, status);
4725            }
4726            if (roundBits) {
4727                float_raise(float_flag_inexact, status);
4728            }
4729            zSig0 += roundIncrement;
4730            if ( (int64_t) zSig0 < 0 ) zExp = 1;
4731            roundIncrement = roundMask + 1;
4732            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4733                roundMask |= roundIncrement;
4734            }
4735            zSig0 &= ~ roundMask;
4736            return packFloatx80( zSign, zExp, zSig0 );
4737        }
4738    }
4739    if (roundBits) {
4740        float_raise(float_flag_inexact, status);
4741    }
4742    zSig0 += roundIncrement;
4743    if ( zSig0 < roundIncrement ) {
4744        ++zExp;
4745        zSig0 = UINT64_C(0x8000000000000000);
4746    }
4747    roundIncrement = roundMask + 1;
4748    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4749        roundMask |= roundIncrement;
4750    }
4751    zSig0 &= ~ roundMask;
4752    if ( zSig0 == 0 ) zExp = 0;
4753    return packFloatx80( zSign, zExp, zSig0 );
4754 precision80:
4755    switch (roundingMode) {
4756    case float_round_nearest_even:
4757    case float_round_ties_away:
4758        increment = ((int64_t)zSig1 < 0);
4759        break;
4760    case float_round_to_zero:
4761        increment = 0;
4762        break;
4763    case float_round_up:
4764        increment = !zSign && zSig1;
4765        break;
4766    case float_round_down:
4767        increment = zSign && zSig1;
4768        break;
4769    default:
4770        abort();
4771    }
4772    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4773        if (    ( 0x7FFE < zExp )
4774             || (    ( zExp == 0x7FFE )
4775                  && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4776                  && increment
4777                )
4778           ) {
4779            roundMask = 0;
4780 overflow:
4781            float_raise(float_flag_overflow | float_flag_inexact, status);
4782            if (    ( roundingMode == float_round_to_zero )
4783                 || ( zSign && ( roundingMode == float_round_up ) )
4784                 || ( ! zSign && ( roundingMode == float_round_down ) )
4785               ) {
4786                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4787            }
4788            return packFloatx80(zSign,
4789                                floatx80_infinity_high,
4790                                floatx80_infinity_low);
4791        }
4792        if ( zExp <= 0 ) {
4793            isTiny = status->tininess_before_rounding
4794                  || (zExp < 0)
4795                  || !increment
4796                  || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4797            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4798            zExp = 0;
4799            if (isTiny && zSig1) {
4800                float_raise(float_flag_underflow, status);
4801            }
4802            if (zSig1) {
4803                float_raise(float_flag_inexact, status);
4804            }
4805            switch (roundingMode) {
4806            case float_round_nearest_even:
4807            case float_round_ties_away:
4808                increment = ((int64_t)zSig1 < 0);
4809                break;
4810            case float_round_to_zero:
4811                increment = 0;
4812                break;
4813            case float_round_up:
4814                increment = !zSign && zSig1;
4815                break;
4816            case float_round_down:
4817                increment = zSign && zSig1;
4818                break;
4819            default:
4820                abort();
4821            }
4822            if ( increment ) {
4823                ++zSig0;
4824                if (!(zSig1 << 1) && roundNearestEven) {
4825                    zSig0 &= ~1;
4826                }
4827                if ( (int64_t) zSig0 < 0 ) zExp = 1;
4828            }
4829            return packFloatx80( zSign, zExp, zSig0 );
4830        }
4831    }
4832    if (zSig1) {
4833        float_raise(float_flag_inexact, status);
4834    }
4835    if ( increment ) {
4836        ++zSig0;
4837        if ( zSig0 == 0 ) {
4838            ++zExp;
4839            zSig0 = UINT64_C(0x8000000000000000);
4840        }
4841        else {
4842            if (!(zSig1 << 1) && roundNearestEven) {
4843                zSig0 &= ~1;
4844            }
4845        }
4846    }
4847    else {
4848        if ( zSig0 == 0 ) zExp = 0;
4849    }
4850    return packFloatx80( zSign, zExp, zSig0 );
4851
4852}
4853
4854/*----------------------------------------------------------------------------
4855| Takes an abstract floating-point value having sign `zSign', exponent
4856| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4857| and returns the proper extended double-precision floating-point value
4858| corresponding to the abstract input.  This routine is just like
4859| `roundAndPackFloatx80' except that the input significand does not have to be
4860| normalized.
4861*----------------------------------------------------------------------------*/
4862
4863floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4864                                       bool zSign, int32_t zExp,
4865                                       uint64_t zSig0, uint64_t zSig1,
4866                                       float_status *status)
4867{
4868    int8_t shiftCount;
4869
4870    if ( zSig0 == 0 ) {
4871        zSig0 = zSig1;
4872        zSig1 = 0;
4873        zExp -= 64;
4874    }
4875    shiftCount = clz64(zSig0);
4876    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4877    zExp -= shiftCount;
4878    return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4879                                zSig0, zSig1, status);
4880
4881}
4882
4883/*----------------------------------------------------------------------------
4884| Returns the binary exponential of the single-precision floating-point value
4885| `a'. The operation is performed according to the IEC/IEEE Standard for
4886| Binary Floating-Point Arithmetic.
4887|
4888| Uses the following identities:
4889|
4890| 1. -------------------------------------------------------------------------
4891|      x    x*ln(2)
4892|     2  = e
4893|
4894| 2. -------------------------------------------------------------------------
4895|                      2     3     4     5           n
4896|      x        x     x     x     x     x           x
4897|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4898|               1!    2!    3!    4!    5!          n!
4899*----------------------------------------------------------------------------*/
4900
4901static const float64 float32_exp2_coefficients[15] =
4902{
4903    const_float64( 0x3ff0000000000000ll ), /*  1 */
4904    const_float64( 0x3fe0000000000000ll ), /*  2 */
4905    const_float64( 0x3fc5555555555555ll ), /*  3 */
4906    const_float64( 0x3fa5555555555555ll ), /*  4 */
4907    const_float64( 0x3f81111111111111ll ), /*  5 */
4908    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
4909    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
4910    const_float64( 0x3efa01a01a01a01all ), /*  8 */
4911    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
4912    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4913    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4914    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4915    const_float64( 0x3de6124613a86d09ll ), /* 13 */
4916    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4917    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
4918};
4919
4920float32 float32_exp2(float32 a, float_status *status)
4921{
4922    FloatParts64 xp, xnp, tp, rp;
4923    int i;
4924
4925    float32_unpack_canonical(&xp, a, status);
4926    if (unlikely(xp.cls != float_class_normal)) {
4927        switch (xp.cls) {
4928        case float_class_snan:
4929        case float_class_qnan:
4930            parts_return_nan(&xp, status);
4931            return float32_round_pack_canonical(&xp, status);
4932        case float_class_inf:
4933            return xp.sign ? float32_zero : a;
4934        case float_class_zero:
4935            return float32_one;
4936        default:
4937            break;
4938        }
4939        g_assert_not_reached();
4940    }
4941
4942    float_raise(float_flag_inexact, status);
4943
4944    float64_unpack_canonical(&tp, float64_ln2, status);
4945    xp = *parts_mul(&xp, &tp, status);
4946    xnp = xp;
4947
4948    float64_unpack_canonical(&rp, float64_one, status);
4949    for (i = 0 ; i < 15 ; i++) {
4950        float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
4951        rp = *parts_muladd(&tp, &xp, &rp, 0, status);
4952        xnp = *parts_mul(&xnp, &xp, status);
4953    }
4954
4955    return float32_round_pack_canonical(&rp, status);
4956}
4957
4958/*----------------------------------------------------------------------------
4959| Rounds the extended double-precision floating-point value `a'
4960| to the precision provided by floatx80_rounding_precision and returns the
4961| result as an extended double-precision floating-point value.
4962| The operation is performed according to the IEC/IEEE Standard for Binary
4963| Floating-Point Arithmetic.
4964*----------------------------------------------------------------------------*/
4965
4966floatx80 floatx80_round(floatx80 a, float_status *status)
4967{
4968    FloatParts128 p;
4969
4970    if (!floatx80_unpack_canonical(&p, a, status)) {
4971        return floatx80_default_nan(status);
4972    }
4973    return floatx80_round_pack_canonical(&p, status);
4974}
4975
4976static void __attribute__((constructor)) softfloat_init(void)
4977{
4978    union_float64 ua, ub, uc, ur;
4979
4980    if (QEMU_NO_HARDFLOAT) {
4981        return;
4982    }
4983    /*
4984     * Test that the host's FMA is not obviously broken. For example,
4985     * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
4986     *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
4987     */
4988    ua.s = 0x0020000000000001ULL;
4989    ub.s = 0x3ca0000000000000ULL;
4990    uc.s = 0x0020000000000000ULL;
4991    ur.h = fma(ua.h, ub.h, uc.h);
4992    if (ur.s != 0x0020000000000001ULL) {
4993        force_soft_fma = true;
4994    }
4995}
4996