qemu/fpu/softfloat.c
<<
>>
Prefs
   1/*
   2 * QEMU float support
   3 *
   4 * The code in this source file is derived from release 2a of the SoftFloat
   5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6 * some later contributions) are provided under that license, as detailed below.
   7 * It has subsequently been modified by contributors to the QEMU Project,
   8 * so some portions are provided under:
   9 *  the SoftFloat-2a license
  10 *  the BSD license
  11 *  GPL-v2-or-later
  12 *
  13 * Any future contributions to this file after December 1st 2014 will be
  14 * taken to be licensed under the Softfloat-2a license unless specifically
  15 * indicated otherwise.
  16 */
  17
  18/*
  19===============================================================================
  20This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21Arithmetic Package, Release 2a.
  22
  23Written by John R. Hauser.  This work was made possible in part by the
  24International Computer Science Institute, located at Suite 600, 1947 Center
  25Street, Berkeley, California 94704.  Funding was partially provided by the
  26National Science Foundation under grant MIP-9311980.  The original version
  27of this code was written as part of a project to build a fixed-point vector
  28processor in collaboration with the University of California at Berkeley,
  29overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31arithmetic/SoftFloat.html'.
  32
  33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39Derivative works are acceptable, even for commercial purposes, so long as
  40(1) they include prominent notice that the work is derivative, and (2) they
  41include prominent notice akin to these four paragraphs for those parts of
  42this code that are retained.
  43
  44===============================================================================
  45*/
  46
  47/* BSD licensing:
  48 * Copyright (c) 2006, Fabrice Bellard
  49 * All rights reserved.
  50 *
  51 * Redistribution and use in source and binary forms, with or without
  52 * modification, are permitted provided that the following conditions are met:
  53 *
  54 * 1. Redistributions of source code must retain the above copyright notice,
  55 * this list of conditions and the following disclaimer.
  56 *
  57 * 2. Redistributions in binary form must reproduce the above copyright notice,
  58 * this list of conditions and the following disclaimer in the documentation
  59 * and/or other materials provided with the distribution.
  60 *
  61 * 3. Neither the name of the copyright holder nor the names of its contributors
  62 * may be used to endorse or promote products derived from this software without
  63 * specific prior written permission.
  64 *
  65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75 * THE POSSIBILITY OF SUCH DAMAGE.
  76 */
  77
  78/* Portions of this work are licensed under the terms of the GNU GPL,
  79 * version 2 or later. See the COPYING file in the top-level directory.
  80 */
  81
  82/* softfloat (and in particular the code in softfloat-specialize.h) is
  83 * target-dependent and needs the TARGET_* macros.
  84 */
  85#include "qemu/osdep.h"
  86#include <math.h>
  87#include "qemu/bitops.h"
  88#include "fpu/softfloat.h"
  89
  90/* We only need stdlib for abort() */
  91
  92/*----------------------------------------------------------------------------
  93| Primitive arithmetic functions, including multi-word arithmetic, and
  94| division and square root approximations.  (Can be specialized to target if
  95| desired.)
  96*----------------------------------------------------------------------------*/
  97#include "fpu/softfloat-macros.h"
  98
  99/*
 100 * Hardfloat
 101 *
 102 * Fast emulation of guest FP instructions is challenging for two reasons.
 103 * First, FP instruction semantics are similar but not identical, particularly
 104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105 * exception flags is not trivial: reading the host's flags register with a
 106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107 * and trapping on every FP exception is not fast nor pleasant to work with.
 108 *
 109 * We address these challenges by leveraging the host FPU for a subset of the
 110 * operations. To do this we expand on the idea presented in this paper:
 111 *
 112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114 *
 115 * The idea is thus to leverage the host FPU to (1) compute FP operations
 116 * and (2) identify whether FP exceptions occurred while avoiding
 117 * expensive exception flag register accesses.
 118 *
 119 * An important optimization shown in the paper is that given that exception
 120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121 * This is particularly useful for the inexact flag, which is very frequently
 122 * raised in floating-point workloads.
 123 *
 124 * We optimize the code further by deferring to soft-fp whenever FP exception
 125 * detection might get hairy. Two examples: (1) when at least one operand is
 126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127 * and the result is < the minimum normal.
 128 */
 129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130    static inline void name(soft_t *a, float_status *s)                 \
 131    {                                                                   \
 132        if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133            *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                     soft_t ## _is_neg(*a));            \
 135            float_raise(float_flag_input_denormal, s);                  \
 136        }                                                               \
 137    }
 138
 139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141#undef GEN_INPUT_FLUSH__NOCHECK
 142
 143#define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144    static inline void name(soft_t *a, float_status *s) \
 145    {                                                   \
 146        if (likely(!s->flush_inputs_to_zero)) {         \
 147            return;                                     \
 148        }                                               \
 149        soft_t ## _input_flush__nocheck(a, s);          \
 150    }
 151
 152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154#undef GEN_INPUT_FLUSH1
 155
 156#define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157    static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158    {                                                                   \
 159        if (likely(!s->flush_inputs_to_zero)) {                         \
 160            return;                                                     \
 161        }                                                               \
 162        soft_t ## _input_flush__nocheck(a, s);                          \
 163        soft_t ## _input_flush__nocheck(b, s);                          \
 164    }
 165
 166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168#undef GEN_INPUT_FLUSH2
 169
 170#define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171    static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172    {                                                                   \
 173        if (likely(!s->flush_inputs_to_zero)) {                         \
 174            return;                                                     \
 175        }                                                               \
 176        soft_t ## _input_flush__nocheck(a, s);                          \
 177        soft_t ## _input_flush__nocheck(b, s);                          \
 178        soft_t ## _input_flush__nocheck(c, s);                          \
 179    }
 180
 181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183#undef GEN_INPUT_FLUSH3
 184
 185/*
 186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187 * hardfloat functions. Each combination of number of inputs and float size
 188 * gets its own value.
 189 */
 190#if defined(__x86_64__)
 191# define QEMU_HARDFLOAT_1F32_USE_FP 0
 192# define QEMU_HARDFLOAT_1F64_USE_FP 1
 193# define QEMU_HARDFLOAT_2F32_USE_FP 0
 194# define QEMU_HARDFLOAT_2F64_USE_FP 1
 195# define QEMU_HARDFLOAT_3F32_USE_FP 0
 196# define QEMU_HARDFLOAT_3F64_USE_FP 1
 197#else
 198# define QEMU_HARDFLOAT_1F32_USE_FP 0
 199# define QEMU_HARDFLOAT_1F64_USE_FP 0
 200# define QEMU_HARDFLOAT_2F32_USE_FP 0
 201# define QEMU_HARDFLOAT_2F64_USE_FP 0
 202# define QEMU_HARDFLOAT_3F32_USE_FP 0
 203# define QEMU_HARDFLOAT_3F64_USE_FP 0
 204#endif
 205
 206/*
 207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208 * float{32,64}_is_infinity when !USE_FP.
 209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211 */
 212#if defined(__x86_64__) || defined(__aarch64__)
 213# define QEMU_HARDFLOAT_USE_ISINF   1
 214#else
 215# define QEMU_HARDFLOAT_USE_ISINF   0
 216#endif
 217
 218/*
 219 * Some targets clear the FP flags before most FP operations. This prevents
 220 * the use of hardfloat, since hardfloat relies on the inexact flag being
 221 * already set.
 222 */
 223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224# if defined(__FAST_MATH__)
 225#  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226    IEEE implementation
 227# endif
 228# define QEMU_NO_HARDFLOAT 1
 229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230#else
 231# define QEMU_NO_HARDFLOAT 0
 232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233#endif
 234
 235static inline bool can_use_fpu(const float_status *s)
 236{
 237    if (QEMU_NO_HARDFLOAT) {
 238        return false;
 239    }
 240    return likely(s->float_exception_flags & float_flag_inexact &&
 241                  s->float_rounding_mode == float_round_nearest_even);
 242}
 243
 244/*
 245 * Hardfloat generation functions. Each operation can have two flavors:
 246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247 * most condition checks, or native ones (e.g. fpclassify).
 248 *
 249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250 * compiler to propagate constants and inline everything into the callers.
 251 *
 252 * We only generate functions for operations with two inputs, since only
 253 * these are common enough to justify consolidating them into common code.
 254 */
 255
 256typedef union {
 257    float32 s;
 258    float h;
 259} union_float32;
 260
 261typedef union {
 262    float64 s;
 263    double h;
 264} union_float64;
 265
 266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271typedef float   (*hard_f32_op2_fn)(float a, float b);
 272typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274/* 2-input is-zero-or-normal */
 275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276{
 277    if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278        /*
 279         * Not using a temp variable for consecutive fpclassify calls ends up
 280         * generating faster code.
 281         */
 282        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284    }
 285    return float32_is_zero_or_normal(a.s) &&
 286           float32_is_zero_or_normal(b.s);
 287}
 288
 289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290{
 291    if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294    }
 295    return float64_is_zero_or_normal(a.s) &&
 296           float64_is_zero_or_normal(b.s);
 297}
 298
 299/* 3-input is-zero-or-normal */
 300static inline
 301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302{
 303    if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306               (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307    }
 308    return float32_is_zero_or_normal(a.s) &&
 309           float32_is_zero_or_normal(b.s) &&
 310           float32_is_zero_or_normal(c.s);
 311}
 312
 313static inline
 314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315{
 316    if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319               (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320    }
 321    return float64_is_zero_or_normal(a.s) &&
 322           float64_is_zero_or_normal(b.s) &&
 323           float64_is_zero_or_normal(c.s);
 324}
 325
 326static inline bool f32_is_inf(union_float32 a)
 327{
 328    if (QEMU_HARDFLOAT_USE_ISINF) {
 329        return isinf(a.h);
 330    }
 331    return float32_is_infinity(a.s);
 332}
 333
 334static inline bool f64_is_inf(union_float64 a)
 335{
 336    if (QEMU_HARDFLOAT_USE_ISINF) {
 337        return isinf(a.h);
 338    }
 339    return float64_is_infinity(a.s);
 340}
 341
 342static inline float32
 343float32_gen2(float32 xa, float32 xb, float_status *s,
 344             hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345             f32_check_fn pre, f32_check_fn post)
 346{
 347    union_float32 ua, ub, ur;
 348
 349    ua.s = xa;
 350    ub.s = xb;
 351
 352    if (unlikely(!can_use_fpu(s))) {
 353        goto soft;
 354    }
 355
 356    float32_input_flush2(&ua.s, &ub.s, s);
 357    if (unlikely(!pre(ua, ub))) {
 358        goto soft;
 359    }
 360
 361    ur.h = hard(ua.h, ub.h);
 362    if (unlikely(f32_is_inf(ur))) {
 363        float_raise(float_flag_overflow, s);
 364    } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365        goto soft;
 366    }
 367    return ur.s;
 368
 369 soft:
 370    return soft(ua.s, ub.s, s);
 371}
 372
 373static inline float64
 374float64_gen2(float64 xa, float64 xb, float_status *s,
 375             hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376             f64_check_fn pre, f64_check_fn post)
 377{
 378    union_float64 ua, ub, ur;
 379
 380    ua.s = xa;
 381    ub.s = xb;
 382
 383    if (unlikely(!can_use_fpu(s))) {
 384        goto soft;
 385    }
 386
 387    float64_input_flush2(&ua.s, &ub.s, s);
 388    if (unlikely(!pre(ua, ub))) {
 389        goto soft;
 390    }
 391
 392    ur.h = hard(ua.h, ub.h);
 393    if (unlikely(f64_is_inf(ur))) {
 394        float_raise(float_flag_overflow, s);
 395    } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396        goto soft;
 397    }
 398    return ur.s;
 399
 400 soft:
 401    return soft(ua.s, ub.s, s);
 402}
 403
 404/*
 405 * Classify a floating point number. Everything above float_class_qnan
 406 * is a NaN so cls >= float_class_qnan is any NaN.
 407 */
 408
 409typedef enum __attribute__ ((__packed__)) {
 410    float_class_unclassified,
 411    float_class_zero,
 412    float_class_normal,
 413    float_class_inf,
 414    float_class_qnan,  /* all NaNs from here */
 415    float_class_snan,
 416} FloatClass;
 417
 418#define float_cmask(bit)  (1u << (bit))
 419
 420enum {
 421    float_cmask_zero    = float_cmask(float_class_zero),
 422    float_cmask_normal  = float_cmask(float_class_normal),
 423    float_cmask_inf     = float_cmask(float_class_inf),
 424    float_cmask_qnan    = float_cmask(float_class_qnan),
 425    float_cmask_snan    = float_cmask(float_class_snan),
 426
 427    float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 428    float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 429};
 430
 431/* Flags for parts_minmax. */
 432enum {
 433    /* Set for minimum; clear for maximum. */
 434    minmax_ismin = 1,
 435    /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 436    minmax_isnum = 2,
 437    /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 438    minmax_ismag = 4,
 439    /*
 440     * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
 441     * operations.
 442     */
 443    minmax_isnumber = 8,
 444};
 445
 446/* Simple helpers for checking if, or what kind of, NaN we have */
 447static inline __attribute__((unused)) bool is_nan(FloatClass c)
 448{
 449    return unlikely(c >= float_class_qnan);
 450}
 451
 452static inline __attribute__((unused)) bool is_snan(FloatClass c)
 453{
 454    return c == float_class_snan;
 455}
 456
 457static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 458{
 459    return c == float_class_qnan;
 460}
 461
 462/*
 463 * Structure holding all of the decomposed parts of a float.
 464 * The exponent is unbiased and the fraction is normalized.
 465 *
 466 * The fraction words are stored in big-endian word ordering,
 467 * so that truncation from a larger format to a smaller format
 468 * can be done simply by ignoring subsequent elements.
 469 */
 470
 471typedef struct {
 472    FloatClass cls;
 473    bool sign;
 474    int32_t exp;
 475    union {
 476        /* Routines that know the structure may reference the singular name. */
 477        uint64_t frac;
 478        /*
 479         * Routines expanded with multiple structures reference "hi" and "lo"
 480         * depending on the operation.  In FloatParts64, "hi" and "lo" are
 481         * both the same word and aliased here.
 482         */
 483        uint64_t frac_hi;
 484        uint64_t frac_lo;
 485    };
 486} FloatParts64;
 487
 488typedef struct {
 489    FloatClass cls;
 490    bool sign;
 491    int32_t exp;
 492    uint64_t frac_hi;
 493    uint64_t frac_lo;
 494} FloatParts128;
 495
 496typedef struct {
 497    FloatClass cls;
 498    bool sign;
 499    int32_t exp;
 500    uint64_t frac_hi;
 501    uint64_t frac_hm;  /* high-middle */
 502    uint64_t frac_lm;  /* low-middle */
 503    uint64_t frac_lo;
 504} FloatParts256;
 505
 506/* These apply to the most significant word of each FloatPartsN. */
 507#define DECOMPOSED_BINARY_POINT    63
 508#define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 509
 510/* Structure holding all of the relevant parameters for a format.
 511 *   exp_size: the size of the exponent field
 512 *   exp_bias: the offset applied to the exponent field
 513 *   exp_max: the maximum normalised exponent
 514 *   frac_size: the size of the fraction field
 515 *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 516 * The following are computed based the size of fraction
 517 *   round_mask: bits below lsb which must be rounded
 518 * The following optional modifiers are available:
 519 *   arm_althp: handle ARM Alternative Half Precision
 520 */
 521typedef struct {
 522    int exp_size;
 523    int exp_bias;
 524    int exp_max;
 525    int frac_size;
 526    int frac_shift;
 527    bool arm_althp;
 528    uint64_t round_mask;
 529} FloatFmt;
 530
 531/* Expand fields based on the size of exponent and fraction */
 532#define FLOAT_PARAMS_(E)                                \
 533    .exp_size       = E,                                \
 534    .exp_bias       = ((1 << E) - 1) >> 1,              \
 535    .exp_max        = (1 << E) - 1
 536
 537#define FLOAT_PARAMS(E, F)                              \
 538    FLOAT_PARAMS_(E),                                   \
 539    .frac_size      = F,                                \
 540    .frac_shift     = (-F - 1) & 63,                    \
 541    .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 542
 543static const FloatFmt float16_params = {
 544    FLOAT_PARAMS(5, 10)
 545};
 546
 547static const FloatFmt float16_params_ahp = {
 548    FLOAT_PARAMS(5, 10),
 549    .arm_althp = true
 550};
 551
 552static const FloatFmt bfloat16_params = {
 553    FLOAT_PARAMS(8, 7)
 554};
 555
 556static const FloatFmt float32_params = {
 557    FLOAT_PARAMS(8, 23)
 558};
 559
 560static const FloatFmt float64_params = {
 561    FLOAT_PARAMS(11, 52)
 562};
 563
 564static const FloatFmt float128_params = {
 565    FLOAT_PARAMS(15, 112)
 566};
 567
 568#define FLOATX80_PARAMS(R)              \
 569    FLOAT_PARAMS_(15),                  \
 570    .frac_size = R == 64 ? 63 : R,      \
 571    .frac_shift = 0,                    \
 572    .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 573
 574static const FloatFmt floatx80_params[3] = {
 575    [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 576    [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 577    [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 578};
 579
 580/* Unpack a float to parts, but do not canonicalize.  */
 581static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 582{
 583    const int f_size = fmt->frac_size;
 584    const int e_size = fmt->exp_size;
 585
 586    *r = (FloatParts64) {
 587        .cls = float_class_unclassified,
 588        .sign = extract64(raw, f_size + e_size, 1),
 589        .exp = extract64(raw, f_size, e_size),
 590        .frac = extract64(raw, 0, f_size)
 591    };
 592}
 593
 594static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 595{
 596    unpack_raw64(p, &float16_params, f);
 597}
 598
 599static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 600{
 601    unpack_raw64(p, &bfloat16_params, f);
 602}
 603
 604static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 605{
 606    unpack_raw64(p, &float32_params, f);
 607}
 608
 609static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 610{
 611    unpack_raw64(p, &float64_params, f);
 612}
 613
 614static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 615{
 616    *p = (FloatParts128) {
 617        .cls = float_class_unclassified,
 618        .sign = extract32(f.high, 15, 1),
 619        .exp = extract32(f.high, 0, 15),
 620        .frac_hi = f.low
 621    };
 622}
 623
 624static void float128_unpack_raw(FloatParts128 *p, float128 f)
 625{
 626    const int f_size = float128_params.frac_size - 64;
 627    const int e_size = float128_params.exp_size;
 628
 629    *p = (FloatParts128) {
 630        .cls = float_class_unclassified,
 631        .sign = extract64(f.high, f_size + e_size, 1),
 632        .exp = extract64(f.high, f_size, e_size),
 633        .frac_hi = extract64(f.high, 0, f_size),
 634        .frac_lo = f.low,
 635    };
 636}
 637
 638/* Pack a float from parts, but do not canonicalize.  */
 639static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 640{
 641    const int f_size = fmt->frac_size;
 642    const int e_size = fmt->exp_size;
 643    uint64_t ret;
 644
 645    ret = (uint64_t)p->sign << (f_size + e_size);
 646    ret = deposit64(ret, f_size, e_size, p->exp);
 647    ret = deposit64(ret, 0, f_size, p->frac);
 648    return ret;
 649}
 650
 651static inline float16 float16_pack_raw(const FloatParts64 *p)
 652{
 653    return make_float16(pack_raw64(p, &float16_params));
 654}
 655
 656static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 657{
 658    return pack_raw64(p, &bfloat16_params);
 659}
 660
 661static inline float32 float32_pack_raw(const FloatParts64 *p)
 662{
 663    return make_float32(pack_raw64(p, &float32_params));
 664}
 665
 666static inline float64 float64_pack_raw(const FloatParts64 *p)
 667{
 668    return make_float64(pack_raw64(p, &float64_params));
 669}
 670
 671static float128 float128_pack_raw(const FloatParts128 *p)
 672{
 673    const int f_size = float128_params.frac_size - 64;
 674    const int e_size = float128_params.exp_size;
 675    uint64_t hi;
 676
 677    hi = (uint64_t)p->sign << (f_size + e_size);
 678    hi = deposit64(hi, f_size, e_size, p->exp);
 679    hi = deposit64(hi, 0, f_size, p->frac_hi);
 680    return make_float128(hi, p->frac_lo);
 681}
 682
 683/*----------------------------------------------------------------------------
 684| Functions and definitions to determine:  (1) whether tininess for underflow
 685| is detected before or after rounding by default, (2) what (if anything)
 686| happens when exceptions are raised, (3) how signaling NaNs are distinguished
 687| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 688| are propagated from function inputs to output.  These details are target-
 689| specific.
 690*----------------------------------------------------------------------------*/
 691#include "softfloat-specialize.c.inc"
 692
 693#define PARTS_GENERIC_64_128(NAME, P) \
 694    _Generic((P), FloatParts64 *: parts64_##NAME, \
 695                  FloatParts128 *: parts128_##NAME)
 696
 697#define PARTS_GENERIC_64_128_256(NAME, P) \
 698    _Generic((P), FloatParts64 *: parts64_##NAME, \
 699                  FloatParts128 *: parts128_##NAME, \
 700                  FloatParts256 *: parts256_##NAME)
 701
 702#define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 703#define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 704
 705static void parts64_return_nan(FloatParts64 *a, float_status *s);
 706static void parts128_return_nan(FloatParts128 *a, float_status *s);
 707
 708#define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 709
 710static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 711                                      float_status *s);
 712static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 713                                        float_status *s);
 714
 715#define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 716
 717static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 718                                             FloatParts64 *c, float_status *s,
 719                                             int ab_mask, int abc_mask);
 720static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 721                                               FloatParts128 *b,
 722                                               FloatParts128 *c,
 723                                               float_status *s,
 724                                               int ab_mask, int abc_mask);
 725
 726#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 727    PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 728
 729static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 730                                 const FloatFmt *fmt);
 731static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 732                                  const FloatFmt *fmt);
 733
 734#define parts_canonicalize(A, S, F) \
 735    PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 736
 737static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 738                                   const FloatFmt *fmt);
 739static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 740                                    const FloatFmt *fmt);
 741
 742#define parts_uncanon_normal(A, S, F) \
 743    PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 744
 745static void parts64_uncanon(FloatParts64 *p, float_status *status,
 746                            const FloatFmt *fmt);
 747static void parts128_uncanon(FloatParts128 *p, float_status *status,
 748                             const FloatFmt *fmt);
 749
 750#define parts_uncanon(A, S, F) \
 751    PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 752
 753static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 754static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 755static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 756
 757#define parts_add_normal(A, B) \
 758    PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 759
 760static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 761static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 762static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 763
 764#define parts_sub_normal(A, B) \
 765    PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 766
 767static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 768                                    float_status *s, bool subtract);
 769static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 770                                      float_status *s, bool subtract);
 771
 772#define parts_addsub(A, B, S, Z) \
 773    PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 774
 775static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 776                                 float_status *s);
 777static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 778                                   float_status *s);
 779
 780#define parts_mul(A, B, S) \
 781    PARTS_GENERIC_64_128(mul, A)(A, B, S)
 782
 783static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 784                                    FloatParts64 *c, int flags,
 785                                    float_status *s);
 786static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 787                                      FloatParts128 *c, int flags,
 788                                      float_status *s);
 789
 790#define parts_muladd(A, B, C, Z, S) \
 791    PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 792
 793static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 794                                 float_status *s);
 795static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 796                                   float_status *s);
 797
 798#define parts_div(A, B, S) \
 799    PARTS_GENERIC_64_128(div, A)(A, B, S)
 800
 801static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
 802                                    uint64_t *mod_quot, float_status *s);
 803static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
 804                                      uint64_t *mod_quot, float_status *s);
 805
 806#define parts_modrem(A, B, Q, S) \
 807    PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
 808
 809static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 810static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 811
 812#define parts_sqrt(A, S, F) \
 813    PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 814
 815static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 816                                        int scale, int frac_size);
 817static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 818                                         int scale, int frac_size);
 819
 820#define parts_round_to_int_normal(A, R, C, F) \
 821    PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 822
 823static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 824                                 int scale, float_status *s,
 825                                 const FloatFmt *fmt);
 826static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 827                                  int scale, float_status *s,
 828                                  const FloatFmt *fmt);
 829
 830#define parts_round_to_int(A, R, C, S, F) \
 831    PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 832
 833static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 834                                     int scale, int64_t min, int64_t max,
 835                                     float_status *s);
 836static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 837                                     int scale, int64_t min, int64_t max,
 838                                     float_status *s);
 839
 840#define parts_float_to_sint(P, R, Z, MN, MX, S) \
 841    PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 842
 843static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 844                                      int scale, uint64_t max,
 845                                      float_status *s);
 846static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 847                                       int scale, uint64_t max,
 848                                       float_status *s);
 849
 850#define parts_float_to_uint(P, R, Z, M, S) \
 851    PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 852
 853static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 854                                  int scale, float_status *s);
 855static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 856                                   int scale, float_status *s);
 857
 858#define parts_sint_to_float(P, I, Z, S) \
 859    PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 860
 861static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 862                                  int scale, float_status *s);
 863static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 864                                   int scale, float_status *s);
 865
 866#define parts_uint_to_float(P, I, Z, S) \
 867    PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 868
 869static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 870                                    float_status *s, int flags);
 871static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 872                                      float_status *s, int flags);
 873
 874#define parts_minmax(A, B, S, F) \
 875    PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 876
 877static FloatRelation parts64_compare(FloatParts64 *a, FloatParts64 *b,
 878                                     float_status *s, bool q);
 879static FloatRelation parts128_compare(FloatParts128 *a, FloatParts128 *b,
 880                                      float_status *s, bool q);
 881
 882#define parts_compare(A, B, S, Q) \
 883    PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 884
 885static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 886static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 887
 888#define parts_scalbn(A, N, S) \
 889    PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 890
 891static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
 892static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
 893
 894#define parts_log2(A, S, F) \
 895    PARTS_GENERIC_64_128(log2, A)(A, S, F)
 896
 897/*
 898 * Helper functions for softfloat-parts.c.inc, per-size operations.
 899 */
 900
 901#define FRAC_GENERIC_64_128(NAME, P) \
 902    _Generic((P), FloatParts64 *: frac64_##NAME, \
 903                  FloatParts128 *: frac128_##NAME)
 904
 905#define FRAC_GENERIC_64_128_256(NAME, P) \
 906    _Generic((P), FloatParts64 *: frac64_##NAME, \
 907                  FloatParts128 *: frac128_##NAME, \
 908                  FloatParts256 *: frac256_##NAME)
 909
 910static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 911{
 912    return uadd64_overflow(a->frac, b->frac, &r->frac);
 913}
 914
 915static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 916{
 917    bool c = 0;
 918    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 919    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 920    return c;
 921}
 922
 923static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 924{
 925    bool c = 0;
 926    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 927    r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 928    r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 929    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 930    return c;
 931}
 932
 933#define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 934
 935static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 936{
 937    return uadd64_overflow(a->frac, c, &r->frac);
 938}
 939
 940static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 941{
 942    c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 943    return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 944}
 945
 946#define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 947
 948static void frac64_allones(FloatParts64 *a)
 949{
 950    a->frac = -1;
 951}
 952
 953static void frac128_allones(FloatParts128 *a)
 954{
 955    a->frac_hi = a->frac_lo = -1;
 956}
 957
 958#define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 959
 960static FloatRelation frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 961{
 962    return (a->frac == b->frac ? float_relation_equal
 963            : a->frac < b->frac ? float_relation_less
 964            : float_relation_greater);
 965}
 966
 967static FloatRelation frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 968{
 969    uint64_t ta = a->frac_hi, tb = b->frac_hi;
 970    if (ta == tb) {
 971        ta = a->frac_lo, tb = b->frac_lo;
 972        if (ta == tb) {
 973            return float_relation_equal;
 974        }
 975    }
 976    return ta < tb ? float_relation_less : float_relation_greater;
 977}
 978
 979#define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 980
 981static void frac64_clear(FloatParts64 *a)
 982{
 983    a->frac = 0;
 984}
 985
 986static void frac128_clear(FloatParts128 *a)
 987{
 988    a->frac_hi = a->frac_lo = 0;
 989}
 990
 991#define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 992
 993static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 994{
 995    uint64_t n1, n0, r, q;
 996    bool ret;
 997
 998    /*
 999     * We want a 2*N / N-bit division to produce exactly an N-bit
1000     * result, so that we do not lose any precision and so that we
1001     * do not have to renormalize afterward.  If A.frac < B.frac,
1002     * then division would produce an (N-1)-bit result; shift A left
1003     * by one to produce the an N-bit result, and return true to
1004     * decrement the exponent to match.
1005     *
1006     * The udiv_qrnnd algorithm that we're using requires normalization,
1007     * i.e. the msb of the denominator must be set, which is already true.
1008     */
1009    ret = a->frac < b->frac;
1010    if (ret) {
1011        n0 = a->frac;
1012        n1 = 0;
1013    } else {
1014        n0 = a->frac >> 1;
1015        n1 = a->frac << 63;
1016    }
1017    q = udiv_qrnnd(&r, n0, n1, b->frac);
1018
1019    /* Set lsb if there is a remainder, to set inexact. */
1020    a->frac = q | (r != 0);
1021
1022    return ret;
1023}
1024
1025static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1026{
1027    uint64_t q0, q1, a0, a1, b0, b1;
1028    uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1029    bool ret = false;
1030
1031    a0 = a->frac_hi, a1 = a->frac_lo;
1032    b0 = b->frac_hi, b1 = b->frac_lo;
1033
1034    ret = lt128(a0, a1, b0, b1);
1035    if (!ret) {
1036        a1 = shr_double(a0, a1, 1);
1037        a0 = a0 >> 1;
1038    }
1039
1040    /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1041    q0 = estimateDiv128To64(a0, a1, b0);
1042
1043    /*
1044     * Estimate is high because B1 was not included (unless B1 == 0).
1045     * Reduce quotient and increase remainder until remainder is non-negative.
1046     * This loop will execute 0 to 2 times.
1047     */
1048    mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1049    sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1050    while (r0 != 0) {
1051        q0--;
1052        add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1053    }
1054
1055    /* Repeat using the remainder, producing a second word of quotient. */
1056    q1 = estimateDiv128To64(r1, r2, b0);
1057    mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1058    sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1059    while (r1 != 0) {
1060        q1--;
1061        add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1062    }
1063
1064    /* Any remainder indicates inexact; set sticky bit. */
1065    q1 |= (r2 | r3) != 0;
1066
1067    a->frac_hi = q0;
1068    a->frac_lo = q1;
1069    return ret;
1070}
1071
1072#define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1073
1074static bool frac64_eqz(FloatParts64 *a)
1075{
1076    return a->frac == 0;
1077}
1078
1079static bool frac128_eqz(FloatParts128 *a)
1080{
1081    return (a->frac_hi | a->frac_lo) == 0;
1082}
1083
1084#define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1085
1086static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1087{
1088    mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1089}
1090
1091static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1092{
1093    mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1094                &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1095}
1096
1097#define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1098
1099static void frac64_neg(FloatParts64 *a)
1100{
1101    a->frac = -a->frac;
1102}
1103
1104static void frac128_neg(FloatParts128 *a)
1105{
1106    bool c = 0;
1107    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1108    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1109}
1110
1111static void frac256_neg(FloatParts256 *a)
1112{
1113    bool c = 0;
1114    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1115    a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1116    a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1117    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1118}
1119
1120#define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1121
1122static int frac64_normalize(FloatParts64 *a)
1123{
1124    if (a->frac) {
1125        int shift = clz64(a->frac);
1126        a->frac <<= shift;
1127        return shift;
1128    }
1129    return 64;
1130}
1131
1132static int frac128_normalize(FloatParts128 *a)
1133{
1134    if (a->frac_hi) {
1135        int shl = clz64(a->frac_hi);
1136        a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1137        a->frac_lo <<= shl;
1138        return shl;
1139    } else if (a->frac_lo) {
1140        int shl = clz64(a->frac_lo);
1141        a->frac_hi = a->frac_lo << shl;
1142        a->frac_lo = 0;
1143        return shl + 64;
1144    }
1145    return 128;
1146}
1147
1148static int frac256_normalize(FloatParts256 *a)
1149{
1150    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1151    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1152    int ret, shl;
1153
1154    if (likely(a0)) {
1155        shl = clz64(a0);
1156        if (shl == 0) {
1157            return 0;
1158        }
1159        ret = shl;
1160    } else {
1161        if (a1) {
1162            ret = 64;
1163            a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1164        } else if (a2) {
1165            ret = 128;
1166            a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1167        } else if (a3) {
1168            ret = 192;
1169            a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1170        } else {
1171            ret = 256;
1172            a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1173            goto done;
1174        }
1175        shl = clz64(a0);
1176        if (shl == 0) {
1177            goto done;
1178        }
1179        ret += shl;
1180    }
1181
1182    a0 = shl_double(a0, a1, shl);
1183    a1 = shl_double(a1, a2, shl);
1184    a2 = shl_double(a2, a3, shl);
1185    a3 <<= shl;
1186
1187 done:
1188    a->frac_hi = a0;
1189    a->frac_hm = a1;
1190    a->frac_lm = a2;
1191    a->frac_lo = a3;
1192    return ret;
1193}
1194
1195#define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1196
1197static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1198{
1199    uint64_t a0, a1, b0, t0, t1, q, quot;
1200    int exp_diff = a->exp - b->exp;
1201    int shift;
1202
1203    a0 = a->frac;
1204    a1 = 0;
1205
1206    if (exp_diff < -1) {
1207        if (mod_quot) {
1208            *mod_quot = 0;
1209        }
1210        return;
1211    }
1212    if (exp_diff == -1) {
1213        a0 >>= 1;
1214        exp_diff = 0;
1215    }
1216
1217    b0 = b->frac;
1218    quot = q = b0 <= a0;
1219    if (q) {
1220        a0 -= b0;
1221    }
1222
1223    exp_diff -= 64;
1224    while (exp_diff > 0) {
1225        q = estimateDiv128To64(a0, a1, b0);
1226        q = q > 2 ? q - 2 : 0;
1227        mul64To128(b0, q, &t0, &t1);
1228        sub128(a0, a1, t0, t1, &a0, &a1);
1229        shortShift128Left(a0, a1, 62, &a0, &a1);
1230        exp_diff -= 62;
1231        quot = (quot << 62) + q;
1232    }
1233
1234    exp_diff += 64;
1235    if (exp_diff > 0) {
1236        q = estimateDiv128To64(a0, a1, b0);
1237        q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1238        mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1239        sub128(a0, a1, t0, t1, &a0, &a1);
1240        shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1241        while (le128(t0, t1, a0, a1)) {
1242            ++q;
1243            sub128(a0, a1, t0, t1, &a0, &a1);
1244        }
1245        quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1246    } else {
1247        t0 = b0;
1248        t1 = 0;
1249    }
1250
1251    if (mod_quot) {
1252        *mod_quot = quot;
1253    } else {
1254        sub128(t0, t1, a0, a1, &t0, &t1);
1255        if (lt128(t0, t1, a0, a1) ||
1256            (eq128(t0, t1, a0, a1) && (q & 1))) {
1257            a0 = t0;
1258            a1 = t1;
1259            a->sign = !a->sign;
1260        }
1261    }
1262
1263    if (likely(a0)) {
1264        shift = clz64(a0);
1265        shortShift128Left(a0, a1, shift, &a0, &a1);
1266    } else if (likely(a1)) {
1267        shift = clz64(a1);
1268        a0 = a1 << shift;
1269        a1 = 0;
1270        shift += 64;
1271    } else {
1272        a->cls = float_class_zero;
1273        return;
1274    }
1275
1276    a->exp = b->exp + exp_diff - shift;
1277    a->frac = a0 | (a1 != 0);
1278}
1279
1280static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1281                           uint64_t *mod_quot)
1282{
1283    uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1284    int exp_diff = a->exp - b->exp;
1285    int shift;
1286
1287    a0 = a->frac_hi;
1288    a1 = a->frac_lo;
1289    a2 = 0;
1290
1291    if (exp_diff < -1) {
1292        if (mod_quot) {
1293            *mod_quot = 0;
1294        }
1295        return;
1296    }
1297    if (exp_diff == -1) {
1298        shift128Right(a0, a1, 1, &a0, &a1);
1299        exp_diff = 0;
1300    }
1301
1302    b0 = b->frac_hi;
1303    b1 = b->frac_lo;
1304
1305    quot = q = le128(b0, b1, a0, a1);
1306    if (q) {
1307        sub128(a0, a1, b0, b1, &a0, &a1);
1308    }
1309
1310    exp_diff -= 64;
1311    while (exp_diff > 0) {
1312        q = estimateDiv128To64(a0, a1, b0);
1313        q = q > 4 ? q - 4 : 0;
1314        mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1315        sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1316        shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1317        exp_diff -= 61;
1318        quot = (quot << 61) + q;
1319    }
1320
1321    exp_diff += 64;
1322    if (exp_diff > 0) {
1323        q = estimateDiv128To64(a0, a1, b0);
1324        q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1325        mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1326        sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1327        shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1328        while (le192(t0, t1, t2, a0, a1, a2)) {
1329            ++q;
1330            sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1331        }
1332        quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1333    } else {
1334        t0 = b0;
1335        t1 = b1;
1336        t2 = 0;
1337    }
1338
1339    if (mod_quot) {
1340        *mod_quot = quot;
1341    } else {
1342        sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1343        if (lt192(t0, t1, t2, a0, a1, a2) ||
1344            (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1345            a0 = t0;
1346            a1 = t1;
1347            a2 = t2;
1348            a->sign = !a->sign;
1349        }
1350    }
1351
1352    if (likely(a0)) {
1353        shift = clz64(a0);
1354        shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1355    } else if (likely(a1)) {
1356        shift = clz64(a1);
1357        shortShift128Left(a1, a2, shift, &a0, &a1);
1358        a2 = 0;
1359        shift += 64;
1360    } else if (likely(a2)) {
1361        shift = clz64(a2);
1362        a0 = a2 << shift;
1363        a1 = a2 = 0;
1364        shift += 128;
1365    } else {
1366        a->cls = float_class_zero;
1367        return;
1368    }
1369
1370    a->exp = b->exp + exp_diff - shift;
1371    a->frac_hi = a0;
1372    a->frac_lo = a1 | (a2 != 0);
1373}
1374
1375#define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1376
1377static void frac64_shl(FloatParts64 *a, int c)
1378{
1379    a->frac <<= c;
1380}
1381
1382static void frac128_shl(FloatParts128 *a, int c)
1383{
1384    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1385
1386    if (c & 64) {
1387        a0 = a1, a1 = 0;
1388    }
1389
1390    c &= 63;
1391    if (c) {
1392        a0 = shl_double(a0, a1, c);
1393        a1 = a1 << c;
1394    }
1395
1396    a->frac_hi = a0;
1397    a->frac_lo = a1;
1398}
1399
1400#define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1401
1402static void frac64_shr(FloatParts64 *a, int c)
1403{
1404    a->frac >>= c;
1405}
1406
1407static void frac128_shr(FloatParts128 *a, int c)
1408{
1409    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1410
1411    if (c & 64) {
1412        a1 = a0, a0 = 0;
1413    }
1414
1415    c &= 63;
1416    if (c) {
1417        a1 = shr_double(a0, a1, c);
1418        a0 = a0 >> c;
1419    }
1420
1421    a->frac_hi = a0;
1422    a->frac_lo = a1;
1423}
1424
1425#define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1426
1427static void frac64_shrjam(FloatParts64 *a, int c)
1428{
1429    uint64_t a0 = a->frac;
1430
1431    if (likely(c != 0)) {
1432        if (likely(c < 64)) {
1433            a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1434        } else {
1435            a0 = a0 != 0;
1436        }
1437        a->frac = a0;
1438    }
1439}
1440
1441static void frac128_shrjam(FloatParts128 *a, int c)
1442{
1443    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1444    uint64_t sticky = 0;
1445
1446    if (unlikely(c == 0)) {
1447        return;
1448    } else if (likely(c < 64)) {
1449        /* nothing */
1450    } else if (likely(c < 128)) {
1451        sticky = a1;
1452        a1 = a0;
1453        a0 = 0;
1454        c &= 63;
1455        if (c == 0) {
1456            goto done;
1457        }
1458    } else {
1459        sticky = a0 | a1;
1460        a0 = a1 = 0;
1461        goto done;
1462    }
1463
1464    sticky |= shr_double(a1, 0, c);
1465    a1 = shr_double(a0, a1, c);
1466    a0 = a0 >> c;
1467
1468 done:
1469    a->frac_lo = a1 | (sticky != 0);
1470    a->frac_hi = a0;
1471}
1472
1473static void frac256_shrjam(FloatParts256 *a, int c)
1474{
1475    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1476    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1477    uint64_t sticky = 0;
1478
1479    if (unlikely(c == 0)) {
1480        return;
1481    } else if (likely(c < 64)) {
1482        /* nothing */
1483    } else if (likely(c < 256)) {
1484        if (unlikely(c & 128)) {
1485            sticky |= a2 | a3;
1486            a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1487        }
1488        if (unlikely(c & 64)) {
1489            sticky |= a3;
1490            a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1491        }
1492        c &= 63;
1493        if (c == 0) {
1494            goto done;
1495        }
1496    } else {
1497        sticky = a0 | a1 | a2 | a3;
1498        a0 = a1 = a2 = a3 = 0;
1499        goto done;
1500    }
1501
1502    sticky |= shr_double(a3, 0, c);
1503    a3 = shr_double(a2, a3, c);
1504    a2 = shr_double(a1, a2, c);
1505    a1 = shr_double(a0, a1, c);
1506    a0 = a0 >> c;
1507
1508 done:
1509    a->frac_lo = a3 | (sticky != 0);
1510    a->frac_lm = a2;
1511    a->frac_hm = a1;
1512    a->frac_hi = a0;
1513}
1514
1515#define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1516
1517static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1518{
1519    return usub64_overflow(a->frac, b->frac, &r->frac);
1520}
1521
1522static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1523{
1524    bool c = 0;
1525    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1526    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1527    return c;
1528}
1529
1530static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1531{
1532    bool c = 0;
1533    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1534    r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1535    r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1536    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1537    return c;
1538}
1539
1540#define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1541
1542static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1543{
1544    r->frac = a->frac_hi | (a->frac_lo != 0);
1545}
1546
1547static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1548{
1549    r->frac_hi = a->frac_hi;
1550    r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1551}
1552
1553#define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1554
1555static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1556{
1557    r->frac_hi = a->frac;
1558    r->frac_lo = 0;
1559}
1560
1561static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1562{
1563    r->frac_hi = a->frac_hi;
1564    r->frac_hm = a->frac_lo;
1565    r->frac_lm = 0;
1566    r->frac_lo = 0;
1567}
1568
1569#define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1570
1571/*
1572 * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1573 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1574 * and thus MIT licenced.
1575 */
1576static const uint16_t rsqrt_tab[128] = {
1577    0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1578    0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1579    0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1580    0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1581    0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1582    0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1583    0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1584    0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1585    0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1586    0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1587    0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1588    0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1589    0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1590    0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1591    0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1592    0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1593};
1594
1595#define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1596#define FloatPartsN    glue(FloatParts,N)
1597#define FloatPartsW    glue(FloatParts,W)
1598
1599#define N 64
1600#define W 128
1601
1602#include "softfloat-parts-addsub.c.inc"
1603#include "softfloat-parts.c.inc"
1604
1605#undef  N
1606#undef  W
1607#define N 128
1608#define W 256
1609
1610#include "softfloat-parts-addsub.c.inc"
1611#include "softfloat-parts.c.inc"
1612
1613#undef  N
1614#undef  W
1615#define N            256
1616
1617#include "softfloat-parts-addsub.c.inc"
1618
1619#undef  N
1620#undef  W
1621#undef  partsN
1622#undef  FloatPartsN
1623#undef  FloatPartsW
1624
1625/*
1626 * Pack/unpack routines with a specific FloatFmt.
1627 */
1628
1629static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1630                                      float_status *s, const FloatFmt *params)
1631{
1632    float16_unpack_raw(p, f);
1633    parts_canonicalize(p, s, params);
1634}
1635
1636static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1637                                     float_status *s)
1638{
1639    float16a_unpack_canonical(p, f, s, &float16_params);
1640}
1641
1642static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1643                                      float_status *s)
1644{
1645    bfloat16_unpack_raw(p, f);
1646    parts_canonicalize(p, s, &bfloat16_params);
1647}
1648
1649static float16 float16a_round_pack_canonical(FloatParts64 *p,
1650                                             float_status *s,
1651                                             const FloatFmt *params)
1652{
1653    parts_uncanon(p, s, params);
1654    return float16_pack_raw(p);
1655}
1656
1657static float16 float16_round_pack_canonical(FloatParts64 *p,
1658                                            float_status *s)
1659{
1660    return float16a_round_pack_canonical(p, s, &float16_params);
1661}
1662
1663static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1664                                              float_status *s)
1665{
1666    parts_uncanon(p, s, &bfloat16_params);
1667    return bfloat16_pack_raw(p);
1668}
1669
1670static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1671                                     float_status *s)
1672{
1673    float32_unpack_raw(p, f);
1674    parts_canonicalize(p, s, &float32_params);
1675}
1676
1677static float32 float32_round_pack_canonical(FloatParts64 *p,
1678                                            float_status *s)
1679{
1680    parts_uncanon(p, s, &float32_params);
1681    return float32_pack_raw(p);
1682}
1683
1684static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1685                                     float_status *s)
1686{
1687    float64_unpack_raw(p, f);
1688    parts_canonicalize(p, s, &float64_params);
1689}
1690
1691static float64 float64_round_pack_canonical(FloatParts64 *p,
1692                                            float_status *s)
1693{
1694    parts_uncanon(p, s, &float64_params);
1695    return float64_pack_raw(p);
1696}
1697
1698static float64 float64r32_round_pack_canonical(FloatParts64 *p,
1699                                               float_status *s)
1700{
1701    parts_uncanon(p, s, &float32_params);
1702
1703    /*
1704     * In parts_uncanon, we placed the fraction for float32 at the lsb.
1705     * We need to adjust the fraction higher so that the least N bits are
1706     * zero, and the fraction is adjacent to the float64 implicit bit.
1707     */
1708    switch (p->cls) {
1709    case float_class_normal:
1710        if (unlikely(p->exp == 0)) {
1711            /*
1712             * The result is denormal for float32, but can be represented
1713             * in normalized form for float64.  Adjust, per canonicalize.
1714             */
1715            int shift = frac_normalize(p);
1716            p->exp = (float32_params.frac_shift -
1717                      float32_params.exp_bias - shift + 1 +
1718                      float64_params.exp_bias);
1719            frac_shr(p, float64_params.frac_shift);
1720        } else {
1721            frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1722            p->exp += float64_params.exp_bias - float32_params.exp_bias;
1723        }
1724        break;
1725    case float_class_snan:
1726    case float_class_qnan:
1727        frac_shl(p, float32_params.frac_shift - float64_params.frac_shift);
1728        p->exp = float64_params.exp_max;
1729        break;
1730    case float_class_inf:
1731        p->exp = float64_params.exp_max;
1732        break;
1733    case float_class_zero:
1734        break;
1735    default:
1736        g_assert_not_reached();
1737    }
1738
1739    return float64_pack_raw(p);
1740}
1741
1742static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1743                                      float_status *s)
1744{
1745    float128_unpack_raw(p, f);
1746    parts_canonicalize(p, s, &float128_params);
1747}
1748
1749static float128 float128_round_pack_canonical(FloatParts128 *p,
1750                                              float_status *s)
1751{
1752    parts_uncanon(p, s, &float128_params);
1753    return float128_pack_raw(p);
1754}
1755
1756/* Returns false if the encoding is invalid. */
1757static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1758                                      float_status *s)
1759{
1760    /* Ensure rounding precision is set before beginning. */
1761    switch (s->floatx80_rounding_precision) {
1762    case floatx80_precision_x:
1763    case floatx80_precision_d:
1764    case floatx80_precision_s:
1765        break;
1766    default:
1767        g_assert_not_reached();
1768    }
1769
1770    if (unlikely(floatx80_invalid_encoding(f))) {
1771        float_raise(float_flag_invalid, s);
1772        return false;
1773    }
1774
1775    floatx80_unpack_raw(p, f);
1776
1777    if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1778        parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1779    } else {
1780        /* The explicit integer bit is ignored, after invalid checks. */
1781        p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1782        p->cls = (p->frac_hi == 0 ? float_class_inf
1783                  : parts_is_snan_frac(p->frac_hi, s)
1784                  ? float_class_snan : float_class_qnan);
1785    }
1786    return true;
1787}
1788
1789static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1790                                              float_status *s)
1791{
1792    const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1793    uint64_t frac;
1794    int exp;
1795
1796    switch (p->cls) {
1797    case float_class_normal:
1798        if (s->floatx80_rounding_precision == floatx80_precision_x) {
1799            parts_uncanon_normal(p, s, fmt);
1800            frac = p->frac_hi;
1801            exp = p->exp;
1802        } else {
1803            FloatParts64 p64;
1804
1805            p64.sign = p->sign;
1806            p64.exp = p->exp;
1807            frac_truncjam(&p64, p);
1808            parts_uncanon_normal(&p64, s, fmt);
1809            frac = p64.frac;
1810            exp = p64.exp;
1811        }
1812        if (exp != fmt->exp_max) {
1813            break;
1814        }
1815        /* rounded to inf -- fall through to set frac correctly */
1816
1817    case float_class_inf:
1818        /* x86 and m68k differ in the setting of the integer bit. */
1819        frac = floatx80_infinity_low;
1820        exp = fmt->exp_max;
1821        break;
1822
1823    case float_class_zero:
1824        frac = 0;
1825        exp = 0;
1826        break;
1827
1828    case float_class_snan:
1829    case float_class_qnan:
1830        /* NaNs have the integer bit set. */
1831        frac = p->frac_hi | (1ull << 63);
1832        exp = fmt->exp_max;
1833        break;
1834
1835    default:
1836        g_assert_not_reached();
1837    }
1838
1839    return packFloatx80(p->sign, exp, frac);
1840}
1841
1842/*
1843 * Addition and subtraction
1844 */
1845
1846static float16 QEMU_FLATTEN
1847float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1848{
1849    FloatParts64 pa, pb, *pr;
1850
1851    float16_unpack_canonical(&pa, a, status);
1852    float16_unpack_canonical(&pb, b, status);
1853    pr = parts_addsub(&pa, &pb, status, subtract);
1854
1855    return float16_round_pack_canonical(pr, status);
1856}
1857
1858float16 float16_add(float16 a, float16 b, float_status *status)
1859{
1860    return float16_addsub(a, b, status, false);
1861}
1862
1863float16 float16_sub(float16 a, float16 b, float_status *status)
1864{
1865    return float16_addsub(a, b, status, true);
1866}
1867
1868static float32 QEMU_SOFTFLOAT_ATTR
1869soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1870{
1871    FloatParts64 pa, pb, *pr;
1872
1873    float32_unpack_canonical(&pa, a, status);
1874    float32_unpack_canonical(&pb, b, status);
1875    pr = parts_addsub(&pa, &pb, status, subtract);
1876
1877    return float32_round_pack_canonical(pr, status);
1878}
1879
1880static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1881{
1882    return soft_f32_addsub(a, b, status, false);
1883}
1884
1885static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1886{
1887    return soft_f32_addsub(a, b, status, true);
1888}
1889
1890static float64 QEMU_SOFTFLOAT_ATTR
1891soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1892{
1893    FloatParts64 pa, pb, *pr;
1894
1895    float64_unpack_canonical(&pa, a, status);
1896    float64_unpack_canonical(&pb, b, status);
1897    pr = parts_addsub(&pa, &pb, status, subtract);
1898
1899    return float64_round_pack_canonical(pr, status);
1900}
1901
1902static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1903{
1904    return soft_f64_addsub(a, b, status, false);
1905}
1906
1907static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1908{
1909    return soft_f64_addsub(a, b, status, true);
1910}
1911
1912static float hard_f32_add(float a, float b)
1913{
1914    return a + b;
1915}
1916
1917static float hard_f32_sub(float a, float b)
1918{
1919    return a - b;
1920}
1921
1922static double hard_f64_add(double a, double b)
1923{
1924    return a + b;
1925}
1926
1927static double hard_f64_sub(double a, double b)
1928{
1929    return a - b;
1930}
1931
1932static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1933{
1934    if (QEMU_HARDFLOAT_2F32_USE_FP) {
1935        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1936    }
1937    return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1938}
1939
1940static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1941{
1942    if (QEMU_HARDFLOAT_2F64_USE_FP) {
1943        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1944    } else {
1945        return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1946    }
1947}
1948
1949static float32 float32_addsub(float32 a, float32 b, float_status *s,
1950                              hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1951{
1952    return float32_gen2(a, b, s, hard, soft,
1953                        f32_is_zon2, f32_addsubmul_post);
1954}
1955
1956static float64 float64_addsub(float64 a, float64 b, float_status *s,
1957                              hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1958{
1959    return float64_gen2(a, b, s, hard, soft,
1960                        f64_is_zon2, f64_addsubmul_post);
1961}
1962
1963float32 QEMU_FLATTEN
1964float32_add(float32 a, float32 b, float_status *s)
1965{
1966    return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1967}
1968
1969float32 QEMU_FLATTEN
1970float32_sub(float32 a, float32 b, float_status *s)
1971{
1972    return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1973}
1974
1975float64 QEMU_FLATTEN
1976float64_add(float64 a, float64 b, float_status *s)
1977{
1978    return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1979}
1980
1981float64 QEMU_FLATTEN
1982float64_sub(float64 a, float64 b, float_status *s)
1983{
1984    return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1985}
1986
1987static float64 float64r32_addsub(float64 a, float64 b, float_status *status,
1988                                 bool subtract)
1989{
1990    FloatParts64 pa, pb, *pr;
1991
1992    float64_unpack_canonical(&pa, a, status);
1993    float64_unpack_canonical(&pb, b, status);
1994    pr = parts_addsub(&pa, &pb, status, subtract);
1995
1996    return float64r32_round_pack_canonical(pr, status);
1997}
1998
1999float64 float64r32_add(float64 a, float64 b, float_status *status)
2000{
2001    return float64r32_addsub(a, b, status, false);
2002}
2003
2004float64 float64r32_sub(float64 a, float64 b, float_status *status)
2005{
2006    return float64r32_addsub(a, b, status, true);
2007}
2008
2009static bfloat16 QEMU_FLATTEN
2010bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
2011{
2012    FloatParts64 pa, pb, *pr;
2013
2014    bfloat16_unpack_canonical(&pa, a, status);
2015    bfloat16_unpack_canonical(&pb, b, status);
2016    pr = parts_addsub(&pa, &pb, status, subtract);
2017
2018    return bfloat16_round_pack_canonical(pr, status);
2019}
2020
2021bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
2022{
2023    return bfloat16_addsub(a, b, status, false);
2024}
2025
2026bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
2027{
2028    return bfloat16_addsub(a, b, status, true);
2029}
2030
2031static float128 QEMU_FLATTEN
2032float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
2033{
2034    FloatParts128 pa, pb, *pr;
2035
2036    float128_unpack_canonical(&pa, a, status);
2037    float128_unpack_canonical(&pb, b, status);
2038    pr = parts_addsub(&pa, &pb, status, subtract);
2039
2040    return float128_round_pack_canonical(pr, status);
2041}
2042
2043float128 float128_add(float128 a, float128 b, float_status *status)
2044{
2045    return float128_addsub(a, b, status, false);
2046}
2047
2048float128 float128_sub(float128 a, float128 b, float_status *status)
2049{
2050    return float128_addsub(a, b, status, true);
2051}
2052
2053static floatx80 QEMU_FLATTEN
2054floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
2055{
2056    FloatParts128 pa, pb, *pr;
2057
2058    if (!floatx80_unpack_canonical(&pa, a, status) ||
2059        !floatx80_unpack_canonical(&pb, b, status)) {
2060        return floatx80_default_nan(status);
2061    }
2062
2063    pr = parts_addsub(&pa, &pb, status, subtract);
2064    return floatx80_round_pack_canonical(pr, status);
2065}
2066
2067floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2068{
2069    return floatx80_addsub(a, b, status, false);
2070}
2071
2072floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2073{
2074    return floatx80_addsub(a, b, status, true);
2075}
2076
2077/*
2078 * Multiplication
2079 */
2080
2081float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2082{
2083    FloatParts64 pa, pb, *pr;
2084
2085    float16_unpack_canonical(&pa, a, status);
2086    float16_unpack_canonical(&pb, b, status);
2087    pr = parts_mul(&pa, &pb, status);
2088
2089    return float16_round_pack_canonical(pr, status);
2090}
2091
2092static float32 QEMU_SOFTFLOAT_ATTR
2093soft_f32_mul(float32 a, float32 b, float_status *status)
2094{
2095    FloatParts64 pa, pb, *pr;
2096
2097    float32_unpack_canonical(&pa, a, status);
2098    float32_unpack_canonical(&pb, b, status);
2099    pr = parts_mul(&pa, &pb, status);
2100
2101    return float32_round_pack_canonical(pr, status);
2102}
2103
2104static float64 QEMU_SOFTFLOAT_ATTR
2105soft_f64_mul(float64 a, float64 b, float_status *status)
2106{
2107    FloatParts64 pa, pb, *pr;
2108
2109    float64_unpack_canonical(&pa, a, status);
2110    float64_unpack_canonical(&pb, b, status);
2111    pr = parts_mul(&pa, &pb, status);
2112
2113    return float64_round_pack_canonical(pr, status);
2114}
2115
2116static float hard_f32_mul(float a, float b)
2117{
2118    return a * b;
2119}
2120
2121static double hard_f64_mul(double a, double b)
2122{
2123    return a * b;
2124}
2125
2126float32 QEMU_FLATTEN
2127float32_mul(float32 a, float32 b, float_status *s)
2128{
2129    return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2130                        f32_is_zon2, f32_addsubmul_post);
2131}
2132
2133float64 QEMU_FLATTEN
2134float64_mul(float64 a, float64 b, float_status *s)
2135{
2136    return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2137                        f64_is_zon2, f64_addsubmul_post);
2138}
2139
2140float64 float64r32_mul(float64 a, float64 b, float_status *status)
2141{
2142    FloatParts64 pa, pb, *pr;
2143
2144    float64_unpack_canonical(&pa, a, status);
2145    float64_unpack_canonical(&pb, b, status);
2146    pr = parts_mul(&pa, &pb, status);
2147
2148    return float64r32_round_pack_canonical(pr, status);
2149}
2150
2151bfloat16 QEMU_FLATTEN
2152bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2153{
2154    FloatParts64 pa, pb, *pr;
2155
2156    bfloat16_unpack_canonical(&pa, a, status);
2157    bfloat16_unpack_canonical(&pb, b, status);
2158    pr = parts_mul(&pa, &pb, status);
2159
2160    return bfloat16_round_pack_canonical(pr, status);
2161}
2162
2163float128 QEMU_FLATTEN
2164float128_mul(float128 a, float128 b, float_status *status)
2165{
2166    FloatParts128 pa, pb, *pr;
2167
2168    float128_unpack_canonical(&pa, a, status);
2169    float128_unpack_canonical(&pb, b, status);
2170    pr = parts_mul(&pa, &pb, status);
2171
2172    return float128_round_pack_canonical(pr, status);
2173}
2174
2175floatx80 QEMU_FLATTEN
2176floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2177{
2178    FloatParts128 pa, pb, *pr;
2179
2180    if (!floatx80_unpack_canonical(&pa, a, status) ||
2181        !floatx80_unpack_canonical(&pb, b, status)) {
2182        return floatx80_default_nan(status);
2183    }
2184
2185    pr = parts_mul(&pa, &pb, status);
2186    return floatx80_round_pack_canonical(pr, status);
2187}
2188
2189/*
2190 * Fused multiply-add
2191 */
2192
2193float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2194                                    int flags, float_status *status)
2195{
2196    FloatParts64 pa, pb, pc, *pr;
2197
2198    float16_unpack_canonical(&pa, a, status);
2199    float16_unpack_canonical(&pb, b, status);
2200    float16_unpack_canonical(&pc, c, status);
2201    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2202
2203    return float16_round_pack_canonical(pr, status);
2204}
2205
2206static float32 QEMU_SOFTFLOAT_ATTR
2207soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2208                float_status *status)
2209{
2210    FloatParts64 pa, pb, pc, *pr;
2211
2212    float32_unpack_canonical(&pa, a, status);
2213    float32_unpack_canonical(&pb, b, status);
2214    float32_unpack_canonical(&pc, c, status);
2215    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2216
2217    return float32_round_pack_canonical(pr, status);
2218}
2219
2220static float64 QEMU_SOFTFLOAT_ATTR
2221soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2222                float_status *status)
2223{
2224    FloatParts64 pa, pb, pc, *pr;
2225
2226    float64_unpack_canonical(&pa, a, status);
2227    float64_unpack_canonical(&pb, b, status);
2228    float64_unpack_canonical(&pc, c, status);
2229    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2230
2231    return float64_round_pack_canonical(pr, status);
2232}
2233
2234static bool force_soft_fma;
2235
2236float32 QEMU_FLATTEN
2237float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2238{
2239    union_float32 ua, ub, uc, ur;
2240
2241    ua.s = xa;
2242    ub.s = xb;
2243    uc.s = xc;
2244
2245    if (unlikely(!can_use_fpu(s))) {
2246        goto soft;
2247    }
2248    if (unlikely(flags & float_muladd_halve_result)) {
2249        goto soft;
2250    }
2251
2252    float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2253    if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2254        goto soft;
2255    }
2256
2257    if (unlikely(force_soft_fma)) {
2258        goto soft;
2259    }
2260
2261    /*
2262     * When (a || b) == 0, there's no need to check for under/over flow,
2263     * since we know the addend is (normal || 0) and the product is 0.
2264     */
2265    if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2266        union_float32 up;
2267        bool prod_sign;
2268
2269        prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2270        prod_sign ^= !!(flags & float_muladd_negate_product);
2271        up.s = float32_set_sign(float32_zero, prod_sign);
2272
2273        if (flags & float_muladd_negate_c) {
2274            uc.h = -uc.h;
2275        }
2276        ur.h = up.h + uc.h;
2277    } else {
2278        union_float32 ua_orig = ua;
2279        union_float32 uc_orig = uc;
2280
2281        if (flags & float_muladd_negate_product) {
2282            ua.h = -ua.h;
2283        }
2284        if (flags & float_muladd_negate_c) {
2285            uc.h = -uc.h;
2286        }
2287
2288        ur.h = fmaf(ua.h, ub.h, uc.h);
2289
2290        if (unlikely(f32_is_inf(ur))) {
2291            float_raise(float_flag_overflow, s);
2292        } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2293            ua = ua_orig;
2294            uc = uc_orig;
2295            goto soft;
2296        }
2297    }
2298    if (flags & float_muladd_negate_result) {
2299        return float32_chs(ur.s);
2300    }
2301    return ur.s;
2302
2303 soft:
2304    return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2305}
2306
2307float64 QEMU_FLATTEN
2308float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2309{
2310    union_float64 ua, ub, uc, ur;
2311
2312    ua.s = xa;
2313    ub.s = xb;
2314    uc.s = xc;
2315
2316    if (unlikely(!can_use_fpu(s))) {
2317        goto soft;
2318    }
2319    if (unlikely(flags & float_muladd_halve_result)) {
2320        goto soft;
2321    }
2322
2323    float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2324    if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2325        goto soft;
2326    }
2327
2328    if (unlikely(force_soft_fma)) {
2329        goto soft;
2330    }
2331
2332    /*
2333     * When (a || b) == 0, there's no need to check for under/over flow,
2334     * since we know the addend is (normal || 0) and the product is 0.
2335     */
2336    if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2337        union_float64 up;
2338        bool prod_sign;
2339
2340        prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2341        prod_sign ^= !!(flags & float_muladd_negate_product);
2342        up.s = float64_set_sign(float64_zero, prod_sign);
2343
2344        if (flags & float_muladd_negate_c) {
2345            uc.h = -uc.h;
2346        }
2347        ur.h = up.h + uc.h;
2348    } else {
2349        union_float64 ua_orig = ua;
2350        union_float64 uc_orig = uc;
2351
2352        if (flags & float_muladd_negate_product) {
2353            ua.h = -ua.h;
2354        }
2355        if (flags & float_muladd_negate_c) {
2356            uc.h = -uc.h;
2357        }
2358
2359        ur.h = fma(ua.h, ub.h, uc.h);
2360
2361        if (unlikely(f64_is_inf(ur))) {
2362            float_raise(float_flag_overflow, s);
2363        } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2364            ua = ua_orig;
2365            uc = uc_orig;
2366            goto soft;
2367        }
2368    }
2369    if (flags & float_muladd_negate_result) {
2370        return float64_chs(ur.s);
2371    }
2372    return ur.s;
2373
2374 soft:
2375    return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2376}
2377
2378float64 float64r32_muladd(float64 a, float64 b, float64 c,
2379                          int flags, float_status *status)
2380{
2381    FloatParts64 pa, pb, pc, *pr;
2382
2383    float64_unpack_canonical(&pa, a, status);
2384    float64_unpack_canonical(&pb, b, status);
2385    float64_unpack_canonical(&pc, c, status);
2386    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2387
2388    return float64r32_round_pack_canonical(pr, status);
2389}
2390
2391bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2392                                      int flags, float_status *status)
2393{
2394    FloatParts64 pa, pb, pc, *pr;
2395
2396    bfloat16_unpack_canonical(&pa, a, status);
2397    bfloat16_unpack_canonical(&pb, b, status);
2398    bfloat16_unpack_canonical(&pc, c, status);
2399    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2400
2401    return bfloat16_round_pack_canonical(pr, status);
2402}
2403
2404float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2405                                      int flags, float_status *status)
2406{
2407    FloatParts128 pa, pb, pc, *pr;
2408
2409    float128_unpack_canonical(&pa, a, status);
2410    float128_unpack_canonical(&pb, b, status);
2411    float128_unpack_canonical(&pc, c, status);
2412    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2413
2414    return float128_round_pack_canonical(pr, status);
2415}
2416
2417/*
2418 * Division
2419 */
2420
2421float16 float16_div(float16 a, float16 b, float_status *status)
2422{
2423    FloatParts64 pa, pb, *pr;
2424
2425    float16_unpack_canonical(&pa, a, status);
2426    float16_unpack_canonical(&pb, b, status);
2427    pr = parts_div(&pa, &pb, status);
2428
2429    return float16_round_pack_canonical(pr, status);
2430}
2431
2432static float32 QEMU_SOFTFLOAT_ATTR
2433soft_f32_div(float32 a, float32 b, float_status *status)
2434{
2435    FloatParts64 pa, pb, *pr;
2436
2437    float32_unpack_canonical(&pa, a, status);
2438    float32_unpack_canonical(&pb, b, status);
2439    pr = parts_div(&pa, &pb, status);
2440
2441    return float32_round_pack_canonical(pr, status);
2442}
2443
2444static float64 QEMU_SOFTFLOAT_ATTR
2445soft_f64_div(float64 a, float64 b, float_status *status)
2446{
2447    FloatParts64 pa, pb, *pr;
2448
2449    float64_unpack_canonical(&pa, a, status);
2450    float64_unpack_canonical(&pb, b, status);
2451    pr = parts_div(&pa, &pb, status);
2452
2453    return float64_round_pack_canonical(pr, status);
2454}
2455
2456static float hard_f32_div(float a, float b)
2457{
2458    return a / b;
2459}
2460
2461static double hard_f64_div(double a, double b)
2462{
2463    return a / b;
2464}
2465
2466static bool f32_div_pre(union_float32 a, union_float32 b)
2467{
2468    if (QEMU_HARDFLOAT_2F32_USE_FP) {
2469        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2470               fpclassify(b.h) == FP_NORMAL;
2471    }
2472    return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2473}
2474
2475static bool f64_div_pre(union_float64 a, union_float64 b)
2476{
2477    if (QEMU_HARDFLOAT_2F64_USE_FP) {
2478        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2479               fpclassify(b.h) == FP_NORMAL;
2480    }
2481    return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2482}
2483
2484static bool f32_div_post(union_float32 a, union_float32 b)
2485{
2486    if (QEMU_HARDFLOAT_2F32_USE_FP) {
2487        return fpclassify(a.h) != FP_ZERO;
2488    }
2489    return !float32_is_zero(a.s);
2490}
2491
2492static bool f64_div_post(union_float64 a, union_float64 b)
2493{
2494    if (QEMU_HARDFLOAT_2F64_USE_FP) {
2495        return fpclassify(a.h) != FP_ZERO;
2496    }
2497    return !float64_is_zero(a.s);
2498}
2499
2500float32 QEMU_FLATTEN
2501float32_div(float32 a, float32 b, float_status *s)
2502{
2503    return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2504                        f32_div_pre, f32_div_post);
2505}
2506
2507float64 QEMU_FLATTEN
2508float64_div(float64 a, float64 b, float_status *s)
2509{
2510    return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2511                        f64_div_pre, f64_div_post);
2512}
2513
2514float64 float64r32_div(float64 a, float64 b, float_status *status)
2515{
2516    FloatParts64 pa, pb, *pr;
2517
2518    float64_unpack_canonical(&pa, a, status);
2519    float64_unpack_canonical(&pb, b, status);
2520    pr = parts_div(&pa, &pb, status);
2521
2522    return float64r32_round_pack_canonical(pr, status);
2523}
2524
2525bfloat16 QEMU_FLATTEN
2526bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2527{
2528    FloatParts64 pa, pb, *pr;
2529
2530    bfloat16_unpack_canonical(&pa, a, status);
2531    bfloat16_unpack_canonical(&pb, b, status);
2532    pr = parts_div(&pa, &pb, status);
2533
2534    return bfloat16_round_pack_canonical(pr, status);
2535}
2536
2537float128 QEMU_FLATTEN
2538float128_div(float128 a, float128 b, float_status *status)
2539{
2540    FloatParts128 pa, pb, *pr;
2541
2542    float128_unpack_canonical(&pa, a, status);
2543    float128_unpack_canonical(&pb, b, status);
2544    pr = parts_div(&pa, &pb, status);
2545
2546    return float128_round_pack_canonical(pr, status);
2547}
2548
2549floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2550{
2551    FloatParts128 pa, pb, *pr;
2552
2553    if (!floatx80_unpack_canonical(&pa, a, status) ||
2554        !floatx80_unpack_canonical(&pb, b, status)) {
2555        return floatx80_default_nan(status);
2556    }
2557
2558    pr = parts_div(&pa, &pb, status);
2559    return floatx80_round_pack_canonical(pr, status);
2560}
2561
2562/*
2563 * Remainder
2564 */
2565
2566float32 float32_rem(float32 a, float32 b, float_status *status)
2567{
2568    FloatParts64 pa, pb, *pr;
2569
2570    float32_unpack_canonical(&pa, a, status);
2571    float32_unpack_canonical(&pb, b, status);
2572    pr = parts_modrem(&pa, &pb, NULL, status);
2573
2574    return float32_round_pack_canonical(pr, status);
2575}
2576
2577float64 float64_rem(float64 a, float64 b, float_status *status)
2578{
2579    FloatParts64 pa, pb, *pr;
2580
2581    float64_unpack_canonical(&pa, a, status);
2582    float64_unpack_canonical(&pb, b, status);
2583    pr = parts_modrem(&pa, &pb, NULL, status);
2584
2585    return float64_round_pack_canonical(pr, status);
2586}
2587
2588float128 float128_rem(float128 a, float128 b, float_status *status)
2589{
2590    FloatParts128 pa, pb, *pr;
2591
2592    float128_unpack_canonical(&pa, a, status);
2593    float128_unpack_canonical(&pb, b, status);
2594    pr = parts_modrem(&pa, &pb, NULL, status);
2595
2596    return float128_round_pack_canonical(pr, status);
2597}
2598
2599/*
2600 * Returns the remainder of the extended double-precision floating-point value
2601 * `a' with respect to the corresponding value `b'.
2602 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2603 * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2604 * the remainder based on truncating the quotient toward zero instead and
2605 * *quotient is set to the low 64 bits of the absolute value of the integer
2606 * quotient.
2607 */
2608floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2609                         uint64_t *quotient, float_status *status)
2610{
2611    FloatParts128 pa, pb, *pr;
2612
2613    *quotient = 0;
2614    if (!floatx80_unpack_canonical(&pa, a, status) ||
2615        !floatx80_unpack_canonical(&pb, b, status)) {
2616        return floatx80_default_nan(status);
2617    }
2618    pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2619
2620    return floatx80_round_pack_canonical(pr, status);
2621}
2622
2623floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2624{
2625    uint64_t quotient;
2626    return floatx80_modrem(a, b, false, &quotient, status);
2627}
2628
2629floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2630{
2631    uint64_t quotient;
2632    return floatx80_modrem(a, b, true, &quotient, status);
2633}
2634
2635/*
2636 * Float to Float conversions
2637 *
2638 * Returns the result of converting one float format to another. The
2639 * conversion is performed according to the IEC/IEEE Standard for
2640 * Binary Floating-Point Arithmetic.
2641 *
2642 * Usually this only needs to take care of raising invalid exceptions
2643 * and handling the conversion on NaNs.
2644 */
2645
2646static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2647{
2648    switch (a->cls) {
2649    case float_class_snan:
2650        float_raise(float_flag_invalid_snan, s);
2651        /* fall through */
2652    case float_class_qnan:
2653        /*
2654         * There is no NaN in the destination format.  Raise Invalid
2655         * and return a zero with the sign of the input NaN.
2656         */
2657        float_raise(float_flag_invalid, s);
2658        a->cls = float_class_zero;
2659        break;
2660
2661    case float_class_inf:
2662        /*
2663         * There is no Inf in the destination format.  Raise Invalid
2664         * and return the maximum normal with the correct sign.
2665         */
2666        float_raise(float_flag_invalid, s);
2667        a->cls = float_class_normal;
2668        a->exp = float16_params_ahp.exp_max;
2669        a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2670                                  float16_params_ahp.frac_size + 1);
2671        break;
2672
2673    case float_class_normal:
2674    case float_class_zero:
2675        break;
2676
2677    default:
2678        g_assert_not_reached();
2679    }
2680}
2681
2682static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2683{
2684    if (is_nan(a->cls)) {
2685        parts_return_nan(a, s);
2686    }
2687}
2688
2689static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2690{
2691    if (is_nan(a->cls)) {
2692        parts_return_nan(a, s);
2693    }
2694}
2695
2696#define parts_float_to_float(P, S) \
2697    PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2698
2699static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2700                                        float_status *s)
2701{
2702    a->cls = b->cls;
2703    a->sign = b->sign;
2704    a->exp = b->exp;
2705
2706    if (a->cls == float_class_normal) {
2707        frac_truncjam(a, b);
2708    } else if (is_nan(a->cls)) {
2709        /* Discard the low bits of the NaN. */
2710        a->frac = b->frac_hi;
2711        parts_return_nan(a, s);
2712    }
2713}
2714
2715static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2716                                       float_status *s)
2717{
2718    a->cls = b->cls;
2719    a->sign = b->sign;
2720    a->exp = b->exp;
2721    frac_widen(a, b);
2722
2723    if (is_nan(a->cls)) {
2724        parts_return_nan(a, s);
2725    }
2726}
2727
2728float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2729{
2730    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2731    FloatParts64 p;
2732
2733    float16a_unpack_canonical(&p, a, s, fmt16);
2734    parts_float_to_float(&p, s);
2735    return float32_round_pack_canonical(&p, s);
2736}
2737
2738float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2739{
2740    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2741    FloatParts64 p;
2742
2743    float16a_unpack_canonical(&p, a, s, fmt16);
2744    parts_float_to_float(&p, s);
2745    return float64_round_pack_canonical(&p, s);
2746}
2747
2748float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2749{
2750    FloatParts64 p;
2751    const FloatFmt *fmt;
2752
2753    float32_unpack_canonical(&p, a, s);
2754    if (ieee) {
2755        parts_float_to_float(&p, s);
2756        fmt = &float16_params;
2757    } else {
2758        parts_float_to_ahp(&p, s);
2759        fmt = &float16_params_ahp;
2760    }
2761    return float16a_round_pack_canonical(&p, s, fmt);
2762}
2763
2764static float64 QEMU_SOFTFLOAT_ATTR
2765soft_float32_to_float64(float32 a, float_status *s)
2766{
2767    FloatParts64 p;
2768
2769    float32_unpack_canonical(&p, a, s);
2770    parts_float_to_float(&p, s);
2771    return float64_round_pack_canonical(&p, s);
2772}
2773
2774float64 float32_to_float64(float32 a, float_status *s)
2775{
2776    if (likely(float32_is_normal(a))) {
2777        /* Widening conversion can never produce inexact results.  */
2778        union_float32 uf;
2779        union_float64 ud;
2780        uf.s = a;
2781        ud.h = uf.h;
2782        return ud.s;
2783    } else if (float32_is_zero(a)) {
2784        return float64_set_sign(float64_zero, float32_is_neg(a));
2785    } else {
2786        return soft_float32_to_float64(a, s);
2787    }
2788}
2789
2790float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2791{
2792    FloatParts64 p;
2793    const FloatFmt *fmt;
2794
2795    float64_unpack_canonical(&p, a, s);
2796    if (ieee) {
2797        parts_float_to_float(&p, s);
2798        fmt = &float16_params;
2799    } else {
2800        parts_float_to_ahp(&p, s);
2801        fmt = &float16_params_ahp;
2802    }
2803    return float16a_round_pack_canonical(&p, s, fmt);
2804}
2805
2806float32 float64_to_float32(float64 a, float_status *s)
2807{
2808    FloatParts64 p;
2809
2810    float64_unpack_canonical(&p, a, s);
2811    parts_float_to_float(&p, s);
2812    return float32_round_pack_canonical(&p, s);
2813}
2814
2815float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2816{
2817    FloatParts64 p;
2818
2819    bfloat16_unpack_canonical(&p, a, s);
2820    parts_float_to_float(&p, s);
2821    return float32_round_pack_canonical(&p, s);
2822}
2823
2824float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2825{
2826    FloatParts64 p;
2827
2828    bfloat16_unpack_canonical(&p, a, s);
2829    parts_float_to_float(&p, s);
2830    return float64_round_pack_canonical(&p, s);
2831}
2832
2833bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2834{
2835    FloatParts64 p;
2836
2837    float32_unpack_canonical(&p, a, s);
2838    parts_float_to_float(&p, s);
2839    return bfloat16_round_pack_canonical(&p, s);
2840}
2841
2842bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2843{
2844    FloatParts64 p;
2845
2846    float64_unpack_canonical(&p, a, s);
2847    parts_float_to_float(&p, s);
2848    return bfloat16_round_pack_canonical(&p, s);
2849}
2850
2851float32 float128_to_float32(float128 a, float_status *s)
2852{
2853    FloatParts64 p64;
2854    FloatParts128 p128;
2855
2856    float128_unpack_canonical(&p128, a, s);
2857    parts_float_to_float_narrow(&p64, &p128, s);
2858    return float32_round_pack_canonical(&p64, s);
2859}
2860
2861float64 float128_to_float64(float128 a, float_status *s)
2862{
2863    FloatParts64 p64;
2864    FloatParts128 p128;
2865
2866    float128_unpack_canonical(&p128, a, s);
2867    parts_float_to_float_narrow(&p64, &p128, s);
2868    return float64_round_pack_canonical(&p64, s);
2869}
2870
2871float128 float32_to_float128(float32 a, float_status *s)
2872{
2873    FloatParts64 p64;
2874    FloatParts128 p128;
2875
2876    float32_unpack_canonical(&p64, a, s);
2877    parts_float_to_float_widen(&p128, &p64, s);
2878    return float128_round_pack_canonical(&p128, s);
2879}
2880
2881float128 float64_to_float128(float64 a, float_status *s)
2882{
2883    FloatParts64 p64;
2884    FloatParts128 p128;
2885
2886    float64_unpack_canonical(&p64, a, s);
2887    parts_float_to_float_widen(&p128, &p64, s);
2888    return float128_round_pack_canonical(&p128, s);
2889}
2890
2891float32 floatx80_to_float32(floatx80 a, float_status *s)
2892{
2893    FloatParts64 p64;
2894    FloatParts128 p128;
2895
2896    if (floatx80_unpack_canonical(&p128, a, s)) {
2897        parts_float_to_float_narrow(&p64, &p128, s);
2898    } else {
2899        parts_default_nan(&p64, s);
2900    }
2901    return float32_round_pack_canonical(&p64, s);
2902}
2903
2904float64 floatx80_to_float64(floatx80 a, float_status *s)
2905{
2906    FloatParts64 p64;
2907    FloatParts128 p128;
2908
2909    if (floatx80_unpack_canonical(&p128, a, s)) {
2910        parts_float_to_float_narrow(&p64, &p128, s);
2911    } else {
2912        parts_default_nan(&p64, s);
2913    }
2914    return float64_round_pack_canonical(&p64, s);
2915}
2916
2917float128 floatx80_to_float128(floatx80 a, float_status *s)
2918{
2919    FloatParts128 p;
2920
2921    if (floatx80_unpack_canonical(&p, a, s)) {
2922        parts_float_to_float(&p, s);
2923    } else {
2924        parts_default_nan(&p, s);
2925    }
2926    return float128_round_pack_canonical(&p, s);
2927}
2928
2929floatx80 float32_to_floatx80(float32 a, float_status *s)
2930{
2931    FloatParts64 p64;
2932    FloatParts128 p128;
2933
2934    float32_unpack_canonical(&p64, a, s);
2935    parts_float_to_float_widen(&p128, &p64, s);
2936    return floatx80_round_pack_canonical(&p128, s);
2937}
2938
2939floatx80 float64_to_floatx80(float64 a, float_status *s)
2940{
2941    FloatParts64 p64;
2942    FloatParts128 p128;
2943
2944    float64_unpack_canonical(&p64, a, s);
2945    parts_float_to_float_widen(&p128, &p64, s);
2946    return floatx80_round_pack_canonical(&p128, s);
2947}
2948
2949floatx80 float128_to_floatx80(float128 a, float_status *s)
2950{
2951    FloatParts128 p;
2952
2953    float128_unpack_canonical(&p, a, s);
2954    parts_float_to_float(&p, s);
2955    return floatx80_round_pack_canonical(&p, s);
2956}
2957
2958/*
2959 * Round to integral value
2960 */
2961
2962float16 float16_round_to_int(float16 a, float_status *s)
2963{
2964    FloatParts64 p;
2965
2966    float16_unpack_canonical(&p, a, s);
2967    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2968    return float16_round_pack_canonical(&p, s);
2969}
2970
2971float32 float32_round_to_int(float32 a, float_status *s)
2972{
2973    FloatParts64 p;
2974
2975    float32_unpack_canonical(&p, a, s);
2976    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2977    return float32_round_pack_canonical(&p, s);
2978}
2979
2980float64 float64_round_to_int(float64 a, float_status *s)
2981{
2982    FloatParts64 p;
2983
2984    float64_unpack_canonical(&p, a, s);
2985    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2986    return float64_round_pack_canonical(&p, s);
2987}
2988
2989bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2990{
2991    FloatParts64 p;
2992
2993    bfloat16_unpack_canonical(&p, a, s);
2994    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2995    return bfloat16_round_pack_canonical(&p, s);
2996}
2997
2998float128 float128_round_to_int(float128 a, float_status *s)
2999{
3000    FloatParts128 p;
3001
3002    float128_unpack_canonical(&p, a, s);
3003    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
3004    return float128_round_pack_canonical(&p, s);
3005}
3006
3007floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
3008{
3009    FloatParts128 p;
3010
3011    if (!floatx80_unpack_canonical(&p, a, status)) {
3012        return floatx80_default_nan(status);
3013    }
3014
3015    parts_round_to_int(&p, status->float_rounding_mode, 0, status,
3016                       &floatx80_params[status->floatx80_rounding_precision]);
3017    return floatx80_round_pack_canonical(&p, status);
3018}
3019
3020/*
3021 * Floating-point to signed integer conversions
3022 */
3023
3024int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3025                              float_status *s)
3026{
3027    FloatParts64 p;
3028
3029    float16_unpack_canonical(&p, a, s);
3030    return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
3031}
3032
3033int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3034                                float_status *s)
3035{
3036    FloatParts64 p;
3037
3038    float16_unpack_canonical(&p, a, s);
3039    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3040}
3041
3042int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3043                                float_status *s)
3044{
3045    FloatParts64 p;
3046
3047    float16_unpack_canonical(&p, a, s);
3048    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3049}
3050
3051int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3052                                float_status *s)
3053{
3054    FloatParts64 p;
3055
3056    float16_unpack_canonical(&p, a, s);
3057    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3058}
3059
3060int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3061                                float_status *s)
3062{
3063    FloatParts64 p;
3064
3065    float32_unpack_canonical(&p, a, s);
3066    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3067}
3068
3069int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3070                                float_status *s)
3071{
3072    FloatParts64 p;
3073
3074    float32_unpack_canonical(&p, a, s);
3075    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3076}
3077
3078int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3079                                float_status *s)
3080{
3081    FloatParts64 p;
3082
3083    float32_unpack_canonical(&p, a, s);
3084    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3085}
3086
3087int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3088                                float_status *s)
3089{
3090    FloatParts64 p;
3091
3092    float64_unpack_canonical(&p, a, s);
3093    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3094}
3095
3096int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3097                                float_status *s)
3098{
3099    FloatParts64 p;
3100
3101    float64_unpack_canonical(&p, a, s);
3102    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3103}
3104
3105int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3106                                float_status *s)
3107{
3108    FloatParts64 p;
3109
3110    float64_unpack_canonical(&p, a, s);
3111    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3112}
3113
3114int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3115                                 float_status *s)
3116{
3117    FloatParts64 p;
3118
3119    bfloat16_unpack_canonical(&p, a, s);
3120    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3121}
3122
3123int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3124                                 float_status *s)
3125{
3126    FloatParts64 p;
3127
3128    bfloat16_unpack_canonical(&p, a, s);
3129    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3130}
3131
3132int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3133                                 float_status *s)
3134{
3135    FloatParts64 p;
3136
3137    bfloat16_unpack_canonical(&p, a, s);
3138    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3139}
3140
3141static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3142                                        int scale, float_status *s)
3143{
3144    FloatParts128 p;
3145
3146    float128_unpack_canonical(&p, a, s);
3147    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3148}
3149
3150static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3151                                        int scale, float_status *s)
3152{
3153    FloatParts128 p;
3154
3155    float128_unpack_canonical(&p, a, s);
3156    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3157}
3158
3159static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
3160                                        int scale, float_status *s)
3161{
3162    int flags = 0;
3163    Int128 r;
3164    FloatParts128 p;
3165
3166    float128_unpack_canonical(&p, a, s);
3167
3168    switch (p.cls) {
3169    case float_class_snan:
3170        flags |= float_flag_invalid_snan;
3171        /* fall through */
3172    case float_class_qnan:
3173        flags |= float_flag_invalid;
3174        r = UINT128_MAX;
3175        break;
3176
3177    case float_class_inf:
3178        flags = float_flag_invalid | float_flag_invalid_cvti;
3179        r = p.sign ? INT128_MIN : INT128_MAX;
3180        break;
3181
3182    case float_class_zero:
3183        return int128_zero();
3184
3185    case float_class_normal:
3186        if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3187            flags = float_flag_inexact;
3188        }
3189
3190        if (p.exp < 127) {
3191            int shift = 127 - p.exp;
3192            r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3193            if (p.sign) {
3194                r = int128_neg(r);
3195            }
3196        } else if (p.exp == 127 && p.sign && p.frac_lo == 0 &&
3197                   p.frac_hi == DECOMPOSED_IMPLICIT_BIT) {
3198            r = INT128_MIN;
3199        } else {
3200            flags = float_flag_invalid | float_flag_invalid_cvti;
3201            r = p.sign ? INT128_MIN : INT128_MAX;
3202        }
3203        break;
3204
3205    default:
3206        g_assert_not_reached();
3207    }
3208
3209    float_raise(flags, s);
3210    return r;
3211}
3212
3213static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3214                                        int scale, float_status *s)
3215{
3216    FloatParts128 p;
3217
3218    if (!floatx80_unpack_canonical(&p, a, s)) {
3219        parts_default_nan(&p, s);
3220    }
3221    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3222}
3223
3224static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3225                                        int scale, float_status *s)
3226{
3227    FloatParts128 p;
3228
3229    if (!floatx80_unpack_canonical(&p, a, s)) {
3230        parts_default_nan(&p, s);
3231    }
3232    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3233}
3234
3235int8_t float16_to_int8(float16 a, float_status *s)
3236{
3237    return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3238}
3239
3240int16_t float16_to_int16(float16 a, float_status *s)
3241{
3242    return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3243}
3244
3245int32_t float16_to_int32(float16 a, float_status *s)
3246{
3247    return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3248}
3249
3250int64_t float16_to_int64(float16 a, float_status *s)
3251{
3252    return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3253}
3254
3255int16_t float32_to_int16(float32 a, float_status *s)
3256{
3257    return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3258}
3259
3260int32_t float32_to_int32(float32 a, float_status *s)
3261{
3262    return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3263}
3264
3265int64_t float32_to_int64(float32 a, float_status *s)
3266{
3267    return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3268}
3269
3270int16_t float64_to_int16(float64 a, float_status *s)
3271{
3272    return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3273}
3274
3275int32_t float64_to_int32(float64 a, float_status *s)
3276{
3277    return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3278}
3279
3280int64_t float64_to_int64(float64 a, float_status *s)
3281{
3282    return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3283}
3284
3285int32_t float128_to_int32(float128 a, float_status *s)
3286{
3287    return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3288}
3289
3290int64_t float128_to_int64(float128 a, float_status *s)
3291{
3292    return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3293}
3294
3295Int128 float128_to_int128(float128 a, float_status *s)
3296{
3297    return float128_to_int128_scalbn(a, s->float_rounding_mode, 0, s);
3298}
3299
3300int32_t floatx80_to_int32(floatx80 a, float_status *s)
3301{
3302    return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3303}
3304
3305int64_t floatx80_to_int64(floatx80 a, float_status *s)
3306{
3307    return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3308}
3309
3310int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3311{
3312    return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3313}
3314
3315int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3316{
3317    return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3318}
3319
3320int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3321{
3322    return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3323}
3324
3325int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3326{
3327    return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3328}
3329
3330int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3331{
3332    return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3333}
3334
3335int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3336{
3337    return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3338}
3339
3340int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3341{
3342    return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3343}
3344
3345int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3346{
3347    return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3348}
3349
3350int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3351{
3352    return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3353}
3354
3355int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3356{
3357    return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3358}
3359
3360int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3361{
3362    return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3363}
3364
3365Int128 float128_to_int128_round_to_zero(float128 a, float_status *s)
3366{
3367    return float128_to_int128_scalbn(a, float_round_to_zero, 0, s);
3368}
3369
3370int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3371{
3372    return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3373}
3374
3375int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3376{
3377    return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3378}
3379
3380int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3381{
3382    return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3383}
3384
3385int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3386{
3387    return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3388}
3389
3390int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3391{
3392    return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3393}
3394
3395int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3396{
3397    return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3398}
3399
3400int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3401{
3402    return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3403}
3404
3405int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3406{
3407    return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3408}
3409
3410/*
3411 * Floating-point to unsigned integer conversions
3412 */
3413
3414uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3415                                float_status *s)
3416{
3417    FloatParts64 p;
3418
3419    float16_unpack_canonical(&p, a, s);
3420    return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3421}
3422
3423uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3424                                  float_status *s)
3425{
3426    FloatParts64 p;
3427
3428    float16_unpack_canonical(&p, a, s);
3429    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3430}
3431
3432uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3433                                  float_status *s)
3434{
3435    FloatParts64 p;
3436
3437    float16_unpack_canonical(&p, a, s);
3438    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3439}
3440
3441uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3442                                  float_status *s)
3443{
3444    FloatParts64 p;
3445
3446    float16_unpack_canonical(&p, a, s);
3447    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3448}
3449
3450uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3451                                  float_status *s)
3452{
3453    FloatParts64 p;
3454
3455    float32_unpack_canonical(&p, a, s);
3456    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3457}
3458
3459uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3460                                  float_status *s)
3461{
3462    FloatParts64 p;
3463
3464    float32_unpack_canonical(&p, a, s);
3465    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3466}
3467
3468uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3469                                  float_status *s)
3470{
3471    FloatParts64 p;
3472
3473    float32_unpack_canonical(&p, a, s);
3474    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3475}
3476
3477uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3478                                  float_status *s)
3479{
3480    FloatParts64 p;
3481
3482    float64_unpack_canonical(&p, a, s);
3483    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3484}
3485
3486uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3487                                  float_status *s)
3488{
3489    FloatParts64 p;
3490
3491    float64_unpack_canonical(&p, a, s);
3492    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3493}
3494
3495uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3496                                  float_status *s)
3497{
3498    FloatParts64 p;
3499
3500    float64_unpack_canonical(&p, a, s);
3501    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3502}
3503
3504uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3505                                   int scale, float_status *s)
3506{
3507    FloatParts64 p;
3508
3509    bfloat16_unpack_canonical(&p, a, s);
3510    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3511}
3512
3513uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3514                                   int scale, float_status *s)
3515{
3516    FloatParts64 p;
3517
3518    bfloat16_unpack_canonical(&p, a, s);
3519    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3520}
3521
3522uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3523                                   int scale, float_status *s)
3524{
3525    FloatParts64 p;
3526
3527    bfloat16_unpack_canonical(&p, a, s);
3528    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3529}
3530
3531static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3532                                          int scale, float_status *s)
3533{
3534    FloatParts128 p;
3535
3536    float128_unpack_canonical(&p, a, s);
3537    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3538}
3539
3540static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3541                                          int scale, float_status *s)
3542{
3543    FloatParts128 p;
3544
3545    float128_unpack_canonical(&p, a, s);
3546    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3547}
3548
3549static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
3550                                         int scale, float_status *s)
3551{
3552    int flags = 0;
3553    Int128 r;
3554    FloatParts128 p;
3555
3556    float128_unpack_canonical(&p, a, s);
3557
3558    switch (p.cls) {
3559    case float_class_snan:
3560        flags |= float_flag_invalid_snan;
3561        /* fall through */
3562    case float_class_qnan:
3563        flags |= float_flag_invalid;
3564        r = UINT128_MAX;
3565        break;
3566
3567    case float_class_inf:
3568        flags = float_flag_invalid | float_flag_invalid_cvti;
3569        r = p.sign ? int128_zero() : UINT128_MAX;
3570        break;
3571
3572    case float_class_zero:
3573        return int128_zero();
3574
3575    case float_class_normal:
3576        if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
3577            flags = float_flag_inexact;
3578            if (p.cls == float_class_zero) {
3579                r = int128_zero();
3580                break;
3581            }
3582        }
3583
3584        if (p.sign) {
3585            flags = float_flag_invalid | float_flag_invalid_cvti;
3586            r = int128_zero();
3587        } else if (p.exp <= 127) {
3588            int shift = 127 - p.exp;
3589            r = int128_urshift(int128_make128(p.frac_lo, p.frac_hi), shift);
3590        } else {
3591            flags = float_flag_invalid | float_flag_invalid_cvti;
3592            r = UINT128_MAX;
3593        }
3594        break;
3595
3596    default:
3597        g_assert_not_reached();
3598    }
3599
3600    float_raise(flags, s);
3601    return r;
3602}
3603
3604uint8_t float16_to_uint8(float16 a, float_status *s)
3605{
3606    return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3607}
3608
3609uint16_t float16_to_uint16(float16 a, float_status *s)
3610{
3611    return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3612}
3613
3614uint32_t float16_to_uint32(float16 a, float_status *s)
3615{
3616    return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3617}
3618
3619uint64_t float16_to_uint64(float16 a, float_status *s)
3620{
3621    return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3622}
3623
3624uint16_t float32_to_uint16(float32 a, float_status *s)
3625{
3626    return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3627}
3628
3629uint32_t float32_to_uint32(float32 a, float_status *s)
3630{
3631    return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3632}
3633
3634uint64_t float32_to_uint64(float32 a, float_status *s)
3635{
3636    return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3637}
3638
3639uint16_t float64_to_uint16(float64 a, float_status *s)
3640{
3641    return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3642}
3643
3644uint32_t float64_to_uint32(float64 a, float_status *s)
3645{
3646    return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3647}
3648
3649uint64_t float64_to_uint64(float64 a, float_status *s)
3650{
3651    return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3652}
3653
3654uint32_t float128_to_uint32(float128 a, float_status *s)
3655{
3656    return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3657}
3658
3659uint64_t float128_to_uint64(float128 a, float_status *s)
3660{
3661    return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3662}
3663
3664Int128 float128_to_uint128(float128 a, float_status *s)
3665{
3666    return float128_to_uint128_scalbn(a, s->float_rounding_mode, 0, s);
3667}
3668
3669uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3670{
3671    return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3672}
3673
3674uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3675{
3676    return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3677}
3678
3679uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3680{
3681    return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3682}
3683
3684uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3685{
3686    return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3687}
3688
3689uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3690{
3691    return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3692}
3693
3694uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3695{
3696    return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3697}
3698
3699uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3700{
3701    return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3702}
3703
3704uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3705{
3706    return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3707}
3708
3709uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3710{
3711    return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3712}
3713
3714uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3715{
3716    return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3717}
3718
3719uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3720{
3721    return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3722}
3723
3724Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s)
3725{
3726    return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s);
3727}
3728
3729uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3730{
3731    return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3732}
3733
3734uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3735{
3736    return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3737}
3738
3739uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3740{
3741    return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3742}
3743
3744uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3745{
3746    return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3747}
3748
3749uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3750{
3751    return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3752}
3753
3754uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3755{
3756    return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3757}
3758
3759/*
3760 * Signed integer to floating-point conversions
3761 */
3762
3763float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3764{
3765    FloatParts64 p;
3766
3767    parts_sint_to_float(&p, a, scale, status);
3768    return float16_round_pack_canonical(&p, status);
3769}
3770
3771float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3772{
3773    return int64_to_float16_scalbn(a, scale, status);
3774}
3775
3776float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3777{
3778    return int64_to_float16_scalbn(a, scale, status);
3779}
3780
3781float16 int64_to_float16(int64_t a, float_status *status)
3782{
3783    return int64_to_float16_scalbn(a, 0, status);
3784}
3785
3786float16 int32_to_float16(int32_t a, float_status *status)
3787{
3788    return int64_to_float16_scalbn(a, 0, status);
3789}
3790
3791float16 int16_to_float16(int16_t a, float_status *status)
3792{
3793    return int64_to_float16_scalbn(a, 0, status);
3794}
3795
3796float16 int8_to_float16(int8_t a, float_status *status)
3797{
3798    return int64_to_float16_scalbn(a, 0, status);
3799}
3800
3801float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3802{
3803    FloatParts64 p;
3804
3805    /* Without scaling, there are no overflow concerns. */
3806    if (likely(scale == 0) && can_use_fpu(status)) {
3807        union_float32 ur;
3808        ur.h = a;
3809        return ur.s;
3810    }
3811
3812    parts64_sint_to_float(&p, a, scale, status);
3813    return float32_round_pack_canonical(&p, status);
3814}
3815
3816float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3817{
3818    return int64_to_float32_scalbn(a, scale, status);
3819}
3820
3821float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3822{
3823    return int64_to_float32_scalbn(a, scale, status);
3824}
3825
3826float32 int64_to_float32(int64_t a, float_status *status)
3827{
3828    return int64_to_float32_scalbn(a, 0, status);
3829}
3830
3831float32 int32_to_float32(int32_t a, float_status *status)
3832{
3833    return int64_to_float32_scalbn(a, 0, status);
3834}
3835
3836float32 int16_to_float32(int16_t a, float_status *status)
3837{
3838    return int64_to_float32_scalbn(a, 0, status);
3839}
3840
3841float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3842{
3843    FloatParts64 p;
3844
3845    /* Without scaling, there are no overflow concerns. */
3846    if (likely(scale == 0) && can_use_fpu(status)) {
3847        union_float64 ur;
3848        ur.h = a;
3849        return ur.s;
3850    }
3851
3852    parts_sint_to_float(&p, a, scale, status);
3853    return float64_round_pack_canonical(&p, status);
3854}
3855
3856float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3857{
3858    return int64_to_float64_scalbn(a, scale, status);
3859}
3860
3861float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3862{
3863    return int64_to_float64_scalbn(a, scale, status);
3864}
3865
3866float64 int64_to_float64(int64_t a, float_status *status)
3867{
3868    return int64_to_float64_scalbn(a, 0, status);
3869}
3870
3871float64 int32_to_float64(int32_t a, float_status *status)
3872{
3873    return int64_to_float64_scalbn(a, 0, status);
3874}
3875
3876float64 int16_to_float64(int16_t a, float_status *status)
3877{
3878    return int64_to_float64_scalbn(a, 0, status);
3879}
3880
3881bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3882{
3883    FloatParts64 p;
3884
3885    parts_sint_to_float(&p, a, scale, status);
3886    return bfloat16_round_pack_canonical(&p, status);
3887}
3888
3889bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3890{
3891    return int64_to_bfloat16_scalbn(a, scale, status);
3892}
3893
3894bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3895{
3896    return int64_to_bfloat16_scalbn(a, scale, status);
3897}
3898
3899bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3900{
3901    return int64_to_bfloat16_scalbn(a, 0, status);
3902}
3903
3904bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3905{
3906    return int64_to_bfloat16_scalbn(a, 0, status);
3907}
3908
3909bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3910{
3911    return int64_to_bfloat16_scalbn(a, 0, status);
3912}
3913
3914float128 int128_to_float128(Int128 a, float_status *status)
3915{
3916    FloatParts128 p = { };
3917    int shift;
3918
3919    if (int128_nz(a)) {
3920        p.cls = float_class_normal;
3921        if (!int128_nonneg(a)) {
3922            p.sign = true;
3923            a = int128_neg(a);
3924        }
3925
3926        shift = clz64(int128_gethi(a));
3927        if (shift == 64) {
3928            shift += clz64(int128_getlo(a));
3929        }
3930
3931        p.exp = 127 - shift;
3932        a = int128_lshift(a, shift);
3933
3934        p.frac_hi = int128_gethi(a);
3935        p.frac_lo = int128_getlo(a);
3936    } else {
3937        p.cls = float_class_zero;
3938    }
3939
3940    return float128_round_pack_canonical(&p, status);
3941}
3942
3943float128 int64_to_float128(int64_t a, float_status *status)
3944{
3945    FloatParts128 p;
3946
3947    parts_sint_to_float(&p, a, 0, status);
3948    return float128_round_pack_canonical(&p, status);
3949}
3950
3951float128 int32_to_float128(int32_t a, float_status *status)
3952{
3953    return int64_to_float128(a, status);
3954}
3955
3956floatx80 int64_to_floatx80(int64_t a, float_status *status)
3957{
3958    FloatParts128 p;
3959
3960    parts_sint_to_float(&p, a, 0, status);
3961    return floatx80_round_pack_canonical(&p, status);
3962}
3963
3964floatx80 int32_to_floatx80(int32_t a, float_status *status)
3965{
3966    return int64_to_floatx80(a, status);
3967}
3968
3969/*
3970 * Unsigned Integer to floating-point conversions
3971 */
3972
3973float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3974{
3975    FloatParts64 p;
3976
3977    parts_uint_to_float(&p, a, scale, status);
3978    return float16_round_pack_canonical(&p, status);
3979}
3980
3981float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3982{
3983    return uint64_to_float16_scalbn(a, scale, status);
3984}
3985
3986float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3987{
3988    return uint64_to_float16_scalbn(a, scale, status);
3989}
3990
3991float16 uint64_to_float16(uint64_t a, float_status *status)
3992{
3993    return uint64_to_float16_scalbn(a, 0, status);
3994}
3995
3996float16 uint32_to_float16(uint32_t a, float_status *status)
3997{
3998    return uint64_to_float16_scalbn(a, 0, status);
3999}
4000
4001float16 uint16_to_float16(uint16_t a, float_status *status)
4002{
4003    return uint64_to_float16_scalbn(a, 0, status);
4004}
4005
4006float16 uint8_to_float16(uint8_t a, float_status *status)
4007{
4008    return uint64_to_float16_scalbn(a, 0, status);
4009}
4010
4011float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
4012{
4013    FloatParts64 p;
4014
4015    /* Without scaling, there are no overflow concerns. */
4016    if (likely(scale == 0) && can_use_fpu(status)) {
4017        union_float32 ur;
4018        ur.h = a;
4019        return ur.s;
4020    }
4021
4022    parts_uint_to_float(&p, a, scale, status);
4023    return float32_round_pack_canonical(&p, status);
4024}
4025
4026float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
4027{
4028    return uint64_to_float32_scalbn(a, scale, status);
4029}
4030
4031float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
4032{
4033    return uint64_to_float32_scalbn(a, scale, status);
4034}
4035
4036float32 uint64_to_float32(uint64_t a, float_status *status)
4037{
4038    return uint64_to_float32_scalbn(a, 0, status);
4039}
4040
4041float32 uint32_to_float32(uint32_t a, float_status *status)
4042{
4043    return uint64_to_float32_scalbn(a, 0, status);
4044}
4045
4046float32 uint16_to_float32(uint16_t a, float_status *status)
4047{
4048    return uint64_to_float32_scalbn(a, 0, status);
4049}
4050
4051float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
4052{
4053    FloatParts64 p;
4054
4055    /* Without scaling, there are no overflow concerns. */
4056    if (likely(scale == 0) && can_use_fpu(status)) {
4057        union_float64 ur;
4058        ur.h = a;
4059        return ur.s;
4060    }
4061
4062    parts_uint_to_float(&p, a, scale, status);
4063    return float64_round_pack_canonical(&p, status);
4064}
4065
4066float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
4067{
4068    return uint64_to_float64_scalbn(a, scale, status);
4069}
4070
4071float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
4072{
4073    return uint64_to_float64_scalbn(a, scale, status);
4074}
4075
4076float64 uint64_to_float64(uint64_t a, float_status *status)
4077{
4078    return uint64_to_float64_scalbn(a, 0, status);
4079}
4080
4081float64 uint32_to_float64(uint32_t a, float_status *status)
4082{
4083    return uint64_to_float64_scalbn(a, 0, status);
4084}
4085
4086float64 uint16_to_float64(uint16_t a, float_status *status)
4087{
4088    return uint64_to_float64_scalbn(a, 0, status);
4089}
4090
4091bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
4092{
4093    FloatParts64 p;
4094
4095    parts_uint_to_float(&p, a, scale, status);
4096    return bfloat16_round_pack_canonical(&p, status);
4097}
4098
4099bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
4100{
4101    return uint64_to_bfloat16_scalbn(a, scale, status);
4102}
4103
4104bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
4105{
4106    return uint64_to_bfloat16_scalbn(a, scale, status);
4107}
4108
4109bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
4110{
4111    return uint64_to_bfloat16_scalbn(a, 0, status);
4112}
4113
4114bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
4115{
4116    return uint64_to_bfloat16_scalbn(a, 0, status);
4117}
4118
4119bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
4120{
4121    return uint64_to_bfloat16_scalbn(a, 0, status);
4122}
4123
4124float128 uint64_to_float128(uint64_t a, float_status *status)
4125{
4126    FloatParts128 p;
4127
4128    parts_uint_to_float(&p, a, 0, status);
4129    return float128_round_pack_canonical(&p, status);
4130}
4131
4132float128 uint128_to_float128(Int128 a, float_status *status)
4133{
4134    FloatParts128 p = { };
4135    int shift;
4136
4137    if (int128_nz(a)) {
4138        p.cls = float_class_normal;
4139
4140        shift = clz64(int128_gethi(a));
4141        if (shift == 64) {
4142            shift += clz64(int128_getlo(a));
4143        }
4144
4145        p.exp = 127 - shift;
4146        a = int128_lshift(a, shift);
4147
4148        p.frac_hi = int128_gethi(a);
4149        p.frac_lo = int128_getlo(a);
4150    } else {
4151        p.cls = float_class_zero;
4152    }
4153
4154    return float128_round_pack_canonical(&p, status);
4155}
4156
4157/*
4158 * Minimum and maximum
4159 */
4160
4161static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
4162{
4163    FloatParts64 pa, pb, *pr;
4164
4165    float16_unpack_canonical(&pa, a, s);
4166    float16_unpack_canonical(&pb, b, s);
4167    pr = parts_minmax(&pa, &pb, s, flags);
4168
4169    return float16_round_pack_canonical(pr, s);
4170}
4171
4172static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
4173                                float_status *s, int flags)
4174{
4175    FloatParts64 pa, pb, *pr;
4176
4177    bfloat16_unpack_canonical(&pa, a, s);
4178    bfloat16_unpack_canonical(&pb, b, s);
4179    pr = parts_minmax(&pa, &pb, s, flags);
4180
4181    return bfloat16_round_pack_canonical(pr, s);
4182}
4183
4184static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
4185{
4186    FloatParts64 pa, pb, *pr;
4187
4188    float32_unpack_canonical(&pa, a, s);
4189    float32_unpack_canonical(&pb, b, s);
4190    pr = parts_minmax(&pa, &pb, s, flags);
4191
4192    return float32_round_pack_canonical(pr, s);
4193}
4194
4195static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
4196{
4197    FloatParts64 pa, pb, *pr;
4198
4199    float64_unpack_canonical(&pa, a, s);
4200    float64_unpack_canonical(&pb, b, s);
4201    pr = parts_minmax(&pa, &pb, s, flags);
4202
4203    return float64_round_pack_canonical(pr, s);
4204}
4205
4206static float128 float128_minmax(float128 a, float128 b,
4207                                float_status *s, int flags)
4208{
4209    FloatParts128 pa, pb, *pr;
4210
4211    float128_unpack_canonical(&pa, a, s);
4212    float128_unpack_canonical(&pb, b, s);
4213    pr = parts_minmax(&pa, &pb, s, flags);
4214
4215    return float128_round_pack_canonical(pr, s);
4216}
4217
4218#define MINMAX_1(type, name, flags) \
4219    type type##_##name(type a, type b, float_status *s) \
4220    { return type##_minmax(a, b, s, flags); }
4221
4222#define MINMAX_2(type) \
4223    MINMAX_1(type, max, 0)                                                \
4224    MINMAX_1(type, maxnum, minmax_isnum)                                  \
4225    MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
4226    MINMAX_1(type, maximum_number, minmax_isnumber)                       \
4227    MINMAX_1(type, min, minmax_ismin)                                     \
4228    MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
4229    MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
4230    MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
4231
4232MINMAX_2(float16)
4233MINMAX_2(bfloat16)
4234MINMAX_2(float32)
4235MINMAX_2(float64)
4236MINMAX_2(float128)
4237
4238#undef MINMAX_1
4239#undef MINMAX_2
4240
4241/*
4242 * Floating point compare
4243 */
4244
4245static FloatRelation QEMU_FLATTEN
4246float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
4247{
4248    FloatParts64 pa, pb;
4249
4250    float16_unpack_canonical(&pa, a, s);
4251    float16_unpack_canonical(&pb, b, s);
4252    return parts_compare(&pa, &pb, s, is_quiet);
4253}
4254
4255FloatRelation float16_compare(float16 a, float16 b, float_status *s)
4256{
4257    return float16_do_compare(a, b, s, false);
4258}
4259
4260FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
4261{
4262    return float16_do_compare(a, b, s, true);
4263}
4264
4265static FloatRelation QEMU_SOFTFLOAT_ATTR
4266float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
4267{
4268    FloatParts64 pa, pb;
4269
4270    float32_unpack_canonical(&pa, a, s);
4271    float32_unpack_canonical(&pb, b, s);
4272    return parts_compare(&pa, &pb, s, is_quiet);
4273}
4274
4275static FloatRelation QEMU_FLATTEN
4276float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
4277{
4278    union_float32 ua, ub;
4279
4280    ua.s = xa;
4281    ub.s = xb;
4282
4283    if (QEMU_NO_HARDFLOAT) {
4284        goto soft;
4285    }
4286
4287    float32_input_flush2(&ua.s, &ub.s, s);
4288    if (isgreaterequal(ua.h, ub.h)) {
4289        if (isgreater(ua.h, ub.h)) {
4290            return float_relation_greater;
4291        }
4292        return float_relation_equal;
4293    }
4294    if (likely(isless(ua.h, ub.h))) {
4295        return float_relation_less;
4296    }
4297    /*
4298     * The only condition remaining is unordered.
4299     * Fall through to set flags.
4300     */
4301 soft:
4302    return float32_do_compare(ua.s, ub.s, s, is_quiet);
4303}
4304
4305FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4306{
4307    return float32_hs_compare(a, b, s, false);
4308}
4309
4310FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4311{
4312    return float32_hs_compare(a, b, s, true);
4313}
4314
4315static FloatRelation QEMU_SOFTFLOAT_ATTR
4316float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4317{
4318    FloatParts64 pa, pb;
4319
4320    float64_unpack_canonical(&pa, a, s);
4321    float64_unpack_canonical(&pb, b, s);
4322    return parts_compare(&pa, &pb, s, is_quiet);
4323}
4324
4325static FloatRelation QEMU_FLATTEN
4326float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4327{
4328    union_float64 ua, ub;
4329
4330    ua.s = xa;
4331    ub.s = xb;
4332
4333    if (QEMU_NO_HARDFLOAT) {
4334        goto soft;
4335    }
4336
4337    float64_input_flush2(&ua.s, &ub.s, s);
4338    if (isgreaterequal(ua.h, ub.h)) {
4339        if (isgreater(ua.h, ub.h)) {
4340            return float_relation_greater;
4341        }
4342        return float_relation_equal;
4343    }
4344    if (likely(isless(ua.h, ub.h))) {
4345        return float_relation_less;
4346    }
4347    /*
4348     * The only condition remaining is unordered.
4349     * Fall through to set flags.
4350     */
4351 soft:
4352    return float64_do_compare(ua.s, ub.s, s, is_quiet);
4353}
4354
4355FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4356{
4357    return float64_hs_compare(a, b, s, false);
4358}
4359
4360FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4361{
4362    return float64_hs_compare(a, b, s, true);
4363}
4364
4365static FloatRelation QEMU_FLATTEN
4366bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4367{
4368    FloatParts64 pa, pb;
4369
4370    bfloat16_unpack_canonical(&pa, a, s);
4371    bfloat16_unpack_canonical(&pb, b, s);
4372    return parts_compare(&pa, &pb, s, is_quiet);
4373}
4374
4375FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4376{
4377    return bfloat16_do_compare(a, b, s, false);
4378}
4379
4380FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4381{
4382    return bfloat16_do_compare(a, b, s, true);
4383}
4384
4385static FloatRelation QEMU_FLATTEN
4386float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4387{
4388    FloatParts128 pa, pb;
4389
4390    float128_unpack_canonical(&pa, a, s);
4391    float128_unpack_canonical(&pb, b, s);
4392    return parts_compare(&pa, &pb, s, is_quiet);
4393}
4394
4395FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4396{
4397    return float128_do_compare(a, b, s, false);
4398}
4399
4400FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4401{
4402    return float128_do_compare(a, b, s, true);
4403}
4404
4405static FloatRelation QEMU_FLATTEN
4406floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4407{
4408    FloatParts128 pa, pb;
4409
4410    if (!floatx80_unpack_canonical(&pa, a, s) ||
4411        !floatx80_unpack_canonical(&pb, b, s)) {
4412        return float_relation_unordered;
4413    }
4414    return parts_compare(&pa, &pb, s, is_quiet);
4415}
4416
4417FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4418{
4419    return floatx80_do_compare(a, b, s, false);
4420}
4421
4422FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4423{
4424    return floatx80_do_compare(a, b, s, true);
4425}
4426
4427/*
4428 * Scale by 2**N
4429 */
4430
4431float16 float16_scalbn(float16 a, int n, float_status *status)
4432{
4433    FloatParts64 p;
4434
4435    float16_unpack_canonical(&p, a, status);
4436    parts_scalbn(&p, n, status);
4437    return float16_round_pack_canonical(&p, status);
4438}
4439
4440float32 float32_scalbn(float32 a, int n, float_status *status)
4441{
4442    FloatParts64 p;
4443
4444    float32_unpack_canonical(&p, a, status);
4445    parts_scalbn(&p, n, status);
4446    return float32_round_pack_canonical(&p, status);
4447}
4448
4449float64 float64_scalbn(float64 a, int n, float_status *status)
4450{
4451    FloatParts64 p;
4452
4453    float64_unpack_canonical(&p, a, status);
4454    parts_scalbn(&p, n, status);
4455    return float64_round_pack_canonical(&p, status);
4456}
4457
4458bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4459{
4460    FloatParts64 p;
4461
4462    bfloat16_unpack_canonical(&p, a, status);
4463    parts_scalbn(&p, n, status);
4464    return bfloat16_round_pack_canonical(&p, status);
4465}
4466
4467float128 float128_scalbn(float128 a, int n, float_status *status)
4468{
4469    FloatParts128 p;
4470
4471    float128_unpack_canonical(&p, a, status);
4472    parts_scalbn(&p, n, status);
4473    return float128_round_pack_canonical(&p, status);
4474}
4475
4476floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4477{
4478    FloatParts128 p;
4479
4480    if (!floatx80_unpack_canonical(&p, a, status)) {
4481        return floatx80_default_nan(status);
4482    }
4483    parts_scalbn(&p, n, status);
4484    return floatx80_round_pack_canonical(&p, status);
4485}
4486
4487/*
4488 * Square Root
4489 */
4490
4491float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4492{
4493    FloatParts64 p;
4494
4495    float16_unpack_canonical(&p, a, status);
4496    parts_sqrt(&p, status, &float16_params);
4497    return float16_round_pack_canonical(&p, status);
4498}
4499
4500static float32 QEMU_SOFTFLOAT_ATTR
4501soft_f32_sqrt(float32 a, float_status *status)
4502{
4503    FloatParts64 p;
4504
4505    float32_unpack_canonical(&p, a, status);
4506    parts_sqrt(&p, status, &float32_params);
4507    return float32_round_pack_canonical(&p, status);
4508}
4509
4510static float64 QEMU_SOFTFLOAT_ATTR
4511soft_f64_sqrt(float64 a, float_status *status)
4512{
4513    FloatParts64 p;
4514
4515    float64_unpack_canonical(&p, a, status);
4516    parts_sqrt(&p, status, &float64_params);
4517    return float64_round_pack_canonical(&p, status);
4518}
4519
4520float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4521{
4522    union_float32 ua, ur;
4523
4524    ua.s = xa;
4525    if (unlikely(!can_use_fpu(s))) {
4526        goto soft;
4527    }
4528
4529    float32_input_flush1(&ua.s, s);
4530    if (QEMU_HARDFLOAT_1F32_USE_FP) {
4531        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4532                       fpclassify(ua.h) == FP_ZERO) ||
4533                     signbit(ua.h))) {
4534            goto soft;
4535        }
4536    } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4537                        float32_is_neg(ua.s))) {
4538        goto soft;
4539    }
4540    ur.h = sqrtf(ua.h);
4541    return ur.s;
4542
4543 soft:
4544    return soft_f32_sqrt(ua.s, s);
4545}
4546
4547float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4548{
4549    union_float64 ua, ur;
4550
4551    ua.s = xa;
4552    if (unlikely(!can_use_fpu(s))) {
4553        goto soft;
4554    }
4555
4556    float64_input_flush1(&ua.s, s);
4557    if (QEMU_HARDFLOAT_1F64_USE_FP) {
4558        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4559                       fpclassify(ua.h) == FP_ZERO) ||
4560                     signbit(ua.h))) {
4561            goto soft;
4562        }
4563    } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4564                        float64_is_neg(ua.s))) {
4565        goto soft;
4566    }
4567    ur.h = sqrt(ua.h);
4568    return ur.s;
4569
4570 soft:
4571    return soft_f64_sqrt(ua.s, s);
4572}
4573
4574float64 float64r32_sqrt(float64 a, float_status *status)
4575{
4576    FloatParts64 p;
4577
4578    float64_unpack_canonical(&p, a, status);
4579    parts_sqrt(&p, status, &float64_params);
4580    return float64r32_round_pack_canonical(&p, status);
4581}
4582
4583bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4584{
4585    FloatParts64 p;
4586
4587    bfloat16_unpack_canonical(&p, a, status);
4588    parts_sqrt(&p, status, &bfloat16_params);
4589    return bfloat16_round_pack_canonical(&p, status);
4590}
4591
4592float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4593{
4594    FloatParts128 p;
4595
4596    float128_unpack_canonical(&p, a, status);
4597    parts_sqrt(&p, status, &float128_params);
4598    return float128_round_pack_canonical(&p, status);
4599}
4600
4601floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4602{
4603    FloatParts128 p;
4604
4605    if (!floatx80_unpack_canonical(&p, a, s)) {
4606        return floatx80_default_nan(s);
4607    }
4608    parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4609    return floatx80_round_pack_canonical(&p, s);
4610}
4611
4612/*
4613 * log2
4614 */
4615float32 float32_log2(float32 a, float_status *status)
4616{
4617    FloatParts64 p;
4618
4619    float32_unpack_canonical(&p, a, status);
4620    parts_log2(&p, status, &float32_params);
4621    return float32_round_pack_canonical(&p, status);
4622}
4623
4624float64 float64_log2(float64 a, float_status *status)
4625{
4626    FloatParts64 p;
4627
4628    float64_unpack_canonical(&p, a, status);
4629    parts_log2(&p, status, &float64_params);
4630    return float64_round_pack_canonical(&p, status);
4631}
4632
4633/*----------------------------------------------------------------------------
4634| The pattern for a default generated NaN.
4635*----------------------------------------------------------------------------*/
4636
4637float16 float16_default_nan(float_status *status)
4638{
4639    FloatParts64 p;
4640
4641    parts_default_nan(&p, status);
4642    p.frac >>= float16_params.frac_shift;
4643    return float16_pack_raw(&p);
4644}
4645
4646float32 float32_default_nan(float_status *status)
4647{
4648    FloatParts64 p;
4649
4650    parts_default_nan(&p, status);
4651    p.frac >>= float32_params.frac_shift;
4652    return float32_pack_raw(&p);
4653}
4654
4655float64 float64_default_nan(float_status *status)
4656{
4657    FloatParts64 p;
4658
4659    parts_default_nan(&p, status);
4660    p.frac >>= float64_params.frac_shift;
4661    return float64_pack_raw(&p);
4662}
4663
4664float128 float128_default_nan(float_status *status)
4665{
4666    FloatParts128 p;
4667
4668    parts_default_nan(&p, status);
4669    frac_shr(&p, float128_params.frac_shift);
4670    return float128_pack_raw(&p);
4671}
4672
4673bfloat16 bfloat16_default_nan(float_status *status)
4674{
4675    FloatParts64 p;
4676
4677    parts_default_nan(&p, status);
4678    p.frac >>= bfloat16_params.frac_shift;
4679    return bfloat16_pack_raw(&p);
4680}
4681
4682/*----------------------------------------------------------------------------
4683| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4684*----------------------------------------------------------------------------*/
4685
4686float16 float16_silence_nan(float16 a, float_status *status)
4687{
4688    FloatParts64 p;
4689
4690    float16_unpack_raw(&p, a);
4691    p.frac <<= float16_params.frac_shift;
4692    parts_silence_nan(&p, status);
4693    p.frac >>= float16_params.frac_shift;
4694    return float16_pack_raw(&p);
4695}
4696
4697float32 float32_silence_nan(float32 a, float_status *status)
4698{
4699    FloatParts64 p;
4700
4701    float32_unpack_raw(&p, a);
4702    p.frac <<= float32_params.frac_shift;
4703    parts_silence_nan(&p, status);
4704    p.frac >>= float32_params.frac_shift;
4705    return float32_pack_raw(&p);
4706}
4707
4708float64 float64_silence_nan(float64 a, float_status *status)
4709{
4710    FloatParts64 p;
4711
4712    float64_unpack_raw(&p, a);
4713    p.frac <<= float64_params.frac_shift;
4714    parts_silence_nan(&p, status);
4715    p.frac >>= float64_params.frac_shift;
4716    return float64_pack_raw(&p);
4717}
4718
4719bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4720{
4721    FloatParts64 p;
4722
4723    bfloat16_unpack_raw(&p, a);
4724    p.frac <<= bfloat16_params.frac_shift;
4725    parts_silence_nan(&p, status);
4726    p.frac >>= bfloat16_params.frac_shift;
4727    return bfloat16_pack_raw(&p);
4728}
4729
4730float128 float128_silence_nan(float128 a, float_status *status)
4731{
4732    FloatParts128 p;
4733
4734    float128_unpack_raw(&p, a);
4735    frac_shl(&p, float128_params.frac_shift);
4736    parts_silence_nan(&p, status);
4737    frac_shr(&p, float128_params.frac_shift);
4738    return float128_pack_raw(&p);
4739}
4740
4741/*----------------------------------------------------------------------------
4742| If `a' is denormal and we are in flush-to-zero mode then set the
4743| input-denormal exception and return zero. Otherwise just return the value.
4744*----------------------------------------------------------------------------*/
4745
4746static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4747{
4748    if (p.exp == 0 && p.frac != 0) {
4749        float_raise(float_flag_input_denormal, status);
4750        return true;
4751    }
4752
4753    return false;
4754}
4755
4756float16 float16_squash_input_denormal(float16 a, float_status *status)
4757{
4758    if (status->flush_inputs_to_zero) {
4759        FloatParts64 p;
4760
4761        float16_unpack_raw(&p, a);
4762        if (parts_squash_denormal(p, status)) {
4763            return float16_set_sign(float16_zero, p.sign);
4764        }
4765    }
4766    return a;
4767}
4768
4769float32 float32_squash_input_denormal(float32 a, float_status *status)
4770{
4771    if (status->flush_inputs_to_zero) {
4772        FloatParts64 p;
4773
4774        float32_unpack_raw(&p, a);
4775        if (parts_squash_denormal(p, status)) {
4776            return float32_set_sign(float32_zero, p.sign);
4777        }
4778    }
4779    return a;
4780}
4781
4782float64 float64_squash_input_denormal(float64 a, float_status *status)
4783{
4784    if (status->flush_inputs_to_zero) {
4785        FloatParts64 p;
4786
4787        float64_unpack_raw(&p, a);
4788        if (parts_squash_denormal(p, status)) {
4789            return float64_set_sign(float64_zero, p.sign);
4790        }
4791    }
4792    return a;
4793}
4794
4795bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4796{
4797    if (status->flush_inputs_to_zero) {
4798        FloatParts64 p;
4799
4800        bfloat16_unpack_raw(&p, a);
4801        if (parts_squash_denormal(p, status)) {
4802            return bfloat16_set_sign(bfloat16_zero, p.sign);
4803        }
4804    }
4805    return a;
4806}
4807
4808/*----------------------------------------------------------------------------
4809| Normalizes the subnormal extended double-precision floating-point value
4810| represented by the denormalized significand `aSig'.  The normalized exponent
4811| and significand are stored at the locations pointed to by `zExpPtr' and
4812| `zSigPtr', respectively.
4813*----------------------------------------------------------------------------*/
4814
4815void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4816                                uint64_t *zSigPtr)
4817{
4818    int8_t shiftCount;
4819
4820    shiftCount = clz64(aSig);
4821    *zSigPtr = aSig<<shiftCount;
4822    *zExpPtr = 1 - shiftCount;
4823}
4824
4825/*----------------------------------------------------------------------------
4826| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4827| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4828| and returns the proper extended double-precision floating-point value
4829| corresponding to the abstract input.  Ordinarily, the abstract value is
4830| rounded and packed into the extended double-precision format, with the
4831| inexact exception raised if the abstract input cannot be represented
4832| exactly.  However, if the abstract value is too large, the overflow and
4833| inexact exceptions are raised and an infinity or maximal finite value is
4834| returned.  If the abstract value is too small, the input value is rounded to
4835| a subnormal number, and the underflow and inexact exceptions are raised if
4836| the abstract input cannot be represented exactly as a subnormal extended
4837| double-precision floating-point number.
4838|     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4839| the result is rounded to the same number of bits as single or double
4840| precision, respectively.  Otherwise, the result is rounded to the full
4841| precision of the extended double-precision format.
4842|     The input significand must be normalized or smaller.  If the input
4843| significand is not normalized, `zExp' must be 0; in that case, the result
4844| returned is a subnormal number, and it must not require rounding.  The
4845| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4846| Floating-Point Arithmetic.
4847*----------------------------------------------------------------------------*/
4848
4849floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4850                              int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4851                              float_status *status)
4852{
4853    FloatRoundMode roundingMode;
4854    bool roundNearestEven, increment, isTiny;
4855    int64_t roundIncrement, roundMask, roundBits;
4856
4857    roundingMode = status->float_rounding_mode;
4858    roundNearestEven = ( roundingMode == float_round_nearest_even );
4859    switch (roundingPrecision) {
4860    case floatx80_precision_x:
4861        goto precision80;
4862    case floatx80_precision_d:
4863        roundIncrement = UINT64_C(0x0000000000000400);
4864        roundMask = UINT64_C(0x00000000000007FF);
4865        break;
4866    case floatx80_precision_s:
4867        roundIncrement = UINT64_C(0x0000008000000000);
4868        roundMask = UINT64_C(0x000000FFFFFFFFFF);
4869        break;
4870    default:
4871        g_assert_not_reached();
4872    }
4873    zSig0 |= ( zSig1 != 0 );
4874    switch (roundingMode) {
4875    case float_round_nearest_even:
4876    case float_round_ties_away:
4877        break;
4878    case float_round_to_zero:
4879        roundIncrement = 0;
4880        break;
4881    case float_round_up:
4882        roundIncrement = zSign ? 0 : roundMask;
4883        break;
4884    case float_round_down:
4885        roundIncrement = zSign ? roundMask : 0;
4886        break;
4887    default:
4888        abort();
4889    }
4890    roundBits = zSig0 & roundMask;
4891    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4892        if (    ( 0x7FFE < zExp )
4893             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4894           ) {
4895            goto overflow;
4896        }
4897        if ( zExp <= 0 ) {
4898            if (status->flush_to_zero) {
4899                float_raise(float_flag_output_denormal, status);
4900                return packFloatx80(zSign, 0, 0);
4901            }
4902            isTiny = status->tininess_before_rounding
4903                  || (zExp < 0 )
4904                  || (zSig0 <= zSig0 + roundIncrement);
4905            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4906            zExp = 0;
4907            roundBits = zSig0 & roundMask;
4908            if (isTiny && roundBits) {
4909                float_raise(float_flag_underflow, status);
4910            }
4911            if (roundBits) {
4912                float_raise(float_flag_inexact, status);
4913            }
4914            zSig0 += roundIncrement;
4915            if ( (int64_t) zSig0 < 0 ) zExp = 1;
4916            roundIncrement = roundMask + 1;
4917            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4918                roundMask |= roundIncrement;
4919            }
4920            zSig0 &= ~ roundMask;
4921            return packFloatx80( zSign, zExp, zSig0 );
4922        }
4923    }
4924    if (roundBits) {
4925        float_raise(float_flag_inexact, status);
4926    }
4927    zSig0 += roundIncrement;
4928    if ( zSig0 < roundIncrement ) {
4929        ++zExp;
4930        zSig0 = UINT64_C(0x8000000000000000);
4931    }
4932    roundIncrement = roundMask + 1;
4933    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4934        roundMask |= roundIncrement;
4935    }
4936    zSig0 &= ~ roundMask;
4937    if ( zSig0 == 0 ) zExp = 0;
4938    return packFloatx80( zSign, zExp, zSig0 );
4939 precision80:
4940    switch (roundingMode) {
4941    case float_round_nearest_even:
4942    case float_round_ties_away:
4943        increment = ((int64_t)zSig1 < 0);
4944        break;
4945    case float_round_to_zero:
4946        increment = 0;
4947        break;
4948    case float_round_up:
4949        increment = !zSign && zSig1;
4950        break;
4951    case float_round_down:
4952        increment = zSign && zSig1;
4953        break;
4954    default:
4955        abort();
4956    }
4957    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4958        if (    ( 0x7FFE < zExp )
4959             || (    ( zExp == 0x7FFE )
4960                  && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4961                  && increment
4962                )
4963           ) {
4964            roundMask = 0;
4965 overflow:
4966            float_raise(float_flag_overflow | float_flag_inexact, status);
4967            if (    ( roundingMode == float_round_to_zero )
4968                 || ( zSign && ( roundingMode == float_round_up ) )
4969                 || ( ! zSign && ( roundingMode == float_round_down ) )
4970               ) {
4971                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4972            }
4973            return packFloatx80(zSign,
4974                                floatx80_infinity_high,
4975                                floatx80_infinity_low);
4976        }
4977        if ( zExp <= 0 ) {
4978            isTiny = status->tininess_before_rounding
4979                  || (zExp < 0)
4980                  || !increment
4981                  || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4982            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4983            zExp = 0;
4984            if (isTiny && zSig1) {
4985                float_raise(float_flag_underflow, status);
4986            }
4987            if (zSig1) {
4988                float_raise(float_flag_inexact, status);
4989            }
4990            switch (roundingMode) {
4991            case float_round_nearest_even:
4992            case float_round_ties_away:
4993                increment = ((int64_t)zSig1 < 0);
4994                break;
4995            case float_round_to_zero:
4996                increment = 0;
4997                break;
4998            case float_round_up:
4999                increment = !zSign && zSig1;
5000                break;
5001            case float_round_down:
5002                increment = zSign && zSig1;
5003                break;
5004            default:
5005                abort();
5006            }
5007            if ( increment ) {
5008                ++zSig0;
5009                if (!(zSig1 << 1) && roundNearestEven) {
5010                    zSig0 &= ~1;
5011                }
5012                if ( (int64_t) zSig0 < 0 ) zExp = 1;
5013            }
5014            return packFloatx80( zSign, zExp, zSig0 );
5015        }
5016    }
5017    if (zSig1) {
5018        float_raise(float_flag_inexact, status);
5019    }
5020    if ( increment ) {
5021        ++zSig0;
5022        if ( zSig0 == 0 ) {
5023            ++zExp;
5024            zSig0 = UINT64_C(0x8000000000000000);
5025        }
5026        else {
5027            if (!(zSig1 << 1) && roundNearestEven) {
5028                zSig0 &= ~1;
5029            }
5030        }
5031    }
5032    else {
5033        if ( zSig0 == 0 ) zExp = 0;
5034    }
5035    return packFloatx80( zSign, zExp, zSig0 );
5036
5037}
5038
5039/*----------------------------------------------------------------------------
5040| Takes an abstract floating-point value having sign `zSign', exponent
5041| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
5042| and returns the proper extended double-precision floating-point value
5043| corresponding to the abstract input.  This routine is just like
5044| `roundAndPackFloatx80' except that the input significand does not have to be
5045| normalized.
5046*----------------------------------------------------------------------------*/
5047
5048floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
5049                                       bool zSign, int32_t zExp,
5050                                       uint64_t zSig0, uint64_t zSig1,
5051                                       float_status *status)
5052{
5053    int8_t shiftCount;
5054
5055    if ( zSig0 == 0 ) {
5056        zSig0 = zSig1;
5057        zSig1 = 0;
5058        zExp -= 64;
5059    }
5060    shiftCount = clz64(zSig0);
5061    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
5062    zExp -= shiftCount;
5063    return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
5064                                zSig0, zSig1, status);
5065
5066}
5067
5068/*----------------------------------------------------------------------------
5069| Returns the binary exponential of the single-precision floating-point value
5070| `a'. The operation is performed according to the IEC/IEEE Standard for
5071| Binary Floating-Point Arithmetic.
5072|
5073| Uses the following identities:
5074|
5075| 1. -------------------------------------------------------------------------
5076|      x    x*ln(2)
5077|     2  = e
5078|
5079| 2. -------------------------------------------------------------------------
5080|                      2     3     4     5           n
5081|      x        x     x     x     x     x           x
5082|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
5083|               1!    2!    3!    4!    5!          n!
5084*----------------------------------------------------------------------------*/
5085
5086static const float64 float32_exp2_coefficients[15] =
5087{
5088    const_float64( 0x3ff0000000000000ll ), /*  1 */
5089    const_float64( 0x3fe0000000000000ll ), /*  2 */
5090    const_float64( 0x3fc5555555555555ll ), /*  3 */
5091    const_float64( 0x3fa5555555555555ll ), /*  4 */
5092    const_float64( 0x3f81111111111111ll ), /*  5 */
5093    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
5094    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
5095    const_float64( 0x3efa01a01a01a01all ), /*  8 */
5096    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
5097    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
5098    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
5099    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
5100    const_float64( 0x3de6124613a86d09ll ), /* 13 */
5101    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
5102    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
5103};
5104
5105float32 float32_exp2(float32 a, float_status *status)
5106{
5107    FloatParts64 xp, xnp, tp, rp;
5108    int i;
5109
5110    float32_unpack_canonical(&xp, a, status);
5111    if (unlikely(xp.cls != float_class_normal)) {
5112        switch (xp.cls) {
5113        case float_class_snan:
5114        case float_class_qnan:
5115            parts_return_nan(&xp, status);
5116            return float32_round_pack_canonical(&xp, status);
5117        case float_class_inf:
5118            return xp.sign ? float32_zero : a;
5119        case float_class_zero:
5120            return float32_one;
5121        default:
5122            break;
5123        }
5124        g_assert_not_reached();
5125    }
5126
5127    float_raise(float_flag_inexact, status);
5128
5129    float64_unpack_canonical(&tp, float64_ln2, status);
5130    xp = *parts_mul(&xp, &tp, status);
5131    xnp = xp;
5132
5133    float64_unpack_canonical(&rp, float64_one, status);
5134    for (i = 0 ; i < 15 ; i++) {
5135        float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
5136        rp = *parts_muladd(&tp, &xp, &rp, 0, status);
5137        xnp = *parts_mul(&xnp, &xp, status);
5138    }
5139
5140    return float32_round_pack_canonical(&rp, status);
5141}
5142
5143/*----------------------------------------------------------------------------
5144| Rounds the extended double-precision floating-point value `a'
5145| to the precision provided by floatx80_rounding_precision and returns the
5146| result as an extended double-precision floating-point value.
5147| The operation is performed according to the IEC/IEEE Standard for Binary
5148| Floating-Point Arithmetic.
5149*----------------------------------------------------------------------------*/
5150
5151floatx80 floatx80_round(floatx80 a, float_status *status)
5152{
5153    FloatParts128 p;
5154
5155    if (!floatx80_unpack_canonical(&p, a, status)) {
5156        return floatx80_default_nan(status);
5157    }
5158    return floatx80_round_pack_canonical(&p, status);
5159}
5160
5161static void __attribute__((constructor)) softfloat_init(void)
5162{
5163    union_float64 ua, ub, uc, ur;
5164
5165    if (QEMU_NO_HARDFLOAT) {
5166        return;
5167    }
5168    /*
5169     * Test that the host's FMA is not obviously broken. For example,
5170     * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
5171     *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
5172     */
5173    ua.s = 0x0020000000000001ULL;
5174    ub.s = 0x3ca0000000000000ULL;
5175    uc.s = 0x0020000000000000ULL;
5176    ur.h = fma(ua.h, ub.h, uc.h);
5177    if (ur.s != 0x0020000000000001ULL) {
5178        force_soft_fma = true;
5179    }
5180}
5181