qemu/fpu/softfloat.c
<<
>>
Prefs
   1/*
   2 * QEMU float support
   3 *
   4 * The code in this source file is derived from release 2a of the SoftFloat
   5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6 * some later contributions) are provided under that license, as detailed below.
   7 * It has subsequently been modified by contributors to the QEMU Project,
   8 * so some portions are provided under:
   9 *  the SoftFloat-2a license
  10 *  the BSD license
  11 *  GPL-v2-or-later
  12 *
  13 * Any future contributions to this file after December 1st 2014 will be
  14 * taken to be licensed under the Softfloat-2a license unless specifically
  15 * indicated otherwise.
  16 */
  17
  18/*
  19===============================================================================
  20This C source file is part of the SoftFloat IEC/IEEE Floating-point
  21Arithmetic Package, Release 2a.
  22
  23Written by John R. Hauser.  This work was made possible in part by the
  24International Computer Science Institute, located at Suite 600, 1947 Center
  25Street, Berkeley, California 94704.  Funding was partially provided by the
  26National Science Foundation under grant MIP-9311980.  The original version
  27of this code was written as part of a project to build a fixed-point vector
  28processor in collaboration with the University of California at Berkeley,
  29overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  31arithmetic/SoftFloat.html'.
  32
  33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  35TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  38
  39Derivative works are acceptable, even for commercial purposes, so long as
  40(1) they include prominent notice that the work is derivative, and (2) they
  41include prominent notice akin to these four paragraphs for those parts of
  42this code that are retained.
  43
  44===============================================================================
  45*/
  46
  47/* BSD licensing:
  48 * Copyright (c) 2006, Fabrice Bellard
  49 * All rights reserved.
  50 *
  51 * Redistribution and use in source and binary forms, with or without
  52 * modification, are permitted provided that the following conditions are met:
  53 *
  54 * 1. Redistributions of source code must retain the above copyright notice,
  55 * this list of conditions and the following disclaimer.
  56 *
  57 * 2. Redistributions in binary form must reproduce the above copyright notice,
  58 * this list of conditions and the following disclaimer in the documentation
  59 * and/or other materials provided with the distribution.
  60 *
  61 * 3. Neither the name of the copyright holder nor the names of its contributors
  62 * may be used to endorse or promote products derived from this software without
  63 * specific prior written permission.
  64 *
  65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  75 * THE POSSIBILITY OF SUCH DAMAGE.
  76 */
  77
  78/* Portions of this work are licensed under the terms of the GNU GPL,
  79 * version 2 or later. See the COPYING file in the top-level directory.
  80 */
  81
  82/* softfloat (and in particular the code in softfloat-specialize.h) is
  83 * target-dependent and needs the TARGET_* macros.
  84 */
  85#include "qemu/osdep.h"
  86#include <math.h>
  87#include "qemu/bitops.h"
  88#include "fpu/softfloat.h"
  89
  90/* We only need stdlib for abort() */
  91
  92/*----------------------------------------------------------------------------
  93| Primitive arithmetic functions, including multi-word arithmetic, and
  94| division and square root approximations.  (Can be specialized to target if
  95| desired.)
  96*----------------------------------------------------------------------------*/
  97#include "fpu/softfloat-macros.h"
  98
  99/*
 100 * Hardfloat
 101 *
 102 * Fast emulation of guest FP instructions is challenging for two reasons.
 103 * First, FP instruction semantics are similar but not identical, particularly
 104 * when handling NaNs. Second, emulating at reasonable speed the guest FP
 105 * exception flags is not trivial: reading the host's flags register with a
 106 * feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
 107 * and trapping on every FP exception is not fast nor pleasant to work with.
 108 *
 109 * We address these challenges by leveraging the host FPU for a subset of the
 110 * operations. To do this we expand on the idea presented in this paper:
 111 *
 112 * Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
 113 * binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
 114 *
 115 * The idea is thus to leverage the host FPU to (1) compute FP operations
 116 * and (2) identify whether FP exceptions occurred while avoiding
 117 * expensive exception flag register accesses.
 118 *
 119 * An important optimization shown in the paper is that given that exception
 120 * flags are rarely cleared by the guest, we can avoid recomputing some flags.
 121 * This is particularly useful for the inexact flag, which is very frequently
 122 * raised in floating-point workloads.
 123 *
 124 * We optimize the code further by deferring to soft-fp whenever FP exception
 125 * detection might get hairy. Two examples: (1) when at least one operand is
 126 * denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
 127 * and the result is < the minimum normal.
 128 */
 129#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t)                          \
 130    static inline void name(soft_t *a, float_status *s)                 \
 131    {                                                                   \
 132        if (unlikely(soft_t ## _is_denormal(*a))) {                     \
 133            *a = soft_t ## _set_sign(soft_t ## _zero,                   \
 134                                     soft_t ## _is_neg(*a));            \
 135            float_raise(float_flag_input_denormal, s);                  \
 136        }                                                               \
 137    }
 138
 139GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
 140GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
 141#undef GEN_INPUT_FLUSH__NOCHECK
 142
 143#define GEN_INPUT_FLUSH1(name, soft_t)                  \
 144    static inline void name(soft_t *a, float_status *s) \
 145    {                                                   \
 146        if (likely(!s->flush_inputs_to_zero)) {         \
 147            return;                                     \
 148        }                                               \
 149        soft_t ## _input_flush__nocheck(a, s);          \
 150    }
 151
 152GEN_INPUT_FLUSH1(float32_input_flush1, float32)
 153GEN_INPUT_FLUSH1(float64_input_flush1, float64)
 154#undef GEN_INPUT_FLUSH1
 155
 156#define GEN_INPUT_FLUSH2(name, soft_t)                                  \
 157    static inline void name(soft_t *a, soft_t *b, float_status *s)      \
 158    {                                                                   \
 159        if (likely(!s->flush_inputs_to_zero)) {                         \
 160            return;                                                     \
 161        }                                                               \
 162        soft_t ## _input_flush__nocheck(a, s);                          \
 163        soft_t ## _input_flush__nocheck(b, s);                          \
 164    }
 165
 166GEN_INPUT_FLUSH2(float32_input_flush2, float32)
 167GEN_INPUT_FLUSH2(float64_input_flush2, float64)
 168#undef GEN_INPUT_FLUSH2
 169
 170#define GEN_INPUT_FLUSH3(name, soft_t)                                  \
 171    static inline void name(soft_t *a, soft_t *b, soft_t *c, float_status *s) \
 172    {                                                                   \
 173        if (likely(!s->flush_inputs_to_zero)) {                         \
 174            return;                                                     \
 175        }                                                               \
 176        soft_t ## _input_flush__nocheck(a, s);                          \
 177        soft_t ## _input_flush__nocheck(b, s);                          \
 178        soft_t ## _input_flush__nocheck(c, s);                          \
 179    }
 180
 181GEN_INPUT_FLUSH3(float32_input_flush3, float32)
 182GEN_INPUT_FLUSH3(float64_input_flush3, float64)
 183#undef GEN_INPUT_FLUSH3
 184
 185/*
 186 * Choose whether to use fpclassify or float32/64_* primitives in the generated
 187 * hardfloat functions. Each combination of number of inputs and float size
 188 * gets its own value.
 189 */
 190#if defined(__x86_64__)
 191# define QEMU_HARDFLOAT_1F32_USE_FP 0
 192# define QEMU_HARDFLOAT_1F64_USE_FP 1
 193# define QEMU_HARDFLOAT_2F32_USE_FP 0
 194# define QEMU_HARDFLOAT_2F64_USE_FP 1
 195# define QEMU_HARDFLOAT_3F32_USE_FP 0
 196# define QEMU_HARDFLOAT_3F64_USE_FP 1
 197#else
 198# define QEMU_HARDFLOAT_1F32_USE_FP 0
 199# define QEMU_HARDFLOAT_1F64_USE_FP 0
 200# define QEMU_HARDFLOAT_2F32_USE_FP 0
 201# define QEMU_HARDFLOAT_2F64_USE_FP 0
 202# define QEMU_HARDFLOAT_3F32_USE_FP 0
 203# define QEMU_HARDFLOAT_3F64_USE_FP 0
 204#endif
 205
 206/*
 207 * QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
 208 * float{32,64}_is_infinity when !USE_FP.
 209 * On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
 210 * On power64 however, using isinf() reduces fp-bench performance by up to 50%.
 211 */
 212#if defined(__x86_64__) || defined(__aarch64__)
 213# define QEMU_HARDFLOAT_USE_ISINF   1
 214#else
 215# define QEMU_HARDFLOAT_USE_ISINF   0
 216#endif
 217
 218/*
 219 * Some targets clear the FP flags before most FP operations. This prevents
 220 * the use of hardfloat, since hardfloat relies on the inexact flag being
 221 * already set.
 222 */
 223#if defined(TARGET_PPC) || defined(__FAST_MATH__)
 224# if defined(__FAST_MATH__)
 225#  warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
 226    IEEE implementation
 227# endif
 228# define QEMU_NO_HARDFLOAT 1
 229# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
 230#else
 231# define QEMU_NO_HARDFLOAT 0
 232# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
 233#endif
 234
 235static inline bool can_use_fpu(const float_status *s)
 236{
 237    if (QEMU_NO_HARDFLOAT) {
 238        return false;
 239    }
 240    return likely(s->float_exception_flags & float_flag_inexact &&
 241                  s->float_rounding_mode == float_round_nearest_even);
 242}
 243
 244/*
 245 * Hardfloat generation functions. Each operation can have two flavors:
 246 * either using softfloat primitives (e.g. float32_is_zero_or_normal) for
 247 * most condition checks, or native ones (e.g. fpclassify).
 248 *
 249 * The flavor is chosen by the callers. Instead of using macros, we rely on the
 250 * compiler to propagate constants and inline everything into the callers.
 251 *
 252 * We only generate functions for operations with two inputs, since only
 253 * these are common enough to justify consolidating them into common code.
 254 */
 255
 256typedef union {
 257    float32 s;
 258    float h;
 259} union_float32;
 260
 261typedef union {
 262    float64 s;
 263    double h;
 264} union_float64;
 265
 266typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
 267typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
 268
 269typedef float32 (*soft_f32_op2_fn)(float32 a, float32 b, float_status *s);
 270typedef float64 (*soft_f64_op2_fn)(float64 a, float64 b, float_status *s);
 271typedef float   (*hard_f32_op2_fn)(float a, float b);
 272typedef double  (*hard_f64_op2_fn)(double a, double b);
 273
 274/* 2-input is-zero-or-normal */
 275static inline bool f32_is_zon2(union_float32 a, union_float32 b)
 276{
 277    if (QEMU_HARDFLOAT_2F32_USE_FP) {
 278        /*
 279         * Not using a temp variable for consecutive fpclassify calls ends up
 280         * generating faster code.
 281         */
 282        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 283               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 284    }
 285    return float32_is_zero_or_normal(a.s) &&
 286           float32_is_zero_or_normal(b.s);
 287}
 288
 289static inline bool f64_is_zon2(union_float64 a, union_float64 b)
 290{
 291    if (QEMU_HARDFLOAT_2F64_USE_FP) {
 292        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 293               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO);
 294    }
 295    return float64_is_zero_or_normal(a.s) &&
 296           float64_is_zero_or_normal(b.s);
 297}
 298
 299/* 3-input is-zero-or-normal */
 300static inline
 301bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
 302{
 303    if (QEMU_HARDFLOAT_3F32_USE_FP) {
 304        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 305               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 306               (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 307    }
 308    return float32_is_zero_or_normal(a.s) &&
 309           float32_is_zero_or_normal(b.s) &&
 310           float32_is_zero_or_normal(c.s);
 311}
 312
 313static inline
 314bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
 315{
 316    if (QEMU_HARDFLOAT_3F64_USE_FP) {
 317        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
 318               (fpclassify(b.h) == FP_NORMAL || fpclassify(b.h) == FP_ZERO) &&
 319               (fpclassify(c.h) == FP_NORMAL || fpclassify(c.h) == FP_ZERO);
 320    }
 321    return float64_is_zero_or_normal(a.s) &&
 322           float64_is_zero_or_normal(b.s) &&
 323           float64_is_zero_or_normal(c.s);
 324}
 325
 326static inline bool f32_is_inf(union_float32 a)
 327{
 328    if (QEMU_HARDFLOAT_USE_ISINF) {
 329        return isinf(a.h);
 330    }
 331    return float32_is_infinity(a.s);
 332}
 333
 334static inline bool f64_is_inf(union_float64 a)
 335{
 336    if (QEMU_HARDFLOAT_USE_ISINF) {
 337        return isinf(a.h);
 338    }
 339    return float64_is_infinity(a.s);
 340}
 341
 342static inline float32
 343float32_gen2(float32 xa, float32 xb, float_status *s,
 344             hard_f32_op2_fn hard, soft_f32_op2_fn soft,
 345             f32_check_fn pre, f32_check_fn post)
 346{
 347    union_float32 ua, ub, ur;
 348
 349    ua.s = xa;
 350    ub.s = xb;
 351
 352    if (unlikely(!can_use_fpu(s))) {
 353        goto soft;
 354    }
 355
 356    float32_input_flush2(&ua.s, &ub.s, s);
 357    if (unlikely(!pre(ua, ub))) {
 358        goto soft;
 359    }
 360
 361    ur.h = hard(ua.h, ub.h);
 362    if (unlikely(f32_is_inf(ur))) {
 363        float_raise(float_flag_overflow, s);
 364    } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
 365        goto soft;
 366    }
 367    return ur.s;
 368
 369 soft:
 370    return soft(ua.s, ub.s, s);
 371}
 372
 373static inline float64
 374float64_gen2(float64 xa, float64 xb, float_status *s,
 375             hard_f64_op2_fn hard, soft_f64_op2_fn soft,
 376             f64_check_fn pre, f64_check_fn post)
 377{
 378    union_float64 ua, ub, ur;
 379
 380    ua.s = xa;
 381    ub.s = xb;
 382
 383    if (unlikely(!can_use_fpu(s))) {
 384        goto soft;
 385    }
 386
 387    float64_input_flush2(&ua.s, &ub.s, s);
 388    if (unlikely(!pre(ua, ub))) {
 389        goto soft;
 390    }
 391
 392    ur.h = hard(ua.h, ub.h);
 393    if (unlikely(f64_is_inf(ur))) {
 394        float_raise(float_flag_overflow, s);
 395    } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
 396        goto soft;
 397    }
 398    return ur.s;
 399
 400 soft:
 401    return soft(ua.s, ub.s, s);
 402}
 403
 404/*
 405 * Classify a floating point number. Everything above float_class_qnan
 406 * is a NaN so cls >= float_class_qnan is any NaN.
 407 */
 408
 409typedef enum __attribute__ ((__packed__)) {
 410    float_class_unclassified,
 411    float_class_zero,
 412    float_class_normal,
 413    float_class_inf,
 414    float_class_qnan,  /* all NaNs from here */
 415    float_class_snan,
 416} FloatClass;
 417
 418#define float_cmask(bit)  (1u << (bit))
 419
 420enum {
 421    float_cmask_zero    = float_cmask(float_class_zero),
 422    float_cmask_normal  = float_cmask(float_class_normal),
 423    float_cmask_inf     = float_cmask(float_class_inf),
 424    float_cmask_qnan    = float_cmask(float_class_qnan),
 425    float_cmask_snan    = float_cmask(float_class_snan),
 426
 427    float_cmask_infzero = float_cmask_zero | float_cmask_inf,
 428    float_cmask_anynan  = float_cmask_qnan | float_cmask_snan,
 429};
 430
 431/* Flags for parts_minmax. */
 432enum {
 433    /* Set for minimum; clear for maximum. */
 434    minmax_ismin = 1,
 435    /* Set for the IEEE 754-2008 minNum() and maxNum() operations. */
 436    minmax_isnum = 2,
 437    /* Set for the IEEE 754-2008 minNumMag() and minNumMag() operations. */
 438    minmax_ismag = 4,
 439    /*
 440     * Set for the IEEE 754-2019 minimumNumber() and maximumNumber()
 441     * operations.
 442     */
 443    minmax_isnumber = 8,
 444};
 445
 446/* Simple helpers for checking if, or what kind of, NaN we have */
 447static inline __attribute__((unused)) bool is_nan(FloatClass c)
 448{
 449    return unlikely(c >= float_class_qnan);
 450}
 451
 452static inline __attribute__((unused)) bool is_snan(FloatClass c)
 453{
 454    return c == float_class_snan;
 455}
 456
 457static inline __attribute__((unused)) bool is_qnan(FloatClass c)
 458{
 459    return c == float_class_qnan;
 460}
 461
 462/*
 463 * Structure holding all of the decomposed parts of a float.
 464 * The exponent is unbiased and the fraction is normalized.
 465 *
 466 * The fraction words are stored in big-endian word ordering,
 467 * so that truncation from a larger format to a smaller format
 468 * can be done simply by ignoring subsequent elements.
 469 */
 470
 471typedef struct {
 472    FloatClass cls;
 473    bool sign;
 474    int32_t exp;
 475    union {
 476        /* Routines that know the structure may reference the singular name. */
 477        uint64_t frac;
 478        /*
 479         * Routines expanded with multiple structures reference "hi" and "lo"
 480         * depending on the operation.  In FloatParts64, "hi" and "lo" are
 481         * both the same word and aliased here.
 482         */
 483        uint64_t frac_hi;
 484        uint64_t frac_lo;
 485    };
 486} FloatParts64;
 487
 488typedef struct {
 489    FloatClass cls;
 490    bool sign;
 491    int32_t exp;
 492    uint64_t frac_hi;
 493    uint64_t frac_lo;
 494} FloatParts128;
 495
 496typedef struct {
 497    FloatClass cls;
 498    bool sign;
 499    int32_t exp;
 500    uint64_t frac_hi;
 501    uint64_t frac_hm;  /* high-middle */
 502    uint64_t frac_lm;  /* low-middle */
 503    uint64_t frac_lo;
 504} FloatParts256;
 505
 506/* These apply to the most significant word of each FloatPartsN. */
 507#define DECOMPOSED_BINARY_POINT    63
 508#define DECOMPOSED_IMPLICIT_BIT    (1ull << DECOMPOSED_BINARY_POINT)
 509
 510/* Structure holding all of the relevant parameters for a format.
 511 *   exp_size: the size of the exponent field
 512 *   exp_bias: the offset applied to the exponent field
 513 *   exp_max: the maximum normalised exponent
 514 *   frac_size: the size of the fraction field
 515 *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
 516 * The following are computed based the size of fraction
 517 *   round_mask: bits below lsb which must be rounded
 518 * The following optional modifiers are available:
 519 *   arm_althp: handle ARM Alternative Half Precision
 520 */
 521typedef struct {
 522    int exp_size;
 523    int exp_bias;
 524    int exp_max;
 525    int frac_size;
 526    int frac_shift;
 527    bool arm_althp;
 528    uint64_t round_mask;
 529} FloatFmt;
 530
 531/* Expand fields based on the size of exponent and fraction */
 532#define FLOAT_PARAMS_(E)                                \
 533    .exp_size       = E,                                \
 534    .exp_bias       = ((1 << E) - 1) >> 1,              \
 535    .exp_max        = (1 << E) - 1
 536
 537#define FLOAT_PARAMS(E, F)                              \
 538    FLOAT_PARAMS_(E),                                   \
 539    .frac_size      = F,                                \
 540    .frac_shift     = (-F - 1) & 63,                    \
 541    .round_mask     = (1ull << ((-F - 1) & 63)) - 1
 542
 543static const FloatFmt float16_params = {
 544    FLOAT_PARAMS(5, 10)
 545};
 546
 547static const FloatFmt float16_params_ahp = {
 548    FLOAT_PARAMS(5, 10),
 549    .arm_althp = true
 550};
 551
 552static const FloatFmt bfloat16_params = {
 553    FLOAT_PARAMS(8, 7)
 554};
 555
 556static const FloatFmt float32_params = {
 557    FLOAT_PARAMS(8, 23)
 558};
 559
 560static const FloatFmt float64_params = {
 561    FLOAT_PARAMS(11, 52)
 562};
 563
 564static const FloatFmt float128_params = {
 565    FLOAT_PARAMS(15, 112)
 566};
 567
 568#define FLOATX80_PARAMS(R)              \
 569    FLOAT_PARAMS_(15),                  \
 570    .frac_size = R == 64 ? 63 : R,      \
 571    .frac_shift = 0,                    \
 572    .round_mask = R == 64 ? -1 : (1ull << ((-R - 1) & 63)) - 1
 573
 574static const FloatFmt floatx80_params[3] = {
 575    [floatx80_precision_s] = { FLOATX80_PARAMS(23) },
 576    [floatx80_precision_d] = { FLOATX80_PARAMS(52) },
 577    [floatx80_precision_x] = { FLOATX80_PARAMS(64) },
 578};
 579
 580/* Unpack a float to parts, but do not canonicalize.  */
 581static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
 582{
 583    const int f_size = fmt->frac_size;
 584    const int e_size = fmt->exp_size;
 585
 586    *r = (FloatParts64) {
 587        .cls = float_class_unclassified,
 588        .sign = extract64(raw, f_size + e_size, 1),
 589        .exp = extract64(raw, f_size, e_size),
 590        .frac = extract64(raw, 0, f_size)
 591    };
 592}
 593
 594static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
 595{
 596    unpack_raw64(p, &float16_params, f);
 597}
 598
 599static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 600{
 601    unpack_raw64(p, &bfloat16_params, f);
 602}
 603
 604static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
 605{
 606    unpack_raw64(p, &float32_params, f);
 607}
 608
 609static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
 610{
 611    unpack_raw64(p, &float64_params, f);
 612}
 613
 614static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 615{
 616    *p = (FloatParts128) {
 617        .cls = float_class_unclassified,
 618        .sign = extract32(f.high, 15, 1),
 619        .exp = extract32(f.high, 0, 15),
 620        .frac_hi = f.low
 621    };
 622}
 623
 624static void float128_unpack_raw(FloatParts128 *p, float128 f)
 625{
 626    const int f_size = float128_params.frac_size - 64;
 627    const int e_size = float128_params.exp_size;
 628
 629    *p = (FloatParts128) {
 630        .cls = float_class_unclassified,
 631        .sign = extract64(f.high, f_size + e_size, 1),
 632        .exp = extract64(f.high, f_size, e_size),
 633        .frac_hi = extract64(f.high, 0, f_size),
 634        .frac_lo = f.low,
 635    };
 636}
 637
 638/* Pack a float from parts, but do not canonicalize.  */
 639static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
 640{
 641    const int f_size = fmt->frac_size;
 642    const int e_size = fmt->exp_size;
 643    uint64_t ret;
 644
 645    ret = (uint64_t)p->sign << (f_size + e_size);
 646    ret = deposit64(ret, f_size, e_size, p->exp);
 647    ret = deposit64(ret, 0, f_size, p->frac);
 648    return ret;
 649}
 650
 651static inline float16 float16_pack_raw(const FloatParts64 *p)
 652{
 653    return make_float16(pack_raw64(p, &float16_params));
 654}
 655
 656static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
 657{
 658    return pack_raw64(p, &bfloat16_params);
 659}
 660
 661static inline float32 float32_pack_raw(const FloatParts64 *p)
 662{
 663    return make_float32(pack_raw64(p, &float32_params));
 664}
 665
 666static inline float64 float64_pack_raw(const FloatParts64 *p)
 667{
 668    return make_float64(pack_raw64(p, &float64_params));
 669}
 670
 671static float128 float128_pack_raw(const FloatParts128 *p)
 672{
 673    const int f_size = float128_params.frac_size - 64;
 674    const int e_size = float128_params.exp_size;
 675    uint64_t hi;
 676
 677    hi = (uint64_t)p->sign << (f_size + e_size);
 678    hi = deposit64(hi, f_size, e_size, p->exp);
 679    hi = deposit64(hi, 0, f_size, p->frac_hi);
 680    return make_float128(hi, p->frac_lo);
 681}
 682
 683/*----------------------------------------------------------------------------
 684| Functions and definitions to determine:  (1) whether tininess for underflow
 685| is detected before or after rounding by default, (2) what (if anything)
 686| happens when exceptions are raised, (3) how signaling NaNs are distinguished
 687| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 688| are propagated from function inputs to output.  These details are target-
 689| specific.
 690*----------------------------------------------------------------------------*/
 691#include "softfloat-specialize.c.inc"
 692
 693#define PARTS_GENERIC_64_128(NAME, P) \
 694    _Generic((P), FloatParts64 *: parts64_##NAME, \
 695                  FloatParts128 *: parts128_##NAME)
 696
 697#define PARTS_GENERIC_64_128_256(NAME, P) \
 698    _Generic((P), FloatParts64 *: parts64_##NAME, \
 699                  FloatParts128 *: parts128_##NAME, \
 700                  FloatParts256 *: parts256_##NAME)
 701
 702#define parts_default_nan(P, S)    PARTS_GENERIC_64_128(default_nan, P)(P, S)
 703#define parts_silence_nan(P, S)    PARTS_GENERIC_64_128(silence_nan, P)(P, S)
 704
 705static void parts64_return_nan(FloatParts64 *a, float_status *s);
 706static void parts128_return_nan(FloatParts128 *a, float_status *s);
 707
 708#define parts_return_nan(P, S)     PARTS_GENERIC_64_128(return_nan, P)(P, S)
 709
 710static FloatParts64 *parts64_pick_nan(FloatParts64 *a, FloatParts64 *b,
 711                                      float_status *s);
 712static FloatParts128 *parts128_pick_nan(FloatParts128 *a, FloatParts128 *b,
 713                                        float_status *s);
 714
 715#define parts_pick_nan(A, B, S)    PARTS_GENERIC_64_128(pick_nan, A)(A, B, S)
 716
 717static FloatParts64 *parts64_pick_nan_muladd(FloatParts64 *a, FloatParts64 *b,
 718                                             FloatParts64 *c, float_status *s,
 719                                             int ab_mask, int abc_mask);
 720static FloatParts128 *parts128_pick_nan_muladd(FloatParts128 *a,
 721                                               FloatParts128 *b,
 722                                               FloatParts128 *c,
 723                                               float_status *s,
 724                                               int ab_mask, int abc_mask);
 725
 726#define parts_pick_nan_muladd(A, B, C, S, ABM, ABCM) \
 727    PARTS_GENERIC_64_128(pick_nan_muladd, A)(A, B, C, S, ABM, ABCM)
 728
 729static void parts64_canonicalize(FloatParts64 *p, float_status *status,
 730                                 const FloatFmt *fmt);
 731static void parts128_canonicalize(FloatParts128 *p, float_status *status,
 732                                  const FloatFmt *fmt);
 733
 734#define parts_canonicalize(A, S, F) \
 735    PARTS_GENERIC_64_128(canonicalize, A)(A, S, F)
 736
 737static void parts64_uncanon_normal(FloatParts64 *p, float_status *status,
 738                                   const FloatFmt *fmt);
 739static void parts128_uncanon_normal(FloatParts128 *p, float_status *status,
 740                                    const FloatFmt *fmt);
 741
 742#define parts_uncanon_normal(A, S, F) \
 743    PARTS_GENERIC_64_128(uncanon_normal, A)(A, S, F)
 744
 745static void parts64_uncanon(FloatParts64 *p, float_status *status,
 746                            const FloatFmt *fmt);
 747static void parts128_uncanon(FloatParts128 *p, float_status *status,
 748                             const FloatFmt *fmt);
 749
 750#define parts_uncanon(A, S, F) \
 751    PARTS_GENERIC_64_128(uncanon, A)(A, S, F)
 752
 753static void parts64_add_normal(FloatParts64 *a, FloatParts64 *b);
 754static void parts128_add_normal(FloatParts128 *a, FloatParts128 *b);
 755static void parts256_add_normal(FloatParts256 *a, FloatParts256 *b);
 756
 757#define parts_add_normal(A, B) \
 758    PARTS_GENERIC_64_128_256(add_normal, A)(A, B)
 759
 760static bool parts64_sub_normal(FloatParts64 *a, FloatParts64 *b);
 761static bool parts128_sub_normal(FloatParts128 *a, FloatParts128 *b);
 762static bool parts256_sub_normal(FloatParts256 *a, FloatParts256 *b);
 763
 764#define parts_sub_normal(A, B) \
 765    PARTS_GENERIC_64_128_256(sub_normal, A)(A, B)
 766
 767static FloatParts64 *parts64_addsub(FloatParts64 *a, FloatParts64 *b,
 768                                    float_status *s, bool subtract);
 769static FloatParts128 *parts128_addsub(FloatParts128 *a, FloatParts128 *b,
 770                                      float_status *s, bool subtract);
 771
 772#define parts_addsub(A, B, S, Z) \
 773    PARTS_GENERIC_64_128(addsub, A)(A, B, S, Z)
 774
 775static FloatParts64 *parts64_mul(FloatParts64 *a, FloatParts64 *b,
 776                                 float_status *s);
 777static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
 778                                   float_status *s);
 779
 780#define parts_mul(A, B, S) \
 781    PARTS_GENERIC_64_128(mul, A)(A, B, S)
 782
 783static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
 784                                    FloatParts64 *c, int flags,
 785                                    float_status *s);
 786static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
 787                                      FloatParts128 *c, int flags,
 788                                      float_status *s);
 789
 790#define parts_muladd(A, B, C, Z, S) \
 791    PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
 792
 793static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
 794                                 float_status *s);
 795static FloatParts128 *parts128_div(FloatParts128 *a, FloatParts128 *b,
 796                                   float_status *s);
 797
 798#define parts_div(A, B, S) \
 799    PARTS_GENERIC_64_128(div, A)(A, B, S)
 800
 801static FloatParts64 *parts64_modrem(FloatParts64 *a, FloatParts64 *b,
 802                                    uint64_t *mod_quot, float_status *s);
 803static FloatParts128 *parts128_modrem(FloatParts128 *a, FloatParts128 *b,
 804                                      uint64_t *mod_quot, float_status *s);
 805
 806#define parts_modrem(A, B, Q, S) \
 807    PARTS_GENERIC_64_128(modrem, A)(A, B, Q, S)
 808
 809static void parts64_sqrt(FloatParts64 *a, float_status *s, const FloatFmt *f);
 810static void parts128_sqrt(FloatParts128 *a, float_status *s, const FloatFmt *f);
 811
 812#define parts_sqrt(A, S, F) \
 813    PARTS_GENERIC_64_128(sqrt, A)(A, S, F)
 814
 815static bool parts64_round_to_int_normal(FloatParts64 *a, FloatRoundMode rm,
 816                                        int scale, int frac_size);
 817static bool parts128_round_to_int_normal(FloatParts128 *a, FloatRoundMode r,
 818                                         int scale, int frac_size);
 819
 820#define parts_round_to_int_normal(A, R, C, F) \
 821    PARTS_GENERIC_64_128(round_to_int_normal, A)(A, R, C, F)
 822
 823static void parts64_round_to_int(FloatParts64 *a, FloatRoundMode rm,
 824                                 int scale, float_status *s,
 825                                 const FloatFmt *fmt);
 826static void parts128_round_to_int(FloatParts128 *a, FloatRoundMode r,
 827                                  int scale, float_status *s,
 828                                  const FloatFmt *fmt);
 829
 830#define parts_round_to_int(A, R, C, S, F) \
 831    PARTS_GENERIC_64_128(round_to_int, A)(A, R, C, S, F)
 832
 833static int64_t parts64_float_to_sint(FloatParts64 *p, FloatRoundMode rmode,
 834                                     int scale, int64_t min, int64_t max,
 835                                     float_status *s);
 836static int64_t parts128_float_to_sint(FloatParts128 *p, FloatRoundMode rmode,
 837                                     int scale, int64_t min, int64_t max,
 838                                     float_status *s);
 839
 840#define parts_float_to_sint(P, R, Z, MN, MX, S) \
 841    PARTS_GENERIC_64_128(float_to_sint, P)(P, R, Z, MN, MX, S)
 842
 843static uint64_t parts64_float_to_uint(FloatParts64 *p, FloatRoundMode rmode,
 844                                      int scale, uint64_t max,
 845                                      float_status *s);
 846static uint64_t parts128_float_to_uint(FloatParts128 *p, FloatRoundMode rmode,
 847                                       int scale, uint64_t max,
 848                                       float_status *s);
 849
 850#define parts_float_to_uint(P, R, Z, M, S) \
 851    PARTS_GENERIC_64_128(float_to_uint, P)(P, R, Z, M, S)
 852
 853static void parts64_sint_to_float(FloatParts64 *p, int64_t a,
 854                                  int scale, float_status *s);
 855static void parts128_sint_to_float(FloatParts128 *p, int64_t a,
 856                                   int scale, float_status *s);
 857
 858#define parts_sint_to_float(P, I, Z, S) \
 859    PARTS_GENERIC_64_128(sint_to_float, P)(P, I, Z, S)
 860
 861static void parts64_uint_to_float(FloatParts64 *p, uint64_t a,
 862                                  int scale, float_status *s);
 863static void parts128_uint_to_float(FloatParts128 *p, uint64_t a,
 864                                   int scale, float_status *s);
 865
 866#define parts_uint_to_float(P, I, Z, S) \
 867    PARTS_GENERIC_64_128(uint_to_float, P)(P, I, Z, S)
 868
 869static FloatParts64 *parts64_minmax(FloatParts64 *a, FloatParts64 *b,
 870                                    float_status *s, int flags);
 871static FloatParts128 *parts128_minmax(FloatParts128 *a, FloatParts128 *b,
 872                                      float_status *s, int flags);
 873
 874#define parts_minmax(A, B, S, F) \
 875    PARTS_GENERIC_64_128(minmax, A)(A, B, S, F)
 876
 877static int parts64_compare(FloatParts64 *a, FloatParts64 *b,
 878                           float_status *s, bool q);
 879static int parts128_compare(FloatParts128 *a, FloatParts128 *b,
 880                            float_status *s, bool q);
 881
 882#define parts_compare(A, B, S, Q) \
 883    PARTS_GENERIC_64_128(compare, A)(A, B, S, Q)
 884
 885static void parts64_scalbn(FloatParts64 *a, int n, float_status *s);
 886static void parts128_scalbn(FloatParts128 *a, int n, float_status *s);
 887
 888#define parts_scalbn(A, N, S) \
 889    PARTS_GENERIC_64_128(scalbn, A)(A, N, S)
 890
 891static void parts64_log2(FloatParts64 *a, float_status *s, const FloatFmt *f);
 892static void parts128_log2(FloatParts128 *a, float_status *s, const FloatFmt *f);
 893
 894#define parts_log2(A, S, F) \
 895    PARTS_GENERIC_64_128(log2, A)(A, S, F)
 896
 897/*
 898 * Helper functions for softfloat-parts.c.inc, per-size operations.
 899 */
 900
 901#define FRAC_GENERIC_64_128(NAME, P) \
 902    _Generic((P), FloatParts64 *: frac64_##NAME, \
 903                  FloatParts128 *: frac128_##NAME)
 904
 905#define FRAC_GENERIC_64_128_256(NAME, P) \
 906    _Generic((P), FloatParts64 *: frac64_##NAME, \
 907                  FloatParts128 *: frac128_##NAME, \
 908                  FloatParts256 *: frac256_##NAME)
 909
 910static bool frac64_add(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
 911{
 912    return uadd64_overflow(a->frac, b->frac, &r->frac);
 913}
 914
 915static bool frac128_add(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
 916{
 917    bool c = 0;
 918    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 919    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 920    return c;
 921}
 922
 923static bool frac256_add(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
 924{
 925    bool c = 0;
 926    r->frac_lo = uadd64_carry(a->frac_lo, b->frac_lo, &c);
 927    r->frac_lm = uadd64_carry(a->frac_lm, b->frac_lm, &c);
 928    r->frac_hm = uadd64_carry(a->frac_hm, b->frac_hm, &c);
 929    r->frac_hi = uadd64_carry(a->frac_hi, b->frac_hi, &c);
 930    return c;
 931}
 932
 933#define frac_add(R, A, B)  FRAC_GENERIC_64_128_256(add, R)(R, A, B)
 934
 935static bool frac64_addi(FloatParts64 *r, FloatParts64 *a, uint64_t c)
 936{
 937    return uadd64_overflow(a->frac, c, &r->frac);
 938}
 939
 940static bool frac128_addi(FloatParts128 *r, FloatParts128 *a, uint64_t c)
 941{
 942    c = uadd64_overflow(a->frac_lo, c, &r->frac_lo);
 943    return uadd64_overflow(a->frac_hi, c, &r->frac_hi);
 944}
 945
 946#define frac_addi(R, A, C)  FRAC_GENERIC_64_128(addi, R)(R, A, C)
 947
 948static void frac64_allones(FloatParts64 *a)
 949{
 950    a->frac = -1;
 951}
 952
 953static void frac128_allones(FloatParts128 *a)
 954{
 955    a->frac_hi = a->frac_lo = -1;
 956}
 957
 958#define frac_allones(A)  FRAC_GENERIC_64_128(allones, A)(A)
 959
 960static int frac64_cmp(FloatParts64 *a, FloatParts64 *b)
 961{
 962    return a->frac == b->frac ? 0 : a->frac < b->frac ? -1 : 1;
 963}
 964
 965static int frac128_cmp(FloatParts128 *a, FloatParts128 *b)
 966{
 967    uint64_t ta = a->frac_hi, tb = b->frac_hi;
 968    if (ta == tb) {
 969        ta = a->frac_lo, tb = b->frac_lo;
 970        if (ta == tb) {
 971            return 0;
 972        }
 973    }
 974    return ta < tb ? -1 : 1;
 975}
 976
 977#define frac_cmp(A, B)  FRAC_GENERIC_64_128(cmp, A)(A, B)
 978
 979static void frac64_clear(FloatParts64 *a)
 980{
 981    a->frac = 0;
 982}
 983
 984static void frac128_clear(FloatParts128 *a)
 985{
 986    a->frac_hi = a->frac_lo = 0;
 987}
 988
 989#define frac_clear(A)  FRAC_GENERIC_64_128(clear, A)(A)
 990
 991static bool frac64_div(FloatParts64 *a, FloatParts64 *b)
 992{
 993    uint64_t n1, n0, r, q;
 994    bool ret;
 995
 996    /*
 997     * We want a 2*N / N-bit division to produce exactly an N-bit
 998     * result, so that we do not lose any precision and so that we
 999     * do not have to renormalize afterward.  If A.frac < B.frac,
1000     * then division would produce an (N-1)-bit result; shift A left
1001     * by one to produce the an N-bit result, and return true to
1002     * decrement the exponent to match.
1003     *
1004     * The udiv_qrnnd algorithm that we're using requires normalization,
1005     * i.e. the msb of the denominator must be set, which is already true.
1006     */
1007    ret = a->frac < b->frac;
1008    if (ret) {
1009        n0 = a->frac;
1010        n1 = 0;
1011    } else {
1012        n0 = a->frac >> 1;
1013        n1 = a->frac << 63;
1014    }
1015    q = udiv_qrnnd(&r, n0, n1, b->frac);
1016
1017    /* Set lsb if there is a remainder, to set inexact. */
1018    a->frac = q | (r != 0);
1019
1020    return ret;
1021}
1022
1023static bool frac128_div(FloatParts128 *a, FloatParts128 *b)
1024{
1025    uint64_t q0, q1, a0, a1, b0, b1;
1026    uint64_t r0, r1, r2, r3, t0, t1, t2, t3;
1027    bool ret = false;
1028
1029    a0 = a->frac_hi, a1 = a->frac_lo;
1030    b0 = b->frac_hi, b1 = b->frac_lo;
1031
1032    ret = lt128(a0, a1, b0, b1);
1033    if (!ret) {
1034        a1 = shr_double(a0, a1, 1);
1035        a0 = a0 >> 1;
1036    }
1037
1038    /* Use 128/64 -> 64 division as estimate for 192/128 -> 128 division. */
1039    q0 = estimateDiv128To64(a0, a1, b0);
1040
1041    /*
1042     * Estimate is high because B1 was not included (unless B1 == 0).
1043     * Reduce quotient and increase remainder until remainder is non-negative.
1044     * This loop will execute 0 to 2 times.
1045     */
1046    mul128By64To192(b0, b1, q0, &t0, &t1, &t2);
1047    sub192(a0, a1, 0, t0, t1, t2, &r0, &r1, &r2);
1048    while (r0 != 0) {
1049        q0--;
1050        add192(r0, r1, r2, 0, b0, b1, &r0, &r1, &r2);
1051    }
1052
1053    /* Repeat using the remainder, producing a second word of quotient. */
1054    q1 = estimateDiv128To64(r1, r2, b0);
1055    mul128By64To192(b0, b1, q1, &t1, &t2, &t3);
1056    sub192(r1, r2, 0, t1, t2, t3, &r1, &r2, &r3);
1057    while (r1 != 0) {
1058        q1--;
1059        add192(r1, r2, r3, 0, b0, b1, &r1, &r2, &r3);
1060    }
1061
1062    /* Any remainder indicates inexact; set sticky bit. */
1063    q1 |= (r2 | r3) != 0;
1064
1065    a->frac_hi = q0;
1066    a->frac_lo = q1;
1067    return ret;
1068}
1069
1070#define frac_div(A, B)  FRAC_GENERIC_64_128(div, A)(A, B)
1071
1072static bool frac64_eqz(FloatParts64 *a)
1073{
1074    return a->frac == 0;
1075}
1076
1077static bool frac128_eqz(FloatParts128 *a)
1078{
1079    return (a->frac_hi | a->frac_lo) == 0;
1080}
1081
1082#define frac_eqz(A)  FRAC_GENERIC_64_128(eqz, A)(A)
1083
1084static void frac64_mulw(FloatParts128 *r, FloatParts64 *a, FloatParts64 *b)
1085{
1086    mulu64(&r->frac_lo, &r->frac_hi, a->frac, b->frac);
1087}
1088
1089static void frac128_mulw(FloatParts256 *r, FloatParts128 *a, FloatParts128 *b)
1090{
1091    mul128To256(a->frac_hi, a->frac_lo, b->frac_hi, b->frac_lo,
1092                &r->frac_hi, &r->frac_hm, &r->frac_lm, &r->frac_lo);
1093}
1094
1095#define frac_mulw(R, A, B)  FRAC_GENERIC_64_128(mulw, A)(R, A, B)
1096
1097static void frac64_neg(FloatParts64 *a)
1098{
1099    a->frac = -a->frac;
1100}
1101
1102static void frac128_neg(FloatParts128 *a)
1103{
1104    bool c = 0;
1105    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1106    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1107}
1108
1109static void frac256_neg(FloatParts256 *a)
1110{
1111    bool c = 0;
1112    a->frac_lo = usub64_borrow(0, a->frac_lo, &c);
1113    a->frac_lm = usub64_borrow(0, a->frac_lm, &c);
1114    a->frac_hm = usub64_borrow(0, a->frac_hm, &c);
1115    a->frac_hi = usub64_borrow(0, a->frac_hi, &c);
1116}
1117
1118#define frac_neg(A)  FRAC_GENERIC_64_128_256(neg, A)(A)
1119
1120static int frac64_normalize(FloatParts64 *a)
1121{
1122    if (a->frac) {
1123        int shift = clz64(a->frac);
1124        a->frac <<= shift;
1125        return shift;
1126    }
1127    return 64;
1128}
1129
1130static int frac128_normalize(FloatParts128 *a)
1131{
1132    if (a->frac_hi) {
1133        int shl = clz64(a->frac_hi);
1134        a->frac_hi = shl_double(a->frac_hi, a->frac_lo, shl);
1135        a->frac_lo <<= shl;
1136        return shl;
1137    } else if (a->frac_lo) {
1138        int shl = clz64(a->frac_lo);
1139        a->frac_hi = a->frac_lo << shl;
1140        a->frac_lo = 0;
1141        return shl + 64;
1142    }
1143    return 128;
1144}
1145
1146static int frac256_normalize(FloatParts256 *a)
1147{
1148    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1149    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1150    int ret, shl;
1151
1152    if (likely(a0)) {
1153        shl = clz64(a0);
1154        if (shl == 0) {
1155            return 0;
1156        }
1157        ret = shl;
1158    } else {
1159        if (a1) {
1160            ret = 64;
1161            a0 = a1, a1 = a2, a2 = a3, a3 = 0;
1162        } else if (a2) {
1163            ret = 128;
1164            a0 = a2, a1 = a3, a2 = 0, a3 = 0;
1165        } else if (a3) {
1166            ret = 192;
1167            a0 = a3, a1 = 0, a2 = 0, a3 = 0;
1168        } else {
1169            ret = 256;
1170            a0 = 0, a1 = 0, a2 = 0, a3 = 0;
1171            goto done;
1172        }
1173        shl = clz64(a0);
1174        if (shl == 0) {
1175            goto done;
1176        }
1177        ret += shl;
1178    }
1179
1180    a0 = shl_double(a0, a1, shl);
1181    a1 = shl_double(a1, a2, shl);
1182    a2 = shl_double(a2, a3, shl);
1183    a3 <<= shl;
1184
1185 done:
1186    a->frac_hi = a0;
1187    a->frac_hm = a1;
1188    a->frac_lm = a2;
1189    a->frac_lo = a3;
1190    return ret;
1191}
1192
1193#define frac_normalize(A)  FRAC_GENERIC_64_128_256(normalize, A)(A)
1194
1195static void frac64_modrem(FloatParts64 *a, FloatParts64 *b, uint64_t *mod_quot)
1196{
1197    uint64_t a0, a1, b0, t0, t1, q, quot;
1198    int exp_diff = a->exp - b->exp;
1199    int shift;
1200
1201    a0 = a->frac;
1202    a1 = 0;
1203
1204    if (exp_diff < -1) {
1205        if (mod_quot) {
1206            *mod_quot = 0;
1207        }
1208        return;
1209    }
1210    if (exp_diff == -1) {
1211        a0 >>= 1;
1212        exp_diff = 0;
1213    }
1214
1215    b0 = b->frac;
1216    quot = q = b0 <= a0;
1217    if (q) {
1218        a0 -= b0;
1219    }
1220
1221    exp_diff -= 64;
1222    while (exp_diff > 0) {
1223        q = estimateDiv128To64(a0, a1, b0);
1224        q = q > 2 ? q - 2 : 0;
1225        mul64To128(b0, q, &t0, &t1);
1226        sub128(a0, a1, t0, t1, &a0, &a1);
1227        shortShift128Left(a0, a1, 62, &a0, &a1);
1228        exp_diff -= 62;
1229        quot = (quot << 62) + q;
1230    }
1231
1232    exp_diff += 64;
1233    if (exp_diff > 0) {
1234        q = estimateDiv128To64(a0, a1, b0);
1235        q = q > 2 ? (q - 2) >> (64 - exp_diff) : 0;
1236        mul64To128(b0, q << (64 - exp_diff), &t0, &t1);
1237        sub128(a0, a1, t0, t1, &a0, &a1);
1238        shortShift128Left(0, b0, 64 - exp_diff, &t0, &t1);
1239        while (le128(t0, t1, a0, a1)) {
1240            ++q;
1241            sub128(a0, a1, t0, t1, &a0, &a1);
1242        }
1243        quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1244    } else {
1245        t0 = b0;
1246        t1 = 0;
1247    }
1248
1249    if (mod_quot) {
1250        *mod_quot = quot;
1251    } else {
1252        sub128(t0, t1, a0, a1, &t0, &t1);
1253        if (lt128(t0, t1, a0, a1) ||
1254            (eq128(t0, t1, a0, a1) && (q & 1))) {
1255            a0 = t0;
1256            a1 = t1;
1257            a->sign = !a->sign;
1258        }
1259    }
1260
1261    if (likely(a0)) {
1262        shift = clz64(a0);
1263        shortShift128Left(a0, a1, shift, &a0, &a1);
1264    } else if (likely(a1)) {
1265        shift = clz64(a1);
1266        a0 = a1 << shift;
1267        a1 = 0;
1268        shift += 64;
1269    } else {
1270        a->cls = float_class_zero;
1271        return;
1272    }
1273
1274    a->exp = b->exp + exp_diff - shift;
1275    a->frac = a0 | (a1 != 0);
1276}
1277
1278static void frac128_modrem(FloatParts128 *a, FloatParts128 *b,
1279                           uint64_t *mod_quot)
1280{
1281    uint64_t a0, a1, a2, b0, b1, t0, t1, t2, q, quot;
1282    int exp_diff = a->exp - b->exp;
1283    int shift;
1284
1285    a0 = a->frac_hi;
1286    a1 = a->frac_lo;
1287    a2 = 0;
1288
1289    if (exp_diff < -1) {
1290        if (mod_quot) {
1291            *mod_quot = 0;
1292        }
1293        return;
1294    }
1295    if (exp_diff == -1) {
1296        shift128Right(a0, a1, 1, &a0, &a1);
1297        exp_diff = 0;
1298    }
1299
1300    b0 = b->frac_hi;
1301    b1 = b->frac_lo;
1302
1303    quot = q = le128(b0, b1, a0, a1);
1304    if (q) {
1305        sub128(a0, a1, b0, b1, &a0, &a1);
1306    }
1307
1308    exp_diff -= 64;
1309    while (exp_diff > 0) {
1310        q = estimateDiv128To64(a0, a1, b0);
1311        q = q > 4 ? q - 4 : 0;
1312        mul128By64To192(b0, b1, q, &t0, &t1, &t2);
1313        sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1314        shortShift192Left(a0, a1, a2, 61, &a0, &a1, &a2);
1315        exp_diff -= 61;
1316        quot = (quot << 61) + q;
1317    }
1318
1319    exp_diff += 64;
1320    if (exp_diff > 0) {
1321        q = estimateDiv128To64(a0, a1, b0);
1322        q = q > 4 ? (q - 4) >> (64 - exp_diff) : 0;
1323        mul128By64To192(b0, b1, q << (64 - exp_diff), &t0, &t1, &t2);
1324        sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1325        shortShift192Left(0, b0, b1, 64 - exp_diff, &t0, &t1, &t2);
1326        while (le192(t0, t1, t2, a0, a1, a2)) {
1327            ++q;
1328            sub192(a0, a1, a2, t0, t1, t2, &a0, &a1, &a2);
1329        }
1330        quot = (exp_diff < 64 ? quot << exp_diff : 0) + q;
1331    } else {
1332        t0 = b0;
1333        t1 = b1;
1334        t2 = 0;
1335    }
1336
1337    if (mod_quot) {
1338        *mod_quot = quot;
1339    } else {
1340        sub192(t0, t1, t2, a0, a1, a2, &t0, &t1, &t2);
1341        if (lt192(t0, t1, t2, a0, a1, a2) ||
1342            (eq192(t0, t1, t2, a0, a1, a2) && (q & 1))) {
1343            a0 = t0;
1344            a1 = t1;
1345            a2 = t2;
1346            a->sign = !a->sign;
1347        }
1348    }
1349
1350    if (likely(a0)) {
1351        shift = clz64(a0);
1352        shortShift192Left(a0, a1, a2, shift, &a0, &a1, &a2);
1353    } else if (likely(a1)) {
1354        shift = clz64(a1);
1355        shortShift128Left(a1, a2, shift, &a0, &a1);
1356        a2 = 0;
1357        shift += 64;
1358    } else if (likely(a2)) {
1359        shift = clz64(a2);
1360        a0 = a2 << shift;
1361        a1 = a2 = 0;
1362        shift += 128;
1363    } else {
1364        a->cls = float_class_zero;
1365        return;
1366    }
1367
1368    a->exp = b->exp + exp_diff - shift;
1369    a->frac_hi = a0;
1370    a->frac_lo = a1 | (a2 != 0);
1371}
1372
1373#define frac_modrem(A, B, Q)  FRAC_GENERIC_64_128(modrem, A)(A, B, Q)
1374
1375static void frac64_shl(FloatParts64 *a, int c)
1376{
1377    a->frac <<= c;
1378}
1379
1380static void frac128_shl(FloatParts128 *a, int c)
1381{
1382    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1383
1384    if (c & 64) {
1385        a0 = a1, a1 = 0;
1386    }
1387
1388    c &= 63;
1389    if (c) {
1390        a0 = shl_double(a0, a1, c);
1391        a1 = a1 << c;
1392    }
1393
1394    a->frac_hi = a0;
1395    a->frac_lo = a1;
1396}
1397
1398#define frac_shl(A, C)  FRAC_GENERIC_64_128(shl, A)(A, C)
1399
1400static void frac64_shr(FloatParts64 *a, int c)
1401{
1402    a->frac >>= c;
1403}
1404
1405static void frac128_shr(FloatParts128 *a, int c)
1406{
1407    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1408
1409    if (c & 64) {
1410        a1 = a0, a0 = 0;
1411    }
1412
1413    c &= 63;
1414    if (c) {
1415        a1 = shr_double(a0, a1, c);
1416        a0 = a0 >> c;
1417    }
1418
1419    a->frac_hi = a0;
1420    a->frac_lo = a1;
1421}
1422
1423#define frac_shr(A, C)  FRAC_GENERIC_64_128(shr, A)(A, C)
1424
1425static void frac64_shrjam(FloatParts64 *a, int c)
1426{
1427    uint64_t a0 = a->frac;
1428
1429    if (likely(c != 0)) {
1430        if (likely(c < 64)) {
1431            a0 = (a0 >> c) | (shr_double(a0, 0, c) != 0);
1432        } else {
1433            a0 = a0 != 0;
1434        }
1435        a->frac = a0;
1436    }
1437}
1438
1439static void frac128_shrjam(FloatParts128 *a, int c)
1440{
1441    uint64_t a0 = a->frac_hi, a1 = a->frac_lo;
1442    uint64_t sticky = 0;
1443
1444    if (unlikely(c == 0)) {
1445        return;
1446    } else if (likely(c < 64)) {
1447        /* nothing */
1448    } else if (likely(c < 128)) {
1449        sticky = a1;
1450        a1 = a0;
1451        a0 = 0;
1452        c &= 63;
1453        if (c == 0) {
1454            goto done;
1455        }
1456    } else {
1457        sticky = a0 | a1;
1458        a0 = a1 = 0;
1459        goto done;
1460    }
1461
1462    sticky |= shr_double(a1, 0, c);
1463    a1 = shr_double(a0, a1, c);
1464    a0 = a0 >> c;
1465
1466 done:
1467    a->frac_lo = a1 | (sticky != 0);
1468    a->frac_hi = a0;
1469}
1470
1471static void frac256_shrjam(FloatParts256 *a, int c)
1472{
1473    uint64_t a0 = a->frac_hi, a1 = a->frac_hm;
1474    uint64_t a2 = a->frac_lm, a3 = a->frac_lo;
1475    uint64_t sticky = 0;
1476
1477    if (unlikely(c == 0)) {
1478        return;
1479    } else if (likely(c < 64)) {
1480        /* nothing */
1481    } else if (likely(c < 256)) {
1482        if (unlikely(c & 128)) {
1483            sticky |= a2 | a3;
1484            a3 = a1, a2 = a0, a1 = 0, a0 = 0;
1485        }
1486        if (unlikely(c & 64)) {
1487            sticky |= a3;
1488            a3 = a2, a2 = a1, a1 = a0, a0 = 0;
1489        }
1490        c &= 63;
1491        if (c == 0) {
1492            goto done;
1493        }
1494    } else {
1495        sticky = a0 | a1 | a2 | a3;
1496        a0 = a1 = a2 = a3 = 0;
1497        goto done;
1498    }
1499
1500    sticky |= shr_double(a3, 0, c);
1501    a3 = shr_double(a2, a3, c);
1502    a2 = shr_double(a1, a2, c);
1503    a1 = shr_double(a0, a1, c);
1504    a0 = a0 >> c;
1505
1506 done:
1507    a->frac_lo = a3 | (sticky != 0);
1508    a->frac_lm = a2;
1509    a->frac_hm = a1;
1510    a->frac_hi = a0;
1511}
1512
1513#define frac_shrjam(A, C)  FRAC_GENERIC_64_128_256(shrjam, A)(A, C)
1514
1515static bool frac64_sub(FloatParts64 *r, FloatParts64 *a, FloatParts64 *b)
1516{
1517    return usub64_overflow(a->frac, b->frac, &r->frac);
1518}
1519
1520static bool frac128_sub(FloatParts128 *r, FloatParts128 *a, FloatParts128 *b)
1521{
1522    bool c = 0;
1523    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1524    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1525    return c;
1526}
1527
1528static bool frac256_sub(FloatParts256 *r, FloatParts256 *a, FloatParts256 *b)
1529{
1530    bool c = 0;
1531    r->frac_lo = usub64_borrow(a->frac_lo, b->frac_lo, &c);
1532    r->frac_lm = usub64_borrow(a->frac_lm, b->frac_lm, &c);
1533    r->frac_hm = usub64_borrow(a->frac_hm, b->frac_hm, &c);
1534    r->frac_hi = usub64_borrow(a->frac_hi, b->frac_hi, &c);
1535    return c;
1536}
1537
1538#define frac_sub(R, A, B)  FRAC_GENERIC_64_128_256(sub, R)(R, A, B)
1539
1540static void frac64_truncjam(FloatParts64 *r, FloatParts128 *a)
1541{
1542    r->frac = a->frac_hi | (a->frac_lo != 0);
1543}
1544
1545static void frac128_truncjam(FloatParts128 *r, FloatParts256 *a)
1546{
1547    r->frac_hi = a->frac_hi;
1548    r->frac_lo = a->frac_hm | ((a->frac_lm | a->frac_lo) != 0);
1549}
1550
1551#define frac_truncjam(R, A)  FRAC_GENERIC_64_128(truncjam, R)(R, A)
1552
1553static void frac64_widen(FloatParts128 *r, FloatParts64 *a)
1554{
1555    r->frac_hi = a->frac;
1556    r->frac_lo = 0;
1557}
1558
1559static void frac128_widen(FloatParts256 *r, FloatParts128 *a)
1560{
1561    r->frac_hi = a->frac_hi;
1562    r->frac_hm = a->frac_lo;
1563    r->frac_lm = 0;
1564    r->frac_lo = 0;
1565}
1566
1567#define frac_widen(A, B)  FRAC_GENERIC_64_128(widen, B)(A, B)
1568
1569/*
1570 * Reciprocal sqrt table.  1 bit of exponent, 6-bits of mantessa.
1571 * From https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt_data.c
1572 * and thus MIT licenced.
1573 */
1574static const uint16_t rsqrt_tab[128] = {
1575    0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
1576    0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
1577    0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
1578    0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
1579    0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
1580    0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
1581    0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
1582    0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
1583    0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
1584    0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
1585    0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
1586    0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
1587    0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
1588    0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
1589    0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
1590    0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
1591};
1592
1593#define partsN(NAME)   glue(glue(glue(parts,N),_),NAME)
1594#define FloatPartsN    glue(FloatParts,N)
1595#define FloatPartsW    glue(FloatParts,W)
1596
1597#define N 64
1598#define W 128
1599
1600#include "softfloat-parts-addsub.c.inc"
1601#include "softfloat-parts.c.inc"
1602
1603#undef  N
1604#undef  W
1605#define N 128
1606#define W 256
1607
1608#include "softfloat-parts-addsub.c.inc"
1609#include "softfloat-parts.c.inc"
1610
1611#undef  N
1612#undef  W
1613#define N            256
1614
1615#include "softfloat-parts-addsub.c.inc"
1616
1617#undef  N
1618#undef  W
1619#undef  partsN
1620#undef  FloatPartsN
1621#undef  FloatPartsW
1622
1623/*
1624 * Pack/unpack routines with a specific FloatFmt.
1625 */
1626
1627static void float16a_unpack_canonical(FloatParts64 *p, float16 f,
1628                                      float_status *s, const FloatFmt *params)
1629{
1630    float16_unpack_raw(p, f);
1631    parts_canonicalize(p, s, params);
1632}
1633
1634static void float16_unpack_canonical(FloatParts64 *p, float16 f,
1635                                     float_status *s)
1636{
1637    float16a_unpack_canonical(p, f, s, &float16_params);
1638}
1639
1640static void bfloat16_unpack_canonical(FloatParts64 *p, bfloat16 f,
1641                                      float_status *s)
1642{
1643    bfloat16_unpack_raw(p, f);
1644    parts_canonicalize(p, s, &bfloat16_params);
1645}
1646
1647static float16 float16a_round_pack_canonical(FloatParts64 *p,
1648                                             float_status *s,
1649                                             const FloatFmt *params)
1650{
1651    parts_uncanon(p, s, params);
1652    return float16_pack_raw(p);
1653}
1654
1655static float16 float16_round_pack_canonical(FloatParts64 *p,
1656                                            float_status *s)
1657{
1658    return float16a_round_pack_canonical(p, s, &float16_params);
1659}
1660
1661static bfloat16 bfloat16_round_pack_canonical(FloatParts64 *p,
1662                                              float_status *s)
1663{
1664    parts_uncanon(p, s, &bfloat16_params);
1665    return bfloat16_pack_raw(p);
1666}
1667
1668static void float32_unpack_canonical(FloatParts64 *p, float32 f,
1669                                     float_status *s)
1670{
1671    float32_unpack_raw(p, f);
1672    parts_canonicalize(p, s, &float32_params);
1673}
1674
1675static float32 float32_round_pack_canonical(FloatParts64 *p,
1676                                            float_status *s)
1677{
1678    parts_uncanon(p, s, &float32_params);
1679    return float32_pack_raw(p);
1680}
1681
1682static void float64_unpack_canonical(FloatParts64 *p, float64 f,
1683                                     float_status *s)
1684{
1685    float64_unpack_raw(p, f);
1686    parts_canonicalize(p, s, &float64_params);
1687}
1688
1689static float64 float64_round_pack_canonical(FloatParts64 *p,
1690                                            float_status *s)
1691{
1692    parts_uncanon(p, s, &float64_params);
1693    return float64_pack_raw(p);
1694}
1695
1696static void float128_unpack_canonical(FloatParts128 *p, float128 f,
1697                                      float_status *s)
1698{
1699    float128_unpack_raw(p, f);
1700    parts_canonicalize(p, s, &float128_params);
1701}
1702
1703static float128 float128_round_pack_canonical(FloatParts128 *p,
1704                                              float_status *s)
1705{
1706    parts_uncanon(p, s, &float128_params);
1707    return float128_pack_raw(p);
1708}
1709
1710/* Returns false if the encoding is invalid. */
1711static bool floatx80_unpack_canonical(FloatParts128 *p, floatx80 f,
1712                                      float_status *s)
1713{
1714    /* Ensure rounding precision is set before beginning. */
1715    switch (s->floatx80_rounding_precision) {
1716    case floatx80_precision_x:
1717    case floatx80_precision_d:
1718    case floatx80_precision_s:
1719        break;
1720    default:
1721        g_assert_not_reached();
1722    }
1723
1724    if (unlikely(floatx80_invalid_encoding(f))) {
1725        float_raise(float_flag_invalid, s);
1726        return false;
1727    }
1728
1729    floatx80_unpack_raw(p, f);
1730
1731    if (likely(p->exp != floatx80_params[floatx80_precision_x].exp_max)) {
1732        parts_canonicalize(p, s, &floatx80_params[floatx80_precision_x]);
1733    } else {
1734        /* The explicit integer bit is ignored, after invalid checks. */
1735        p->frac_hi &= MAKE_64BIT_MASK(0, 63);
1736        p->cls = (p->frac_hi == 0 ? float_class_inf
1737                  : parts_is_snan_frac(p->frac_hi, s)
1738                  ? float_class_snan : float_class_qnan);
1739    }
1740    return true;
1741}
1742
1743static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
1744                                              float_status *s)
1745{
1746    const FloatFmt *fmt = &floatx80_params[s->floatx80_rounding_precision];
1747    uint64_t frac;
1748    int exp;
1749
1750    switch (p->cls) {
1751    case float_class_normal:
1752        if (s->floatx80_rounding_precision == floatx80_precision_x) {
1753            parts_uncanon_normal(p, s, fmt);
1754            frac = p->frac_hi;
1755            exp = p->exp;
1756        } else {
1757            FloatParts64 p64;
1758
1759            p64.sign = p->sign;
1760            p64.exp = p->exp;
1761            frac_truncjam(&p64, p);
1762            parts_uncanon_normal(&p64, s, fmt);
1763            frac = p64.frac;
1764            exp = p64.exp;
1765        }
1766        if (exp != fmt->exp_max) {
1767            break;
1768        }
1769        /* rounded to inf -- fall through to set frac correctly */
1770
1771    case float_class_inf:
1772        /* x86 and m68k differ in the setting of the integer bit. */
1773        frac = floatx80_infinity_low;
1774        exp = fmt->exp_max;
1775        break;
1776
1777    case float_class_zero:
1778        frac = 0;
1779        exp = 0;
1780        break;
1781
1782    case float_class_snan:
1783    case float_class_qnan:
1784        /* NaNs have the integer bit set. */
1785        frac = p->frac_hi | (1ull << 63);
1786        exp = fmt->exp_max;
1787        break;
1788
1789    default:
1790        g_assert_not_reached();
1791    }
1792
1793    return packFloatx80(p->sign, exp, frac);
1794}
1795
1796/*
1797 * Addition and subtraction
1798 */
1799
1800static float16 QEMU_FLATTEN
1801float16_addsub(float16 a, float16 b, float_status *status, bool subtract)
1802{
1803    FloatParts64 pa, pb, *pr;
1804
1805    float16_unpack_canonical(&pa, a, status);
1806    float16_unpack_canonical(&pb, b, status);
1807    pr = parts_addsub(&pa, &pb, status, subtract);
1808
1809    return float16_round_pack_canonical(pr, status);
1810}
1811
1812float16 float16_add(float16 a, float16 b, float_status *status)
1813{
1814    return float16_addsub(a, b, status, false);
1815}
1816
1817float16 float16_sub(float16 a, float16 b, float_status *status)
1818{
1819    return float16_addsub(a, b, status, true);
1820}
1821
1822static float32 QEMU_SOFTFLOAT_ATTR
1823soft_f32_addsub(float32 a, float32 b, float_status *status, bool subtract)
1824{
1825    FloatParts64 pa, pb, *pr;
1826
1827    float32_unpack_canonical(&pa, a, status);
1828    float32_unpack_canonical(&pb, b, status);
1829    pr = parts_addsub(&pa, &pb, status, subtract);
1830
1831    return float32_round_pack_canonical(pr, status);
1832}
1833
1834static float32 soft_f32_add(float32 a, float32 b, float_status *status)
1835{
1836    return soft_f32_addsub(a, b, status, false);
1837}
1838
1839static float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1840{
1841    return soft_f32_addsub(a, b, status, true);
1842}
1843
1844static float64 QEMU_SOFTFLOAT_ATTR
1845soft_f64_addsub(float64 a, float64 b, float_status *status, bool subtract)
1846{
1847    FloatParts64 pa, pb, *pr;
1848
1849    float64_unpack_canonical(&pa, a, status);
1850    float64_unpack_canonical(&pb, b, status);
1851    pr = parts_addsub(&pa, &pb, status, subtract);
1852
1853    return float64_round_pack_canonical(pr, status);
1854}
1855
1856static float64 soft_f64_add(float64 a, float64 b, float_status *status)
1857{
1858    return soft_f64_addsub(a, b, status, false);
1859}
1860
1861static float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1862{
1863    return soft_f64_addsub(a, b, status, true);
1864}
1865
1866static float hard_f32_add(float a, float b)
1867{
1868    return a + b;
1869}
1870
1871static float hard_f32_sub(float a, float b)
1872{
1873    return a - b;
1874}
1875
1876static double hard_f64_add(double a, double b)
1877{
1878    return a + b;
1879}
1880
1881static double hard_f64_sub(double a, double b)
1882{
1883    return a - b;
1884}
1885
1886static bool f32_addsubmul_post(union_float32 a, union_float32 b)
1887{
1888    if (QEMU_HARDFLOAT_2F32_USE_FP) {
1889        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1890    }
1891    return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1892}
1893
1894static bool f64_addsubmul_post(union_float64 a, union_float64 b)
1895{
1896    if (QEMU_HARDFLOAT_2F64_USE_FP) {
1897        return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1898    } else {
1899        return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1900    }
1901}
1902
1903static float32 float32_addsub(float32 a, float32 b, float_status *s,
1904                              hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1905{
1906    return float32_gen2(a, b, s, hard, soft,
1907                        f32_is_zon2, f32_addsubmul_post);
1908}
1909
1910static float64 float64_addsub(float64 a, float64 b, float_status *s,
1911                              hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1912{
1913    return float64_gen2(a, b, s, hard, soft,
1914                        f64_is_zon2, f64_addsubmul_post);
1915}
1916
1917float32 QEMU_FLATTEN
1918float32_add(float32 a, float32 b, float_status *s)
1919{
1920    return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1921}
1922
1923float32 QEMU_FLATTEN
1924float32_sub(float32 a, float32 b, float_status *s)
1925{
1926    return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1927}
1928
1929float64 QEMU_FLATTEN
1930float64_add(float64 a, float64 b, float_status *s)
1931{
1932    return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1933}
1934
1935float64 QEMU_FLATTEN
1936float64_sub(float64 a, float64 b, float_status *s)
1937{
1938    return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1939}
1940
1941static bfloat16 QEMU_FLATTEN
1942bfloat16_addsub(bfloat16 a, bfloat16 b, float_status *status, bool subtract)
1943{
1944    FloatParts64 pa, pb, *pr;
1945
1946    bfloat16_unpack_canonical(&pa, a, status);
1947    bfloat16_unpack_canonical(&pb, b, status);
1948    pr = parts_addsub(&pa, &pb, status, subtract);
1949
1950    return bfloat16_round_pack_canonical(pr, status);
1951}
1952
1953bfloat16 bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
1954{
1955    return bfloat16_addsub(a, b, status, false);
1956}
1957
1958bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
1959{
1960    return bfloat16_addsub(a, b, status, true);
1961}
1962
1963static float128 QEMU_FLATTEN
1964float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
1965{
1966    FloatParts128 pa, pb, *pr;
1967
1968    float128_unpack_canonical(&pa, a, status);
1969    float128_unpack_canonical(&pb, b, status);
1970    pr = parts_addsub(&pa, &pb, status, subtract);
1971
1972    return float128_round_pack_canonical(pr, status);
1973}
1974
1975float128 float128_add(float128 a, float128 b, float_status *status)
1976{
1977    return float128_addsub(a, b, status, false);
1978}
1979
1980float128 float128_sub(float128 a, float128 b, float_status *status)
1981{
1982    return float128_addsub(a, b, status, true);
1983}
1984
1985static floatx80 QEMU_FLATTEN
1986floatx80_addsub(floatx80 a, floatx80 b, float_status *status, bool subtract)
1987{
1988    FloatParts128 pa, pb, *pr;
1989
1990    if (!floatx80_unpack_canonical(&pa, a, status) ||
1991        !floatx80_unpack_canonical(&pb, b, status)) {
1992        return floatx80_default_nan(status);
1993    }
1994
1995    pr = parts_addsub(&pa, &pb, status, subtract);
1996    return floatx80_round_pack_canonical(pr, status);
1997}
1998
1999floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2000{
2001    return floatx80_addsub(a, b, status, false);
2002}
2003
2004floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2005{
2006    return floatx80_addsub(a, b, status, true);
2007}
2008
2009/*
2010 * Multiplication
2011 */
2012
2013float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
2014{
2015    FloatParts64 pa, pb, *pr;
2016
2017    float16_unpack_canonical(&pa, a, status);
2018    float16_unpack_canonical(&pb, b, status);
2019    pr = parts_mul(&pa, &pb, status);
2020
2021    return float16_round_pack_canonical(pr, status);
2022}
2023
2024static float32 QEMU_SOFTFLOAT_ATTR
2025soft_f32_mul(float32 a, float32 b, float_status *status)
2026{
2027    FloatParts64 pa, pb, *pr;
2028
2029    float32_unpack_canonical(&pa, a, status);
2030    float32_unpack_canonical(&pb, b, status);
2031    pr = parts_mul(&pa, &pb, status);
2032
2033    return float32_round_pack_canonical(pr, status);
2034}
2035
2036static float64 QEMU_SOFTFLOAT_ATTR
2037soft_f64_mul(float64 a, float64 b, float_status *status)
2038{
2039    FloatParts64 pa, pb, *pr;
2040
2041    float64_unpack_canonical(&pa, a, status);
2042    float64_unpack_canonical(&pb, b, status);
2043    pr = parts_mul(&pa, &pb, status);
2044
2045    return float64_round_pack_canonical(pr, status);
2046}
2047
2048static float hard_f32_mul(float a, float b)
2049{
2050    return a * b;
2051}
2052
2053static double hard_f64_mul(double a, double b)
2054{
2055    return a * b;
2056}
2057
2058float32 QEMU_FLATTEN
2059float32_mul(float32 a, float32 b, float_status *s)
2060{
2061    return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
2062                        f32_is_zon2, f32_addsubmul_post);
2063}
2064
2065float64 QEMU_FLATTEN
2066float64_mul(float64 a, float64 b, float_status *s)
2067{
2068    return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
2069                        f64_is_zon2, f64_addsubmul_post);
2070}
2071
2072bfloat16 QEMU_FLATTEN
2073bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
2074{
2075    FloatParts64 pa, pb, *pr;
2076
2077    bfloat16_unpack_canonical(&pa, a, status);
2078    bfloat16_unpack_canonical(&pb, b, status);
2079    pr = parts_mul(&pa, &pb, status);
2080
2081    return bfloat16_round_pack_canonical(pr, status);
2082}
2083
2084float128 QEMU_FLATTEN
2085float128_mul(float128 a, float128 b, float_status *status)
2086{
2087    FloatParts128 pa, pb, *pr;
2088
2089    float128_unpack_canonical(&pa, a, status);
2090    float128_unpack_canonical(&pb, b, status);
2091    pr = parts_mul(&pa, &pb, status);
2092
2093    return float128_round_pack_canonical(pr, status);
2094}
2095
2096floatx80 QEMU_FLATTEN
2097floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2098{
2099    FloatParts128 pa, pb, *pr;
2100
2101    if (!floatx80_unpack_canonical(&pa, a, status) ||
2102        !floatx80_unpack_canonical(&pb, b, status)) {
2103        return floatx80_default_nan(status);
2104    }
2105
2106    pr = parts_mul(&pa, &pb, status);
2107    return floatx80_round_pack_canonical(pr, status);
2108}
2109
2110/*
2111 * Fused multiply-add
2112 */
2113
2114float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
2115                                    int flags, float_status *status)
2116{
2117    FloatParts64 pa, pb, pc, *pr;
2118
2119    float16_unpack_canonical(&pa, a, status);
2120    float16_unpack_canonical(&pb, b, status);
2121    float16_unpack_canonical(&pc, c, status);
2122    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2123
2124    return float16_round_pack_canonical(pr, status);
2125}
2126
2127static float32 QEMU_SOFTFLOAT_ATTR
2128soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
2129                float_status *status)
2130{
2131    FloatParts64 pa, pb, pc, *pr;
2132
2133    float32_unpack_canonical(&pa, a, status);
2134    float32_unpack_canonical(&pb, b, status);
2135    float32_unpack_canonical(&pc, c, status);
2136    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2137
2138    return float32_round_pack_canonical(pr, status);
2139}
2140
2141static float64 QEMU_SOFTFLOAT_ATTR
2142soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
2143                float_status *status)
2144{
2145    FloatParts64 pa, pb, pc, *pr;
2146
2147    float64_unpack_canonical(&pa, a, status);
2148    float64_unpack_canonical(&pb, b, status);
2149    float64_unpack_canonical(&pc, c, status);
2150    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2151
2152    return float64_round_pack_canonical(pr, status);
2153}
2154
2155static bool force_soft_fma;
2156
2157float32 QEMU_FLATTEN
2158float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
2159{
2160    union_float32 ua, ub, uc, ur;
2161
2162    ua.s = xa;
2163    ub.s = xb;
2164    uc.s = xc;
2165
2166    if (unlikely(!can_use_fpu(s))) {
2167        goto soft;
2168    }
2169    if (unlikely(flags & float_muladd_halve_result)) {
2170        goto soft;
2171    }
2172
2173    float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
2174    if (unlikely(!f32_is_zon3(ua, ub, uc))) {
2175        goto soft;
2176    }
2177
2178    if (unlikely(force_soft_fma)) {
2179        goto soft;
2180    }
2181
2182    /*
2183     * When (a || b) == 0, there's no need to check for under/over flow,
2184     * since we know the addend is (normal || 0) and the product is 0.
2185     */
2186    if (float32_is_zero(ua.s) || float32_is_zero(ub.s)) {
2187        union_float32 up;
2188        bool prod_sign;
2189
2190        prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
2191        prod_sign ^= !!(flags & float_muladd_negate_product);
2192        up.s = float32_set_sign(float32_zero, prod_sign);
2193
2194        if (flags & float_muladd_negate_c) {
2195            uc.h = -uc.h;
2196        }
2197        ur.h = up.h + uc.h;
2198    } else {
2199        union_float32 ua_orig = ua;
2200        union_float32 uc_orig = uc;
2201
2202        if (flags & float_muladd_negate_product) {
2203            ua.h = -ua.h;
2204        }
2205        if (flags & float_muladd_negate_c) {
2206            uc.h = -uc.h;
2207        }
2208
2209        ur.h = fmaf(ua.h, ub.h, uc.h);
2210
2211        if (unlikely(f32_is_inf(ur))) {
2212            float_raise(float_flag_overflow, s);
2213        } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
2214            ua = ua_orig;
2215            uc = uc_orig;
2216            goto soft;
2217        }
2218    }
2219    if (flags & float_muladd_negate_result) {
2220        return float32_chs(ur.s);
2221    }
2222    return ur.s;
2223
2224 soft:
2225    return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
2226}
2227
2228float64 QEMU_FLATTEN
2229float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
2230{
2231    union_float64 ua, ub, uc, ur;
2232
2233    ua.s = xa;
2234    ub.s = xb;
2235    uc.s = xc;
2236
2237    if (unlikely(!can_use_fpu(s))) {
2238        goto soft;
2239    }
2240    if (unlikely(flags & float_muladd_halve_result)) {
2241        goto soft;
2242    }
2243
2244    float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
2245    if (unlikely(!f64_is_zon3(ua, ub, uc))) {
2246        goto soft;
2247    }
2248
2249    if (unlikely(force_soft_fma)) {
2250        goto soft;
2251    }
2252
2253    /*
2254     * When (a || b) == 0, there's no need to check for under/over flow,
2255     * since we know the addend is (normal || 0) and the product is 0.
2256     */
2257    if (float64_is_zero(ua.s) || float64_is_zero(ub.s)) {
2258        union_float64 up;
2259        bool prod_sign;
2260
2261        prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
2262        prod_sign ^= !!(flags & float_muladd_negate_product);
2263        up.s = float64_set_sign(float64_zero, prod_sign);
2264
2265        if (flags & float_muladd_negate_c) {
2266            uc.h = -uc.h;
2267        }
2268        ur.h = up.h + uc.h;
2269    } else {
2270        union_float64 ua_orig = ua;
2271        union_float64 uc_orig = uc;
2272
2273        if (flags & float_muladd_negate_product) {
2274            ua.h = -ua.h;
2275        }
2276        if (flags & float_muladd_negate_c) {
2277            uc.h = -uc.h;
2278        }
2279
2280        ur.h = fma(ua.h, ub.h, uc.h);
2281
2282        if (unlikely(f64_is_inf(ur))) {
2283            float_raise(float_flag_overflow, s);
2284        } else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
2285            ua = ua_orig;
2286            uc = uc_orig;
2287            goto soft;
2288        }
2289    }
2290    if (flags & float_muladd_negate_result) {
2291        return float64_chs(ur.s);
2292    }
2293    return ur.s;
2294
2295 soft:
2296    return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
2297}
2298
2299bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
2300                                      int flags, float_status *status)
2301{
2302    FloatParts64 pa, pb, pc, *pr;
2303
2304    bfloat16_unpack_canonical(&pa, a, status);
2305    bfloat16_unpack_canonical(&pb, b, status);
2306    bfloat16_unpack_canonical(&pc, c, status);
2307    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2308
2309    return bfloat16_round_pack_canonical(pr, status);
2310}
2311
2312float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
2313                                      int flags, float_status *status)
2314{
2315    FloatParts128 pa, pb, pc, *pr;
2316
2317    float128_unpack_canonical(&pa, a, status);
2318    float128_unpack_canonical(&pb, b, status);
2319    float128_unpack_canonical(&pc, c, status);
2320    pr = parts_muladd(&pa, &pb, &pc, flags, status);
2321
2322    return float128_round_pack_canonical(pr, status);
2323}
2324
2325/*
2326 * Division
2327 */
2328
2329float16 float16_div(float16 a, float16 b, float_status *status)
2330{
2331    FloatParts64 pa, pb, *pr;
2332
2333    float16_unpack_canonical(&pa, a, status);
2334    float16_unpack_canonical(&pb, b, status);
2335    pr = parts_div(&pa, &pb, status);
2336
2337    return float16_round_pack_canonical(pr, status);
2338}
2339
2340static float32 QEMU_SOFTFLOAT_ATTR
2341soft_f32_div(float32 a, float32 b, float_status *status)
2342{
2343    FloatParts64 pa, pb, *pr;
2344
2345    float32_unpack_canonical(&pa, a, status);
2346    float32_unpack_canonical(&pb, b, status);
2347    pr = parts_div(&pa, &pb, status);
2348
2349    return float32_round_pack_canonical(pr, status);
2350}
2351
2352static float64 QEMU_SOFTFLOAT_ATTR
2353soft_f64_div(float64 a, float64 b, float_status *status)
2354{
2355    FloatParts64 pa, pb, *pr;
2356
2357    float64_unpack_canonical(&pa, a, status);
2358    float64_unpack_canonical(&pb, b, status);
2359    pr = parts_div(&pa, &pb, status);
2360
2361    return float64_round_pack_canonical(pr, status);
2362}
2363
2364static float hard_f32_div(float a, float b)
2365{
2366    return a / b;
2367}
2368
2369static double hard_f64_div(double a, double b)
2370{
2371    return a / b;
2372}
2373
2374static bool f32_div_pre(union_float32 a, union_float32 b)
2375{
2376    if (QEMU_HARDFLOAT_2F32_USE_FP) {
2377        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2378               fpclassify(b.h) == FP_NORMAL;
2379    }
2380    return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
2381}
2382
2383static bool f64_div_pre(union_float64 a, union_float64 b)
2384{
2385    if (QEMU_HARDFLOAT_2F64_USE_FP) {
2386        return (fpclassify(a.h) == FP_NORMAL || fpclassify(a.h) == FP_ZERO) &&
2387               fpclassify(b.h) == FP_NORMAL;
2388    }
2389    return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
2390}
2391
2392static bool f32_div_post(union_float32 a, union_float32 b)
2393{
2394    if (QEMU_HARDFLOAT_2F32_USE_FP) {
2395        return fpclassify(a.h) != FP_ZERO;
2396    }
2397    return !float32_is_zero(a.s);
2398}
2399
2400static bool f64_div_post(union_float64 a, union_float64 b)
2401{
2402    if (QEMU_HARDFLOAT_2F64_USE_FP) {
2403        return fpclassify(a.h) != FP_ZERO;
2404    }
2405    return !float64_is_zero(a.s);
2406}
2407
2408float32 QEMU_FLATTEN
2409float32_div(float32 a, float32 b, float_status *s)
2410{
2411    return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
2412                        f32_div_pre, f32_div_post);
2413}
2414
2415float64 QEMU_FLATTEN
2416float64_div(float64 a, float64 b, float_status *s)
2417{
2418    return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
2419                        f64_div_pre, f64_div_post);
2420}
2421
2422bfloat16 QEMU_FLATTEN
2423bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
2424{
2425    FloatParts64 pa, pb, *pr;
2426
2427    bfloat16_unpack_canonical(&pa, a, status);
2428    bfloat16_unpack_canonical(&pb, b, status);
2429    pr = parts_div(&pa, &pb, status);
2430
2431    return bfloat16_round_pack_canonical(pr, status);
2432}
2433
2434float128 QEMU_FLATTEN
2435float128_div(float128 a, float128 b, float_status *status)
2436{
2437    FloatParts128 pa, pb, *pr;
2438
2439    float128_unpack_canonical(&pa, a, status);
2440    float128_unpack_canonical(&pb, b, status);
2441    pr = parts_div(&pa, &pb, status);
2442
2443    return float128_round_pack_canonical(pr, status);
2444}
2445
2446floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2447{
2448    FloatParts128 pa, pb, *pr;
2449
2450    if (!floatx80_unpack_canonical(&pa, a, status) ||
2451        !floatx80_unpack_canonical(&pb, b, status)) {
2452        return floatx80_default_nan(status);
2453    }
2454
2455    pr = parts_div(&pa, &pb, status);
2456    return floatx80_round_pack_canonical(pr, status);
2457}
2458
2459/*
2460 * Remainder
2461 */
2462
2463float32 float32_rem(float32 a, float32 b, float_status *status)
2464{
2465    FloatParts64 pa, pb, *pr;
2466
2467    float32_unpack_canonical(&pa, a, status);
2468    float32_unpack_canonical(&pb, b, status);
2469    pr = parts_modrem(&pa, &pb, NULL, status);
2470
2471    return float32_round_pack_canonical(pr, status);
2472}
2473
2474float64 float64_rem(float64 a, float64 b, float_status *status)
2475{
2476    FloatParts64 pa, pb, *pr;
2477
2478    float64_unpack_canonical(&pa, a, status);
2479    float64_unpack_canonical(&pb, b, status);
2480    pr = parts_modrem(&pa, &pb, NULL, status);
2481
2482    return float64_round_pack_canonical(pr, status);
2483}
2484
2485float128 float128_rem(float128 a, float128 b, float_status *status)
2486{
2487    FloatParts128 pa, pb, *pr;
2488
2489    float128_unpack_canonical(&pa, a, status);
2490    float128_unpack_canonical(&pb, b, status);
2491    pr = parts_modrem(&pa, &pb, NULL, status);
2492
2493    return float128_round_pack_canonical(pr, status);
2494}
2495
2496/*
2497 * Returns the remainder of the extended double-precision floating-point value
2498 * `a' with respect to the corresponding value `b'.
2499 * If 'mod' is false, the operation is performed according to the IEC/IEEE
2500 * Standard for Binary Floating-Point Arithmetic.  If 'mod' is true, return
2501 * the remainder based on truncating the quotient toward zero instead and
2502 * *quotient is set to the low 64 bits of the absolute value of the integer
2503 * quotient.
2504 */
2505floatx80 floatx80_modrem(floatx80 a, floatx80 b, bool mod,
2506                         uint64_t *quotient, float_status *status)
2507{
2508    FloatParts128 pa, pb, *pr;
2509
2510    *quotient = 0;
2511    if (!floatx80_unpack_canonical(&pa, a, status) ||
2512        !floatx80_unpack_canonical(&pb, b, status)) {
2513        return floatx80_default_nan(status);
2514    }
2515    pr = parts_modrem(&pa, &pb, mod ? quotient : NULL, status);
2516
2517    return floatx80_round_pack_canonical(pr, status);
2518}
2519
2520floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2521{
2522    uint64_t quotient;
2523    return floatx80_modrem(a, b, false, &quotient, status);
2524}
2525
2526floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
2527{
2528    uint64_t quotient;
2529    return floatx80_modrem(a, b, true, &quotient, status);
2530}
2531
2532/*
2533 * Float to Float conversions
2534 *
2535 * Returns the result of converting one float format to another. The
2536 * conversion is performed according to the IEC/IEEE Standard for
2537 * Binary Floating-Point Arithmetic.
2538 *
2539 * Usually this only needs to take care of raising invalid exceptions
2540 * and handling the conversion on NaNs.
2541 */
2542
2543static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
2544{
2545    switch (a->cls) {
2546    case float_class_qnan:
2547    case float_class_snan:
2548        /*
2549         * There is no NaN in the destination format.  Raise Invalid
2550         * and return a zero with the sign of the input NaN.
2551         */
2552        float_raise(float_flag_invalid, s);
2553        a->cls = float_class_zero;
2554        break;
2555
2556    case float_class_inf:
2557        /*
2558         * There is no Inf in the destination format.  Raise Invalid
2559         * and return the maximum normal with the correct sign.
2560         */
2561        float_raise(float_flag_invalid, s);
2562        a->cls = float_class_normal;
2563        a->exp = float16_params_ahp.exp_max;
2564        a->frac = MAKE_64BIT_MASK(float16_params_ahp.frac_shift,
2565                                  float16_params_ahp.frac_size + 1);
2566        break;
2567
2568    case float_class_normal:
2569    case float_class_zero:
2570        break;
2571
2572    default:
2573        g_assert_not_reached();
2574    }
2575}
2576
2577static void parts64_float_to_float(FloatParts64 *a, float_status *s)
2578{
2579    if (is_nan(a->cls)) {
2580        parts_return_nan(a, s);
2581    }
2582}
2583
2584static void parts128_float_to_float(FloatParts128 *a, float_status *s)
2585{
2586    if (is_nan(a->cls)) {
2587        parts_return_nan(a, s);
2588    }
2589}
2590
2591#define parts_float_to_float(P, S) \
2592    PARTS_GENERIC_64_128(float_to_float, P)(P, S)
2593
2594static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
2595                                        float_status *s)
2596{
2597    a->cls = b->cls;
2598    a->sign = b->sign;
2599    a->exp = b->exp;
2600
2601    if (a->cls == float_class_normal) {
2602        frac_truncjam(a, b);
2603    } else if (is_nan(a->cls)) {
2604        /* Discard the low bits of the NaN. */
2605        a->frac = b->frac_hi;
2606        parts_return_nan(a, s);
2607    }
2608}
2609
2610static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
2611                                       float_status *s)
2612{
2613    a->cls = b->cls;
2614    a->sign = b->sign;
2615    a->exp = b->exp;
2616    frac_widen(a, b);
2617
2618    if (is_nan(a->cls)) {
2619        parts_return_nan(a, s);
2620    }
2621}
2622
2623float32 float16_to_float32(float16 a, bool ieee, float_status *s)
2624{
2625    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2626    FloatParts64 p;
2627
2628    float16a_unpack_canonical(&p, a, s, fmt16);
2629    parts_float_to_float(&p, s);
2630    return float32_round_pack_canonical(&p, s);
2631}
2632
2633float64 float16_to_float64(float16 a, bool ieee, float_status *s)
2634{
2635    const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
2636    FloatParts64 p;
2637
2638    float16a_unpack_canonical(&p, a, s, fmt16);
2639    parts_float_to_float(&p, s);
2640    return float64_round_pack_canonical(&p, s);
2641}
2642
2643float16 float32_to_float16(float32 a, bool ieee, float_status *s)
2644{
2645    FloatParts64 p;
2646    const FloatFmt *fmt;
2647
2648    float32_unpack_canonical(&p, a, s);
2649    if (ieee) {
2650        parts_float_to_float(&p, s);
2651        fmt = &float16_params;
2652    } else {
2653        parts_float_to_ahp(&p, s);
2654        fmt = &float16_params_ahp;
2655    }
2656    return float16a_round_pack_canonical(&p, s, fmt);
2657}
2658
2659static float64 QEMU_SOFTFLOAT_ATTR
2660soft_float32_to_float64(float32 a, float_status *s)
2661{
2662    FloatParts64 p;
2663
2664    float32_unpack_canonical(&p, a, s);
2665    parts_float_to_float(&p, s);
2666    return float64_round_pack_canonical(&p, s);
2667}
2668
2669float64 float32_to_float64(float32 a, float_status *s)
2670{
2671    if (likely(float32_is_normal(a))) {
2672        /* Widening conversion can never produce inexact results.  */
2673        union_float32 uf;
2674        union_float64 ud;
2675        uf.s = a;
2676        ud.h = uf.h;
2677        return ud.s;
2678    } else if (float32_is_zero(a)) {
2679        return float64_set_sign(float64_zero, float32_is_neg(a));
2680    } else {
2681        return soft_float32_to_float64(a, s);
2682    }
2683}
2684
2685float16 float64_to_float16(float64 a, bool ieee, float_status *s)
2686{
2687    FloatParts64 p;
2688    const FloatFmt *fmt;
2689
2690    float64_unpack_canonical(&p, a, s);
2691    if (ieee) {
2692        parts_float_to_float(&p, s);
2693        fmt = &float16_params;
2694    } else {
2695        parts_float_to_ahp(&p, s);
2696        fmt = &float16_params_ahp;
2697    }
2698    return float16a_round_pack_canonical(&p, s, fmt);
2699}
2700
2701float32 float64_to_float32(float64 a, float_status *s)
2702{
2703    FloatParts64 p;
2704
2705    float64_unpack_canonical(&p, a, s);
2706    parts_float_to_float(&p, s);
2707    return float32_round_pack_canonical(&p, s);
2708}
2709
2710float32 bfloat16_to_float32(bfloat16 a, float_status *s)
2711{
2712    FloatParts64 p;
2713
2714    bfloat16_unpack_canonical(&p, a, s);
2715    parts_float_to_float(&p, s);
2716    return float32_round_pack_canonical(&p, s);
2717}
2718
2719float64 bfloat16_to_float64(bfloat16 a, float_status *s)
2720{
2721    FloatParts64 p;
2722
2723    bfloat16_unpack_canonical(&p, a, s);
2724    parts_float_to_float(&p, s);
2725    return float64_round_pack_canonical(&p, s);
2726}
2727
2728bfloat16 float32_to_bfloat16(float32 a, float_status *s)
2729{
2730    FloatParts64 p;
2731
2732    float32_unpack_canonical(&p, a, s);
2733    parts_float_to_float(&p, s);
2734    return bfloat16_round_pack_canonical(&p, s);
2735}
2736
2737bfloat16 float64_to_bfloat16(float64 a, float_status *s)
2738{
2739    FloatParts64 p;
2740
2741    float64_unpack_canonical(&p, a, s);
2742    parts_float_to_float(&p, s);
2743    return bfloat16_round_pack_canonical(&p, s);
2744}
2745
2746float32 float128_to_float32(float128 a, float_status *s)
2747{
2748    FloatParts64 p64;
2749    FloatParts128 p128;
2750
2751    float128_unpack_canonical(&p128, a, s);
2752    parts_float_to_float_narrow(&p64, &p128, s);
2753    return float32_round_pack_canonical(&p64, s);
2754}
2755
2756float64 float128_to_float64(float128 a, float_status *s)
2757{
2758    FloatParts64 p64;
2759    FloatParts128 p128;
2760
2761    float128_unpack_canonical(&p128, a, s);
2762    parts_float_to_float_narrow(&p64, &p128, s);
2763    return float64_round_pack_canonical(&p64, s);
2764}
2765
2766float128 float32_to_float128(float32 a, float_status *s)
2767{
2768    FloatParts64 p64;
2769    FloatParts128 p128;
2770
2771    float32_unpack_canonical(&p64, a, s);
2772    parts_float_to_float_widen(&p128, &p64, s);
2773    return float128_round_pack_canonical(&p128, s);
2774}
2775
2776float128 float64_to_float128(float64 a, float_status *s)
2777{
2778    FloatParts64 p64;
2779    FloatParts128 p128;
2780
2781    float64_unpack_canonical(&p64, a, s);
2782    parts_float_to_float_widen(&p128, &p64, s);
2783    return float128_round_pack_canonical(&p128, s);
2784}
2785
2786float32 floatx80_to_float32(floatx80 a, float_status *s)
2787{
2788    FloatParts64 p64;
2789    FloatParts128 p128;
2790
2791    if (floatx80_unpack_canonical(&p128, a, s)) {
2792        parts_float_to_float_narrow(&p64, &p128, s);
2793    } else {
2794        parts_default_nan(&p64, s);
2795    }
2796    return float32_round_pack_canonical(&p64, s);
2797}
2798
2799float64 floatx80_to_float64(floatx80 a, float_status *s)
2800{
2801    FloatParts64 p64;
2802    FloatParts128 p128;
2803
2804    if (floatx80_unpack_canonical(&p128, a, s)) {
2805        parts_float_to_float_narrow(&p64, &p128, s);
2806    } else {
2807        parts_default_nan(&p64, s);
2808    }
2809    return float64_round_pack_canonical(&p64, s);
2810}
2811
2812float128 floatx80_to_float128(floatx80 a, float_status *s)
2813{
2814    FloatParts128 p;
2815
2816    if (floatx80_unpack_canonical(&p, a, s)) {
2817        parts_float_to_float(&p, s);
2818    } else {
2819        parts_default_nan(&p, s);
2820    }
2821    return float128_round_pack_canonical(&p, s);
2822}
2823
2824floatx80 float32_to_floatx80(float32 a, float_status *s)
2825{
2826    FloatParts64 p64;
2827    FloatParts128 p128;
2828
2829    float32_unpack_canonical(&p64, a, s);
2830    parts_float_to_float_widen(&p128, &p64, s);
2831    return floatx80_round_pack_canonical(&p128, s);
2832}
2833
2834floatx80 float64_to_floatx80(float64 a, float_status *s)
2835{
2836    FloatParts64 p64;
2837    FloatParts128 p128;
2838
2839    float64_unpack_canonical(&p64, a, s);
2840    parts_float_to_float_widen(&p128, &p64, s);
2841    return floatx80_round_pack_canonical(&p128, s);
2842}
2843
2844floatx80 float128_to_floatx80(float128 a, float_status *s)
2845{
2846    FloatParts128 p;
2847
2848    float128_unpack_canonical(&p, a, s);
2849    parts_float_to_float(&p, s);
2850    return floatx80_round_pack_canonical(&p, s);
2851}
2852
2853/*
2854 * Round to integral value
2855 */
2856
2857float16 float16_round_to_int(float16 a, float_status *s)
2858{
2859    FloatParts64 p;
2860
2861    float16_unpack_canonical(&p, a, s);
2862    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float16_params);
2863    return float16_round_pack_canonical(&p, s);
2864}
2865
2866float32 float32_round_to_int(float32 a, float_status *s)
2867{
2868    FloatParts64 p;
2869
2870    float32_unpack_canonical(&p, a, s);
2871    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float32_params);
2872    return float32_round_pack_canonical(&p, s);
2873}
2874
2875float64 float64_round_to_int(float64 a, float_status *s)
2876{
2877    FloatParts64 p;
2878
2879    float64_unpack_canonical(&p, a, s);
2880    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float64_params);
2881    return float64_round_pack_canonical(&p, s);
2882}
2883
2884bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
2885{
2886    FloatParts64 p;
2887
2888    bfloat16_unpack_canonical(&p, a, s);
2889    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &bfloat16_params);
2890    return bfloat16_round_pack_canonical(&p, s);
2891}
2892
2893float128 float128_round_to_int(float128 a, float_status *s)
2894{
2895    FloatParts128 p;
2896
2897    float128_unpack_canonical(&p, a, s);
2898    parts_round_to_int(&p, s->float_rounding_mode, 0, s, &float128_params);
2899    return float128_round_pack_canonical(&p, s);
2900}
2901
2902floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
2903{
2904    FloatParts128 p;
2905
2906    if (!floatx80_unpack_canonical(&p, a, status)) {
2907        return floatx80_default_nan(status);
2908    }
2909
2910    parts_round_to_int(&p, status->float_rounding_mode, 0, status,
2911                       &floatx80_params[status->floatx80_rounding_precision]);
2912    return floatx80_round_pack_canonical(&p, status);
2913}
2914
2915/*
2916 * Floating-point to signed integer conversions
2917 */
2918
2919int8_t float16_to_int8_scalbn(float16 a, FloatRoundMode rmode, int scale,
2920                              float_status *s)
2921{
2922    FloatParts64 p;
2923
2924    float16_unpack_canonical(&p, a, s);
2925    return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s);
2926}
2927
2928int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
2929                                float_status *s)
2930{
2931    FloatParts64 p;
2932
2933    float16_unpack_canonical(&p, a, s);
2934    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2935}
2936
2937int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
2938                                float_status *s)
2939{
2940    FloatParts64 p;
2941
2942    float16_unpack_canonical(&p, a, s);
2943    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2944}
2945
2946int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
2947                                float_status *s)
2948{
2949    FloatParts64 p;
2950
2951    float16_unpack_canonical(&p, a, s);
2952    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2953}
2954
2955int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
2956                                float_status *s)
2957{
2958    FloatParts64 p;
2959
2960    float32_unpack_canonical(&p, a, s);
2961    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2962}
2963
2964int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
2965                                float_status *s)
2966{
2967    FloatParts64 p;
2968
2969    float32_unpack_canonical(&p, a, s);
2970    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2971}
2972
2973int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
2974                                float_status *s)
2975{
2976    FloatParts64 p;
2977
2978    float32_unpack_canonical(&p, a, s);
2979    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
2980}
2981
2982int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
2983                                float_status *s)
2984{
2985    FloatParts64 p;
2986
2987    float64_unpack_canonical(&p, a, s);
2988    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
2989}
2990
2991int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
2992                                float_status *s)
2993{
2994    FloatParts64 p;
2995
2996    float64_unpack_canonical(&p, a, s);
2997    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
2998}
2999
3000int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3001                                float_status *s)
3002{
3003    FloatParts64 p;
3004
3005    float64_unpack_canonical(&p, a, s);
3006    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3007}
3008
3009int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3010                                 float_status *s)
3011{
3012    FloatParts64 p;
3013
3014    bfloat16_unpack_canonical(&p, a, s);
3015    return parts_float_to_sint(&p, rmode, scale, INT16_MIN, INT16_MAX, s);
3016}
3017
3018int32_t bfloat16_to_int32_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3019                                 float_status *s)
3020{
3021    FloatParts64 p;
3022
3023    bfloat16_unpack_canonical(&p, a, s);
3024    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3025}
3026
3027int64_t bfloat16_to_int64_scalbn(bfloat16 a, FloatRoundMode rmode, int scale,
3028                                 float_status *s)
3029{
3030    FloatParts64 p;
3031
3032    bfloat16_unpack_canonical(&p, a, s);
3033    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3034}
3035
3036static int32_t float128_to_int32_scalbn(float128 a, FloatRoundMode rmode,
3037                                        int scale, float_status *s)
3038{
3039    FloatParts128 p;
3040
3041    float128_unpack_canonical(&p, a, s);
3042    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3043}
3044
3045static int64_t float128_to_int64_scalbn(float128 a, FloatRoundMode rmode,
3046                                        int scale, float_status *s)
3047{
3048    FloatParts128 p;
3049
3050    float128_unpack_canonical(&p, a, s);
3051    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3052}
3053
3054static int32_t floatx80_to_int32_scalbn(floatx80 a, FloatRoundMode rmode,
3055                                        int scale, float_status *s)
3056{
3057    FloatParts128 p;
3058
3059    if (!floatx80_unpack_canonical(&p, a, s)) {
3060        parts_default_nan(&p, s);
3061    }
3062    return parts_float_to_sint(&p, rmode, scale, INT32_MIN, INT32_MAX, s);
3063}
3064
3065static int64_t floatx80_to_int64_scalbn(floatx80 a, FloatRoundMode rmode,
3066                                        int scale, float_status *s)
3067{
3068    FloatParts128 p;
3069
3070    if (!floatx80_unpack_canonical(&p, a, s)) {
3071        parts_default_nan(&p, s);
3072    }
3073    return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s);
3074}
3075
3076int8_t float16_to_int8(float16 a, float_status *s)
3077{
3078    return float16_to_int8_scalbn(a, s->float_rounding_mode, 0, s);
3079}
3080
3081int16_t float16_to_int16(float16 a, float_status *s)
3082{
3083    return float16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3084}
3085
3086int32_t float16_to_int32(float16 a, float_status *s)
3087{
3088    return float16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3089}
3090
3091int64_t float16_to_int64(float16 a, float_status *s)
3092{
3093    return float16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3094}
3095
3096int16_t float32_to_int16(float32 a, float_status *s)
3097{
3098    return float32_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3099}
3100
3101int32_t float32_to_int32(float32 a, float_status *s)
3102{
3103    return float32_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3104}
3105
3106int64_t float32_to_int64(float32 a, float_status *s)
3107{
3108    return float32_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3109}
3110
3111int16_t float64_to_int16(float64 a, float_status *s)
3112{
3113    return float64_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3114}
3115
3116int32_t float64_to_int32(float64 a, float_status *s)
3117{
3118    return float64_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3119}
3120
3121int64_t float64_to_int64(float64 a, float_status *s)
3122{
3123    return float64_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3124}
3125
3126int32_t float128_to_int32(float128 a, float_status *s)
3127{
3128    return float128_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3129}
3130
3131int64_t float128_to_int64(float128 a, float_status *s)
3132{
3133    return float128_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3134}
3135
3136int32_t floatx80_to_int32(floatx80 a, float_status *s)
3137{
3138    return floatx80_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3139}
3140
3141int64_t floatx80_to_int64(floatx80 a, float_status *s)
3142{
3143    return floatx80_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3144}
3145
3146int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
3147{
3148    return float16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3149}
3150
3151int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
3152{
3153    return float16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3154}
3155
3156int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
3157{
3158    return float16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3159}
3160
3161int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
3162{
3163    return float32_to_int16_scalbn(a, float_round_to_zero, 0, s);
3164}
3165
3166int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
3167{
3168    return float32_to_int32_scalbn(a, float_round_to_zero, 0, s);
3169}
3170
3171int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
3172{
3173    return float32_to_int64_scalbn(a, float_round_to_zero, 0, s);
3174}
3175
3176int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
3177{
3178    return float64_to_int16_scalbn(a, float_round_to_zero, 0, s);
3179}
3180
3181int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
3182{
3183    return float64_to_int32_scalbn(a, float_round_to_zero, 0, s);
3184}
3185
3186int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
3187{
3188    return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
3189}
3190
3191int32_t float128_to_int32_round_to_zero(float128 a, float_status *s)
3192{
3193    return float128_to_int32_scalbn(a, float_round_to_zero, 0, s);
3194}
3195
3196int64_t float128_to_int64_round_to_zero(float128 a, float_status *s)
3197{
3198    return float128_to_int64_scalbn(a, float_round_to_zero, 0, s);
3199}
3200
3201int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *s)
3202{
3203    return floatx80_to_int32_scalbn(a, float_round_to_zero, 0, s);
3204}
3205
3206int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s)
3207{
3208    return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s);
3209}
3210
3211int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
3212{
3213    return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
3214}
3215
3216int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
3217{
3218    return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
3219}
3220
3221int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
3222{
3223    return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
3224}
3225
3226int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
3227{
3228    return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
3229}
3230
3231int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
3232{
3233    return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
3234}
3235
3236int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
3237{
3238    return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
3239}
3240
3241/*
3242 * Floating-point to unsigned integer conversions
3243 */
3244
3245uint8_t float16_to_uint8_scalbn(float16 a, FloatRoundMode rmode, int scale,
3246                                float_status *s)
3247{
3248    FloatParts64 p;
3249
3250    float16_unpack_canonical(&p, a, s);
3251    return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s);
3252}
3253
3254uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
3255                                  float_status *s)
3256{
3257    FloatParts64 p;
3258
3259    float16_unpack_canonical(&p, a, s);
3260    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3261}
3262
3263uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
3264                                  float_status *s)
3265{
3266    FloatParts64 p;
3267
3268    float16_unpack_canonical(&p, a, s);
3269    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3270}
3271
3272uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
3273                                  float_status *s)
3274{
3275    FloatParts64 p;
3276
3277    float16_unpack_canonical(&p, a, s);
3278    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3279}
3280
3281uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
3282                                  float_status *s)
3283{
3284    FloatParts64 p;
3285
3286    float32_unpack_canonical(&p, a, s);
3287    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3288}
3289
3290uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
3291                                  float_status *s)
3292{
3293    FloatParts64 p;
3294
3295    float32_unpack_canonical(&p, a, s);
3296    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3297}
3298
3299uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
3300                                  float_status *s)
3301{
3302    FloatParts64 p;
3303
3304    float32_unpack_canonical(&p, a, s);
3305    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3306}
3307
3308uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
3309                                  float_status *s)
3310{
3311    FloatParts64 p;
3312
3313    float64_unpack_canonical(&p, a, s);
3314    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3315}
3316
3317uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
3318                                  float_status *s)
3319{
3320    FloatParts64 p;
3321
3322    float64_unpack_canonical(&p, a, s);
3323    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3324}
3325
3326uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
3327                                  float_status *s)
3328{
3329    FloatParts64 p;
3330
3331    float64_unpack_canonical(&p, a, s);
3332    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3333}
3334
3335uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode,
3336                                   int scale, float_status *s)
3337{
3338    FloatParts64 p;
3339
3340    bfloat16_unpack_canonical(&p, a, s);
3341    return parts_float_to_uint(&p, rmode, scale, UINT16_MAX, s);
3342}
3343
3344uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode rmode,
3345                                   int scale, float_status *s)
3346{
3347    FloatParts64 p;
3348
3349    bfloat16_unpack_canonical(&p, a, s);
3350    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3351}
3352
3353uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode rmode,
3354                                   int scale, float_status *s)
3355{
3356    FloatParts64 p;
3357
3358    bfloat16_unpack_canonical(&p, a, s);
3359    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3360}
3361
3362static uint32_t float128_to_uint32_scalbn(float128 a, FloatRoundMode rmode,
3363                                          int scale, float_status *s)
3364{
3365    FloatParts128 p;
3366
3367    float128_unpack_canonical(&p, a, s);
3368    return parts_float_to_uint(&p, rmode, scale, UINT32_MAX, s);
3369}
3370
3371static uint64_t float128_to_uint64_scalbn(float128 a, FloatRoundMode rmode,
3372                                          int scale, float_status *s)
3373{
3374    FloatParts128 p;
3375
3376    float128_unpack_canonical(&p, a, s);
3377    return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s);
3378}
3379
3380uint8_t float16_to_uint8(float16 a, float_status *s)
3381{
3382    return float16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s);
3383}
3384
3385uint16_t float16_to_uint16(float16 a, float_status *s)
3386{
3387    return float16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3388}
3389
3390uint32_t float16_to_uint32(float16 a, float_status *s)
3391{
3392    return float16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3393}
3394
3395uint64_t float16_to_uint64(float16 a, float_status *s)
3396{
3397    return float16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3398}
3399
3400uint16_t float32_to_uint16(float32 a, float_status *s)
3401{
3402    return float32_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3403}
3404
3405uint32_t float32_to_uint32(float32 a, float_status *s)
3406{
3407    return float32_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3408}
3409
3410uint64_t float32_to_uint64(float32 a, float_status *s)
3411{
3412    return float32_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3413}
3414
3415uint16_t float64_to_uint16(float64 a, float_status *s)
3416{
3417    return float64_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3418}
3419
3420uint32_t float64_to_uint32(float64 a, float_status *s)
3421{
3422    return float64_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3423}
3424
3425uint64_t float64_to_uint64(float64 a, float_status *s)
3426{
3427    return float64_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3428}
3429
3430uint32_t float128_to_uint32(float128 a, float_status *s)
3431{
3432    return float128_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3433}
3434
3435uint64_t float128_to_uint64(float128 a, float_status *s)
3436{
3437    return float128_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3438}
3439
3440uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
3441{
3442    return float16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3443}
3444
3445uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
3446{
3447    return float16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3448}
3449
3450uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
3451{
3452    return float16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3453}
3454
3455uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
3456{
3457    return float32_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3458}
3459
3460uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
3461{
3462    return float32_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3463}
3464
3465uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
3466{
3467    return float32_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3468}
3469
3470uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
3471{
3472    return float64_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3473}
3474
3475uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
3476{
3477    return float64_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3478}
3479
3480uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
3481{
3482    return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3483}
3484
3485uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *s)
3486{
3487    return float128_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3488}
3489
3490uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *s)
3491{
3492    return float128_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3493}
3494
3495uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
3496{
3497    return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
3498}
3499
3500uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
3501{
3502    return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
3503}
3504
3505uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
3506{
3507    return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
3508}
3509
3510uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
3511{
3512    return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
3513}
3514
3515uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
3516{
3517    return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
3518}
3519
3520uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
3521{
3522    return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
3523}
3524
3525/*
3526 * Signed integer to floating-point conversions
3527 */
3528
3529float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
3530{
3531    FloatParts64 p;
3532
3533    parts_sint_to_float(&p, a, scale, status);
3534    return float16_round_pack_canonical(&p, status);
3535}
3536
3537float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
3538{
3539    return int64_to_float16_scalbn(a, scale, status);
3540}
3541
3542float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
3543{
3544    return int64_to_float16_scalbn(a, scale, status);
3545}
3546
3547float16 int64_to_float16(int64_t a, float_status *status)
3548{
3549    return int64_to_float16_scalbn(a, 0, status);
3550}
3551
3552float16 int32_to_float16(int32_t a, float_status *status)
3553{
3554    return int64_to_float16_scalbn(a, 0, status);
3555}
3556
3557float16 int16_to_float16(int16_t a, float_status *status)
3558{
3559    return int64_to_float16_scalbn(a, 0, status);
3560}
3561
3562float16 int8_to_float16(int8_t a, float_status *status)
3563{
3564    return int64_to_float16_scalbn(a, 0, status);
3565}
3566
3567float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
3568{
3569    FloatParts64 p;
3570
3571    /* Without scaling, there are no overflow concerns. */
3572    if (likely(scale == 0) && can_use_fpu(status)) {
3573        union_float32 ur;
3574        ur.h = a;
3575        return ur.s;
3576    }
3577
3578    parts64_sint_to_float(&p, a, scale, status);
3579    return float32_round_pack_canonical(&p, status);
3580}
3581
3582float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
3583{
3584    return int64_to_float32_scalbn(a, scale, status);
3585}
3586
3587float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
3588{
3589    return int64_to_float32_scalbn(a, scale, status);
3590}
3591
3592float32 int64_to_float32(int64_t a, float_status *status)
3593{
3594    return int64_to_float32_scalbn(a, 0, status);
3595}
3596
3597float32 int32_to_float32(int32_t a, float_status *status)
3598{
3599    return int64_to_float32_scalbn(a, 0, status);
3600}
3601
3602float32 int16_to_float32(int16_t a, float_status *status)
3603{
3604    return int64_to_float32_scalbn(a, 0, status);
3605}
3606
3607float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
3608{
3609    FloatParts64 p;
3610
3611    /* Without scaling, there are no overflow concerns. */
3612    if (likely(scale == 0) && can_use_fpu(status)) {
3613        union_float64 ur;
3614        ur.h = a;
3615        return ur.s;
3616    }
3617
3618    parts_sint_to_float(&p, a, scale, status);
3619    return float64_round_pack_canonical(&p, status);
3620}
3621
3622float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
3623{
3624    return int64_to_float64_scalbn(a, scale, status);
3625}
3626
3627float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
3628{
3629    return int64_to_float64_scalbn(a, scale, status);
3630}
3631
3632float64 int64_to_float64(int64_t a, float_status *status)
3633{
3634    return int64_to_float64_scalbn(a, 0, status);
3635}
3636
3637float64 int32_to_float64(int32_t a, float_status *status)
3638{
3639    return int64_to_float64_scalbn(a, 0, status);
3640}
3641
3642float64 int16_to_float64(int16_t a, float_status *status)
3643{
3644    return int64_to_float64_scalbn(a, 0, status);
3645}
3646
3647bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
3648{
3649    FloatParts64 p;
3650
3651    parts_sint_to_float(&p, a, scale, status);
3652    return bfloat16_round_pack_canonical(&p, status);
3653}
3654
3655bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
3656{
3657    return int64_to_bfloat16_scalbn(a, scale, status);
3658}
3659
3660bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
3661{
3662    return int64_to_bfloat16_scalbn(a, scale, status);
3663}
3664
3665bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
3666{
3667    return int64_to_bfloat16_scalbn(a, 0, status);
3668}
3669
3670bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
3671{
3672    return int64_to_bfloat16_scalbn(a, 0, status);
3673}
3674
3675bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
3676{
3677    return int64_to_bfloat16_scalbn(a, 0, status);
3678}
3679
3680float128 int64_to_float128(int64_t a, float_status *status)
3681{
3682    FloatParts128 p;
3683
3684    parts_sint_to_float(&p, a, 0, status);
3685    return float128_round_pack_canonical(&p, status);
3686}
3687
3688float128 int32_to_float128(int32_t a, float_status *status)
3689{
3690    return int64_to_float128(a, status);
3691}
3692
3693floatx80 int64_to_floatx80(int64_t a, float_status *status)
3694{
3695    FloatParts128 p;
3696
3697    parts_sint_to_float(&p, a, 0, status);
3698    return floatx80_round_pack_canonical(&p, status);
3699}
3700
3701floatx80 int32_to_floatx80(int32_t a, float_status *status)
3702{
3703    return int64_to_floatx80(a, status);
3704}
3705
3706/*
3707 * Unsigned Integer to floating-point conversions
3708 */
3709
3710float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
3711{
3712    FloatParts64 p;
3713
3714    parts_uint_to_float(&p, a, scale, status);
3715    return float16_round_pack_canonical(&p, status);
3716}
3717
3718float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
3719{
3720    return uint64_to_float16_scalbn(a, scale, status);
3721}
3722
3723float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
3724{
3725    return uint64_to_float16_scalbn(a, scale, status);
3726}
3727
3728float16 uint64_to_float16(uint64_t a, float_status *status)
3729{
3730    return uint64_to_float16_scalbn(a, 0, status);
3731}
3732
3733float16 uint32_to_float16(uint32_t a, float_status *status)
3734{
3735    return uint64_to_float16_scalbn(a, 0, status);
3736}
3737
3738float16 uint16_to_float16(uint16_t a, float_status *status)
3739{
3740    return uint64_to_float16_scalbn(a, 0, status);
3741}
3742
3743float16 uint8_to_float16(uint8_t a, float_status *status)
3744{
3745    return uint64_to_float16_scalbn(a, 0, status);
3746}
3747
3748float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
3749{
3750    FloatParts64 p;
3751
3752    /* Without scaling, there are no overflow concerns. */
3753    if (likely(scale == 0) && can_use_fpu(status)) {
3754        union_float32 ur;
3755        ur.h = a;
3756        return ur.s;
3757    }
3758
3759    parts_uint_to_float(&p, a, scale, status);
3760    return float32_round_pack_canonical(&p, status);
3761}
3762
3763float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
3764{
3765    return uint64_to_float32_scalbn(a, scale, status);
3766}
3767
3768float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
3769{
3770    return uint64_to_float32_scalbn(a, scale, status);
3771}
3772
3773float32 uint64_to_float32(uint64_t a, float_status *status)
3774{
3775    return uint64_to_float32_scalbn(a, 0, status);
3776}
3777
3778float32 uint32_to_float32(uint32_t a, float_status *status)
3779{
3780    return uint64_to_float32_scalbn(a, 0, status);
3781}
3782
3783float32 uint16_to_float32(uint16_t a, float_status *status)
3784{
3785    return uint64_to_float32_scalbn(a, 0, status);
3786}
3787
3788float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
3789{
3790    FloatParts64 p;
3791
3792    /* Without scaling, there are no overflow concerns. */
3793    if (likely(scale == 0) && can_use_fpu(status)) {
3794        union_float64 ur;
3795        ur.h = a;
3796        return ur.s;
3797    }
3798
3799    parts_uint_to_float(&p, a, scale, status);
3800    return float64_round_pack_canonical(&p, status);
3801}
3802
3803float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
3804{
3805    return uint64_to_float64_scalbn(a, scale, status);
3806}
3807
3808float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
3809{
3810    return uint64_to_float64_scalbn(a, scale, status);
3811}
3812
3813float64 uint64_to_float64(uint64_t a, float_status *status)
3814{
3815    return uint64_to_float64_scalbn(a, 0, status);
3816}
3817
3818float64 uint32_to_float64(uint32_t a, float_status *status)
3819{
3820    return uint64_to_float64_scalbn(a, 0, status);
3821}
3822
3823float64 uint16_to_float64(uint16_t a, float_status *status)
3824{
3825    return uint64_to_float64_scalbn(a, 0, status);
3826}
3827
3828bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
3829{
3830    FloatParts64 p;
3831
3832    parts_uint_to_float(&p, a, scale, status);
3833    return bfloat16_round_pack_canonical(&p, status);
3834}
3835
3836bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
3837{
3838    return uint64_to_bfloat16_scalbn(a, scale, status);
3839}
3840
3841bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
3842{
3843    return uint64_to_bfloat16_scalbn(a, scale, status);
3844}
3845
3846bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
3847{
3848    return uint64_to_bfloat16_scalbn(a, 0, status);
3849}
3850
3851bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
3852{
3853    return uint64_to_bfloat16_scalbn(a, 0, status);
3854}
3855
3856bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
3857{
3858    return uint64_to_bfloat16_scalbn(a, 0, status);
3859}
3860
3861float128 uint64_to_float128(uint64_t a, float_status *status)
3862{
3863    FloatParts128 p;
3864
3865    parts_uint_to_float(&p, a, 0, status);
3866    return float128_round_pack_canonical(&p, status);
3867}
3868
3869/*
3870 * Minimum and maximum
3871 */
3872
3873static float16 float16_minmax(float16 a, float16 b, float_status *s, int flags)
3874{
3875    FloatParts64 pa, pb, *pr;
3876
3877    float16_unpack_canonical(&pa, a, s);
3878    float16_unpack_canonical(&pb, b, s);
3879    pr = parts_minmax(&pa, &pb, s, flags);
3880
3881    return float16_round_pack_canonical(pr, s);
3882}
3883
3884static bfloat16 bfloat16_minmax(bfloat16 a, bfloat16 b,
3885                                float_status *s, int flags)
3886{
3887    FloatParts64 pa, pb, *pr;
3888
3889    bfloat16_unpack_canonical(&pa, a, s);
3890    bfloat16_unpack_canonical(&pb, b, s);
3891    pr = parts_minmax(&pa, &pb, s, flags);
3892
3893    return bfloat16_round_pack_canonical(pr, s);
3894}
3895
3896static float32 float32_minmax(float32 a, float32 b, float_status *s, int flags)
3897{
3898    FloatParts64 pa, pb, *pr;
3899
3900    float32_unpack_canonical(&pa, a, s);
3901    float32_unpack_canonical(&pb, b, s);
3902    pr = parts_minmax(&pa, &pb, s, flags);
3903
3904    return float32_round_pack_canonical(pr, s);
3905}
3906
3907static float64 float64_minmax(float64 a, float64 b, float_status *s, int flags)
3908{
3909    FloatParts64 pa, pb, *pr;
3910
3911    float64_unpack_canonical(&pa, a, s);
3912    float64_unpack_canonical(&pb, b, s);
3913    pr = parts_minmax(&pa, &pb, s, flags);
3914
3915    return float64_round_pack_canonical(pr, s);
3916}
3917
3918static float128 float128_minmax(float128 a, float128 b,
3919                                float_status *s, int flags)
3920{
3921    FloatParts128 pa, pb, *pr;
3922
3923    float128_unpack_canonical(&pa, a, s);
3924    float128_unpack_canonical(&pb, b, s);
3925    pr = parts_minmax(&pa, &pb, s, flags);
3926
3927    return float128_round_pack_canonical(pr, s);
3928}
3929
3930#define MINMAX_1(type, name, flags) \
3931    type type##_##name(type a, type b, float_status *s) \
3932    { return type##_minmax(a, b, s, flags); }
3933
3934#define MINMAX_2(type) \
3935    MINMAX_1(type, max, 0)                                                \
3936    MINMAX_1(type, maxnum, minmax_isnum)                                  \
3937    MINMAX_1(type, maxnummag, minmax_isnum | minmax_ismag)                \
3938    MINMAX_1(type, maximum_number, minmax_isnumber)                       \
3939    MINMAX_1(type, min, minmax_ismin)                                     \
3940    MINMAX_1(type, minnum, minmax_ismin | minmax_isnum)                   \
3941    MINMAX_1(type, minnummag, minmax_ismin | minmax_isnum | minmax_ismag) \
3942    MINMAX_1(type, minimum_number, minmax_ismin | minmax_isnumber)        \
3943
3944MINMAX_2(float16)
3945MINMAX_2(bfloat16)
3946MINMAX_2(float32)
3947MINMAX_2(float64)
3948MINMAX_2(float128)
3949
3950#undef MINMAX_1
3951#undef MINMAX_2
3952
3953/*
3954 * Floating point compare
3955 */
3956
3957static FloatRelation QEMU_FLATTEN
3958float16_do_compare(float16 a, float16 b, float_status *s, bool is_quiet)
3959{
3960    FloatParts64 pa, pb;
3961
3962    float16_unpack_canonical(&pa, a, s);
3963    float16_unpack_canonical(&pb, b, s);
3964    return parts_compare(&pa, &pb, s, is_quiet);
3965}
3966
3967FloatRelation float16_compare(float16 a, float16 b, float_status *s)
3968{
3969    return float16_do_compare(a, b, s, false);
3970}
3971
3972FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
3973{
3974    return float16_do_compare(a, b, s, true);
3975}
3976
3977static FloatRelation QEMU_SOFTFLOAT_ATTR
3978float32_do_compare(float32 a, float32 b, float_status *s, bool is_quiet)
3979{
3980    FloatParts64 pa, pb;
3981
3982    float32_unpack_canonical(&pa, a, s);
3983    float32_unpack_canonical(&pb, b, s);
3984    return parts_compare(&pa, &pb, s, is_quiet);
3985}
3986
3987static FloatRelation QEMU_FLATTEN
3988float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
3989{
3990    union_float32 ua, ub;
3991
3992    ua.s = xa;
3993    ub.s = xb;
3994
3995    if (QEMU_NO_HARDFLOAT) {
3996        goto soft;
3997    }
3998
3999    float32_input_flush2(&ua.s, &ub.s, s);
4000    if (isgreaterequal(ua.h, ub.h)) {
4001        if (isgreater(ua.h, ub.h)) {
4002            return float_relation_greater;
4003        }
4004        return float_relation_equal;
4005    }
4006    if (likely(isless(ua.h, ub.h))) {
4007        return float_relation_less;
4008    }
4009    /*
4010     * The only condition remaining is unordered.
4011     * Fall through to set flags.
4012     */
4013 soft:
4014    return float32_do_compare(ua.s, ub.s, s, is_quiet);
4015}
4016
4017FloatRelation float32_compare(float32 a, float32 b, float_status *s)
4018{
4019    return float32_hs_compare(a, b, s, false);
4020}
4021
4022FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
4023{
4024    return float32_hs_compare(a, b, s, true);
4025}
4026
4027static FloatRelation QEMU_SOFTFLOAT_ATTR
4028float64_do_compare(float64 a, float64 b, float_status *s, bool is_quiet)
4029{
4030    FloatParts64 pa, pb;
4031
4032    float64_unpack_canonical(&pa, a, s);
4033    float64_unpack_canonical(&pb, b, s);
4034    return parts_compare(&pa, &pb, s, is_quiet);
4035}
4036
4037static FloatRelation QEMU_FLATTEN
4038float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
4039{
4040    union_float64 ua, ub;
4041
4042    ua.s = xa;
4043    ub.s = xb;
4044
4045    if (QEMU_NO_HARDFLOAT) {
4046        goto soft;
4047    }
4048
4049    float64_input_flush2(&ua.s, &ub.s, s);
4050    if (isgreaterequal(ua.h, ub.h)) {
4051        if (isgreater(ua.h, ub.h)) {
4052            return float_relation_greater;
4053        }
4054        return float_relation_equal;
4055    }
4056    if (likely(isless(ua.h, ub.h))) {
4057        return float_relation_less;
4058    }
4059    /*
4060     * The only condition remaining is unordered.
4061     * Fall through to set flags.
4062     */
4063 soft:
4064    return float64_do_compare(ua.s, ub.s, s, is_quiet);
4065}
4066
4067FloatRelation float64_compare(float64 a, float64 b, float_status *s)
4068{
4069    return float64_hs_compare(a, b, s, false);
4070}
4071
4072FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
4073{
4074    return float64_hs_compare(a, b, s, true);
4075}
4076
4077static FloatRelation QEMU_FLATTEN
4078bfloat16_do_compare(bfloat16 a, bfloat16 b, float_status *s, bool is_quiet)
4079{
4080    FloatParts64 pa, pb;
4081
4082    bfloat16_unpack_canonical(&pa, a, s);
4083    bfloat16_unpack_canonical(&pb, b, s);
4084    return parts_compare(&pa, &pb, s, is_quiet);
4085}
4086
4087FloatRelation bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
4088{
4089    return bfloat16_do_compare(a, b, s, false);
4090}
4091
4092FloatRelation bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
4093{
4094    return bfloat16_do_compare(a, b, s, true);
4095}
4096
4097static FloatRelation QEMU_FLATTEN
4098float128_do_compare(float128 a, float128 b, float_status *s, bool is_quiet)
4099{
4100    FloatParts128 pa, pb;
4101
4102    float128_unpack_canonical(&pa, a, s);
4103    float128_unpack_canonical(&pb, b, s);
4104    return parts_compare(&pa, &pb, s, is_quiet);
4105}
4106
4107FloatRelation float128_compare(float128 a, float128 b, float_status *s)
4108{
4109    return float128_do_compare(a, b, s, false);
4110}
4111
4112FloatRelation float128_compare_quiet(float128 a, float128 b, float_status *s)
4113{
4114    return float128_do_compare(a, b, s, true);
4115}
4116
4117static FloatRelation QEMU_FLATTEN
4118floatx80_do_compare(floatx80 a, floatx80 b, float_status *s, bool is_quiet)
4119{
4120    FloatParts128 pa, pb;
4121
4122    if (!floatx80_unpack_canonical(&pa, a, s) ||
4123        !floatx80_unpack_canonical(&pb, b, s)) {
4124        return float_relation_unordered;
4125    }
4126    return parts_compare(&pa, &pb, s, is_quiet);
4127}
4128
4129FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *s)
4130{
4131    return floatx80_do_compare(a, b, s, false);
4132}
4133
4134FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *s)
4135{
4136    return floatx80_do_compare(a, b, s, true);
4137}
4138
4139/*
4140 * Scale by 2**N
4141 */
4142
4143float16 float16_scalbn(float16 a, int n, float_status *status)
4144{
4145    FloatParts64 p;
4146
4147    float16_unpack_canonical(&p, a, status);
4148    parts_scalbn(&p, n, status);
4149    return float16_round_pack_canonical(&p, status);
4150}
4151
4152float32 float32_scalbn(float32 a, int n, float_status *status)
4153{
4154    FloatParts64 p;
4155
4156    float32_unpack_canonical(&p, a, status);
4157    parts_scalbn(&p, n, status);
4158    return float32_round_pack_canonical(&p, status);
4159}
4160
4161float64 float64_scalbn(float64 a, int n, float_status *status)
4162{
4163    FloatParts64 p;
4164
4165    float64_unpack_canonical(&p, a, status);
4166    parts_scalbn(&p, n, status);
4167    return float64_round_pack_canonical(&p, status);
4168}
4169
4170bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
4171{
4172    FloatParts64 p;
4173
4174    bfloat16_unpack_canonical(&p, a, status);
4175    parts_scalbn(&p, n, status);
4176    return bfloat16_round_pack_canonical(&p, status);
4177}
4178
4179float128 float128_scalbn(float128 a, int n, float_status *status)
4180{
4181    FloatParts128 p;
4182
4183    float128_unpack_canonical(&p, a, status);
4184    parts_scalbn(&p, n, status);
4185    return float128_round_pack_canonical(&p, status);
4186}
4187
4188floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
4189{
4190    FloatParts128 p;
4191
4192    if (!floatx80_unpack_canonical(&p, a, status)) {
4193        return floatx80_default_nan(status);
4194    }
4195    parts_scalbn(&p, n, status);
4196    return floatx80_round_pack_canonical(&p, status);
4197}
4198
4199/*
4200 * Square Root
4201 */
4202
4203float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
4204{
4205    FloatParts64 p;
4206
4207    float16_unpack_canonical(&p, a, status);
4208    parts_sqrt(&p, status, &float16_params);
4209    return float16_round_pack_canonical(&p, status);
4210}
4211
4212static float32 QEMU_SOFTFLOAT_ATTR
4213soft_f32_sqrt(float32 a, float_status *status)
4214{
4215    FloatParts64 p;
4216
4217    float32_unpack_canonical(&p, a, status);
4218    parts_sqrt(&p, status, &float32_params);
4219    return float32_round_pack_canonical(&p, status);
4220}
4221
4222static float64 QEMU_SOFTFLOAT_ATTR
4223soft_f64_sqrt(float64 a, float_status *status)
4224{
4225    FloatParts64 p;
4226
4227    float64_unpack_canonical(&p, a, status);
4228    parts_sqrt(&p, status, &float64_params);
4229    return float64_round_pack_canonical(&p, status);
4230}
4231
4232float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
4233{
4234    union_float32 ua, ur;
4235
4236    ua.s = xa;
4237    if (unlikely(!can_use_fpu(s))) {
4238        goto soft;
4239    }
4240
4241    float32_input_flush1(&ua.s, s);
4242    if (QEMU_HARDFLOAT_1F32_USE_FP) {
4243        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4244                       fpclassify(ua.h) == FP_ZERO) ||
4245                     signbit(ua.h))) {
4246            goto soft;
4247        }
4248    } else if (unlikely(!float32_is_zero_or_normal(ua.s) ||
4249                        float32_is_neg(ua.s))) {
4250        goto soft;
4251    }
4252    ur.h = sqrtf(ua.h);
4253    return ur.s;
4254
4255 soft:
4256    return soft_f32_sqrt(ua.s, s);
4257}
4258
4259float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
4260{
4261    union_float64 ua, ur;
4262
4263    ua.s = xa;
4264    if (unlikely(!can_use_fpu(s))) {
4265        goto soft;
4266    }
4267
4268    float64_input_flush1(&ua.s, s);
4269    if (QEMU_HARDFLOAT_1F64_USE_FP) {
4270        if (unlikely(!(fpclassify(ua.h) == FP_NORMAL ||
4271                       fpclassify(ua.h) == FP_ZERO) ||
4272                     signbit(ua.h))) {
4273            goto soft;
4274        }
4275    } else if (unlikely(!float64_is_zero_or_normal(ua.s) ||
4276                        float64_is_neg(ua.s))) {
4277        goto soft;
4278    }
4279    ur.h = sqrt(ua.h);
4280    return ur.s;
4281
4282 soft:
4283    return soft_f64_sqrt(ua.s, s);
4284}
4285
4286bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
4287{
4288    FloatParts64 p;
4289
4290    bfloat16_unpack_canonical(&p, a, status);
4291    parts_sqrt(&p, status, &bfloat16_params);
4292    return bfloat16_round_pack_canonical(&p, status);
4293}
4294
4295float128 QEMU_FLATTEN float128_sqrt(float128 a, float_status *status)
4296{
4297    FloatParts128 p;
4298
4299    float128_unpack_canonical(&p, a, status);
4300    parts_sqrt(&p, status, &float128_params);
4301    return float128_round_pack_canonical(&p, status);
4302}
4303
4304floatx80 floatx80_sqrt(floatx80 a, float_status *s)
4305{
4306    FloatParts128 p;
4307
4308    if (!floatx80_unpack_canonical(&p, a, s)) {
4309        return floatx80_default_nan(s);
4310    }
4311    parts_sqrt(&p, s, &floatx80_params[s->floatx80_rounding_precision]);
4312    return floatx80_round_pack_canonical(&p, s);
4313}
4314
4315/*
4316 * log2
4317 */
4318float32 float32_log2(float32 a, float_status *status)
4319{
4320    FloatParts64 p;
4321
4322    float32_unpack_canonical(&p, a, status);
4323    parts_log2(&p, status, &float32_params);
4324    return float32_round_pack_canonical(&p, status);
4325}
4326
4327float64 float64_log2(float64 a, float_status *status)
4328{
4329    FloatParts64 p;
4330
4331    float64_unpack_canonical(&p, a, status);
4332    parts_log2(&p, status, &float64_params);
4333    return float64_round_pack_canonical(&p, status);
4334}
4335
4336/*----------------------------------------------------------------------------
4337| The pattern for a default generated NaN.
4338*----------------------------------------------------------------------------*/
4339
4340float16 float16_default_nan(float_status *status)
4341{
4342    FloatParts64 p;
4343
4344    parts_default_nan(&p, status);
4345    p.frac >>= float16_params.frac_shift;
4346    return float16_pack_raw(&p);
4347}
4348
4349float32 float32_default_nan(float_status *status)
4350{
4351    FloatParts64 p;
4352
4353    parts_default_nan(&p, status);
4354    p.frac >>= float32_params.frac_shift;
4355    return float32_pack_raw(&p);
4356}
4357
4358float64 float64_default_nan(float_status *status)
4359{
4360    FloatParts64 p;
4361
4362    parts_default_nan(&p, status);
4363    p.frac >>= float64_params.frac_shift;
4364    return float64_pack_raw(&p);
4365}
4366
4367float128 float128_default_nan(float_status *status)
4368{
4369    FloatParts128 p;
4370
4371    parts_default_nan(&p, status);
4372    frac_shr(&p, float128_params.frac_shift);
4373    return float128_pack_raw(&p);
4374}
4375
4376bfloat16 bfloat16_default_nan(float_status *status)
4377{
4378    FloatParts64 p;
4379
4380    parts_default_nan(&p, status);
4381    p.frac >>= bfloat16_params.frac_shift;
4382    return bfloat16_pack_raw(&p);
4383}
4384
4385/*----------------------------------------------------------------------------
4386| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
4387*----------------------------------------------------------------------------*/
4388
4389float16 float16_silence_nan(float16 a, float_status *status)
4390{
4391    FloatParts64 p;
4392
4393    float16_unpack_raw(&p, a);
4394    p.frac <<= float16_params.frac_shift;
4395    parts_silence_nan(&p, status);
4396    p.frac >>= float16_params.frac_shift;
4397    return float16_pack_raw(&p);
4398}
4399
4400float32 float32_silence_nan(float32 a, float_status *status)
4401{
4402    FloatParts64 p;
4403
4404    float32_unpack_raw(&p, a);
4405    p.frac <<= float32_params.frac_shift;
4406    parts_silence_nan(&p, status);
4407    p.frac >>= float32_params.frac_shift;
4408    return float32_pack_raw(&p);
4409}
4410
4411float64 float64_silence_nan(float64 a, float_status *status)
4412{
4413    FloatParts64 p;
4414
4415    float64_unpack_raw(&p, a);
4416    p.frac <<= float64_params.frac_shift;
4417    parts_silence_nan(&p, status);
4418    p.frac >>= float64_params.frac_shift;
4419    return float64_pack_raw(&p);
4420}
4421
4422bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
4423{
4424    FloatParts64 p;
4425
4426    bfloat16_unpack_raw(&p, a);
4427    p.frac <<= bfloat16_params.frac_shift;
4428    parts_silence_nan(&p, status);
4429    p.frac >>= bfloat16_params.frac_shift;
4430    return bfloat16_pack_raw(&p);
4431}
4432
4433float128 float128_silence_nan(float128 a, float_status *status)
4434{
4435    FloatParts128 p;
4436
4437    float128_unpack_raw(&p, a);
4438    frac_shl(&p, float128_params.frac_shift);
4439    parts_silence_nan(&p, status);
4440    frac_shr(&p, float128_params.frac_shift);
4441    return float128_pack_raw(&p);
4442}
4443
4444/*----------------------------------------------------------------------------
4445| If `a' is denormal and we are in flush-to-zero mode then set the
4446| input-denormal exception and return zero. Otherwise just return the value.
4447*----------------------------------------------------------------------------*/
4448
4449static bool parts_squash_denormal(FloatParts64 p, float_status *status)
4450{
4451    if (p.exp == 0 && p.frac != 0) {
4452        float_raise(float_flag_input_denormal, status);
4453        return true;
4454    }
4455
4456    return false;
4457}
4458
4459float16 float16_squash_input_denormal(float16 a, float_status *status)
4460{
4461    if (status->flush_inputs_to_zero) {
4462        FloatParts64 p;
4463
4464        float16_unpack_raw(&p, a);
4465        if (parts_squash_denormal(p, status)) {
4466            return float16_set_sign(float16_zero, p.sign);
4467        }
4468    }
4469    return a;
4470}
4471
4472float32 float32_squash_input_denormal(float32 a, float_status *status)
4473{
4474    if (status->flush_inputs_to_zero) {
4475        FloatParts64 p;
4476
4477        float32_unpack_raw(&p, a);
4478        if (parts_squash_denormal(p, status)) {
4479            return float32_set_sign(float32_zero, p.sign);
4480        }
4481    }
4482    return a;
4483}
4484
4485float64 float64_squash_input_denormal(float64 a, float_status *status)
4486{
4487    if (status->flush_inputs_to_zero) {
4488        FloatParts64 p;
4489
4490        float64_unpack_raw(&p, a);
4491        if (parts_squash_denormal(p, status)) {
4492            return float64_set_sign(float64_zero, p.sign);
4493        }
4494    }
4495    return a;
4496}
4497
4498bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
4499{
4500    if (status->flush_inputs_to_zero) {
4501        FloatParts64 p;
4502
4503        bfloat16_unpack_raw(&p, a);
4504        if (parts_squash_denormal(p, status)) {
4505            return bfloat16_set_sign(bfloat16_zero, p.sign);
4506        }
4507    }
4508    return a;
4509}
4510
4511/*----------------------------------------------------------------------------
4512| Normalizes the subnormal extended double-precision floating-point value
4513| represented by the denormalized significand `aSig'.  The normalized exponent
4514| and significand are stored at the locations pointed to by `zExpPtr' and
4515| `zSigPtr', respectively.
4516*----------------------------------------------------------------------------*/
4517
4518void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
4519                                uint64_t *zSigPtr)
4520{
4521    int8_t shiftCount;
4522
4523    shiftCount = clz64(aSig);
4524    *zSigPtr = aSig<<shiftCount;
4525    *zExpPtr = 1 - shiftCount;
4526}
4527
4528/*----------------------------------------------------------------------------
4529| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4530| and extended significand formed by the concatenation of `zSig0' and `zSig1',
4531| and returns the proper extended double-precision floating-point value
4532| corresponding to the abstract input.  Ordinarily, the abstract value is
4533| rounded and packed into the extended double-precision format, with the
4534| inexact exception raised if the abstract input cannot be represented
4535| exactly.  However, if the abstract value is too large, the overflow and
4536| inexact exceptions are raised and an infinity or maximal finite value is
4537| returned.  If the abstract value is too small, the input value is rounded to
4538| a subnormal number, and the underflow and inexact exceptions are raised if
4539| the abstract input cannot be represented exactly as a subnormal extended
4540| double-precision floating-point number.
4541|     If `roundingPrecision' is floatx80_precision_s or floatx80_precision_d,
4542| the result is rounded to the same number of bits as single or double
4543| precision, respectively.  Otherwise, the result is rounded to the full
4544| precision of the extended double-precision format.
4545|     The input significand must be normalized or smaller.  If the input
4546| significand is not normalized, `zExp' must be 0; in that case, the result
4547| returned is a subnormal number, and it must not require rounding.  The
4548| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
4549| Floating-Point Arithmetic.
4550*----------------------------------------------------------------------------*/
4551
4552floatx80 roundAndPackFloatx80(FloatX80RoundPrec roundingPrecision, bool zSign,
4553                              int32_t zExp, uint64_t zSig0, uint64_t zSig1,
4554                              float_status *status)
4555{
4556    FloatRoundMode roundingMode;
4557    bool roundNearestEven, increment, isTiny;
4558    int64_t roundIncrement, roundMask, roundBits;
4559
4560    roundingMode = status->float_rounding_mode;
4561    roundNearestEven = ( roundingMode == float_round_nearest_even );
4562    switch (roundingPrecision) {
4563    case floatx80_precision_x:
4564        goto precision80;
4565    case floatx80_precision_d:
4566        roundIncrement = UINT64_C(0x0000000000000400);
4567        roundMask = UINT64_C(0x00000000000007FF);
4568        break;
4569    case floatx80_precision_s:
4570        roundIncrement = UINT64_C(0x0000008000000000);
4571        roundMask = UINT64_C(0x000000FFFFFFFFFF);
4572        break;
4573    default:
4574        g_assert_not_reached();
4575    }
4576    zSig0 |= ( zSig1 != 0 );
4577    switch (roundingMode) {
4578    case float_round_nearest_even:
4579    case float_round_ties_away:
4580        break;
4581    case float_round_to_zero:
4582        roundIncrement = 0;
4583        break;
4584    case float_round_up:
4585        roundIncrement = zSign ? 0 : roundMask;
4586        break;
4587    case float_round_down:
4588        roundIncrement = zSign ? roundMask : 0;
4589        break;
4590    default:
4591        abort();
4592    }
4593    roundBits = zSig0 & roundMask;
4594    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4595        if (    ( 0x7FFE < zExp )
4596             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
4597           ) {
4598            goto overflow;
4599        }
4600        if ( zExp <= 0 ) {
4601            if (status->flush_to_zero) {
4602                float_raise(float_flag_output_denormal, status);
4603                return packFloatx80(zSign, 0, 0);
4604            }
4605            isTiny = status->tininess_before_rounding
4606                  || (zExp < 0 )
4607                  || (zSig0 <= zSig0 + roundIncrement);
4608            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
4609            zExp = 0;
4610            roundBits = zSig0 & roundMask;
4611            if (isTiny && roundBits) {
4612                float_raise(float_flag_underflow, status);
4613            }
4614            if (roundBits) {
4615                float_raise(float_flag_inexact, status);
4616            }
4617            zSig0 += roundIncrement;
4618            if ( (int64_t) zSig0 < 0 ) zExp = 1;
4619            roundIncrement = roundMask + 1;
4620            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4621                roundMask |= roundIncrement;
4622            }
4623            zSig0 &= ~ roundMask;
4624            return packFloatx80( zSign, zExp, zSig0 );
4625        }
4626    }
4627    if (roundBits) {
4628        float_raise(float_flag_inexact, status);
4629    }
4630    zSig0 += roundIncrement;
4631    if ( zSig0 < roundIncrement ) {
4632        ++zExp;
4633        zSig0 = UINT64_C(0x8000000000000000);
4634    }
4635    roundIncrement = roundMask + 1;
4636    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
4637        roundMask |= roundIncrement;
4638    }
4639    zSig0 &= ~ roundMask;
4640    if ( zSig0 == 0 ) zExp = 0;
4641    return packFloatx80( zSign, zExp, zSig0 );
4642 precision80:
4643    switch (roundingMode) {
4644    case float_round_nearest_even:
4645    case float_round_ties_away:
4646        increment = ((int64_t)zSig1 < 0);
4647        break;
4648    case float_round_to_zero:
4649        increment = 0;
4650        break;
4651    case float_round_up:
4652        increment = !zSign && zSig1;
4653        break;
4654    case float_round_down:
4655        increment = zSign && zSig1;
4656        break;
4657    default:
4658        abort();
4659    }
4660    if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
4661        if (    ( 0x7FFE < zExp )
4662             || (    ( zExp == 0x7FFE )
4663                  && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
4664                  && increment
4665                )
4666           ) {
4667            roundMask = 0;
4668 overflow:
4669            float_raise(float_flag_overflow | float_flag_inexact, status);
4670            if (    ( roundingMode == float_round_to_zero )
4671                 || ( zSign && ( roundingMode == float_round_up ) )
4672                 || ( ! zSign && ( roundingMode == float_round_down ) )
4673               ) {
4674                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
4675            }
4676            return packFloatx80(zSign,
4677                                floatx80_infinity_high,
4678                                floatx80_infinity_low);
4679        }
4680        if ( zExp <= 0 ) {
4681            isTiny = status->tininess_before_rounding
4682                  || (zExp < 0)
4683                  || !increment
4684                  || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
4685            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
4686            zExp = 0;
4687            if (isTiny && zSig1) {
4688                float_raise(float_flag_underflow, status);
4689            }
4690            if (zSig1) {
4691                float_raise(float_flag_inexact, status);
4692            }
4693            switch (roundingMode) {
4694            case float_round_nearest_even:
4695            case float_round_ties_away:
4696                increment = ((int64_t)zSig1 < 0);
4697                break;
4698            case float_round_to_zero:
4699                increment = 0;
4700                break;
4701            case float_round_up:
4702                increment = !zSign && zSig1;
4703                break;
4704            case float_round_down:
4705                increment = zSign && zSig1;
4706                break;
4707            default:
4708                abort();
4709            }
4710            if ( increment ) {
4711                ++zSig0;
4712                if (!(zSig1 << 1) && roundNearestEven) {
4713                    zSig0 &= ~1;
4714                }
4715                if ( (int64_t) zSig0 < 0 ) zExp = 1;
4716            }
4717            return packFloatx80( zSign, zExp, zSig0 );
4718        }
4719    }
4720    if (zSig1) {
4721        float_raise(float_flag_inexact, status);
4722    }
4723    if ( increment ) {
4724        ++zSig0;
4725        if ( zSig0 == 0 ) {
4726            ++zExp;
4727            zSig0 = UINT64_C(0x8000000000000000);
4728        }
4729        else {
4730            if (!(zSig1 << 1) && roundNearestEven) {
4731                zSig0 &= ~1;
4732            }
4733        }
4734    }
4735    else {
4736        if ( zSig0 == 0 ) zExp = 0;
4737    }
4738    return packFloatx80( zSign, zExp, zSig0 );
4739
4740}
4741
4742/*----------------------------------------------------------------------------
4743| Takes an abstract floating-point value having sign `zSign', exponent
4744| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4745| and returns the proper extended double-precision floating-point value
4746| corresponding to the abstract input.  This routine is just like
4747| `roundAndPackFloatx80' except that the input significand does not have to be
4748| normalized.
4749*----------------------------------------------------------------------------*/
4750
4751floatx80 normalizeRoundAndPackFloatx80(FloatX80RoundPrec roundingPrecision,
4752                                       bool zSign, int32_t zExp,
4753                                       uint64_t zSig0, uint64_t zSig1,
4754                                       float_status *status)
4755{
4756    int8_t shiftCount;
4757
4758    if ( zSig0 == 0 ) {
4759        zSig0 = zSig1;
4760        zSig1 = 0;
4761        zExp -= 64;
4762    }
4763    shiftCount = clz64(zSig0);
4764    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4765    zExp -= shiftCount;
4766    return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4767                                zSig0, zSig1, status);
4768
4769}
4770
4771/*----------------------------------------------------------------------------
4772| Returns the binary exponential of the single-precision floating-point value
4773| `a'. The operation is performed according to the IEC/IEEE Standard for
4774| Binary Floating-Point Arithmetic.
4775|
4776| Uses the following identities:
4777|
4778| 1. -------------------------------------------------------------------------
4779|      x    x*ln(2)
4780|     2  = e
4781|
4782| 2. -------------------------------------------------------------------------
4783|                      2     3     4     5           n
4784|      x        x     x     x     x     x           x
4785|     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4786|               1!    2!    3!    4!    5!          n!
4787*----------------------------------------------------------------------------*/
4788
4789static const float64 float32_exp2_coefficients[15] =
4790{
4791    const_float64( 0x3ff0000000000000ll ), /*  1 */
4792    const_float64( 0x3fe0000000000000ll ), /*  2 */
4793    const_float64( 0x3fc5555555555555ll ), /*  3 */
4794    const_float64( 0x3fa5555555555555ll ), /*  4 */
4795    const_float64( 0x3f81111111111111ll ), /*  5 */
4796    const_float64( 0x3f56c16c16c16c17ll ), /*  6 */
4797    const_float64( 0x3f2a01a01a01a01all ), /*  7 */
4798    const_float64( 0x3efa01a01a01a01all ), /*  8 */
4799    const_float64( 0x3ec71de3a556c734ll ), /*  9 */
4800    const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
4801    const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
4802    const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
4803    const_float64( 0x3de6124613a86d09ll ), /* 13 */
4804    const_float64( 0x3da93974a8c07c9dll ), /* 14 */
4805    const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
4806};
4807
4808float32 float32_exp2(float32 a, float_status *status)
4809{
4810    FloatParts64 xp, xnp, tp, rp;
4811    int i;
4812
4813    float32_unpack_canonical(&xp, a, status);
4814    if (unlikely(xp.cls != float_class_normal)) {
4815        switch (xp.cls) {
4816        case float_class_snan:
4817        case float_class_qnan:
4818            parts_return_nan(&xp, status);
4819            return float32_round_pack_canonical(&xp, status);
4820        case float_class_inf:
4821            return xp.sign ? float32_zero : a;
4822        case float_class_zero:
4823            return float32_one;
4824        default:
4825            break;
4826        }
4827        g_assert_not_reached();
4828    }
4829
4830    float_raise(float_flag_inexact, status);
4831
4832    float64_unpack_canonical(&tp, float64_ln2, status);
4833    xp = *parts_mul(&xp, &tp, status);
4834    xnp = xp;
4835
4836    float64_unpack_canonical(&rp, float64_one, status);
4837    for (i = 0 ; i < 15 ; i++) {
4838        float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
4839        rp = *parts_muladd(&tp, &xp, &rp, 0, status);
4840        xnp = *parts_mul(&xnp, &xp, status);
4841    }
4842
4843    return float32_round_pack_canonical(&rp, status);
4844}
4845
4846/*----------------------------------------------------------------------------
4847| Rounds the extended double-precision floating-point value `a'
4848| to the precision provided by floatx80_rounding_precision and returns the
4849| result as an extended double-precision floating-point value.
4850| The operation is performed according to the IEC/IEEE Standard for Binary
4851| Floating-Point Arithmetic.
4852*----------------------------------------------------------------------------*/
4853
4854floatx80 floatx80_round(floatx80 a, float_status *status)
4855{
4856    FloatParts128 p;
4857
4858    if (!floatx80_unpack_canonical(&p, a, status)) {
4859        return floatx80_default_nan(status);
4860    }
4861    return floatx80_round_pack_canonical(&p, status);
4862}
4863
4864static void __attribute__((constructor)) softfloat_init(void)
4865{
4866    union_float64 ua, ub, uc, ur;
4867
4868    if (QEMU_NO_HARDFLOAT) {
4869        return;
4870    }
4871    /*
4872     * Test that the host's FMA is not obviously broken. For example,
4873     * glibc < 2.23 can perform an incorrect FMA on certain hosts; see
4874     *   https://sourceware.org/bugzilla/show_bug.cgi?id=13304
4875     */
4876    ua.s = 0x0020000000000001ULL;
4877    ub.s = 0x3ca0000000000000ULL;
4878    uc.s = 0x0020000000000000ULL;
4879    ur.h = fma(ua.h, ub.h, uc.h);
4880    if (ur.s != 0x0020000000000001ULL) {
4881        force_soft_fma = true;
4882    }
4883}
4884