qemu/fpu/softfloat-parts.c.inc
<<
>>
Prefs
   1/*
   2 * QEMU float support
   3 *
   4 * The code in this source file is derived from release 2a of the SoftFloat
   5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6 * some later contributions) are provided under that license, as detailed below.
   7 * It has subsequently been modified by contributors to the QEMU Project,
   8 * so some portions are provided under:
   9 *  the SoftFloat-2a license
  10 *  the BSD license
  11 *  GPL-v2-or-later
  12 *
  13 * Any future contributions to this file after December 1st 2014 will be
  14 * taken to be licensed under the Softfloat-2a license unless specifically
  15 * indicated otherwise.
  16 */
  17
  18static void partsN(return_nan)(FloatPartsN *a, float_status *s)
  19{
  20    switch (a->cls) {
  21    case float_class_snan:
  22        float_raise(float_flag_invalid, s);
  23        if (s->default_nan_mode) {
  24            parts_default_nan(a, s);
  25        } else {
  26            parts_silence_nan(a, s);
  27        }
  28        break;
  29    case float_class_qnan:
  30        if (s->default_nan_mode) {
  31            parts_default_nan(a, s);
  32        }
  33        break;
  34    default:
  35        g_assert_not_reached();
  36    }
  37}
  38
  39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
  40                                     float_status *s)
  41{
  42    if (is_snan(a->cls) || is_snan(b->cls)) {
  43        float_raise(float_flag_invalid, s);
  44    }
  45
  46    if (s->default_nan_mode) {
  47        parts_default_nan(a, s);
  48    } else {
  49        int cmp = frac_cmp(a, b);
  50        if (cmp == 0) {
  51            cmp = a->sign < b->sign;
  52        }
  53
  54        if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
  55            a = b;
  56        }
  57        if (is_snan(a->cls)) {
  58            parts_silence_nan(a, s);
  59        }
  60    }
  61    return a;
  62}
  63
  64static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
  65                                            FloatPartsN *c, float_status *s,
  66                                            int ab_mask, int abc_mask)
  67{
  68    int which;
  69
  70    if (unlikely(abc_mask & float_cmask_snan)) {
  71        float_raise(float_flag_invalid, s);
  72    }
  73
  74    which = pickNaNMulAdd(a->cls, b->cls, c->cls,
  75                          ab_mask == float_cmask_infzero, s);
  76
  77    if (s->default_nan_mode || which == 3) {
  78        /*
  79         * Note that this check is after pickNaNMulAdd so that function
  80         * has an opportunity to set the Invalid flag for infzero.
  81         */
  82        parts_default_nan(a, s);
  83        return a;
  84    }
  85
  86    switch (which) {
  87    case 0:
  88        break;
  89    case 1:
  90        a = b;
  91        break;
  92    case 2:
  93        a = c;
  94        break;
  95    default:
  96        g_assert_not_reached();
  97    }
  98    if (is_snan(a->cls)) {
  99        parts_silence_nan(a, s);
 100    }
 101    return a;
 102}
 103
 104/*
 105 * Canonicalize the FloatParts structure.  Determine the class,
 106 * unbias the exponent, and normalize the fraction.
 107 */
 108static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
 109                                 const FloatFmt *fmt)
 110{
 111    if (unlikely(p->exp == 0)) {
 112        if (likely(frac_eqz(p))) {
 113            p->cls = float_class_zero;
 114        } else if (status->flush_inputs_to_zero) {
 115            float_raise(float_flag_input_denormal, status);
 116            p->cls = float_class_zero;
 117            frac_clear(p);
 118        } else {
 119            int shift = frac_normalize(p);
 120            p->cls = float_class_normal;
 121            p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
 122        }
 123    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
 124        p->cls = float_class_normal;
 125        p->exp -= fmt->exp_bias;
 126        frac_shl(p, fmt->frac_shift);
 127        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 128    } else if (likely(frac_eqz(p))) {
 129        p->cls = float_class_inf;
 130    } else {
 131        frac_shl(p, fmt->frac_shift);
 132        p->cls = (parts_is_snan_frac(p->frac_hi, status)
 133                  ? float_class_snan : float_class_qnan);
 134    }
 135}
 136
 137/*
 138 * Round and uncanonicalize a floating-point number by parts. There
 139 * are FRAC_SHIFT bits that may require rounding at the bottom of the
 140 * fraction; these bits will be removed. The exponent will be biased
 141 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
 142 */
 143static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
 144                                   const FloatFmt *fmt)
 145{
 146    const int exp_max = fmt->exp_max;
 147    const int frac_shift = fmt->frac_shift;
 148    const uint64_t round_mask = fmt->round_mask;
 149    const uint64_t frac_lsb = round_mask + 1;
 150    const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
 151    const uint64_t roundeven_mask = round_mask | frac_lsb;
 152    uint64_t inc;
 153    bool overflow_norm = false;
 154    int exp, flags = 0;
 155
 156    switch (s->float_rounding_mode) {
 157    case float_round_nearest_even:
 158        if (N > 64 && frac_lsb == 0) {
 159            inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
 160                   ? frac_lsbm1 : 0);
 161        } else {
 162            inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
 163                   ? frac_lsbm1 : 0);
 164        }
 165        break;
 166    case float_round_ties_away:
 167        inc = frac_lsbm1;
 168        break;
 169    case float_round_to_zero:
 170        overflow_norm = true;
 171        inc = 0;
 172        break;
 173    case float_round_up:
 174        inc = p->sign ? 0 : round_mask;
 175        overflow_norm = p->sign;
 176        break;
 177    case float_round_down:
 178        inc = p->sign ? round_mask : 0;
 179        overflow_norm = !p->sign;
 180        break;
 181    case float_round_to_odd:
 182        overflow_norm = true;
 183        /* fall through */
 184    case float_round_to_odd_inf:
 185        if (N > 64 && frac_lsb == 0) {
 186            inc = p->frac_hi & 1 ? 0 : round_mask;
 187        } else {
 188            inc = p->frac_lo & frac_lsb ? 0 : round_mask;
 189        }
 190        break;
 191    default:
 192        g_assert_not_reached();
 193    }
 194
 195    exp = p->exp + fmt->exp_bias;
 196    if (likely(exp > 0)) {
 197        if (p->frac_lo & round_mask) {
 198            flags |= float_flag_inexact;
 199            if (frac_addi(p, p, inc)) {
 200                frac_shr(p, 1);
 201                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 202                exp++;
 203            }
 204            p->frac_lo &= ~round_mask;
 205        }
 206
 207        if (fmt->arm_althp) {
 208            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
 209            if (unlikely(exp > exp_max)) {
 210                /* Overflow.  Return the maximum normal.  */
 211                flags = float_flag_invalid;
 212                exp = exp_max;
 213                frac_allones(p);
 214                p->frac_lo &= ~round_mask;
 215            }
 216        } else if (unlikely(exp >= exp_max)) {
 217            flags |= float_flag_overflow | float_flag_inexact;
 218            if (overflow_norm) {
 219                exp = exp_max - 1;
 220                frac_allones(p);
 221                p->frac_lo &= ~round_mask;
 222            } else {
 223                p->cls = float_class_inf;
 224                exp = exp_max;
 225                frac_clear(p);
 226            }
 227        }
 228        frac_shr(p, frac_shift);
 229    } else if (s->flush_to_zero) {
 230        flags |= float_flag_output_denormal;
 231        p->cls = float_class_zero;
 232        exp = 0;
 233        frac_clear(p);
 234    } else {
 235        bool is_tiny = s->tininess_before_rounding || exp < 0;
 236
 237        if (!is_tiny) {
 238            FloatPartsN discard;
 239            is_tiny = !frac_addi(&discard, p, inc);
 240        }
 241
 242        frac_shrjam(p, 1 - exp);
 243
 244        if (p->frac_lo & round_mask) {
 245            /* Need to recompute round-to-even/round-to-odd. */
 246            switch (s->float_rounding_mode) {
 247            case float_round_nearest_even:
 248                if (N > 64 && frac_lsb == 0) {
 249                    inc = ((p->frac_hi & 1) ||
 250                           (p->frac_lo & round_mask) != frac_lsbm1
 251                           ? frac_lsbm1 : 0);
 252                } else {
 253                    inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
 254                           ? frac_lsbm1 : 0);
 255                }
 256                break;
 257            case float_round_to_odd:
 258            case float_round_to_odd_inf:
 259                if (N > 64 && frac_lsb == 0) {
 260                    inc = p->frac_hi & 1 ? 0 : round_mask;
 261                } else {
 262                    inc = p->frac_lo & frac_lsb ? 0 : round_mask;
 263                }
 264                break;
 265            default:
 266                break;
 267            }
 268            flags |= float_flag_inexact;
 269            frac_addi(p, p, inc);
 270            p->frac_lo &= ~round_mask;
 271        }
 272
 273        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
 274        frac_shr(p, frac_shift);
 275
 276        if (is_tiny && (flags & float_flag_inexact)) {
 277            flags |= float_flag_underflow;
 278        }
 279        if (exp == 0 && frac_eqz(p)) {
 280            p->cls = float_class_zero;
 281        }
 282    }
 283    p->exp = exp;
 284    float_raise(flags, s);
 285}
 286
 287static void partsN(uncanon)(FloatPartsN *p, float_status *s,
 288                            const FloatFmt *fmt)
 289{
 290    if (likely(p->cls == float_class_normal)) {
 291        parts_uncanon_normal(p, s, fmt);
 292    } else {
 293        switch (p->cls) {
 294        case float_class_zero:
 295            p->exp = 0;
 296            frac_clear(p);
 297            return;
 298        case float_class_inf:
 299            g_assert(!fmt->arm_althp);
 300            p->exp = fmt->exp_max;
 301            frac_clear(p);
 302            return;
 303        case float_class_qnan:
 304        case float_class_snan:
 305            g_assert(!fmt->arm_althp);
 306            p->exp = fmt->exp_max;
 307            frac_shr(p, fmt->frac_shift);
 308            return;
 309        default:
 310            break;
 311        }
 312        g_assert_not_reached();
 313    }
 314}
 315
 316/*
 317 * Returns the result of adding or subtracting the values of the
 318 * floating-point values `a' and `b'. The operation is performed
 319 * according to the IEC/IEEE Standard for Binary Floating-Point
 320 * Arithmetic.
 321 */
 322static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
 323                                   float_status *s, bool subtract)
 324{
 325    bool b_sign = b->sign ^ subtract;
 326    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 327
 328    if (a->sign != b_sign) {
 329        /* Subtraction */
 330        if (likely(ab_mask == float_cmask_normal)) {
 331            if (parts_sub_normal(a, b)) {
 332                return a;
 333            }
 334            /* Subtract was exact, fall through to set sign. */
 335            ab_mask = float_cmask_zero;
 336        }
 337
 338        if (ab_mask == float_cmask_zero) {
 339            a->sign = s->float_rounding_mode == float_round_down;
 340            return a;
 341        }
 342
 343        if (unlikely(ab_mask & float_cmask_anynan)) {
 344            goto p_nan;
 345        }
 346
 347        if (ab_mask & float_cmask_inf) {
 348            if (a->cls != float_class_inf) {
 349                /* N - Inf */
 350                goto return_b;
 351            }
 352            if (b->cls != float_class_inf) {
 353                /* Inf - N */
 354                return a;
 355            }
 356            /* Inf - Inf */
 357            float_raise(float_flag_invalid, s);
 358            parts_default_nan(a, s);
 359            return a;
 360        }
 361    } else {
 362        /* Addition */
 363        if (likely(ab_mask == float_cmask_normal)) {
 364            parts_add_normal(a, b);
 365            return a;
 366        }
 367
 368        if (ab_mask == float_cmask_zero) {
 369            return a;
 370        }
 371
 372        if (unlikely(ab_mask & float_cmask_anynan)) {
 373            goto p_nan;
 374        }
 375
 376        if (ab_mask & float_cmask_inf) {
 377            a->cls = float_class_inf;
 378            return a;
 379        }
 380    }
 381
 382    if (b->cls == float_class_zero) {
 383        g_assert(a->cls == float_class_normal);
 384        return a;
 385    }
 386
 387    g_assert(a->cls == float_class_zero);
 388    g_assert(b->cls == float_class_normal);
 389 return_b:
 390    b->sign = b_sign;
 391    return b;
 392
 393 p_nan:
 394    return parts_pick_nan(a, b, s);
 395}
 396
 397/*
 398 * Returns the result of multiplying the floating-point values `a' and
 399 * `b'. The operation is performed according to the IEC/IEEE Standard
 400 * for Binary Floating-Point Arithmetic.
 401 */
 402static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
 403                                float_status *s)
 404{
 405    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 406    bool sign = a->sign ^ b->sign;
 407
 408    if (likely(ab_mask == float_cmask_normal)) {
 409        FloatPartsW tmp;
 410
 411        frac_mulw(&tmp, a, b);
 412        frac_truncjam(a, &tmp);
 413
 414        a->exp += b->exp + 1;
 415        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 416            frac_add(a, a, a);
 417            a->exp -= 1;
 418        }
 419
 420        a->sign = sign;
 421        return a;
 422    }
 423
 424    /* Inf * Zero == NaN */
 425    if (unlikely(ab_mask == float_cmask_infzero)) {
 426        float_raise(float_flag_invalid, s);
 427        parts_default_nan(a, s);
 428        return a;
 429    }
 430
 431    if (unlikely(ab_mask & float_cmask_anynan)) {
 432        return parts_pick_nan(a, b, s);
 433    }
 434
 435    /* Multiply by 0 or Inf */
 436    if (ab_mask & float_cmask_inf) {
 437        a->cls = float_class_inf;
 438        a->sign = sign;
 439        return a;
 440    }
 441
 442    g_assert(ab_mask & float_cmask_zero);
 443    a->cls = float_class_zero;
 444    a->sign = sign;
 445    return a;
 446}
 447
 448/*
 449 * Returns the result of multiplying the floating-point values `a' and
 450 * `b' then adding 'c', with no intermediate rounding step after the
 451 * multiplication. The operation is performed according to the
 452 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
 453 * The flags argument allows the caller to select negation of the
 454 * addend, the intermediate product, or the final result. (The
 455 * difference between this and having the caller do a separate
 456 * negation is that negating externally will flip the sign bit on NaNs.)
 457 *
 458 * Requires A and C extracted into a double-sized structure to provide the
 459 * extra space for the widening multiply.
 460 */
 461static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
 462                                   FloatPartsN *c, int flags, float_status *s)
 463{
 464    int ab_mask, abc_mask;
 465    FloatPartsW p_widen, c_widen;
 466
 467    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 468    abc_mask = float_cmask(c->cls) | ab_mask;
 469
 470    /*
 471     * It is implementation-defined whether the cases of (0,inf,qnan)
 472     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
 473     * they return if they do), so we have to hand this information
 474     * off to the target-specific pick-a-NaN routine.
 475     */
 476    if (unlikely(abc_mask & float_cmask_anynan)) {
 477        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
 478    }
 479
 480    if (flags & float_muladd_negate_c) {
 481        c->sign ^= 1;
 482    }
 483
 484    /* Compute the sign of the product into A. */
 485    a->sign ^= b->sign;
 486    if (flags & float_muladd_negate_product) {
 487        a->sign ^= 1;
 488    }
 489
 490    if (unlikely(ab_mask != float_cmask_normal)) {
 491        if (unlikely(ab_mask == float_cmask_infzero)) {
 492            goto d_nan;
 493        }
 494
 495        if (ab_mask & float_cmask_inf) {
 496            if (c->cls == float_class_inf && a->sign != c->sign) {
 497                goto d_nan;
 498            }
 499            goto return_inf;
 500        }
 501
 502        g_assert(ab_mask & float_cmask_zero);
 503        if (c->cls == float_class_normal) {
 504            *a = *c;
 505            goto return_normal;
 506        }
 507        if (c->cls == float_class_zero) {
 508            if (a->sign != c->sign) {
 509                goto return_sub_zero;
 510            }
 511            goto return_zero;
 512        }
 513        g_assert(c->cls == float_class_inf);
 514    }
 515
 516    if (unlikely(c->cls == float_class_inf)) {
 517        a->sign = c->sign;
 518        goto return_inf;
 519    }
 520
 521    /* Perform the multiplication step. */
 522    p_widen.sign = a->sign;
 523    p_widen.exp = a->exp + b->exp + 1;
 524    frac_mulw(&p_widen, a, b);
 525    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 526        frac_add(&p_widen, &p_widen, &p_widen);
 527        p_widen.exp -= 1;
 528    }
 529
 530    /* Perform the addition step. */
 531    if (c->cls != float_class_zero) {
 532        /* Zero-extend C to less significant bits. */
 533        frac_widen(&c_widen, c);
 534        c_widen.exp = c->exp;
 535
 536        if (a->sign == c->sign) {
 537            parts_add_normal(&p_widen, &c_widen);
 538        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
 539            goto return_sub_zero;
 540        }
 541    }
 542
 543    /* Narrow with sticky bit, for proper rounding later. */
 544    frac_truncjam(a, &p_widen);
 545    a->sign = p_widen.sign;
 546    a->exp = p_widen.exp;
 547
 548 return_normal:
 549    if (flags & float_muladd_halve_result) {
 550        a->exp -= 1;
 551    }
 552 finish_sign:
 553    if (flags & float_muladd_negate_result) {
 554        a->sign ^= 1;
 555    }
 556    return a;
 557
 558 return_sub_zero:
 559    a->sign = s->float_rounding_mode == float_round_down;
 560 return_zero:
 561    a->cls = float_class_zero;
 562    goto finish_sign;
 563
 564 return_inf:
 565    a->cls = float_class_inf;
 566    goto finish_sign;
 567
 568 d_nan:
 569    float_raise(float_flag_invalid, s);
 570    parts_default_nan(a, s);
 571    return a;
 572}
 573
 574/*
 575 * Returns the result of dividing the floating-point value `a' by the
 576 * corresponding value `b'. The operation is performed according to
 577 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 578 */
 579static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
 580                                float_status *s)
 581{
 582    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 583    bool sign = a->sign ^ b->sign;
 584
 585    if (likely(ab_mask == float_cmask_normal)) {
 586        a->sign = sign;
 587        a->exp -= b->exp + frac_div(a, b);
 588        return a;
 589    }
 590
 591    /* 0/0 or Inf/Inf => NaN */
 592    if (unlikely(ab_mask == float_cmask_zero) ||
 593        unlikely(ab_mask == float_cmask_inf)) {
 594        float_raise(float_flag_invalid, s);
 595        parts_default_nan(a, s);
 596        return a;
 597    }
 598
 599    /* All the NaN cases */
 600    if (unlikely(ab_mask & float_cmask_anynan)) {
 601        return parts_pick_nan(a, b, s);
 602    }
 603
 604    a->sign = sign;
 605
 606    /* Inf / X */
 607    if (a->cls == float_class_inf) {
 608        return a;
 609    }
 610
 611    /* 0 / X */
 612    if (a->cls == float_class_zero) {
 613        return a;
 614    }
 615
 616    /* X / Inf */
 617    if (b->cls == float_class_inf) {
 618        a->cls = float_class_zero;
 619        return a;
 620    }
 621
 622    /* X / 0 => Inf */
 623    g_assert(b->cls == float_class_zero);
 624    float_raise(float_flag_divbyzero, s);
 625    a->cls = float_class_inf;
 626    return a;
 627}
 628
 629/*
 630 * Floating point remainder, per IEC/IEEE, or modulus.
 631 */
 632static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
 633                                   uint64_t *mod_quot, float_status *s)
 634{
 635    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 636
 637    if (likely(ab_mask == float_cmask_normal)) {
 638        frac_modrem(a, b, mod_quot);
 639        return a;
 640    }
 641
 642    if (mod_quot) {
 643        *mod_quot = 0;
 644    }
 645
 646    /* All the NaN cases */
 647    if (unlikely(ab_mask & float_cmask_anynan)) {
 648        return parts_pick_nan(a, b, s);
 649    }
 650
 651    /* Inf % N; N % 0 */
 652    if (a->cls == float_class_inf || b->cls == float_class_zero) {
 653        float_raise(float_flag_invalid, s);
 654        parts_default_nan(a, s);
 655        return a;
 656    }
 657
 658    /* N % Inf; 0 % N */
 659    g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
 660    return a;
 661}
 662
 663/*
 664 * Square Root
 665 *
 666 * The base algorithm is lifted from
 667 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
 668 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
 669 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
 670 * and is thus MIT licenced.
 671 */
 672static void partsN(sqrt)(FloatPartsN *a, float_status *status,
 673                         const FloatFmt *fmt)
 674{
 675    const uint32_t three32 = 3u << 30;
 676    const uint64_t three64 = 3ull << 62;
 677    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
 678    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
 679    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
 680    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
 681    uint64_t discard;
 682    bool exp_odd;
 683    size_t index;
 684
 685    if (unlikely(a->cls != float_class_normal)) {
 686        switch (a->cls) {
 687        case float_class_snan:
 688        case float_class_qnan:
 689            parts_return_nan(a, status);
 690            return;
 691        case float_class_zero:
 692            return;
 693        case float_class_inf:
 694            if (unlikely(a->sign)) {
 695                goto d_nan;
 696            }
 697            return;
 698        default:
 699            g_assert_not_reached();
 700        }
 701    }
 702
 703    if (unlikely(a->sign)) {
 704        goto d_nan;
 705    }
 706
 707    /*
 708     * Argument reduction.
 709     * x = 4^e frac; with integer e, and frac in [1, 4)
 710     * m = frac fixed point at bit 62, since we're in base 4.
 711     * If base-2 exponent is odd, exchange that for multiply by 2,
 712     * which results in no shift.
 713     */
 714    exp_odd = a->exp & 1;
 715    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
 716    if (!exp_odd) {
 717        frac_shr(a, 1);
 718    }
 719
 720    /*
 721     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
 722     *
 723     * Initial estimate:
 724     * 7-bit lookup table (1-bit exponent and 6-bit significand).
 725     *
 726     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
 727     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
 728     * a table lookup is faster and needs one less iteration.
 729     * The 7-bit table gives |e| < 0x1.fdp-9.
 730     *
 731     * A Newton-Raphson iteration for r is
 732     *   s = m*r
 733     *   d = s*r
 734     *   u = 3 - d
 735     *   r = r*u/2
 736     *
 737     * Fixed point representations:
 738     *   m, s, d, u, three are all 2.30; r is 0.32
 739     */
 740    m64 = a->frac_hi;
 741    m32 = m64 >> 32;
 742
 743    r32 = rsqrt_tab[index] << 16;
 744    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
 745
 746    s32 = ((uint64_t)m32 * r32) >> 32;
 747    d32 = ((uint64_t)s32 * r32) >> 32;
 748    u32 = three32 - d32;
 749
 750    if (N == 64) {
 751        /* float64 or smaller */
 752
 753        r32 = ((uint64_t)r32 * u32) >> 31;
 754        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
 755
 756        s32 = ((uint64_t)m32 * r32) >> 32;
 757        d32 = ((uint64_t)s32 * r32) >> 32;
 758        u32 = three32 - d32;
 759
 760        if (fmt->frac_size <= 23) {
 761            /* float32 or smaller */
 762
 763            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
 764            s32 = (s32 - 1) >> 6;               /* 9.23 */
 765            /* s < sqrt(m) < s + 0x1.08p-23 */
 766
 767            /* compute nearest rounded result to 2.23 bits */
 768            uint32_t d0 = (m32 << 16) - s32 * s32;
 769            uint32_t d1 = s32 - d0;
 770            uint32_t d2 = d1 + s32 + 1;
 771            s32 += d1 >> 31;
 772            a->frac_hi = (uint64_t)s32 << (64 - 25);
 773
 774            /* increment or decrement for inexact */
 775            if (d2 != 0) {
 776                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
 777            }
 778            goto done;
 779        }
 780
 781        /* float64 */
 782
 783        r64 = (uint64_t)r32 * u32 * 2;
 784        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
 785        mul64To128(m64, r64, &s64, &discard);
 786        mul64To128(s64, r64, &d64, &discard);
 787        u64 = three64 - d64;
 788
 789        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
 790        s64 = (s64 - 2) >> 9;                  /* 12.52 */
 791
 792        /* Compute nearest rounded result */
 793        uint64_t d0 = (m64 << 42) - s64 * s64;
 794        uint64_t d1 = s64 - d0;
 795        uint64_t d2 = d1 + s64 + 1;
 796        s64 += d1 >> 63;
 797        a->frac_hi = s64 << (64 - 54);
 798
 799        /* increment or decrement for inexact */
 800        if (d2 != 0) {
 801            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
 802        }
 803        goto done;
 804    }
 805
 806    r64 = (uint64_t)r32 * u32 * 2;
 807    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
 808
 809    mul64To128(m64, r64, &s64, &discard);
 810    mul64To128(s64, r64, &d64, &discard);
 811    u64 = three64 - d64;
 812    mul64To128(u64, r64, &r64, &discard);
 813    r64 <<= 1;
 814    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
 815
 816    mul64To128(m64, r64, &s64, &discard);
 817    mul64To128(s64, r64, &d64, &discard);
 818    u64 = three64 - d64;
 819    mul64To128(u64, r64, &rh, &rl);
 820    add128(rh, rl, rh, rl, &rh, &rl);
 821    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
 822
 823    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
 824    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
 825    sub128(three64, 0, dh, dl, &uh, &ul);
 826    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
 827    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
 828
 829    sub128(sh, sl, 0, 4, &sh, &sl);
 830    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
 831    /* s < sqrt(m) < s + 1ulp */
 832
 833    /* Compute nearest rounded result */
 834    mul64To128(sl, sl, &d0h, &d0l);
 835    d0h += 2 * sh * sl;
 836    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
 837    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
 838    add128(sh, sl, 0, 1, &d2h, &d2l);
 839    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
 840    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
 841    shift128Left(sh, sl, 128 - 114, &sh, &sl);
 842
 843    /* increment or decrement for inexact */
 844    if (d2h | d2l) {
 845        if ((int64_t)(d1h ^ d2h) < 0) {
 846            sub128(sh, sl, 0, 1, &sh, &sl);
 847        } else {
 848            add128(sh, sl, 0, 1, &sh, &sl);
 849        }
 850    }
 851    a->frac_lo = sl;
 852    a->frac_hi = sh;
 853
 854 done:
 855    /* Convert back from base 4 to base 2. */
 856    a->exp >>= 1;
 857    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 858        frac_add(a, a, a);
 859    } else {
 860        a->exp += 1;
 861    }
 862    return;
 863
 864 d_nan:
 865    float_raise(float_flag_invalid, status);
 866    parts_default_nan(a, status);
 867}
 868
 869/*
 870 * Rounds the floating-point value `a' to an integer, and returns the
 871 * result as a floating-point value. The operation is performed
 872 * according to the IEC/IEEE Standard for Binary Floating-Point
 873 * Arithmetic.
 874 *
 875 * parts_round_to_int_normal is an internal helper function for
 876 * normal numbers only, returning true for inexact but not directly
 877 * raising float_flag_inexact.
 878 */
 879static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
 880                                        int scale, int frac_size)
 881{
 882    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
 883    int shift_adj;
 884
 885    scale = MIN(MAX(scale, -0x10000), 0x10000);
 886    a->exp += scale;
 887
 888    if (a->exp < 0) {
 889        bool one;
 890
 891        /* All fractional */
 892        switch (rmode) {
 893        case float_round_nearest_even:
 894            one = false;
 895            if (a->exp == -1) {
 896                FloatPartsN tmp;
 897                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
 898                frac_add(&tmp, a, a);
 899                /* Anything remaining means frac > 0.5. */
 900                one = !frac_eqz(&tmp);
 901            }
 902            break;
 903        case float_round_ties_away:
 904            one = a->exp == -1;
 905            break;
 906        case float_round_to_zero:
 907            one = false;
 908            break;
 909        case float_round_up:
 910            one = !a->sign;
 911            break;
 912        case float_round_down:
 913            one = a->sign;
 914            break;
 915        case float_round_to_odd:
 916            one = true;
 917            break;
 918        default:
 919            g_assert_not_reached();
 920        }
 921
 922        frac_clear(a);
 923        a->exp = 0;
 924        if (one) {
 925            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
 926        } else {
 927            a->cls = float_class_zero;
 928        }
 929        return true;
 930    }
 931
 932    if (a->exp >= frac_size) {
 933        /* All integral */
 934        return false;
 935    }
 936
 937    if (N > 64 && a->exp < N - 64) {
 938        /*
 939         * Rounding is not in the low word -- shift lsb to bit 2,
 940         * which leaves room for sticky and rounding bit.
 941         */
 942        shift_adj = (N - 1) - (a->exp + 2);
 943        frac_shrjam(a, shift_adj);
 944        frac_lsb = 1 << 2;
 945    } else {
 946        shift_adj = 0;
 947        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
 948    }
 949
 950    frac_lsbm1 = frac_lsb >> 1;
 951    rnd_mask = frac_lsb - 1;
 952    rnd_even_mask = rnd_mask | frac_lsb;
 953
 954    if (!(a->frac_lo & rnd_mask)) {
 955        /* Fractional bits already clear, undo the shift above. */
 956        frac_shl(a, shift_adj);
 957        return false;
 958    }
 959
 960    switch (rmode) {
 961    case float_round_nearest_even:
 962        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
 963        break;
 964    case float_round_ties_away:
 965        inc = frac_lsbm1;
 966        break;
 967    case float_round_to_zero:
 968        inc = 0;
 969        break;
 970    case float_round_up:
 971        inc = a->sign ? 0 : rnd_mask;
 972        break;
 973    case float_round_down:
 974        inc = a->sign ? rnd_mask : 0;
 975        break;
 976    case float_round_to_odd:
 977        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
 978        break;
 979    default:
 980        g_assert_not_reached();
 981    }
 982
 983    if (shift_adj == 0) {
 984        if (frac_addi(a, a, inc)) {
 985            frac_shr(a, 1);
 986            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 987            a->exp++;
 988        }
 989        a->frac_lo &= ~rnd_mask;
 990    } else {
 991        frac_addi(a, a, inc);
 992        a->frac_lo &= ~rnd_mask;
 993        /* Be careful shifting back, not to overflow */
 994        frac_shl(a, shift_adj - 1);
 995        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
 996            a->exp++;
 997        } else {
 998            frac_add(a, a, a);
 999        }
1000    }
1001    return true;
1002}
1003
1004static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1005                                 int scale, float_status *s,
1006                                 const FloatFmt *fmt)
1007{
1008    switch (a->cls) {
1009    case float_class_qnan:
1010    case float_class_snan:
1011        parts_return_nan(a, s);
1012        break;
1013    case float_class_zero:
1014    case float_class_inf:
1015        break;
1016    case float_class_normal:
1017        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1018            float_raise(float_flag_inexact, s);
1019        }
1020        break;
1021    default:
1022        g_assert_not_reached();
1023    }
1024}
1025
1026/*
1027 * Returns the result of converting the floating-point value `a' to
1028 * the two's complement integer format. The conversion is performed
1029 * according to the IEC/IEEE Standard for Binary Floating-Point
1030 * Arithmetic---which means in particular that the conversion is
1031 * rounded according to the current rounding mode. If `a' is a NaN,
1032 * the largest positive integer is returned. Otherwise, if the
1033 * conversion overflows, the largest integer with the same sign as `a'
1034 * is returned.
1035 */
1036static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1037                                     int scale, int64_t min, int64_t max,
1038                                     float_status *s)
1039{
1040    int flags = 0;
1041    uint64_t r;
1042
1043    switch (p->cls) {
1044    case float_class_snan:
1045    case float_class_qnan:
1046        flags = float_flag_invalid;
1047        r = max;
1048        break;
1049
1050    case float_class_inf:
1051        flags = float_flag_invalid;
1052        r = p->sign ? min : max;
1053        break;
1054
1055    case float_class_zero:
1056        return 0;
1057
1058    case float_class_normal:
1059        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1060        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1061            flags = float_flag_inexact;
1062        }
1063
1064        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1065            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1066        } else {
1067            r = UINT64_MAX;
1068        }
1069        if (p->sign) {
1070            if (r <= -(uint64_t)min) {
1071                r = -r;
1072            } else {
1073                flags = float_flag_invalid;
1074                r = min;
1075            }
1076        } else if (r > max) {
1077            flags = float_flag_invalid;
1078            r = max;
1079        }
1080        break;
1081
1082    default:
1083        g_assert_not_reached();
1084    }
1085
1086    float_raise(flags, s);
1087    return r;
1088}
1089
1090/*
1091 *  Returns the result of converting the floating-point value `a' to
1092 *  the unsigned integer format. The conversion is performed according
1093 *  to the IEC/IEEE Standard for Binary Floating-Point
1094 *  Arithmetic---which means in particular that the conversion is
1095 *  rounded according to the current rounding mode. If `a' is a NaN,
1096 *  the largest unsigned integer is returned. Otherwise, if the
1097 *  conversion overflows, the largest unsigned integer is returned. If
1098 *  the 'a' is negative, the result is rounded and zero is returned;
1099 *  values that do not round to zero will raise the inexact exception
1100 *  flag.
1101 */
1102static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1103                                      int scale, uint64_t max, float_status *s)
1104{
1105    int flags = 0;
1106    uint64_t r;
1107
1108    switch (p->cls) {
1109    case float_class_snan:
1110    case float_class_qnan:
1111        flags = float_flag_invalid;
1112        r = max;
1113        break;
1114
1115    case float_class_inf:
1116        flags = float_flag_invalid;
1117        r = p->sign ? 0 : max;
1118        break;
1119
1120    case float_class_zero:
1121        return 0;
1122
1123    case float_class_normal:
1124        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1125        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1126            flags = float_flag_inexact;
1127            if (p->cls == float_class_zero) {
1128                r = 0;
1129                break;
1130            }
1131        }
1132
1133        if (p->sign) {
1134            flags = float_flag_invalid;
1135            r = 0;
1136        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1137            flags = float_flag_invalid;
1138            r = max;
1139        } else {
1140            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1141            if (r > max) {
1142                flags = float_flag_invalid;
1143                r = max;
1144            }
1145        }
1146        break;
1147
1148    default:
1149        g_assert_not_reached();
1150    }
1151
1152    float_raise(flags, s);
1153    return r;
1154}
1155
1156/*
1157 * Integer to float conversions
1158 *
1159 * Returns the result of converting the two's complement integer `a'
1160 * to the floating-point format. The conversion is performed according
1161 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1162 */
1163static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1164                                  int scale, float_status *s)
1165{
1166    uint64_t f = a;
1167    int shift;
1168
1169    memset(p, 0, sizeof(*p));
1170
1171    if (a == 0) {
1172        p->cls = float_class_zero;
1173        return;
1174    }
1175
1176    p->cls = float_class_normal;
1177    if (a < 0) {
1178        f = -f;
1179        p->sign = true;
1180    }
1181    shift = clz64(f);
1182    scale = MIN(MAX(scale, -0x10000), 0x10000);
1183
1184    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1185    p->frac_hi = f << shift;
1186}
1187
1188/*
1189 * Unsigned Integer to float conversions
1190 *
1191 * Returns the result of converting the unsigned integer `a' to the
1192 * floating-point format. The conversion is performed according to the
1193 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1194 */
1195static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1196                                  int scale, float_status *status)
1197{
1198    memset(p, 0, sizeof(*p));
1199
1200    if (a == 0) {
1201        p->cls = float_class_zero;
1202    } else {
1203        int shift = clz64(a);
1204        scale = MIN(MAX(scale, -0x10000), 0x10000);
1205        p->cls = float_class_normal;
1206        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1207        p->frac_hi = a << shift;
1208    }
1209}
1210
1211/*
1212 * Float min/max.
1213 */
1214static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1215                                   float_status *s, int flags)
1216{
1217    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1218    int a_exp, b_exp, cmp;
1219
1220    if (unlikely(ab_mask & float_cmask_anynan)) {
1221        /*
1222         * For minNum/maxNum (IEEE 754-2008)
1223         * or minimumNumber/maximumNumber (IEEE 754-2019),
1224         * if one operand is a QNaN, and the other
1225         * operand is numerical, then return numerical argument.
1226         */
1227        if ((flags & (minmax_isnum | minmax_isnumber))
1228            && !(ab_mask & float_cmask_snan)
1229            && (ab_mask & ~float_cmask_qnan)) {
1230            return is_nan(a->cls) ? b : a;
1231        }
1232
1233        /*
1234         * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1235         * are removed and replaced with minimum, minimumNumber, maximum
1236         * and maximumNumber.
1237         * minimumNumber/maximumNumber behavior for SNaN is changed to:
1238         *   If both operands are NaNs, a QNaN is returned.
1239         *   If either operand is a SNaN,
1240         *   an invalid operation exception is signaled,
1241         *   but unless both operands are NaNs,
1242         *   the SNaN is otherwise ignored and not converted to a QNaN.
1243         */
1244        if ((flags & minmax_isnumber)
1245            && (ab_mask & float_cmask_snan)
1246            && (ab_mask & ~float_cmask_anynan)) {
1247            float_raise(float_flag_invalid, s);
1248            return is_nan(a->cls) ? b : a;
1249        }
1250
1251        return parts_pick_nan(a, b, s);
1252    }
1253
1254    a_exp = a->exp;
1255    b_exp = b->exp;
1256
1257    if (unlikely(ab_mask != float_cmask_normal)) {
1258        switch (a->cls) {
1259        case float_class_normal:
1260            break;
1261        case float_class_inf:
1262            a_exp = INT16_MAX;
1263            break;
1264        case float_class_zero:
1265            a_exp = INT16_MIN;
1266            break;
1267        default:
1268            g_assert_not_reached();
1269            break;
1270        }
1271        switch (b->cls) {
1272        case float_class_normal:
1273            break;
1274        case float_class_inf:
1275            b_exp = INT16_MAX;
1276            break;
1277        case float_class_zero:
1278            b_exp = INT16_MIN;
1279            break;
1280        default:
1281            g_assert_not_reached();
1282            break;
1283        }
1284    }
1285
1286    /* Compare magnitudes. */
1287    cmp = a_exp - b_exp;
1288    if (cmp == 0) {
1289        cmp = frac_cmp(a, b);
1290    }
1291
1292    /*
1293     * Take the sign into account.
1294     * For ismag, only do this if the magnitudes are equal.
1295     */
1296    if (!(flags & minmax_ismag) || cmp == 0) {
1297        if (a->sign != b->sign) {
1298            /* For differing signs, the negative operand is less. */
1299            cmp = a->sign ? -1 : 1;
1300        } else if (a->sign) {
1301            /* For two negative operands, invert the magnitude comparison. */
1302            cmp = -cmp;
1303        }
1304    }
1305
1306    if (flags & minmax_ismin) {
1307        cmp = -cmp;
1308    }
1309    return cmp < 0 ? b : a;
1310}
1311
1312/*
1313 * Floating point compare
1314 */
1315static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1316                                     float_status *s, bool is_quiet)
1317{
1318    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1319    int cmp;
1320
1321    if (likely(ab_mask == float_cmask_normal)) {
1322        if (a->sign != b->sign) {
1323            goto a_sign;
1324        }
1325        if (a->exp != b->exp) {
1326            cmp = a->exp < b->exp ? -1 : 1;
1327        } else {
1328            cmp = frac_cmp(a, b);
1329        }
1330        if (a->sign) {
1331            cmp = -cmp;
1332        }
1333        return cmp;
1334    }
1335
1336    if (unlikely(ab_mask & float_cmask_anynan)) {
1337        if (!is_quiet || (ab_mask & float_cmask_snan)) {
1338            float_raise(float_flag_invalid, s);
1339        }
1340        return float_relation_unordered;
1341    }
1342
1343    if (ab_mask & float_cmask_zero) {
1344        if (ab_mask == float_cmask_zero) {
1345            return float_relation_equal;
1346        } else if (a->cls == float_class_zero) {
1347            goto b_sign;
1348        } else {
1349            goto a_sign;
1350        }
1351    }
1352
1353    if (ab_mask == float_cmask_inf) {
1354        if (a->sign == b->sign) {
1355            return float_relation_equal;
1356        }
1357    } else if (b->cls == float_class_inf) {
1358        goto b_sign;
1359    } else {
1360        g_assert(a->cls == float_class_inf);
1361    }
1362
1363 a_sign:
1364    return a->sign ? float_relation_less : float_relation_greater;
1365 b_sign:
1366    return b->sign ? float_relation_greater : float_relation_less;
1367}
1368
1369/*
1370 * Multiply A by 2 raised to the power N.
1371 */
1372static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1373{
1374    switch (a->cls) {
1375    case float_class_snan:
1376    case float_class_qnan:
1377        parts_return_nan(a, s);
1378        break;
1379    case float_class_zero:
1380    case float_class_inf:
1381        break;
1382    case float_class_normal:
1383        a->exp += MIN(MAX(n, -0x10000), 0x10000);
1384        break;
1385    default:
1386        g_assert_not_reached();
1387    }
1388}
1389
1390/*
1391 * Return log2(A)
1392 */
1393static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1394{
1395    uint64_t a0, a1, r, t, ign;
1396    FloatPartsN f;
1397    int i, n, a_exp, f_exp;
1398
1399    if (unlikely(a->cls != float_class_normal)) {
1400        switch (a->cls) {
1401        case float_class_snan:
1402        case float_class_qnan:
1403            parts_return_nan(a, s);
1404            return;
1405        case float_class_zero:
1406            /* log2(0) = -inf */
1407            a->cls = float_class_inf;
1408            a->sign = 1;
1409            return;
1410        case float_class_inf:
1411            if (unlikely(a->sign)) {
1412                goto d_nan;
1413            }
1414            return;
1415        default:
1416            break;
1417        }
1418        g_assert_not_reached();
1419    }
1420    if (unlikely(a->sign)) {
1421        goto d_nan;
1422    }
1423
1424    /* TODO: This algorithm looses bits too quickly for float128. */
1425    g_assert(N == 64);
1426
1427    a_exp = a->exp;
1428    f_exp = -1;
1429
1430    r = 0;
1431    t = DECOMPOSED_IMPLICIT_BIT;
1432    a0 = a->frac_hi;
1433    a1 = 0;
1434
1435    n = fmt->frac_size + 2;
1436    if (unlikely(a_exp == -1)) {
1437        /*
1438         * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1439         * When the value is very close to 1.0, there are lots of 1's in
1440         * the msb parts of the fraction.  At the end, when we subtract
1441         * this value from -1.0, we can see a catastrophic loss of precision,
1442         * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1443         * bits of y in the final result.  To minimize this, compute as many
1444         * digits as we can.
1445         * ??? This case needs another algorithm to avoid this.
1446         */
1447        n = fmt->frac_size * 2 + 2;
1448        /* Don't compute a value overlapping the sticky bit */
1449        n = MIN(n, 62);
1450    }
1451
1452    for (i = 0; i < n; i++) {
1453        if (a1) {
1454            mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1455        } else if (a0 & 0xffffffffull) {
1456            mul64To128(a0, a0, &a0, &a1);
1457        } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1458            a0 >>= 32;
1459            a0 *= a0;
1460        } else {
1461            goto exact;
1462        }
1463
1464        if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1465            if (unlikely(a_exp == 0 && r == 0)) {
1466                /*
1467                 * When a_exp == 0, we're computing the log2 of a value
1468                 * [1.0,2.0).  When the value is very close to 1.0, there
1469                 * are lots of 0's in the msb parts of the fraction.
1470                 * We need to compute more digits to produce a correct
1471                 * result -- restart at the top of the fraction.
1472                 * ??? This is likely to lose precision quickly, as for
1473                 * float128; we may need another method.
1474                 */
1475                f_exp -= i;
1476                t = r = DECOMPOSED_IMPLICIT_BIT;
1477                i = 0;
1478            } else {
1479                r |= t;
1480            }
1481        } else {
1482            add128(a0, a1, a0, a1, &a0, &a1);
1483        }
1484        t >>= 1;
1485    }
1486
1487    /* Set sticky for inexact. */
1488    r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1489
1490 exact:
1491    parts_sint_to_float(a, a_exp, 0, s);
1492    if (r == 0) {
1493        return;
1494    }
1495
1496    memset(&f, 0, sizeof(f));
1497    f.cls = float_class_normal;
1498    f.frac_hi = r;
1499    f.exp = f_exp - frac_normalize(&f);
1500
1501    if (a_exp < 0) {
1502        parts_sub_normal(a, &f);
1503    } else if (a_exp > 0) {
1504        parts_add_normal(a, &f);
1505    } else {
1506        *a = f;
1507    }
1508    return;
1509
1510 d_nan:
1511    float_raise(float_flag_invalid, s);
1512    parts_default_nan(a, s);
1513}
1514