qemu/fpu/softfloat-parts.c.inc
<<
>>
Prefs
   1/*
   2 * QEMU float support
   3 *
   4 * The code in this source file is derived from release 2a of the SoftFloat
   5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
   6 * some later contributions) are provided under that license, as detailed below.
   7 * It has subsequently been modified by contributors to the QEMU Project,
   8 * so some portions are provided under:
   9 *  the SoftFloat-2a license
  10 *  the BSD license
  11 *  GPL-v2-or-later
  12 *
  13 * Any future contributions to this file after December 1st 2014 will be
  14 * taken to be licensed under the Softfloat-2a license unless specifically
  15 * indicated otherwise.
  16 */
  17
  18static void partsN(return_nan)(FloatPartsN *a, float_status *s)
  19{
  20    switch (a->cls) {
  21    case float_class_snan:
  22        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  23        if (s->default_nan_mode) {
  24            parts_default_nan(a, s);
  25        } else {
  26            parts_silence_nan(a, s);
  27        }
  28        break;
  29    case float_class_qnan:
  30        if (s->default_nan_mode) {
  31            parts_default_nan(a, s);
  32        }
  33        break;
  34    default:
  35        g_assert_not_reached();
  36    }
  37}
  38
  39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
  40                                     float_status *s)
  41{
  42    if (is_snan(a->cls) || is_snan(b->cls)) {
  43        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  44    }
  45
  46    if (s->default_nan_mode) {
  47        parts_default_nan(a, s);
  48    } else {
  49        int cmp = frac_cmp(a, b);
  50        if (cmp == 0) {
  51            cmp = a->sign < b->sign;
  52        }
  53
  54        if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
  55            a = b;
  56        }
  57        if (is_snan(a->cls)) {
  58            parts_silence_nan(a, s);
  59        }
  60    }
  61    return a;
  62}
  63
  64static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
  65                                            FloatPartsN *c, float_status *s,
  66                                            int ab_mask, int abc_mask)
  67{
  68    int which;
  69
  70    if (unlikely(abc_mask & float_cmask_snan)) {
  71        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
  72    }
  73
  74    which = pickNaNMulAdd(a->cls, b->cls, c->cls,
  75                          ab_mask == float_cmask_infzero, s);
  76
  77    if (s->default_nan_mode || which == 3) {
  78        /*
  79         * Note that this check is after pickNaNMulAdd so that function
  80         * has an opportunity to set the Invalid flag for infzero.
  81         */
  82        parts_default_nan(a, s);
  83        return a;
  84    }
  85
  86    switch (which) {
  87    case 0:
  88        break;
  89    case 1:
  90        a = b;
  91        break;
  92    case 2:
  93        a = c;
  94        break;
  95    default:
  96        g_assert_not_reached();
  97    }
  98    if (is_snan(a->cls)) {
  99        parts_silence_nan(a, s);
 100    }
 101    return a;
 102}
 103
 104/*
 105 * Canonicalize the FloatParts structure.  Determine the class,
 106 * unbias the exponent, and normalize the fraction.
 107 */
 108static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
 109                                 const FloatFmt *fmt)
 110{
 111    if (unlikely(p->exp == 0)) {
 112        if (likely(frac_eqz(p))) {
 113            p->cls = float_class_zero;
 114        } else if (status->flush_inputs_to_zero) {
 115            float_raise(float_flag_input_denormal, status);
 116            p->cls = float_class_zero;
 117            frac_clear(p);
 118        } else {
 119            int shift = frac_normalize(p);
 120            p->cls = float_class_normal;
 121            p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
 122        }
 123    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
 124        p->cls = float_class_normal;
 125        p->exp -= fmt->exp_bias;
 126        frac_shl(p, fmt->frac_shift);
 127        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 128    } else if (likely(frac_eqz(p))) {
 129        p->cls = float_class_inf;
 130    } else {
 131        frac_shl(p, fmt->frac_shift);
 132        p->cls = (parts_is_snan_frac(p->frac_hi, status)
 133                  ? float_class_snan : float_class_qnan);
 134    }
 135}
 136
 137/*
 138 * Round and uncanonicalize a floating-point number by parts. There
 139 * are FRAC_SHIFT bits that may require rounding at the bottom of the
 140 * fraction; these bits will be removed. The exponent will be biased
 141 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
 142 */
 143static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
 144                                   const FloatFmt *fmt)
 145{
 146    const int exp_max = fmt->exp_max;
 147    const int frac_shift = fmt->frac_shift;
 148    const uint64_t round_mask = fmt->round_mask;
 149    const uint64_t frac_lsb = round_mask + 1;
 150    const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
 151    const uint64_t roundeven_mask = round_mask | frac_lsb;
 152    uint64_t inc;
 153    bool overflow_norm = false;
 154    int exp, flags = 0;
 155
 156    switch (s->float_rounding_mode) {
 157    case float_round_nearest_even:
 158        if (N > 64 && frac_lsb == 0) {
 159            inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
 160                   ? frac_lsbm1 : 0);
 161        } else {
 162            inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
 163                   ? frac_lsbm1 : 0);
 164        }
 165        break;
 166    case float_round_ties_away:
 167        inc = frac_lsbm1;
 168        break;
 169    case float_round_to_zero:
 170        overflow_norm = true;
 171        inc = 0;
 172        break;
 173    case float_round_up:
 174        inc = p->sign ? 0 : round_mask;
 175        overflow_norm = p->sign;
 176        break;
 177    case float_round_down:
 178        inc = p->sign ? round_mask : 0;
 179        overflow_norm = !p->sign;
 180        break;
 181    case float_round_to_odd:
 182        overflow_norm = true;
 183        /* fall through */
 184    case float_round_to_odd_inf:
 185        if (N > 64 && frac_lsb == 0) {
 186            inc = p->frac_hi & 1 ? 0 : round_mask;
 187        } else {
 188            inc = p->frac_lo & frac_lsb ? 0 : round_mask;
 189        }
 190        break;
 191    default:
 192        g_assert_not_reached();
 193    }
 194
 195    exp = p->exp + fmt->exp_bias;
 196    if (likely(exp > 0)) {
 197        if (p->frac_lo & round_mask) {
 198            flags |= float_flag_inexact;
 199            if (frac_addi(p, p, inc)) {
 200                frac_shr(p, 1);
 201                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 202                exp++;
 203            }
 204            p->frac_lo &= ~round_mask;
 205        }
 206
 207        if (fmt->arm_althp) {
 208            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
 209            if (unlikely(exp > exp_max)) {
 210                /* Overflow.  Return the maximum normal.  */
 211                flags = float_flag_invalid;
 212                exp = exp_max;
 213                frac_allones(p);
 214                p->frac_lo &= ~round_mask;
 215            }
 216        } else if (unlikely(exp >= exp_max)) {
 217            flags |= float_flag_overflow | float_flag_inexact;
 218            if (overflow_norm) {
 219                exp = exp_max - 1;
 220                frac_allones(p);
 221                p->frac_lo &= ~round_mask;
 222            } else {
 223                p->cls = float_class_inf;
 224                exp = exp_max;
 225                frac_clear(p);
 226            }
 227        }
 228        frac_shr(p, frac_shift);
 229    } else if (s->flush_to_zero) {
 230        flags |= float_flag_output_denormal;
 231        p->cls = float_class_zero;
 232        exp = 0;
 233        frac_clear(p);
 234    } else {
 235        bool is_tiny = s->tininess_before_rounding || exp < 0;
 236
 237        if (!is_tiny) {
 238            FloatPartsN discard;
 239            is_tiny = !frac_addi(&discard, p, inc);
 240        }
 241
 242        frac_shrjam(p, 1 - exp);
 243
 244        if (p->frac_lo & round_mask) {
 245            /* Need to recompute round-to-even/round-to-odd. */
 246            switch (s->float_rounding_mode) {
 247            case float_round_nearest_even:
 248                if (N > 64 && frac_lsb == 0) {
 249                    inc = ((p->frac_hi & 1) ||
 250                           (p->frac_lo & round_mask) != frac_lsbm1
 251                           ? frac_lsbm1 : 0);
 252                } else {
 253                    inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
 254                           ? frac_lsbm1 : 0);
 255                }
 256                break;
 257            case float_round_to_odd:
 258            case float_round_to_odd_inf:
 259                if (N > 64 && frac_lsb == 0) {
 260                    inc = p->frac_hi & 1 ? 0 : round_mask;
 261                } else {
 262                    inc = p->frac_lo & frac_lsb ? 0 : round_mask;
 263                }
 264                break;
 265            default:
 266                break;
 267            }
 268            flags |= float_flag_inexact;
 269            frac_addi(p, p, inc);
 270            p->frac_lo &= ~round_mask;
 271        }
 272
 273        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
 274        frac_shr(p, frac_shift);
 275
 276        if (is_tiny && (flags & float_flag_inexact)) {
 277            flags |= float_flag_underflow;
 278        }
 279        if (exp == 0 && frac_eqz(p)) {
 280            p->cls = float_class_zero;
 281        }
 282    }
 283    p->exp = exp;
 284    float_raise(flags, s);
 285}
 286
 287static void partsN(uncanon)(FloatPartsN *p, float_status *s,
 288                            const FloatFmt *fmt)
 289{
 290    if (likely(p->cls == float_class_normal)) {
 291        parts_uncanon_normal(p, s, fmt);
 292    } else {
 293        switch (p->cls) {
 294        case float_class_zero:
 295            p->exp = 0;
 296            frac_clear(p);
 297            return;
 298        case float_class_inf:
 299            g_assert(!fmt->arm_althp);
 300            p->exp = fmt->exp_max;
 301            frac_clear(p);
 302            return;
 303        case float_class_qnan:
 304        case float_class_snan:
 305            g_assert(!fmt->arm_althp);
 306            p->exp = fmt->exp_max;
 307            frac_shr(p, fmt->frac_shift);
 308            return;
 309        default:
 310            break;
 311        }
 312        g_assert_not_reached();
 313    }
 314}
 315
 316/*
 317 * Returns the result of adding or subtracting the values of the
 318 * floating-point values `a' and `b'. The operation is performed
 319 * according to the IEC/IEEE Standard for Binary Floating-Point
 320 * Arithmetic.
 321 */
 322static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
 323                                   float_status *s, bool subtract)
 324{
 325    bool b_sign = b->sign ^ subtract;
 326    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 327
 328    if (a->sign != b_sign) {
 329        /* Subtraction */
 330        if (likely(ab_mask == float_cmask_normal)) {
 331            if (parts_sub_normal(a, b)) {
 332                return a;
 333            }
 334            /* Subtract was exact, fall through to set sign. */
 335            ab_mask = float_cmask_zero;
 336        }
 337
 338        if (ab_mask == float_cmask_zero) {
 339            a->sign = s->float_rounding_mode == float_round_down;
 340            return a;
 341        }
 342
 343        if (unlikely(ab_mask & float_cmask_anynan)) {
 344            goto p_nan;
 345        }
 346
 347        if (ab_mask & float_cmask_inf) {
 348            if (a->cls != float_class_inf) {
 349                /* N - Inf */
 350                goto return_b;
 351            }
 352            if (b->cls != float_class_inf) {
 353                /* Inf - N */
 354                return a;
 355            }
 356            /* Inf - Inf */
 357            float_raise(float_flag_invalid | float_flag_invalid_isi, s);
 358            parts_default_nan(a, s);
 359            return a;
 360        }
 361    } else {
 362        /* Addition */
 363        if (likely(ab_mask == float_cmask_normal)) {
 364            parts_add_normal(a, b);
 365            return a;
 366        }
 367
 368        if (ab_mask == float_cmask_zero) {
 369            return a;
 370        }
 371
 372        if (unlikely(ab_mask & float_cmask_anynan)) {
 373            goto p_nan;
 374        }
 375
 376        if (ab_mask & float_cmask_inf) {
 377            a->cls = float_class_inf;
 378            return a;
 379        }
 380    }
 381
 382    if (b->cls == float_class_zero) {
 383        g_assert(a->cls == float_class_normal);
 384        return a;
 385    }
 386
 387    g_assert(a->cls == float_class_zero);
 388    g_assert(b->cls == float_class_normal);
 389 return_b:
 390    b->sign = b_sign;
 391    return b;
 392
 393 p_nan:
 394    return parts_pick_nan(a, b, s);
 395}
 396
 397/*
 398 * Returns the result of multiplying the floating-point values `a' and
 399 * `b'. The operation is performed according to the IEC/IEEE Standard
 400 * for Binary Floating-Point Arithmetic.
 401 */
 402static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
 403                                float_status *s)
 404{
 405    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 406    bool sign = a->sign ^ b->sign;
 407
 408    if (likely(ab_mask == float_cmask_normal)) {
 409        FloatPartsW tmp;
 410
 411        frac_mulw(&tmp, a, b);
 412        frac_truncjam(a, &tmp);
 413
 414        a->exp += b->exp + 1;
 415        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 416            frac_add(a, a, a);
 417            a->exp -= 1;
 418        }
 419
 420        a->sign = sign;
 421        return a;
 422    }
 423
 424    /* Inf * Zero == NaN */
 425    if (unlikely(ab_mask == float_cmask_infzero)) {
 426        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
 427        parts_default_nan(a, s);
 428        return a;
 429    }
 430
 431    if (unlikely(ab_mask & float_cmask_anynan)) {
 432        return parts_pick_nan(a, b, s);
 433    }
 434
 435    /* Multiply by 0 or Inf */
 436    if (ab_mask & float_cmask_inf) {
 437        a->cls = float_class_inf;
 438        a->sign = sign;
 439        return a;
 440    }
 441
 442    g_assert(ab_mask & float_cmask_zero);
 443    a->cls = float_class_zero;
 444    a->sign = sign;
 445    return a;
 446}
 447
 448/*
 449 * Returns the result of multiplying the floating-point values `a' and
 450 * `b' then adding 'c', with no intermediate rounding step after the
 451 * multiplication. The operation is performed according to the
 452 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
 453 * The flags argument allows the caller to select negation of the
 454 * addend, the intermediate product, or the final result. (The
 455 * difference between this and having the caller do a separate
 456 * negation is that negating externally will flip the sign bit on NaNs.)
 457 *
 458 * Requires A and C extracted into a double-sized structure to provide the
 459 * extra space for the widening multiply.
 460 */
 461static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
 462                                   FloatPartsN *c, int flags, float_status *s)
 463{
 464    int ab_mask, abc_mask;
 465    FloatPartsW p_widen, c_widen;
 466
 467    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 468    abc_mask = float_cmask(c->cls) | ab_mask;
 469
 470    /*
 471     * It is implementation-defined whether the cases of (0,inf,qnan)
 472     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
 473     * they return if they do), so we have to hand this information
 474     * off to the target-specific pick-a-NaN routine.
 475     */
 476    if (unlikely(abc_mask & float_cmask_anynan)) {
 477        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
 478    }
 479
 480    if (flags & float_muladd_negate_c) {
 481        c->sign ^= 1;
 482    }
 483
 484    /* Compute the sign of the product into A. */
 485    a->sign ^= b->sign;
 486    if (flags & float_muladd_negate_product) {
 487        a->sign ^= 1;
 488    }
 489
 490    if (unlikely(ab_mask != float_cmask_normal)) {
 491        if (unlikely(ab_mask == float_cmask_infzero)) {
 492            float_raise(float_flag_invalid | float_flag_invalid_imz, s);
 493            goto d_nan;
 494        }
 495
 496        if (ab_mask & float_cmask_inf) {
 497            if (c->cls == float_class_inf && a->sign != c->sign) {
 498                float_raise(float_flag_invalid | float_flag_invalid_isi, s);
 499                goto d_nan;
 500            }
 501            goto return_inf;
 502        }
 503
 504        g_assert(ab_mask & float_cmask_zero);
 505        if (c->cls == float_class_normal) {
 506            *a = *c;
 507            goto return_normal;
 508        }
 509        if (c->cls == float_class_zero) {
 510            if (a->sign != c->sign) {
 511                goto return_sub_zero;
 512            }
 513            goto return_zero;
 514        }
 515        g_assert(c->cls == float_class_inf);
 516    }
 517
 518    if (unlikely(c->cls == float_class_inf)) {
 519        a->sign = c->sign;
 520        goto return_inf;
 521    }
 522
 523    /* Perform the multiplication step. */
 524    p_widen.sign = a->sign;
 525    p_widen.exp = a->exp + b->exp + 1;
 526    frac_mulw(&p_widen, a, b);
 527    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 528        frac_add(&p_widen, &p_widen, &p_widen);
 529        p_widen.exp -= 1;
 530    }
 531
 532    /* Perform the addition step. */
 533    if (c->cls != float_class_zero) {
 534        /* Zero-extend C to less significant bits. */
 535        frac_widen(&c_widen, c);
 536        c_widen.exp = c->exp;
 537
 538        if (a->sign == c->sign) {
 539            parts_add_normal(&p_widen, &c_widen);
 540        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
 541            goto return_sub_zero;
 542        }
 543    }
 544
 545    /* Narrow with sticky bit, for proper rounding later. */
 546    frac_truncjam(a, &p_widen);
 547    a->sign = p_widen.sign;
 548    a->exp = p_widen.exp;
 549
 550 return_normal:
 551    if (flags & float_muladd_halve_result) {
 552        a->exp -= 1;
 553    }
 554 finish_sign:
 555    if (flags & float_muladd_negate_result) {
 556        a->sign ^= 1;
 557    }
 558    return a;
 559
 560 return_sub_zero:
 561    a->sign = s->float_rounding_mode == float_round_down;
 562 return_zero:
 563    a->cls = float_class_zero;
 564    goto finish_sign;
 565
 566 return_inf:
 567    a->cls = float_class_inf;
 568    goto finish_sign;
 569
 570 d_nan:
 571    parts_default_nan(a, s);
 572    return a;
 573}
 574
 575/*
 576 * Returns the result of dividing the floating-point value `a' by the
 577 * corresponding value `b'. The operation is performed according to
 578 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 579 */
 580static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
 581                                float_status *s)
 582{
 583    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 584    bool sign = a->sign ^ b->sign;
 585
 586    if (likely(ab_mask == float_cmask_normal)) {
 587        a->sign = sign;
 588        a->exp -= b->exp + frac_div(a, b);
 589        return a;
 590    }
 591
 592    /* 0/0 or Inf/Inf => NaN */
 593    if (unlikely(ab_mask == float_cmask_zero)) {
 594        float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
 595        goto d_nan;
 596    }
 597    if (unlikely(ab_mask == float_cmask_inf)) {
 598        float_raise(float_flag_invalid | float_flag_invalid_idi, s);
 599        goto d_nan;
 600    }
 601
 602    /* All the NaN cases */
 603    if (unlikely(ab_mask & float_cmask_anynan)) {
 604        return parts_pick_nan(a, b, s);
 605    }
 606
 607    a->sign = sign;
 608
 609    /* Inf / X */
 610    if (a->cls == float_class_inf) {
 611        return a;
 612    }
 613
 614    /* 0 / X */
 615    if (a->cls == float_class_zero) {
 616        return a;
 617    }
 618
 619    /* X / Inf */
 620    if (b->cls == float_class_inf) {
 621        a->cls = float_class_zero;
 622        return a;
 623    }
 624
 625    /* X / 0 => Inf */
 626    g_assert(b->cls == float_class_zero);
 627    float_raise(float_flag_divbyzero, s);
 628    a->cls = float_class_inf;
 629    return a;
 630
 631 d_nan:
 632    parts_default_nan(a, s);
 633    return a;
 634}
 635
 636/*
 637 * Floating point remainder, per IEC/IEEE, or modulus.
 638 */
 639static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
 640                                   uint64_t *mod_quot, float_status *s)
 641{
 642    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
 643
 644    if (likely(ab_mask == float_cmask_normal)) {
 645        frac_modrem(a, b, mod_quot);
 646        return a;
 647    }
 648
 649    if (mod_quot) {
 650        *mod_quot = 0;
 651    }
 652
 653    /* All the NaN cases */
 654    if (unlikely(ab_mask & float_cmask_anynan)) {
 655        return parts_pick_nan(a, b, s);
 656    }
 657
 658    /* Inf % N; N % 0 */
 659    if (a->cls == float_class_inf || b->cls == float_class_zero) {
 660        float_raise(float_flag_invalid, s);
 661        parts_default_nan(a, s);
 662        return a;
 663    }
 664
 665    /* N % Inf; 0 % N */
 666    g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
 667    return a;
 668}
 669
 670/*
 671 * Square Root
 672 *
 673 * The base algorithm is lifted from
 674 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
 675 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
 676 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
 677 * and is thus MIT licenced.
 678 */
 679static void partsN(sqrt)(FloatPartsN *a, float_status *status,
 680                         const FloatFmt *fmt)
 681{
 682    const uint32_t three32 = 3u << 30;
 683    const uint64_t three64 = 3ull << 62;
 684    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
 685    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
 686    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
 687    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
 688    uint64_t discard;
 689    bool exp_odd;
 690    size_t index;
 691
 692    if (unlikely(a->cls != float_class_normal)) {
 693        switch (a->cls) {
 694        case float_class_snan:
 695        case float_class_qnan:
 696            parts_return_nan(a, status);
 697            return;
 698        case float_class_zero:
 699            return;
 700        case float_class_inf:
 701            if (unlikely(a->sign)) {
 702                goto d_nan;
 703            }
 704            return;
 705        default:
 706            g_assert_not_reached();
 707        }
 708    }
 709
 710    if (unlikely(a->sign)) {
 711        goto d_nan;
 712    }
 713
 714    /*
 715     * Argument reduction.
 716     * x = 4^e frac; with integer e, and frac in [1, 4)
 717     * m = frac fixed point at bit 62, since we're in base 4.
 718     * If base-2 exponent is odd, exchange that for multiply by 2,
 719     * which results in no shift.
 720     */
 721    exp_odd = a->exp & 1;
 722    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
 723    if (!exp_odd) {
 724        frac_shr(a, 1);
 725    }
 726
 727    /*
 728     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
 729     *
 730     * Initial estimate:
 731     * 7-bit lookup table (1-bit exponent and 6-bit significand).
 732     *
 733     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
 734     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
 735     * a table lookup is faster and needs one less iteration.
 736     * The 7-bit table gives |e| < 0x1.fdp-9.
 737     *
 738     * A Newton-Raphson iteration for r is
 739     *   s = m*r
 740     *   d = s*r
 741     *   u = 3 - d
 742     *   r = r*u/2
 743     *
 744     * Fixed point representations:
 745     *   m, s, d, u, three are all 2.30; r is 0.32
 746     */
 747    m64 = a->frac_hi;
 748    m32 = m64 >> 32;
 749
 750    r32 = rsqrt_tab[index] << 16;
 751    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
 752
 753    s32 = ((uint64_t)m32 * r32) >> 32;
 754    d32 = ((uint64_t)s32 * r32) >> 32;
 755    u32 = three32 - d32;
 756
 757    if (N == 64) {
 758        /* float64 or smaller */
 759
 760        r32 = ((uint64_t)r32 * u32) >> 31;
 761        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
 762
 763        s32 = ((uint64_t)m32 * r32) >> 32;
 764        d32 = ((uint64_t)s32 * r32) >> 32;
 765        u32 = three32 - d32;
 766
 767        if (fmt->frac_size <= 23) {
 768            /* float32 or smaller */
 769
 770            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
 771            s32 = (s32 - 1) >> 6;               /* 9.23 */
 772            /* s < sqrt(m) < s + 0x1.08p-23 */
 773
 774            /* compute nearest rounded result to 2.23 bits */
 775            uint32_t d0 = (m32 << 16) - s32 * s32;
 776            uint32_t d1 = s32 - d0;
 777            uint32_t d2 = d1 + s32 + 1;
 778            s32 += d1 >> 31;
 779            a->frac_hi = (uint64_t)s32 << (64 - 25);
 780
 781            /* increment or decrement for inexact */
 782            if (d2 != 0) {
 783                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
 784            }
 785            goto done;
 786        }
 787
 788        /* float64 */
 789
 790        r64 = (uint64_t)r32 * u32 * 2;
 791        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
 792        mul64To128(m64, r64, &s64, &discard);
 793        mul64To128(s64, r64, &d64, &discard);
 794        u64 = three64 - d64;
 795
 796        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
 797        s64 = (s64 - 2) >> 9;                  /* 12.52 */
 798
 799        /* Compute nearest rounded result */
 800        uint64_t d0 = (m64 << 42) - s64 * s64;
 801        uint64_t d1 = s64 - d0;
 802        uint64_t d2 = d1 + s64 + 1;
 803        s64 += d1 >> 63;
 804        a->frac_hi = s64 << (64 - 54);
 805
 806        /* increment or decrement for inexact */
 807        if (d2 != 0) {
 808            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
 809        }
 810        goto done;
 811    }
 812
 813    r64 = (uint64_t)r32 * u32 * 2;
 814    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
 815
 816    mul64To128(m64, r64, &s64, &discard);
 817    mul64To128(s64, r64, &d64, &discard);
 818    u64 = three64 - d64;
 819    mul64To128(u64, r64, &r64, &discard);
 820    r64 <<= 1;
 821    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
 822
 823    mul64To128(m64, r64, &s64, &discard);
 824    mul64To128(s64, r64, &d64, &discard);
 825    u64 = three64 - d64;
 826    mul64To128(u64, r64, &rh, &rl);
 827    add128(rh, rl, rh, rl, &rh, &rl);
 828    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
 829
 830    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
 831    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
 832    sub128(three64, 0, dh, dl, &uh, &ul);
 833    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
 834    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
 835
 836    sub128(sh, sl, 0, 4, &sh, &sl);
 837    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
 838    /* s < sqrt(m) < s + 1ulp */
 839
 840    /* Compute nearest rounded result */
 841    mul64To128(sl, sl, &d0h, &d0l);
 842    d0h += 2 * sh * sl;
 843    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
 844    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
 845    add128(sh, sl, 0, 1, &d2h, &d2l);
 846    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
 847    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
 848    shift128Left(sh, sl, 128 - 114, &sh, &sl);
 849
 850    /* increment or decrement for inexact */
 851    if (d2h | d2l) {
 852        if ((int64_t)(d1h ^ d2h) < 0) {
 853            sub128(sh, sl, 0, 1, &sh, &sl);
 854        } else {
 855            add128(sh, sl, 0, 1, &sh, &sl);
 856        }
 857    }
 858    a->frac_lo = sl;
 859    a->frac_hi = sh;
 860
 861 done:
 862    /* Convert back from base 4 to base 2. */
 863    a->exp >>= 1;
 864    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
 865        frac_add(a, a, a);
 866    } else {
 867        a->exp += 1;
 868    }
 869    return;
 870
 871 d_nan:
 872    float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
 873    parts_default_nan(a, status);
 874}
 875
 876/*
 877 * Rounds the floating-point value `a' to an integer, and returns the
 878 * result as a floating-point value. The operation is performed
 879 * according to the IEC/IEEE Standard for Binary Floating-Point
 880 * Arithmetic.
 881 *
 882 * parts_round_to_int_normal is an internal helper function for
 883 * normal numbers only, returning true for inexact but not directly
 884 * raising float_flag_inexact.
 885 */
 886static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
 887                                        int scale, int frac_size)
 888{
 889    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
 890    int shift_adj;
 891
 892    scale = MIN(MAX(scale, -0x10000), 0x10000);
 893    a->exp += scale;
 894
 895    if (a->exp < 0) {
 896        bool one;
 897
 898        /* All fractional */
 899        switch (rmode) {
 900        case float_round_nearest_even:
 901            one = false;
 902            if (a->exp == -1) {
 903                FloatPartsN tmp;
 904                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
 905                frac_add(&tmp, a, a);
 906                /* Anything remaining means frac > 0.5. */
 907                one = !frac_eqz(&tmp);
 908            }
 909            break;
 910        case float_round_ties_away:
 911            one = a->exp == -1;
 912            break;
 913        case float_round_to_zero:
 914            one = false;
 915            break;
 916        case float_round_up:
 917            one = !a->sign;
 918            break;
 919        case float_round_down:
 920            one = a->sign;
 921            break;
 922        case float_round_to_odd:
 923            one = true;
 924            break;
 925        default:
 926            g_assert_not_reached();
 927        }
 928
 929        frac_clear(a);
 930        a->exp = 0;
 931        if (one) {
 932            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
 933        } else {
 934            a->cls = float_class_zero;
 935        }
 936        return true;
 937    }
 938
 939    if (a->exp >= frac_size) {
 940        /* All integral */
 941        return false;
 942    }
 943
 944    if (N > 64 && a->exp < N - 64) {
 945        /*
 946         * Rounding is not in the low word -- shift lsb to bit 2,
 947         * which leaves room for sticky and rounding bit.
 948         */
 949        shift_adj = (N - 1) - (a->exp + 2);
 950        frac_shrjam(a, shift_adj);
 951        frac_lsb = 1 << 2;
 952    } else {
 953        shift_adj = 0;
 954        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
 955    }
 956
 957    frac_lsbm1 = frac_lsb >> 1;
 958    rnd_mask = frac_lsb - 1;
 959    rnd_even_mask = rnd_mask | frac_lsb;
 960
 961    if (!(a->frac_lo & rnd_mask)) {
 962        /* Fractional bits already clear, undo the shift above. */
 963        frac_shl(a, shift_adj);
 964        return false;
 965    }
 966
 967    switch (rmode) {
 968    case float_round_nearest_even:
 969        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
 970        break;
 971    case float_round_ties_away:
 972        inc = frac_lsbm1;
 973        break;
 974    case float_round_to_zero:
 975        inc = 0;
 976        break;
 977    case float_round_up:
 978        inc = a->sign ? 0 : rnd_mask;
 979        break;
 980    case float_round_down:
 981        inc = a->sign ? rnd_mask : 0;
 982        break;
 983    case float_round_to_odd:
 984        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
 985        break;
 986    default:
 987        g_assert_not_reached();
 988    }
 989
 990    if (shift_adj == 0) {
 991        if (frac_addi(a, a, inc)) {
 992            frac_shr(a, 1);
 993            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
 994            a->exp++;
 995        }
 996        a->frac_lo &= ~rnd_mask;
 997    } else {
 998        frac_addi(a, a, inc);
 999        a->frac_lo &= ~rnd_mask;
1000        /* Be careful shifting back, not to overflow */
1001        frac_shl(a, shift_adj - 1);
1002        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1003            a->exp++;
1004        } else {
1005            frac_add(a, a, a);
1006        }
1007    }
1008    return true;
1009}
1010
1011static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1012                                 int scale, float_status *s,
1013                                 const FloatFmt *fmt)
1014{
1015    switch (a->cls) {
1016    case float_class_qnan:
1017    case float_class_snan:
1018        parts_return_nan(a, s);
1019        break;
1020    case float_class_zero:
1021    case float_class_inf:
1022        break;
1023    case float_class_normal:
1024        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1025            float_raise(float_flag_inexact, s);
1026        }
1027        break;
1028    default:
1029        g_assert_not_reached();
1030    }
1031}
1032
1033/*
1034 * Returns the result of converting the floating-point value `a' to
1035 * the two's complement integer format. The conversion is performed
1036 * according to the IEC/IEEE Standard for Binary Floating-Point
1037 * Arithmetic---which means in particular that the conversion is
1038 * rounded according to the current rounding mode. If `a' is a NaN,
1039 * the largest positive integer is returned. Otherwise, if the
1040 * conversion overflows, the largest integer with the same sign as `a'
1041 * is returned.
1042 */
1043static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1044                                     int scale, int64_t min, int64_t max,
1045                                     float_status *s)
1046{
1047    int flags = 0;
1048    uint64_t r;
1049
1050    switch (p->cls) {
1051    case float_class_snan:
1052        flags |= float_flag_invalid_snan;
1053        /* fall through */
1054    case float_class_qnan:
1055        flags |= float_flag_invalid;
1056        r = max;
1057        break;
1058
1059    case float_class_inf:
1060        flags = float_flag_invalid | float_flag_invalid_cvti;
1061        r = p->sign ? min : max;
1062        break;
1063
1064    case float_class_zero:
1065        return 0;
1066
1067    case float_class_normal:
1068        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1069        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1070            flags = float_flag_inexact;
1071        }
1072
1073        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1074            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1075        } else {
1076            r = UINT64_MAX;
1077        }
1078        if (p->sign) {
1079            if (r <= -(uint64_t)min) {
1080                r = -r;
1081            } else {
1082                flags = float_flag_invalid | float_flag_invalid_cvti;
1083                r = min;
1084            }
1085        } else if (r > max) {
1086            flags = float_flag_invalid | float_flag_invalid_cvti;
1087            r = max;
1088        }
1089        break;
1090
1091    default:
1092        g_assert_not_reached();
1093    }
1094
1095    float_raise(flags, s);
1096    return r;
1097}
1098
1099/*
1100 *  Returns the result of converting the floating-point value `a' to
1101 *  the unsigned integer format. The conversion is performed according
1102 *  to the IEC/IEEE Standard for Binary Floating-Point
1103 *  Arithmetic---which means in particular that the conversion is
1104 *  rounded according to the current rounding mode. If `a' is a NaN,
1105 *  the largest unsigned integer is returned. Otherwise, if the
1106 *  conversion overflows, the largest unsigned integer is returned. If
1107 *  the 'a' is negative, the result is rounded and zero is returned;
1108 *  values that do not round to zero will raise the inexact exception
1109 *  flag.
1110 */
1111static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1112                                      int scale, uint64_t max, float_status *s)
1113{
1114    int flags = 0;
1115    uint64_t r;
1116
1117    switch (p->cls) {
1118    case float_class_snan:
1119        flags |= float_flag_invalid_snan;
1120        /* fall through */
1121    case float_class_qnan:
1122        flags |= float_flag_invalid;
1123        r = max;
1124        break;
1125
1126    case float_class_inf:
1127        flags = float_flag_invalid | float_flag_invalid_cvti;
1128        r = p->sign ? 0 : max;
1129        break;
1130
1131    case float_class_zero:
1132        return 0;
1133
1134    case float_class_normal:
1135        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1136        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1137            flags = float_flag_inexact;
1138            if (p->cls == float_class_zero) {
1139                r = 0;
1140                break;
1141            }
1142        }
1143
1144        if (p->sign) {
1145            flags = float_flag_invalid | float_flag_invalid_cvti;
1146            r = 0;
1147        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1148            flags = float_flag_invalid | float_flag_invalid_cvti;
1149            r = max;
1150        } else {
1151            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1152            if (r > max) {
1153                flags = float_flag_invalid | float_flag_invalid_cvti;
1154                r = max;
1155            }
1156        }
1157        break;
1158
1159    default:
1160        g_assert_not_reached();
1161    }
1162
1163    float_raise(flags, s);
1164    return r;
1165}
1166
1167/*
1168 * Integer to float conversions
1169 *
1170 * Returns the result of converting the two's complement integer `a'
1171 * to the floating-point format. The conversion is performed according
1172 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1173 */
1174static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1175                                  int scale, float_status *s)
1176{
1177    uint64_t f = a;
1178    int shift;
1179
1180    memset(p, 0, sizeof(*p));
1181
1182    if (a == 0) {
1183        p->cls = float_class_zero;
1184        return;
1185    }
1186
1187    p->cls = float_class_normal;
1188    if (a < 0) {
1189        f = -f;
1190        p->sign = true;
1191    }
1192    shift = clz64(f);
1193    scale = MIN(MAX(scale, -0x10000), 0x10000);
1194
1195    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1196    p->frac_hi = f << shift;
1197}
1198
1199/*
1200 * Unsigned Integer to float conversions
1201 *
1202 * Returns the result of converting the unsigned integer `a' to the
1203 * floating-point format. The conversion is performed according to the
1204 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1205 */
1206static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1207                                  int scale, float_status *status)
1208{
1209    memset(p, 0, sizeof(*p));
1210
1211    if (a == 0) {
1212        p->cls = float_class_zero;
1213    } else {
1214        int shift = clz64(a);
1215        scale = MIN(MAX(scale, -0x10000), 0x10000);
1216        p->cls = float_class_normal;
1217        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1218        p->frac_hi = a << shift;
1219    }
1220}
1221
1222/*
1223 * Float min/max.
1224 */
1225static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1226                                   float_status *s, int flags)
1227{
1228    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1229    int a_exp, b_exp, cmp;
1230
1231    if (unlikely(ab_mask & float_cmask_anynan)) {
1232        /*
1233         * For minNum/maxNum (IEEE 754-2008)
1234         * or minimumNumber/maximumNumber (IEEE 754-2019),
1235         * if one operand is a QNaN, and the other
1236         * operand is numerical, then return numerical argument.
1237         */
1238        if ((flags & (minmax_isnum | minmax_isnumber))
1239            && !(ab_mask & float_cmask_snan)
1240            && (ab_mask & ~float_cmask_qnan)) {
1241            return is_nan(a->cls) ? b : a;
1242        }
1243
1244        /*
1245         * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1246         * are removed and replaced with minimum, minimumNumber, maximum
1247         * and maximumNumber.
1248         * minimumNumber/maximumNumber behavior for SNaN is changed to:
1249         *   If both operands are NaNs, a QNaN is returned.
1250         *   If either operand is a SNaN,
1251         *   an invalid operation exception is signaled,
1252         *   but unless both operands are NaNs,
1253         *   the SNaN is otherwise ignored and not converted to a QNaN.
1254         */
1255        if ((flags & minmax_isnumber)
1256            && (ab_mask & float_cmask_snan)
1257            && (ab_mask & ~float_cmask_anynan)) {
1258            float_raise(float_flag_invalid, s);
1259            return is_nan(a->cls) ? b : a;
1260        }
1261
1262        return parts_pick_nan(a, b, s);
1263    }
1264
1265    a_exp = a->exp;
1266    b_exp = b->exp;
1267
1268    if (unlikely(ab_mask != float_cmask_normal)) {
1269        switch (a->cls) {
1270        case float_class_normal:
1271            break;
1272        case float_class_inf:
1273            a_exp = INT16_MAX;
1274            break;
1275        case float_class_zero:
1276            a_exp = INT16_MIN;
1277            break;
1278        default:
1279            g_assert_not_reached();
1280            break;
1281        }
1282        switch (b->cls) {
1283        case float_class_normal:
1284            break;
1285        case float_class_inf:
1286            b_exp = INT16_MAX;
1287            break;
1288        case float_class_zero:
1289            b_exp = INT16_MIN;
1290            break;
1291        default:
1292            g_assert_not_reached();
1293            break;
1294        }
1295    }
1296
1297    /* Compare magnitudes. */
1298    cmp = a_exp - b_exp;
1299    if (cmp == 0) {
1300        cmp = frac_cmp(a, b);
1301    }
1302
1303    /*
1304     * Take the sign into account.
1305     * For ismag, only do this if the magnitudes are equal.
1306     */
1307    if (!(flags & minmax_ismag) || cmp == 0) {
1308        if (a->sign != b->sign) {
1309            /* For differing signs, the negative operand is less. */
1310            cmp = a->sign ? -1 : 1;
1311        } else if (a->sign) {
1312            /* For two negative operands, invert the magnitude comparison. */
1313            cmp = -cmp;
1314        }
1315    }
1316
1317    if (flags & minmax_ismin) {
1318        cmp = -cmp;
1319    }
1320    return cmp < 0 ? b : a;
1321}
1322
1323/*
1324 * Floating point compare
1325 */
1326static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1327                                     float_status *s, bool is_quiet)
1328{
1329    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1330    int cmp;
1331
1332    if (likely(ab_mask == float_cmask_normal)) {
1333        if (a->sign != b->sign) {
1334            goto a_sign;
1335        }
1336        if (a->exp != b->exp) {
1337            cmp = a->exp < b->exp ? -1 : 1;
1338        } else {
1339            cmp = frac_cmp(a, b);
1340        }
1341        if (a->sign) {
1342            cmp = -cmp;
1343        }
1344        return cmp;
1345    }
1346
1347    if (unlikely(ab_mask & float_cmask_anynan)) {
1348        if (ab_mask & float_cmask_snan) {
1349            float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1350        } else if (!is_quiet) {
1351            float_raise(float_flag_invalid, s);
1352        }
1353        return float_relation_unordered;
1354    }
1355
1356    if (ab_mask & float_cmask_zero) {
1357        if (ab_mask == float_cmask_zero) {
1358            return float_relation_equal;
1359        } else if (a->cls == float_class_zero) {
1360            goto b_sign;
1361        } else {
1362            goto a_sign;
1363        }
1364    }
1365
1366    if (ab_mask == float_cmask_inf) {
1367        if (a->sign == b->sign) {
1368            return float_relation_equal;
1369        }
1370    } else if (b->cls == float_class_inf) {
1371        goto b_sign;
1372    } else {
1373        g_assert(a->cls == float_class_inf);
1374    }
1375
1376 a_sign:
1377    return a->sign ? float_relation_less : float_relation_greater;
1378 b_sign:
1379    return b->sign ? float_relation_greater : float_relation_less;
1380}
1381
1382/*
1383 * Multiply A by 2 raised to the power N.
1384 */
1385static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1386{
1387    switch (a->cls) {
1388    case float_class_snan:
1389    case float_class_qnan:
1390        parts_return_nan(a, s);
1391        break;
1392    case float_class_zero:
1393    case float_class_inf:
1394        break;
1395    case float_class_normal:
1396        a->exp += MIN(MAX(n, -0x10000), 0x10000);
1397        break;
1398    default:
1399        g_assert_not_reached();
1400    }
1401}
1402
1403/*
1404 * Return log2(A)
1405 */
1406static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1407{
1408    uint64_t a0, a1, r, t, ign;
1409    FloatPartsN f;
1410    int i, n, a_exp, f_exp;
1411
1412    if (unlikely(a->cls != float_class_normal)) {
1413        switch (a->cls) {
1414        case float_class_snan:
1415        case float_class_qnan:
1416            parts_return_nan(a, s);
1417            return;
1418        case float_class_zero:
1419            /* log2(0) = -inf */
1420            a->cls = float_class_inf;
1421            a->sign = 1;
1422            return;
1423        case float_class_inf:
1424            if (unlikely(a->sign)) {
1425                goto d_nan;
1426            }
1427            return;
1428        default:
1429            break;
1430        }
1431        g_assert_not_reached();
1432    }
1433    if (unlikely(a->sign)) {
1434        goto d_nan;
1435    }
1436
1437    /* TODO: This algorithm looses bits too quickly for float128. */
1438    g_assert(N == 64);
1439
1440    a_exp = a->exp;
1441    f_exp = -1;
1442
1443    r = 0;
1444    t = DECOMPOSED_IMPLICIT_BIT;
1445    a0 = a->frac_hi;
1446    a1 = 0;
1447
1448    n = fmt->frac_size + 2;
1449    if (unlikely(a_exp == -1)) {
1450        /*
1451         * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1452         * When the value is very close to 1.0, there are lots of 1's in
1453         * the msb parts of the fraction.  At the end, when we subtract
1454         * this value from -1.0, we can see a catastrophic loss of precision,
1455         * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1456         * bits of y in the final result.  To minimize this, compute as many
1457         * digits as we can.
1458         * ??? This case needs another algorithm to avoid this.
1459         */
1460        n = fmt->frac_size * 2 + 2;
1461        /* Don't compute a value overlapping the sticky bit */
1462        n = MIN(n, 62);
1463    }
1464
1465    for (i = 0; i < n; i++) {
1466        if (a1) {
1467            mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1468        } else if (a0 & 0xffffffffull) {
1469            mul64To128(a0, a0, &a0, &a1);
1470        } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1471            a0 >>= 32;
1472            a0 *= a0;
1473        } else {
1474            goto exact;
1475        }
1476
1477        if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1478            if (unlikely(a_exp == 0 && r == 0)) {
1479                /*
1480                 * When a_exp == 0, we're computing the log2 of a value
1481                 * [1.0,2.0).  When the value is very close to 1.0, there
1482                 * are lots of 0's in the msb parts of the fraction.
1483                 * We need to compute more digits to produce a correct
1484                 * result -- restart at the top of the fraction.
1485                 * ??? This is likely to lose precision quickly, as for
1486                 * float128; we may need another method.
1487                 */
1488                f_exp -= i;
1489                t = r = DECOMPOSED_IMPLICIT_BIT;
1490                i = 0;
1491            } else {
1492                r |= t;
1493            }
1494        } else {
1495            add128(a0, a1, a0, a1, &a0, &a1);
1496        }
1497        t >>= 1;
1498    }
1499
1500    /* Set sticky for inexact. */
1501    r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1502
1503 exact:
1504    parts_sint_to_float(a, a_exp, 0, s);
1505    if (r == 0) {
1506        return;
1507    }
1508
1509    memset(&f, 0, sizeof(f));
1510    f.cls = float_class_normal;
1511    f.frac_hi = r;
1512    f.exp = f_exp - frac_normalize(&f);
1513
1514    if (a_exp < 0) {
1515        parts_sub_normal(a, &f);
1516    } else if (a_exp > 0) {
1517        parts_add_normal(a, &f);
1518    } else {
1519        *a = f;
1520    }
1521    return;
1522
1523 d_nan:
1524    float_raise(float_flag_invalid, s);
1525    parts_default_nan(a, s);
1526}
1527