qemu/target/i386/tcg/fpu_helper.c
<<
>>
Prefs
   1/*
   2 *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include <math.h>
  22#include "cpu.h"
  23#include "exec/helper-proto.h"
  24#include "qemu/host-utils.h"
  25#include "exec/exec-all.h"
  26#include "exec/cpu_ldst.h"
  27#include "fpu/softfloat.h"
  28#include "fpu/softfloat-macros.h"
  29#include "helper-tcg.h"
  30
  31#ifdef CONFIG_SOFTMMU
  32#include "hw/irq.h"
  33#endif
  34
  35/* float macros */
  36#define FT0    (env->ft0)
  37#define ST0    (env->fpregs[env->fpstt].d)
  38#define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
  39#define ST1    ST(1)
  40
  41#define FPU_RC_MASK         0xc00
  42#define FPU_RC_NEAR         0x000
  43#define FPU_RC_DOWN         0x400
  44#define FPU_RC_UP           0x800
  45#define FPU_RC_CHOP         0xc00
  46
  47#define MAXTAN 9223372036854775808.0
  48
  49/* the following deal with x86 long double-precision numbers */
  50#define MAXEXPD 0x7fff
  51#define EXPBIAS 16383
  52#define EXPD(fp)        (fp.l.upper & 0x7fff)
  53#define SIGND(fp)       ((fp.l.upper) & 0x8000)
  54#define MANTD(fp)       (fp.l.lower)
  55#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  56
  57#define FPUS_IE (1 << 0)
  58#define FPUS_DE (1 << 1)
  59#define FPUS_ZE (1 << 2)
  60#define FPUS_OE (1 << 3)
  61#define FPUS_UE (1 << 4)
  62#define FPUS_PE (1 << 5)
  63#define FPUS_SF (1 << 6)
  64#define FPUS_SE (1 << 7)
  65#define FPUS_B  (1 << 15)
  66
  67#define FPUC_EM 0x3f
  68
  69#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  70#define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
  71#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  72#define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
  73#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  74#define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
  75#define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
  76#define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
  77
  78#if !defined(CONFIG_USER_ONLY)
  79static qemu_irq ferr_irq;
  80
  81void x86_register_ferr_irq(qemu_irq irq)
  82{
  83    ferr_irq = irq;
  84}
  85
  86static void cpu_clear_ignne(void)
  87{
  88    CPUX86State *env = &X86_CPU(first_cpu)->env;
  89    env->hflags2 &= ~HF2_IGNNE_MASK;
  90}
  91
  92void cpu_set_ignne(void)
  93{
  94    CPUX86State *env = &X86_CPU(first_cpu)->env;
  95    env->hflags2 |= HF2_IGNNE_MASK;
  96    /*
  97     * We get here in response to a write to port F0h.  The chipset should
  98     * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
  99     * cleared, because FERR# and FP_IRQ are two separate pins on real
 100     * hardware.  However, we don't model FERR# as a qemu_irq, so we just
 101     * do directly what the chipset would do, i.e. deassert FP_IRQ.
 102     */
 103    qemu_irq_lower(ferr_irq);
 104}
 105#endif
 106
 107
 108static inline void fpush(CPUX86State *env)
 109{
 110    env->fpstt = (env->fpstt - 1) & 7;
 111    env->fptags[env->fpstt] = 0; /* validate stack entry */
 112}
 113
 114static inline void fpop(CPUX86State *env)
 115{
 116    env->fptags[env->fpstt] = 1; /* invalidate stack entry */
 117    env->fpstt = (env->fpstt + 1) & 7;
 118}
 119
 120static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
 121                                   uintptr_t retaddr)
 122{
 123    CPU_LDoubleU temp;
 124
 125    temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
 126    temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
 127    return temp.d;
 128}
 129
 130static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
 131                               uintptr_t retaddr)
 132{
 133    CPU_LDoubleU temp;
 134
 135    temp.d = f;
 136    cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
 137    cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
 138}
 139
 140/* x87 FPU helpers */
 141
 142static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
 143{
 144    union {
 145        float64 f64;
 146        double d;
 147    } u;
 148
 149    u.f64 = floatx80_to_float64(a, &env->fp_status);
 150    return u.d;
 151}
 152
 153static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 154{
 155    union {
 156        float64 f64;
 157        double d;
 158    } u;
 159
 160    u.d = a;
 161    return float64_to_floatx80(u.f64, &env->fp_status);
 162}
 163
 164static void fpu_set_exception(CPUX86State *env, int mask)
 165{
 166    env->fpus |= mask;
 167    if (env->fpus & (~env->fpuc & FPUC_EM)) {
 168        env->fpus |= FPUS_SE | FPUS_B;
 169    }
 170}
 171
 172static inline uint8_t save_exception_flags(CPUX86State *env)
 173{
 174    uint8_t old_flags = get_float_exception_flags(&env->fp_status);
 175    set_float_exception_flags(0, &env->fp_status);
 176    return old_flags;
 177}
 178
 179static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
 180{
 181    uint8_t new_flags = get_float_exception_flags(&env->fp_status);
 182    float_raise(old_flags, &env->fp_status);
 183    fpu_set_exception(env,
 184                      ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
 185                       (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
 186                       (new_flags & float_flag_overflow ? FPUS_OE : 0) |
 187                       (new_flags & float_flag_underflow ? FPUS_UE : 0) |
 188                       (new_flags & float_flag_inexact ? FPUS_PE : 0) |
 189                       (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
 190}
 191
 192static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 193{
 194    uint8_t old_flags = save_exception_flags(env);
 195    floatx80 ret = floatx80_div(a, b, &env->fp_status);
 196    merge_exception_flags(env, old_flags);
 197    return ret;
 198}
 199
 200static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 201{
 202    if (env->cr[0] & CR0_NE_MASK) {
 203        raise_exception_ra(env, EXCP10_COPR, retaddr);
 204    }
 205#if !defined(CONFIG_USER_ONLY)
 206    else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
 207        qemu_irq_raise(ferr_irq);
 208    }
 209#endif
 210}
 211
 212void helper_flds_FT0(CPUX86State *env, uint32_t val)
 213{
 214    uint8_t old_flags = save_exception_flags(env);
 215    union {
 216        float32 f;
 217        uint32_t i;
 218    } u;
 219
 220    u.i = val;
 221    FT0 = float32_to_floatx80(u.f, &env->fp_status);
 222    merge_exception_flags(env, old_flags);
 223}
 224
 225void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 226{
 227    uint8_t old_flags = save_exception_flags(env);
 228    union {
 229        float64 f;
 230        uint64_t i;
 231    } u;
 232
 233    u.i = val;
 234    FT0 = float64_to_floatx80(u.f, &env->fp_status);
 235    merge_exception_flags(env, old_flags);
 236}
 237
 238void helper_fildl_FT0(CPUX86State *env, int32_t val)
 239{
 240    FT0 = int32_to_floatx80(val, &env->fp_status);
 241}
 242
 243void helper_flds_ST0(CPUX86State *env, uint32_t val)
 244{
 245    uint8_t old_flags = save_exception_flags(env);
 246    int new_fpstt;
 247    union {
 248        float32 f;
 249        uint32_t i;
 250    } u;
 251
 252    new_fpstt = (env->fpstt - 1) & 7;
 253    u.i = val;
 254    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 255    env->fpstt = new_fpstt;
 256    env->fptags[new_fpstt] = 0; /* validate stack entry */
 257    merge_exception_flags(env, old_flags);
 258}
 259
 260void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 261{
 262    uint8_t old_flags = save_exception_flags(env);
 263    int new_fpstt;
 264    union {
 265        float64 f;
 266        uint64_t i;
 267    } u;
 268
 269    new_fpstt = (env->fpstt - 1) & 7;
 270    u.i = val;
 271    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 272    env->fpstt = new_fpstt;
 273    env->fptags[new_fpstt] = 0; /* validate stack entry */
 274    merge_exception_flags(env, old_flags);
 275}
 276
 277void helper_fildl_ST0(CPUX86State *env, int32_t val)
 278{
 279    int new_fpstt;
 280
 281    new_fpstt = (env->fpstt - 1) & 7;
 282    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 283    env->fpstt = new_fpstt;
 284    env->fptags[new_fpstt] = 0; /* validate stack entry */
 285}
 286
 287void helper_fildll_ST0(CPUX86State *env, int64_t val)
 288{
 289    int new_fpstt;
 290
 291    new_fpstt = (env->fpstt - 1) & 7;
 292    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 293    env->fpstt = new_fpstt;
 294    env->fptags[new_fpstt] = 0; /* validate stack entry */
 295}
 296
 297uint32_t helper_fsts_ST0(CPUX86State *env)
 298{
 299    uint8_t old_flags = save_exception_flags(env);
 300    union {
 301        float32 f;
 302        uint32_t i;
 303    } u;
 304
 305    u.f = floatx80_to_float32(ST0, &env->fp_status);
 306    merge_exception_flags(env, old_flags);
 307    return u.i;
 308}
 309
 310uint64_t helper_fstl_ST0(CPUX86State *env)
 311{
 312    uint8_t old_flags = save_exception_flags(env);
 313    union {
 314        float64 f;
 315        uint64_t i;
 316    } u;
 317
 318    u.f = floatx80_to_float64(ST0, &env->fp_status);
 319    merge_exception_flags(env, old_flags);
 320    return u.i;
 321}
 322
 323int32_t helper_fist_ST0(CPUX86State *env)
 324{
 325    uint8_t old_flags = save_exception_flags(env);
 326    int32_t val;
 327
 328    val = floatx80_to_int32(ST0, &env->fp_status);
 329    if (val != (int16_t)val) {
 330        set_float_exception_flags(float_flag_invalid, &env->fp_status);
 331        val = -32768;
 332    }
 333    merge_exception_flags(env, old_flags);
 334    return val;
 335}
 336
 337int32_t helper_fistl_ST0(CPUX86State *env)
 338{
 339    uint8_t old_flags = save_exception_flags(env);
 340    int32_t val;
 341
 342    val = floatx80_to_int32(ST0, &env->fp_status);
 343    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 344        val = 0x80000000;
 345    }
 346    merge_exception_flags(env, old_flags);
 347    return val;
 348}
 349
 350int64_t helper_fistll_ST0(CPUX86State *env)
 351{
 352    uint8_t old_flags = save_exception_flags(env);
 353    int64_t val;
 354
 355    val = floatx80_to_int64(ST0, &env->fp_status);
 356    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 357        val = 0x8000000000000000ULL;
 358    }
 359    merge_exception_flags(env, old_flags);
 360    return val;
 361}
 362
 363int32_t helper_fistt_ST0(CPUX86State *env)
 364{
 365    uint8_t old_flags = save_exception_flags(env);
 366    int32_t val;
 367
 368    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 369    if (val != (int16_t)val) {
 370        set_float_exception_flags(float_flag_invalid, &env->fp_status);
 371        val = -32768;
 372    }
 373    merge_exception_flags(env, old_flags);
 374    return val;
 375}
 376
 377int32_t helper_fisttl_ST0(CPUX86State *env)
 378{
 379    uint8_t old_flags = save_exception_flags(env);
 380    int32_t val;
 381
 382    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 383    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 384        val = 0x80000000;
 385    }
 386    merge_exception_flags(env, old_flags);
 387    return val;
 388}
 389
 390int64_t helper_fisttll_ST0(CPUX86State *env)
 391{
 392    uint8_t old_flags = save_exception_flags(env);
 393    int64_t val;
 394
 395    val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 396    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 397        val = 0x8000000000000000ULL;
 398    }
 399    merge_exception_flags(env, old_flags);
 400    return val;
 401}
 402
 403void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 404{
 405    int new_fpstt;
 406
 407    new_fpstt = (env->fpstt - 1) & 7;
 408    env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 409    env->fpstt = new_fpstt;
 410    env->fptags[new_fpstt] = 0; /* validate stack entry */
 411}
 412
 413void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 414{
 415    helper_fstt(env, ST0, ptr, GETPC());
 416}
 417
 418void helper_fpush(CPUX86State *env)
 419{
 420    fpush(env);
 421}
 422
 423void helper_fpop(CPUX86State *env)
 424{
 425    fpop(env);
 426}
 427
 428void helper_fdecstp(CPUX86State *env)
 429{
 430    env->fpstt = (env->fpstt - 1) & 7;
 431    env->fpus &= ~0x4700;
 432}
 433
 434void helper_fincstp(CPUX86State *env)
 435{
 436    env->fpstt = (env->fpstt + 1) & 7;
 437    env->fpus &= ~0x4700;
 438}
 439
 440/* FPU move */
 441
 442void helper_ffree_STN(CPUX86State *env, int st_index)
 443{
 444    env->fptags[(env->fpstt + st_index) & 7] = 1;
 445}
 446
 447void helper_fmov_ST0_FT0(CPUX86State *env)
 448{
 449    ST0 = FT0;
 450}
 451
 452void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 453{
 454    FT0 = ST(st_index);
 455}
 456
 457void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 458{
 459    ST0 = ST(st_index);
 460}
 461
 462void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 463{
 464    ST(st_index) = ST0;
 465}
 466
 467void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 468{
 469    floatx80 tmp;
 470
 471    tmp = ST(st_index);
 472    ST(st_index) = ST0;
 473    ST0 = tmp;
 474}
 475
 476/* FPU operations */
 477
 478static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 479
 480void helper_fcom_ST0_FT0(CPUX86State *env)
 481{
 482    uint8_t old_flags = save_exception_flags(env);
 483    FloatRelation ret;
 484
 485    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 486    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 487    merge_exception_flags(env, old_flags);
 488}
 489
 490void helper_fucom_ST0_FT0(CPUX86State *env)
 491{
 492    uint8_t old_flags = save_exception_flags(env);
 493    FloatRelation ret;
 494
 495    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 496    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 497    merge_exception_flags(env, old_flags);
 498}
 499
 500static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 501
 502void helper_fcomi_ST0_FT0(CPUX86State *env)
 503{
 504    uint8_t old_flags = save_exception_flags(env);
 505    int eflags;
 506    FloatRelation ret;
 507
 508    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 509    eflags = cpu_cc_compute_all(env, CC_OP);
 510    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 511    CC_SRC = eflags;
 512    merge_exception_flags(env, old_flags);
 513}
 514
 515void helper_fucomi_ST0_FT0(CPUX86State *env)
 516{
 517    uint8_t old_flags = save_exception_flags(env);
 518    int eflags;
 519    FloatRelation ret;
 520
 521    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 522    eflags = cpu_cc_compute_all(env, CC_OP);
 523    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 524    CC_SRC = eflags;
 525    merge_exception_flags(env, old_flags);
 526}
 527
 528void helper_fadd_ST0_FT0(CPUX86State *env)
 529{
 530    uint8_t old_flags = save_exception_flags(env);
 531    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 532    merge_exception_flags(env, old_flags);
 533}
 534
 535void helper_fmul_ST0_FT0(CPUX86State *env)
 536{
 537    uint8_t old_flags = save_exception_flags(env);
 538    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 539    merge_exception_flags(env, old_flags);
 540}
 541
 542void helper_fsub_ST0_FT0(CPUX86State *env)
 543{
 544    uint8_t old_flags = save_exception_flags(env);
 545    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 546    merge_exception_flags(env, old_flags);
 547}
 548
 549void helper_fsubr_ST0_FT0(CPUX86State *env)
 550{
 551    uint8_t old_flags = save_exception_flags(env);
 552    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 553    merge_exception_flags(env, old_flags);
 554}
 555
 556void helper_fdiv_ST0_FT0(CPUX86State *env)
 557{
 558    ST0 = helper_fdiv(env, ST0, FT0);
 559}
 560
 561void helper_fdivr_ST0_FT0(CPUX86State *env)
 562{
 563    ST0 = helper_fdiv(env, FT0, ST0);
 564}
 565
 566/* fp operations between STN and ST0 */
 567
 568void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 569{
 570    uint8_t old_flags = save_exception_flags(env);
 571    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 572    merge_exception_flags(env, old_flags);
 573}
 574
 575void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 576{
 577    uint8_t old_flags = save_exception_flags(env);
 578    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 579    merge_exception_flags(env, old_flags);
 580}
 581
 582void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 583{
 584    uint8_t old_flags = save_exception_flags(env);
 585    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 586    merge_exception_flags(env, old_flags);
 587}
 588
 589void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 590{
 591    uint8_t old_flags = save_exception_flags(env);
 592    ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 593    merge_exception_flags(env, old_flags);
 594}
 595
 596void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 597{
 598    floatx80 *p;
 599
 600    p = &ST(st_index);
 601    *p = helper_fdiv(env, *p, ST0);
 602}
 603
 604void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 605{
 606    floatx80 *p;
 607
 608    p = &ST(st_index);
 609    *p = helper_fdiv(env, ST0, *p);
 610}
 611
 612/* misc FPU operations */
 613void helper_fchs_ST0(CPUX86State *env)
 614{
 615    ST0 = floatx80_chs(ST0);
 616}
 617
 618void helper_fabs_ST0(CPUX86State *env)
 619{
 620    ST0 = floatx80_abs(ST0);
 621}
 622
 623void helper_fld1_ST0(CPUX86State *env)
 624{
 625    ST0 = floatx80_one;
 626}
 627
 628void helper_fldl2t_ST0(CPUX86State *env)
 629{
 630    switch (env->fpuc & FPU_RC_MASK) {
 631    case FPU_RC_UP:
 632        ST0 = floatx80_l2t_u;
 633        break;
 634    default:
 635        ST0 = floatx80_l2t;
 636        break;
 637    }
 638}
 639
 640void helper_fldl2e_ST0(CPUX86State *env)
 641{
 642    switch (env->fpuc & FPU_RC_MASK) {
 643    case FPU_RC_DOWN:
 644    case FPU_RC_CHOP:
 645        ST0 = floatx80_l2e_d;
 646        break;
 647    default:
 648        ST0 = floatx80_l2e;
 649        break;
 650    }
 651}
 652
 653void helper_fldpi_ST0(CPUX86State *env)
 654{
 655    switch (env->fpuc & FPU_RC_MASK) {
 656    case FPU_RC_DOWN:
 657    case FPU_RC_CHOP:
 658        ST0 = floatx80_pi_d;
 659        break;
 660    default:
 661        ST0 = floatx80_pi;
 662        break;
 663    }
 664}
 665
 666void helper_fldlg2_ST0(CPUX86State *env)
 667{
 668    switch (env->fpuc & FPU_RC_MASK) {
 669    case FPU_RC_DOWN:
 670    case FPU_RC_CHOP:
 671        ST0 = floatx80_lg2_d;
 672        break;
 673    default:
 674        ST0 = floatx80_lg2;
 675        break;
 676    }
 677}
 678
 679void helper_fldln2_ST0(CPUX86State *env)
 680{
 681    switch (env->fpuc & FPU_RC_MASK) {
 682    case FPU_RC_DOWN:
 683    case FPU_RC_CHOP:
 684        ST0 = floatx80_ln2_d;
 685        break;
 686    default:
 687        ST0 = floatx80_ln2;
 688        break;
 689    }
 690}
 691
 692void helper_fldz_ST0(CPUX86State *env)
 693{
 694    ST0 = floatx80_zero;
 695}
 696
 697void helper_fldz_FT0(CPUX86State *env)
 698{
 699    FT0 = floatx80_zero;
 700}
 701
 702uint32_t helper_fnstsw(CPUX86State *env)
 703{
 704    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 705}
 706
 707uint32_t helper_fnstcw(CPUX86State *env)
 708{
 709    return env->fpuc;
 710}
 711
 712void update_fp_status(CPUX86State *env)
 713{
 714    int rnd_type;
 715
 716    /* set rounding mode */
 717    switch (env->fpuc & FPU_RC_MASK) {
 718    default:
 719    case FPU_RC_NEAR:
 720        rnd_type = float_round_nearest_even;
 721        break;
 722    case FPU_RC_DOWN:
 723        rnd_type = float_round_down;
 724        break;
 725    case FPU_RC_UP:
 726        rnd_type = float_round_up;
 727        break;
 728    case FPU_RC_CHOP:
 729        rnd_type = float_round_to_zero;
 730        break;
 731    }
 732    set_float_rounding_mode(rnd_type, &env->fp_status);
 733    switch ((env->fpuc >> 8) & 3) {
 734    case 0:
 735        rnd_type = 32;
 736        break;
 737    case 2:
 738        rnd_type = 64;
 739        break;
 740    case 3:
 741    default:
 742        rnd_type = 80;
 743        break;
 744    }
 745    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 746}
 747
 748void helper_fldcw(CPUX86State *env, uint32_t val)
 749{
 750    cpu_set_fpuc(env, val);
 751}
 752
 753void helper_fclex(CPUX86State *env)
 754{
 755    env->fpus &= 0x7f00;
 756}
 757
 758void helper_fwait(CPUX86State *env)
 759{
 760    if (env->fpus & FPUS_SE) {
 761        fpu_raise_exception(env, GETPC());
 762    }
 763}
 764
 765void helper_fninit(CPUX86State *env)
 766{
 767    env->fpus = 0;
 768    env->fpstt = 0;
 769    cpu_set_fpuc(env, 0x37f);
 770    env->fptags[0] = 1;
 771    env->fptags[1] = 1;
 772    env->fptags[2] = 1;
 773    env->fptags[3] = 1;
 774    env->fptags[4] = 1;
 775    env->fptags[5] = 1;
 776    env->fptags[6] = 1;
 777    env->fptags[7] = 1;
 778}
 779
 780/* BCD ops */
 781
 782void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 783{
 784    floatx80 tmp;
 785    uint64_t val;
 786    unsigned int v;
 787    int i;
 788
 789    val = 0;
 790    for (i = 8; i >= 0; i--) {
 791        v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 792        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 793    }
 794    tmp = int64_to_floatx80(val, &env->fp_status);
 795    if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 796        tmp = floatx80_chs(tmp);
 797    }
 798    fpush(env);
 799    ST0 = tmp;
 800}
 801
 802void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 803{
 804    uint8_t old_flags = save_exception_flags(env);
 805    int v;
 806    target_ulong mem_ref, mem_end;
 807    int64_t val;
 808    CPU_LDoubleU temp;
 809
 810    temp.d = ST0;
 811
 812    val = floatx80_to_int64(ST0, &env->fp_status);
 813    mem_ref = ptr;
 814    if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
 815        set_float_exception_flags(float_flag_invalid, &env->fp_status);
 816        while (mem_ref < ptr + 7) {
 817            cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 818        }
 819        cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
 820        cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
 821        cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
 822        merge_exception_flags(env, old_flags);
 823        return;
 824    }
 825    mem_end = mem_ref + 9;
 826    if (SIGND(temp)) {
 827        cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 828        val = -val;
 829    } else {
 830        cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 831    }
 832    while (mem_ref < mem_end) {
 833        if (val == 0) {
 834            break;
 835        }
 836        v = val % 100;
 837        val = val / 100;
 838        v = ((v / 10) << 4) | (v % 10);
 839        cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 840    }
 841    while (mem_ref < mem_end) {
 842        cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 843    }
 844    merge_exception_flags(env, old_flags);
 845}
 846
 847/* 128-bit significand of log(2).  */
 848#define ln2_sig_high 0xb17217f7d1cf79abULL
 849#define ln2_sig_low 0xc9e3b39803f2f6afULL
 850
 851/*
 852 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
 853 * the interval [-1/64, 1/64].
 854 */
 855#define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
 856#define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
 857#define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
 858#define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
 859#define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
 860#define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
 861#define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
 862#define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
 863#define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
 864
 865struct f2xm1_data {
 866    /*
 867     * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
 868     * are very close to exact floatx80 values.
 869     */
 870    floatx80 t;
 871    /* The value of 2^t.  */
 872    floatx80 exp2;
 873    /* The value of 2^t - 1.  */
 874    floatx80 exp2m1;
 875};
 876
 877static const struct f2xm1_data f2xm1_table[65] = {
 878    { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
 879      make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
 880      make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
 881    { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
 882      make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
 883      make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
 884    { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
 885      make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
 886      make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
 887    { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
 888      make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
 889      make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
 890    { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
 891      make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
 892      make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
 893    { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
 894      make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
 895      make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
 896    { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
 897      make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
 898      make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
 899    { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
 900      make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
 901      make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
 902    { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
 903      make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
 904      make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
 905    { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
 906      make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
 907      make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
 908    { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
 909      make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
 910      make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
 911    { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
 912      make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
 913      make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
 914    { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
 915      make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
 916      make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
 917    { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
 918      make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
 919      make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
 920    { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
 921      make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
 922      make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
 923    { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
 924      make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
 925      make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
 926    { make_floatx80_init(0xbffe, 0x800000000000227dULL),
 927      make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
 928      make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
 929    { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
 930      make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
 931      make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
 932    { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
 933      make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
 934      make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
 935    { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
 936      make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
 937      make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
 938    { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
 939      make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
 940      make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
 941    { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
 942      make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
 943      make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
 944    { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
 945      make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
 946      make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
 947    { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
 948      make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
 949      make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
 950    { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
 951      make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
 952      make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
 953    { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
 954      make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
 955      make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
 956    { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
 957      make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
 958      make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
 959    { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
 960      make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
 961      make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
 962    { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
 963      make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
 964      make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
 965    { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
 966      make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
 967      make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
 968    { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
 969      make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
 970      make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
 971    { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
 972      make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
 973      make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
 974    { floatx80_zero_init,
 975      make_floatx80_init(0x3fff, 0x8000000000000000ULL),
 976      floatx80_zero_init },
 977    { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
 978      make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
 979      make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
 980    { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
 981      make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
 982      make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
 983    { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
 984      make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
 985      make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
 986    { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
 987      make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
 988      make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
 989    { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
 990      make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
 991      make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
 992    { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
 993      make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
 994      make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
 995    { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
 996      make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
 997      make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
 998    { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
 999      make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
1000      make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
1001    { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
1002      make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
1003      make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
1004    { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
1005      make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
1006      make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
1007    { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
1008      make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
1009      make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
1010    { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
1011      make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
1012      make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
1013    { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
1014      make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
1015      make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
1016    { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
1017      make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
1018      make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
1019    { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
1020      make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
1021      make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
1022    { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
1023      make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
1024      make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
1025    { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
1026      make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
1027      make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
1028    { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
1029      make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
1030      make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
1031    { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
1032      make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
1033      make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
1034    { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
1035      make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
1036      make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
1037    { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
1038      make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
1039      make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
1040    { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
1041      make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
1042      make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
1043    { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
1044      make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
1045      make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
1046    { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
1047      make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
1048      make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
1049    { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
1050      make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
1051      make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
1052    { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
1053      make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
1054      make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
1055    { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
1056      make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
1057      make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
1058    { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
1059      make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
1060      make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
1061    { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
1062      make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
1063      make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
1064    { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
1065      make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
1066      make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
1067    { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
1068      make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
1069      make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
1070    { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
1071      make_floatx80_init(0x4000, 0x8000000000000000ULL),
1072      make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
1073};
1074
1075void helper_f2xm1(CPUX86State *env)
1076{
1077    uint8_t old_flags = save_exception_flags(env);
1078    uint64_t sig = extractFloatx80Frac(ST0);
1079    int32_t exp = extractFloatx80Exp(ST0);
1080    bool sign = extractFloatx80Sign(ST0);
1081
1082    if (floatx80_invalid_encoding(ST0)) {
1083        float_raise(float_flag_invalid, &env->fp_status);
1084        ST0 = floatx80_default_nan(&env->fp_status);
1085    } else if (floatx80_is_any_nan(ST0)) {
1086        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1087            float_raise(float_flag_invalid, &env->fp_status);
1088            ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1089        }
1090    } else if (exp > 0x3fff ||
1091               (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
1092        /* Out of range for the instruction, treat as invalid.  */
1093        float_raise(float_flag_invalid, &env->fp_status);
1094        ST0 = floatx80_default_nan(&env->fp_status);
1095    } else if (exp == 0x3fff) {
1096        /* Argument 1 or -1, exact result 1 or -0.5.  */
1097        if (sign) {
1098            ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
1099        }
1100    } else if (exp < 0x3fb0) {
1101        if (!floatx80_is_zero(ST0)) {
1102            /*
1103             * Multiplying the argument by an extra-precision version
1104             * of log(2) is sufficiently precise.  Zero arguments are
1105             * returned unchanged.
1106             */
1107            uint64_t sig0, sig1, sig2;
1108            if (exp == 0) {
1109                normalizeFloatx80Subnormal(sig, &exp, &sig);
1110            }
1111            mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
1112                            &sig2);
1113            /* This result is inexact.  */
1114            sig1 |= 1;
1115            ST0 = normalizeRoundAndPackFloatx80(80, sign, exp, sig0, sig1,
1116                                                &env->fp_status);
1117        }
1118    } else {
1119        floatx80 tmp, y, accum;
1120        bool asign, bsign;
1121        int32_t n, aexp, bexp;
1122        uint64_t asig0, asig1, asig2, bsig0, bsig1;
1123        FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1124        signed char save_prec = env->fp_status.floatx80_rounding_precision;
1125        env->fp_status.float_rounding_mode = float_round_nearest_even;
1126        env->fp_status.floatx80_rounding_precision = 80;
1127
1128        /* Find the nearest multiple of 1/32 to the argument.  */
1129        tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
1130        n = 32 + floatx80_to_int32(tmp, &env->fp_status);
1131        y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
1132
1133        if (floatx80_is_zero(y)) {
1134            /*
1135             * Use the value of 2^t - 1 from the table, to avoid
1136             * needing to special-case zero as a result of
1137             * multiplication below.
1138             */
1139            ST0 = f2xm1_table[n].t;
1140            set_float_exception_flags(float_flag_inexact, &env->fp_status);
1141            env->fp_status.float_rounding_mode = save_mode;
1142        } else {
1143            /*
1144             * Compute the lower parts of a polynomial expansion for
1145             * (2^y - 1) / y.
1146             */
1147            accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
1148            accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
1149            accum = floatx80_mul(accum, y, &env->fp_status);
1150            accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
1151            accum = floatx80_mul(accum, y, &env->fp_status);
1152            accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
1153            accum = floatx80_mul(accum, y, &env->fp_status);
1154            accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
1155            accum = floatx80_mul(accum, y, &env->fp_status);
1156            accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
1157            accum = floatx80_mul(accum, y, &env->fp_status);
1158            accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
1159            accum = floatx80_mul(accum, y, &env->fp_status);
1160            accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
1161
1162            /*
1163             * The full polynomial expansion is f2xm1_coeff_0 + accum
1164             * (where accum has much lower magnitude, and so, in
1165             * particular, carry out of the addition is not possible).
1166             * (This expansion is only accurate to about 70 bits, not
1167             * 128 bits.)
1168             */
1169            aexp = extractFloatx80Exp(f2xm1_coeff_0);
1170            asign = extractFloatx80Sign(f2xm1_coeff_0);
1171            shift128RightJamming(extractFloatx80Frac(accum), 0,
1172                                 aexp - extractFloatx80Exp(accum),
1173                                 &asig0, &asig1);
1174            bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
1175            bsig1 = 0;
1176            if (asign == extractFloatx80Sign(accum)) {
1177                add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1178            } else {
1179                sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1180            }
1181            /* And thus compute an approximation to 2^y - 1.  */
1182            mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
1183                            &asig0, &asig1, &asig2);
1184            aexp += extractFloatx80Exp(y) - 0x3ffe;
1185            asign ^= extractFloatx80Sign(y);
1186            if (n != 32) {
1187                /*
1188                 * Multiply this by the precomputed value of 2^t and
1189                 * add that of 2^t - 1.
1190                 */
1191                mul128By64To192(asig0, asig1,
1192                                extractFloatx80Frac(f2xm1_table[n].exp2),
1193                                &asig0, &asig1, &asig2);
1194                aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
1195                bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
1196                bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
1197                bsig1 = 0;
1198                if (bexp < aexp) {
1199                    shift128RightJamming(bsig0, bsig1, aexp - bexp,
1200                                         &bsig0, &bsig1);
1201                } else if (aexp < bexp) {
1202                    shift128RightJamming(asig0, asig1, bexp - aexp,
1203                                         &asig0, &asig1);
1204                    aexp = bexp;
1205                }
1206                /* The sign of 2^t - 1 is always that of the result.  */
1207                bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
1208                if (asign == bsign) {
1209                    /* Avoid possible carry out of the addition.  */
1210                    shift128RightJamming(asig0, asig1, 1,
1211                                         &asig0, &asig1);
1212                    shift128RightJamming(bsig0, bsig1, 1,
1213                                         &bsig0, &bsig1);
1214                    ++aexp;
1215                    add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
1216                } else {
1217                    sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1218                    asign = bsign;
1219                }
1220            }
1221            env->fp_status.float_rounding_mode = save_mode;
1222            /* This result is inexact.  */
1223            asig1 |= 1;
1224            ST0 = normalizeRoundAndPackFloatx80(80, asign, aexp, asig0, asig1,
1225                                                &env->fp_status);
1226        }
1227
1228        env->fp_status.floatx80_rounding_precision = save_prec;
1229    }
1230    merge_exception_flags(env, old_flags);
1231}
1232
1233void helper_fptan(CPUX86State *env)
1234{
1235    double fptemp = floatx80_to_double(env, ST0);
1236
1237    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1238        env->fpus |= 0x400;
1239    } else {
1240        fptemp = tan(fptemp);
1241        ST0 = double_to_floatx80(env, fptemp);
1242        fpush(env);
1243        ST0 = floatx80_one;
1244        env->fpus &= ~0x400; /* C2 <-- 0 */
1245        /* the above code is for |arg| < 2**52 only */
1246    }
1247}
1248
1249/* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision.  */
1250#define pi_4_exp 0x3ffe
1251#define pi_4_sig_high 0xc90fdaa22168c234ULL
1252#define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
1253#define pi_2_exp 0x3fff
1254#define pi_2_sig_high 0xc90fdaa22168c234ULL
1255#define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
1256#define pi_34_exp 0x4000
1257#define pi_34_sig_high 0x96cbe3f9990e91a7ULL
1258#define pi_34_sig_low 0x9394c9e8a0a5159dULL
1259#define pi_exp 0x4000
1260#define pi_sig_high 0xc90fdaa22168c234ULL
1261#define pi_sig_low 0xc4c6628b80dc1cd1ULL
1262
1263/*
1264 * Polynomial coefficients for an approximation to atan(x), with only
1265 * odd powers of x used, for x in the interval [-1/16, 1/16].  (Unlike
1266 * for some other approximations, no low part is needed for the first
1267 * coefficient here to achieve a sufficiently accurate result, because
1268 * the coefficient in this minimax approximation is very close to
1269 * exactly 1.)
1270 */
1271#define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
1272#define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
1273#define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
1274#define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
1275#define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
1276#define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
1277#define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
1278
1279struct fpatan_data {
1280    /* High and low parts of atan(x).  */
1281    floatx80 atan_high, atan_low;
1282};
1283
1284static const struct fpatan_data fpatan_table[9] = {
1285    { floatx80_zero_init,
1286      floatx80_zero_init },
1287    { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
1288      make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
1289    { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
1290      make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
1291    { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
1292      make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
1293    { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
1294      make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
1295    { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
1296      make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
1297    { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
1298      make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
1299    { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
1300      make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
1301    { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
1302      make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
1303};
1304
1305void helper_fpatan(CPUX86State *env)
1306{
1307    uint8_t old_flags = save_exception_flags(env);
1308    uint64_t arg0_sig = extractFloatx80Frac(ST0);
1309    int32_t arg0_exp = extractFloatx80Exp(ST0);
1310    bool arg0_sign = extractFloatx80Sign(ST0);
1311    uint64_t arg1_sig = extractFloatx80Frac(ST1);
1312    int32_t arg1_exp = extractFloatx80Exp(ST1);
1313    bool arg1_sign = extractFloatx80Sign(ST1);
1314
1315    if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1316        float_raise(float_flag_invalid, &env->fp_status);
1317        ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1318    } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1319        float_raise(float_flag_invalid, &env->fp_status);
1320        ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1321    } else if (floatx80_invalid_encoding(ST0) ||
1322               floatx80_invalid_encoding(ST1)) {
1323        float_raise(float_flag_invalid, &env->fp_status);
1324        ST1 = floatx80_default_nan(&env->fp_status);
1325    } else if (floatx80_is_any_nan(ST0)) {
1326        ST1 = ST0;
1327    } else if (floatx80_is_any_nan(ST1)) {
1328        /* Pass this NaN through.  */
1329    } else if (floatx80_is_zero(ST1) && !arg0_sign) {
1330        /* Pass this zero through.  */
1331    } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
1332                 arg0_exp - arg1_exp >= 80) &&
1333               !arg0_sign) {
1334        /*
1335         * Dividing ST1 by ST0 gives the correct result up to
1336         * rounding, and avoids spurious underflow exceptions that
1337         * might result from passing some small values through the
1338         * polynomial approximation, but if a finite nonzero result of
1339         * division is exact, the result of fpatan is still inexact
1340         * (and underflowing where appropriate).
1341         */
1342        signed char save_prec = env->fp_status.floatx80_rounding_precision;
1343        env->fp_status.floatx80_rounding_precision = 80;
1344        ST1 = floatx80_div(ST1, ST0, &env->fp_status);
1345        env->fp_status.floatx80_rounding_precision = save_prec;
1346        if (!floatx80_is_zero(ST1) &&
1347            !(get_float_exception_flags(&env->fp_status) &
1348              float_flag_inexact)) {
1349            /*
1350             * The mathematical result is very slightly closer to zero
1351             * than this exact result.  Round a value with the
1352             * significand adjusted accordingly to get the correct
1353             * exceptions, and possibly an adjusted result depending
1354             * on the rounding mode.
1355             */
1356            uint64_t sig = extractFloatx80Frac(ST1);
1357            int32_t exp = extractFloatx80Exp(ST1);
1358            bool sign = extractFloatx80Sign(ST1);
1359            if (exp == 0) {
1360                normalizeFloatx80Subnormal(sig, &exp, &sig);
1361            }
1362            ST1 = normalizeRoundAndPackFloatx80(80, sign, exp, sig - 1,
1363                                                -1, &env->fp_status);
1364        }
1365    } else {
1366        /* The result is inexact.  */
1367        bool rsign = arg1_sign;
1368        int32_t rexp;
1369        uint64_t rsig0, rsig1;
1370        if (floatx80_is_zero(ST1)) {
1371            /*
1372             * ST0 is negative.  The result is pi with the sign of
1373             * ST1.
1374             */
1375            rexp = pi_exp;
1376            rsig0 = pi_sig_high;
1377            rsig1 = pi_sig_low;
1378        } else if (floatx80_is_infinity(ST1)) {
1379            if (floatx80_is_infinity(ST0)) {
1380                if (arg0_sign) {
1381                    rexp = pi_34_exp;
1382                    rsig0 = pi_34_sig_high;
1383                    rsig1 = pi_34_sig_low;
1384                } else {
1385                    rexp = pi_4_exp;
1386                    rsig0 = pi_4_sig_high;
1387                    rsig1 = pi_4_sig_low;
1388                }
1389            } else {
1390                rexp = pi_2_exp;
1391                rsig0 = pi_2_sig_high;
1392                rsig1 = pi_2_sig_low;
1393            }
1394        } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
1395            rexp = pi_2_exp;
1396            rsig0 = pi_2_sig_high;
1397            rsig1 = pi_2_sig_low;
1398        } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
1399            /* ST0 is negative.  */
1400            rexp = pi_exp;
1401            rsig0 = pi_sig_high;
1402            rsig1 = pi_sig_low;
1403        } else {
1404            /*
1405             * ST0 and ST1 are finite, nonzero and with exponents not
1406             * too far apart.
1407             */
1408            int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
1409            int32_t azexp, axexp;
1410            bool adj_sub, ysign, zsign;
1411            uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
1412            uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
1413            uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
1414            uint64_t azsig0, azsig1;
1415            uint64_t azsig2, azsig3, axsig0, axsig1;
1416            floatx80 x8;
1417            FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1418            signed char save_prec = env->fp_status.floatx80_rounding_precision;
1419            env->fp_status.float_rounding_mode = float_round_nearest_even;
1420            env->fp_status.floatx80_rounding_precision = 80;
1421
1422            if (arg0_exp == 0) {
1423                normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
1424            }
1425            if (arg1_exp == 0) {
1426                normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1427            }
1428            if (arg0_exp > arg1_exp ||
1429                (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
1430                /* Work with abs(ST1) / abs(ST0).  */
1431                num_exp = arg1_exp;
1432                num_sig = arg1_sig;
1433                den_exp = arg0_exp;
1434                den_sig = arg0_sig;
1435                if (arg0_sign) {
1436                    /* The result is subtracted from pi.  */
1437                    adj_exp = pi_exp;
1438                    adj_sig0 = pi_sig_high;
1439                    adj_sig1 = pi_sig_low;
1440                    adj_sub = true;
1441                } else {
1442                    /* The result is used as-is.  */
1443                    adj_exp = 0;
1444                    adj_sig0 = 0;
1445                    adj_sig1 = 0;
1446                    adj_sub = false;
1447                }
1448            } else {
1449                /* Work with abs(ST0) / abs(ST1).  */
1450                num_exp = arg0_exp;
1451                num_sig = arg0_sig;
1452                den_exp = arg1_exp;
1453                den_sig = arg1_sig;
1454                /* The result is added to or subtracted from pi/2.  */
1455                adj_exp = pi_2_exp;
1456                adj_sig0 = pi_2_sig_high;
1457                adj_sig1 = pi_2_sig_low;
1458                adj_sub = !arg0_sign;
1459            }
1460
1461            /*
1462             * Compute x = num/den, where 0 < x <= 1 and x is not too
1463             * small.
1464             */
1465            xexp = num_exp - den_exp + 0x3ffe;
1466            remsig0 = num_sig;
1467            remsig1 = 0;
1468            if (den_sig <= remsig0) {
1469                shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1470                ++xexp;
1471            }
1472            xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
1473            mul64To128(den_sig, xsig0, &msig0, &msig1);
1474            sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
1475            while ((int64_t) remsig0 < 0) {
1476                --xsig0;
1477                add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
1478            }
1479            xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
1480            /*
1481             * No need to correct any estimation error in xsig1; even
1482             * with such error, it is accurate enough.
1483             */
1484
1485            /*
1486             * Split x as x = t + y, where t = n/8 is the nearest
1487             * multiple of 1/8 to x.
1488             */
1489            x8 = normalizeRoundAndPackFloatx80(80, false, xexp + 3, xsig0,
1490                                               xsig1, &env->fp_status);
1491            n = floatx80_to_int32(x8, &env->fp_status);
1492            if (n == 0) {
1493                ysign = false;
1494                yexp = xexp;
1495                ysig0 = xsig0;
1496                ysig1 = xsig1;
1497                texp = 0;
1498                tsig = 0;
1499            } else {
1500                int shift = clz32(n) + 32;
1501                texp = 0x403b - shift;
1502                tsig = n;
1503                tsig <<= shift;
1504                if (texp == xexp) {
1505                    sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
1506                    if ((int64_t) ysig0 >= 0) {
1507                        ysign = false;
1508                        if (ysig0 == 0) {
1509                            if (ysig1 == 0) {
1510                                yexp = 0;
1511                            } else {
1512                                shift = clz64(ysig1) + 64;
1513                                yexp = xexp - shift;
1514                                shift128Left(ysig0, ysig1, shift,
1515                                             &ysig0, &ysig1);
1516                            }
1517                        } else {
1518                            shift = clz64(ysig0);
1519                            yexp = xexp - shift;
1520                            shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1521                        }
1522                    } else {
1523                        ysign = true;
1524                        sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
1525                        if (ysig0 == 0) {
1526                            shift = clz64(ysig1) + 64;
1527                        } else {
1528                            shift = clz64(ysig0);
1529                        }
1530                        yexp = xexp - shift;
1531                        shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1532                    }
1533                } else {
1534                    /*
1535                     * t's exponent must be greater than x's because t
1536                     * is positive and the nearest multiple of 1/8 to
1537                     * x, and if x has a greater exponent, the power
1538                     * of 2 with that exponent is also a multiple of
1539                     * 1/8.
1540                     */
1541                    uint64_t usig0, usig1;
1542                    shift128RightJamming(xsig0, xsig1, texp - xexp,
1543                                         &usig0, &usig1);
1544                    ysign = true;
1545                    sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
1546                    if (ysig0 == 0) {
1547                        shift = clz64(ysig1) + 64;
1548                    } else {
1549                        shift = clz64(ysig0);
1550                    }
1551                    yexp = texp - shift;
1552                    shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1553                }
1554            }
1555
1556            /*
1557             * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
1558             * arctan(z).
1559             */
1560            zsign = ysign;
1561            if (texp == 0 || yexp == 0) {
1562                zexp = yexp;
1563                zsig0 = ysig0;
1564                zsig1 = ysig1;
1565            } else {
1566                /*
1567                 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
1568                 */
1569                int32_t dexp = texp + xexp - 0x3ffe;
1570                uint64_t dsig0, dsig1, dsig2;
1571                mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
1572                /*
1573                 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
1574                 * bit).  Add 1 to produce the denominator 1+tx.
1575                 */
1576                shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
1577                                     &dsig0, &dsig1);
1578                dsig0 |= 0x8000000000000000ULL;
1579                zexp = yexp - 1;
1580                remsig0 = ysig0;
1581                remsig1 = ysig1;
1582                remsig2 = 0;
1583                if (dsig0 <= remsig0) {
1584                    shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1585                    ++zexp;
1586                }
1587                zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
1588                mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
1589                sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
1590                       &remsig0, &remsig1, &remsig2);
1591                while ((int64_t) remsig0 < 0) {
1592                    --zsig0;
1593                    add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
1594                           &remsig0, &remsig1, &remsig2);
1595                }
1596                zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
1597                /* No need to correct any estimation error in zsig1.  */
1598            }
1599
1600            if (zexp == 0) {
1601                azexp = 0;
1602                azsig0 = 0;
1603                azsig1 = 0;
1604            } else {
1605                floatx80 z2, accum;
1606                uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
1607                /* Compute z^2.  */
1608                mul128To256(zsig0, zsig1, zsig0, zsig1,
1609                            &z2sig0, &z2sig1, &z2sig2, &z2sig3);
1610                z2 = normalizeRoundAndPackFloatx80(80, false,
1611                                                   zexp + zexp - 0x3ffe,
1612                                                   z2sig0, z2sig1,
1613                                                   &env->fp_status);
1614
1615                /* Compute the lower parts of the polynomial expansion.  */
1616                accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
1617                accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
1618                accum = floatx80_mul(accum, z2, &env->fp_status);
1619                accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
1620                accum = floatx80_mul(accum, z2, &env->fp_status);
1621                accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
1622                accum = floatx80_mul(accum, z2, &env->fp_status);
1623                accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
1624                accum = floatx80_mul(accum, z2, &env->fp_status);
1625                accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
1626                accum = floatx80_mul(accum, z2, &env->fp_status);
1627
1628                /*
1629                 * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
1630                 * fpatan_coeff_0 is 1, and accum is negative and much smaller.
1631                 */
1632                aexp = extractFloatx80Exp(fpatan_coeff_0);
1633                shift128RightJamming(extractFloatx80Frac(accum), 0,
1634                                     aexp - extractFloatx80Exp(accum),
1635                                     &asig0, &asig1);
1636                sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
1637                       &asig0, &asig1);
1638                /* Multiply by z to compute arctan(z).  */
1639                azexp = aexp + zexp - 0x3ffe;
1640                mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
1641                            &azsig2, &azsig3);
1642            }
1643
1644            /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign).  */
1645            if (texp == 0) {
1646                /* z is positive.  */
1647                axexp = azexp;
1648                axsig0 = azsig0;
1649                axsig1 = azsig1;
1650            } else {
1651                bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
1652                int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
1653                uint64_t low_sig0 =
1654                    extractFloatx80Frac(fpatan_table[n].atan_low);
1655                uint64_t low_sig1 = 0;
1656                axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
1657                axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
1658                axsig1 = 0;
1659                shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
1660                                     &low_sig0, &low_sig1);
1661                if (low_sign) {
1662                    sub128(axsig0, axsig1, low_sig0, low_sig1,
1663                           &axsig0, &axsig1);
1664                } else {
1665                    add128(axsig0, axsig1, low_sig0, low_sig1,
1666                           &axsig0, &axsig1);
1667                }
1668                if (azexp >= axexp) {
1669                    shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
1670                                         &axsig0, &axsig1);
1671                    axexp = azexp + 1;
1672                    shift128RightJamming(azsig0, azsig1, 1,
1673                                         &azsig0, &azsig1);
1674                } else {
1675                    shift128RightJamming(axsig0, axsig1, 1,
1676                                         &axsig0, &axsig1);
1677                    shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
1678                                         &azsig0, &azsig1);
1679                    ++axexp;
1680                }
1681                if (zsign) {
1682                    sub128(axsig0, axsig1, azsig0, azsig1,
1683                           &axsig0, &axsig1);
1684                } else {
1685                    add128(axsig0, axsig1, azsig0, azsig1,
1686                           &axsig0, &axsig1);
1687                }
1688            }
1689
1690            if (adj_exp == 0) {
1691                rexp = axexp;
1692                rsig0 = axsig0;
1693                rsig1 = axsig1;
1694            } else {
1695                /*
1696                 * Add or subtract arctan(x) (exponent axexp,
1697                 * significand axsig0 and axsig1, positive, not
1698                 * necessarily normalized) to the number given by
1699                 * adj_exp, adj_sig0 and adj_sig1, according to
1700                 * adj_sub.
1701                 */
1702                if (adj_exp >= axexp) {
1703                    shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
1704                                         &axsig0, &axsig1);
1705                    rexp = adj_exp + 1;
1706                    shift128RightJamming(adj_sig0, adj_sig1, 1,
1707                                         &adj_sig0, &adj_sig1);
1708                } else {
1709                    shift128RightJamming(axsig0, axsig1, 1,
1710                                         &axsig0, &axsig1);
1711                    shift128RightJamming(adj_sig0, adj_sig1,
1712                                         axexp - adj_exp + 1,
1713                                         &adj_sig0, &adj_sig1);
1714                    rexp = axexp + 1;
1715                }
1716                if (adj_sub) {
1717                    sub128(adj_sig0, adj_sig1, axsig0, axsig1,
1718                           &rsig0, &rsig1);
1719                } else {
1720                    add128(adj_sig0, adj_sig1, axsig0, axsig1,
1721                           &rsig0, &rsig1);
1722                }
1723            }
1724
1725            env->fp_status.float_rounding_mode = save_mode;
1726            env->fp_status.floatx80_rounding_precision = save_prec;
1727        }
1728        /* This result is inexact.  */
1729        rsig1 |= 1;
1730        ST1 = normalizeRoundAndPackFloatx80(80, rsign, rexp,
1731                                            rsig0, rsig1, &env->fp_status);
1732    }
1733
1734    fpop(env);
1735    merge_exception_flags(env, old_flags);
1736}
1737
1738void helper_fxtract(CPUX86State *env)
1739{
1740    uint8_t old_flags = save_exception_flags(env);
1741    CPU_LDoubleU temp;
1742
1743    temp.d = ST0;
1744
1745    if (floatx80_is_zero(ST0)) {
1746        /* Easy way to generate -inf and raising division by 0 exception */
1747        ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
1748                           &env->fp_status);
1749        fpush(env);
1750        ST0 = temp.d;
1751    } else if (floatx80_invalid_encoding(ST0)) {
1752        float_raise(float_flag_invalid, &env->fp_status);
1753        ST0 = floatx80_default_nan(&env->fp_status);
1754        fpush(env);
1755        ST0 = ST1;
1756    } else if (floatx80_is_any_nan(ST0)) {
1757        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1758            float_raise(float_flag_invalid, &env->fp_status);
1759            ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1760        }
1761        fpush(env);
1762        ST0 = ST1;
1763    } else if (floatx80_is_infinity(ST0)) {
1764        fpush(env);
1765        ST0 = ST1;
1766        ST1 = floatx80_infinity;
1767    } else {
1768        int expdif;
1769
1770        if (EXPD(temp) == 0) {
1771            int shift = clz64(temp.l.lower);
1772            temp.l.lower <<= shift;
1773            expdif = 1 - EXPBIAS - shift;
1774            float_raise(float_flag_input_denormal, &env->fp_status);
1775        } else {
1776            expdif = EXPD(temp) - EXPBIAS;
1777        }
1778        /* DP exponent bias */
1779        ST0 = int32_to_floatx80(expdif, &env->fp_status);
1780        fpush(env);
1781        BIASEXPONENT(temp);
1782        ST0 = temp.d;
1783    }
1784    merge_exception_flags(env, old_flags);
1785}
1786
1787static void helper_fprem_common(CPUX86State *env, bool mod)
1788{
1789    uint8_t old_flags = save_exception_flags(env);
1790    uint64_t quotient;
1791    CPU_LDoubleU temp0, temp1;
1792    int exp0, exp1, expdiff;
1793
1794    temp0.d = ST0;
1795    temp1.d = ST1;
1796    exp0 = EXPD(temp0);
1797    exp1 = EXPD(temp1);
1798
1799    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1800    if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1801        exp0 == 0x7fff || exp1 == 0x7fff ||
1802        floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
1803        ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1804    } else {
1805        if (exp0 == 0) {
1806            exp0 = 1 - clz64(temp0.l.lower);
1807        }
1808        if (exp1 == 0) {
1809            exp1 = 1 - clz64(temp1.l.lower);
1810        }
1811        expdiff = exp0 - exp1;
1812        if (expdiff < 64) {
1813            ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1814            env->fpus |= (quotient & 0x4) << (8 - 2);  /* (C0) <-- q2 */
1815            env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
1816            env->fpus |= (quotient & 0x1) << (9 - 0);  /* (C1) <-- q0 */
1817        } else {
1818            /*
1819             * Partial remainder.  This choice of how many bits to
1820             * process at once is specified in AMD instruction set
1821             * manuals, and empirically is followed by Intel
1822             * processors as well; it ensures that the final remainder
1823             * operation in a loop does produce the correct low three
1824             * bits of the quotient.  AMD manuals specify that the
1825             * flags other than C2 are cleared, and empirically Intel
1826             * processors clear them as well.
1827             */
1828            int n = 32 + (expdiff % 32);
1829            temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
1830            ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
1831            env->fpus |= 0x400;  /* C2 <-- 1 */
1832        }
1833    }
1834    merge_exception_flags(env, old_flags);
1835}
1836
1837void helper_fprem1(CPUX86State *env)
1838{
1839    helper_fprem_common(env, false);
1840}
1841
1842void helper_fprem(CPUX86State *env)
1843{
1844    helper_fprem_common(env, true);
1845}
1846
1847/* 128-bit significand of log2(e).  */
1848#define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1849#define log2_e_sig_low 0xbe87fed0691d3e89ULL
1850
1851/*
1852 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1853 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1854 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1855 * interval [sqrt(2)/2, sqrt(2)].
1856 */
1857#define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1858#define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1859#define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1860#define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1861#define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1862#define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1863#define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1864#define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1865#define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1866#define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1867#define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1868
1869/*
1870 * Compute an approximation of log2(1+arg), where 1+arg is in the
1871 * interval [sqrt(2)/2, sqrt(2)].  It is assumed that when this
1872 * function is called, rounding precision is set to 80 and the
1873 * round-to-nearest mode is in effect.  arg must not be exactly zero,
1874 * and must not be so close to zero that underflow might occur.
1875 */
1876static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
1877                                uint64_t *sig0, uint64_t *sig1)
1878{
1879    uint64_t arg0_sig = extractFloatx80Frac(arg);
1880    int32_t arg0_exp = extractFloatx80Exp(arg);
1881    bool arg0_sign = extractFloatx80Sign(arg);
1882    bool asign;
1883    int32_t dexp, texp, aexp;
1884    uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
1885    uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
1886    uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
1887    floatx80 t2, accum;
1888
1889    /*
1890     * Compute an approximation of arg/(2+arg), with extra precision,
1891     * as the argument to a polynomial approximation.  The extra
1892     * precision is only needed for the first term of the
1893     * approximation, with subsequent terms being significantly
1894     * smaller; the approximation only uses odd exponents, and the
1895     * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1896     */
1897    if (arg0_sign) {
1898        dexp = 0x3fff;
1899        shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1900        sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
1901    } else {
1902        dexp = 0x4000;
1903        shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1904        dsig0 |= 0x8000000000000000ULL;
1905    }
1906    texp = arg0_exp - dexp + 0x3ffe;
1907    rsig0 = arg0_sig;
1908    rsig1 = 0;
1909    rsig2 = 0;
1910    if (dsig0 <= rsig0) {
1911        shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
1912        ++texp;
1913    }
1914    tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
1915    mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
1916    sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
1917           &rsig0, &rsig1, &rsig2);
1918    while ((int64_t) rsig0 < 0) {
1919        --tsig0;
1920        add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
1921               &rsig0, &rsig1, &rsig2);
1922    }
1923    tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
1924    /*
1925     * No need to correct any estimation error in tsig1; even with
1926     * such error, it is accurate enough.  Now compute the square of
1927     * that approximation.
1928     */
1929    mul128To256(tsig0, tsig1, tsig0, tsig1,
1930                &t2sig0, &t2sig1, &t2sig2, &t2sig3);
1931    t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe,
1932                                       t2sig0, t2sig1, &env->fp_status);
1933
1934    /* Compute the lower parts of the polynomial expansion.  */
1935    accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
1936    accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
1937    accum = floatx80_mul(accum, t2, &env->fp_status);
1938    accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
1939    accum = floatx80_mul(accum, t2, &env->fp_status);
1940    accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
1941    accum = floatx80_mul(accum, t2, &env->fp_status);
1942    accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
1943    accum = floatx80_mul(accum, t2, &env->fp_status);
1944    accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
1945    accum = floatx80_mul(accum, t2, &env->fp_status);
1946    accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
1947    accum = floatx80_mul(accum, t2, &env->fp_status);
1948    accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
1949    accum = floatx80_mul(accum, t2, &env->fp_status);
1950    accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
1951    accum = floatx80_mul(accum, t2, &env->fp_status);
1952    accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
1953
1954    /*
1955     * The full polynomial expansion is fyl2x_coeff_0 + accum (where
1956     * accum has much lower magnitude, and so, in particular, carry
1957     * out of the addition is not possible), multiplied by t.  (This
1958     * expansion is only accurate to about 70 bits, not 128 bits.)
1959     */
1960    aexp = extractFloatx80Exp(fyl2x_coeff_0);
1961    asign = extractFloatx80Sign(fyl2x_coeff_0);
1962    shift128RightJamming(extractFloatx80Frac(accum), 0,
1963                         aexp - extractFloatx80Exp(accum),
1964                         &asig0, &asig1);
1965    bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
1966    bsig1 = 0;
1967    if (asign == extractFloatx80Sign(accum)) {
1968        add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1969    } else {
1970        sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1971    }
1972    /* Multiply by t to compute the required result.  */
1973    mul128To256(asig0, asig1, tsig0, tsig1,
1974                &asig0, &asig1, &asig2, &asig3);
1975    aexp += texp - 0x3ffe;
1976    *exp = aexp;
1977    *sig0 = asig0;
1978    *sig1 = asig1;
1979}
1980
1981void helper_fyl2xp1(CPUX86State *env)
1982{
1983    uint8_t old_flags = save_exception_flags(env);
1984    uint64_t arg0_sig = extractFloatx80Frac(ST0);
1985    int32_t arg0_exp = extractFloatx80Exp(ST0);
1986    bool arg0_sign = extractFloatx80Sign(ST0);
1987    uint64_t arg1_sig = extractFloatx80Frac(ST1);
1988    int32_t arg1_exp = extractFloatx80Exp(ST1);
1989    bool arg1_sign = extractFloatx80Sign(ST1);
1990
1991    if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1992        float_raise(float_flag_invalid, &env->fp_status);
1993        ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1994    } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1995        float_raise(float_flag_invalid, &env->fp_status);
1996        ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1997    } else if (floatx80_invalid_encoding(ST0) ||
1998               floatx80_invalid_encoding(ST1)) {
1999        float_raise(float_flag_invalid, &env->fp_status);
2000        ST1 = floatx80_default_nan(&env->fp_status);
2001    } else if (floatx80_is_any_nan(ST0)) {
2002        ST1 = ST0;
2003    } else if (floatx80_is_any_nan(ST1)) {
2004        /* Pass this NaN through.  */
2005    } else if (arg0_exp > 0x3ffd ||
2006               (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
2007                                                  0x95f619980c4336f7ULL :
2008                                                  0xd413cccfe7799211ULL))) {
2009        /*
2010         * Out of range for the instruction (ST0 must have absolute
2011         * value less than 1 - sqrt(2)/2 = 0.292..., according to
2012         * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
2013         * to sqrt(2) - 1, which we allow here), treat as invalid.
2014         */
2015        float_raise(float_flag_invalid, &env->fp_status);
2016        ST1 = floatx80_default_nan(&env->fp_status);
2017    } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
2018               arg1_exp == 0x7fff) {
2019        /*
2020         * One argument is zero, or multiplying by infinity; correct
2021         * result is exact and can be obtained by multiplying the
2022         * arguments.
2023         */
2024        ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
2025    } else if (arg0_exp < 0x3fb0) {
2026        /*
2027         * Multiplying both arguments and an extra-precision version
2028         * of log2(e) is sufficiently precise.
2029         */
2030        uint64_t sig0, sig1, sig2;
2031        int32_t exp;
2032        if (arg0_exp == 0) {
2033            normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2034        }
2035        if (arg1_exp == 0) {
2036            normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2037        }
2038        mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
2039                        &sig0, &sig1, &sig2);
2040        exp = arg0_exp + 1;
2041        mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
2042        exp += arg1_exp - 0x3ffe;
2043        /* This result is inexact.  */
2044        sig1 |= 1;
2045        ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp,
2046                                            sig0, sig1, &env->fp_status);
2047    } else {
2048        int32_t aexp;
2049        uint64_t asig0, asig1, asig2;
2050        FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2051        signed char save_prec = env->fp_status.floatx80_rounding_precision;
2052        env->fp_status.float_rounding_mode = float_round_nearest_even;
2053        env->fp_status.floatx80_rounding_precision = 80;
2054
2055        helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
2056        /*
2057         * Multiply by the second argument to compute the required
2058         * result.
2059         */
2060        if (arg1_exp == 0) {
2061            normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2062        }
2063        mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2064        aexp += arg1_exp - 0x3ffe;
2065        /* This result is inexact.  */
2066        asig1 |= 1;
2067        env->fp_status.float_rounding_mode = save_mode;
2068        ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp,
2069                                            asig0, asig1, &env->fp_status);
2070        env->fp_status.floatx80_rounding_precision = save_prec;
2071    }
2072    fpop(env);
2073    merge_exception_flags(env, old_flags);
2074}
2075
2076void helper_fyl2x(CPUX86State *env)
2077{
2078    uint8_t old_flags = save_exception_flags(env);
2079    uint64_t arg0_sig = extractFloatx80Frac(ST0);
2080    int32_t arg0_exp = extractFloatx80Exp(ST0);
2081    bool arg0_sign = extractFloatx80Sign(ST0);
2082    uint64_t arg1_sig = extractFloatx80Frac(ST1);
2083    int32_t arg1_exp = extractFloatx80Exp(ST1);
2084    bool arg1_sign = extractFloatx80Sign(ST1);
2085
2086    if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2087        float_raise(float_flag_invalid, &env->fp_status);
2088        ST1 = floatx80_silence_nan(ST0, &env->fp_status);
2089    } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
2090        float_raise(float_flag_invalid, &env->fp_status);
2091        ST1 = floatx80_silence_nan(ST1, &env->fp_status);
2092    } else if (floatx80_invalid_encoding(ST0) ||
2093               floatx80_invalid_encoding(ST1)) {
2094        float_raise(float_flag_invalid, &env->fp_status);
2095        ST1 = floatx80_default_nan(&env->fp_status);
2096    } else if (floatx80_is_any_nan(ST0)) {
2097        ST1 = ST0;
2098    } else if (floatx80_is_any_nan(ST1)) {
2099        /* Pass this NaN through.  */
2100    } else if (arg0_sign && !floatx80_is_zero(ST0)) {
2101        float_raise(float_flag_invalid, &env->fp_status);
2102        ST1 = floatx80_default_nan(&env->fp_status);
2103    } else if (floatx80_is_infinity(ST1)) {
2104        FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
2105                                             &env->fp_status);
2106        switch (cmp) {
2107        case float_relation_less:
2108            ST1 = floatx80_chs(ST1);
2109            break;
2110        case float_relation_greater:
2111            /* Result is infinity of the same sign as ST1.  */
2112            break;
2113        default:
2114            float_raise(float_flag_invalid, &env->fp_status);
2115            ST1 = floatx80_default_nan(&env->fp_status);
2116            break;
2117        }
2118    } else if (floatx80_is_infinity(ST0)) {
2119        if (floatx80_is_zero(ST1)) {
2120            float_raise(float_flag_invalid, &env->fp_status);
2121            ST1 = floatx80_default_nan(&env->fp_status);
2122        } else if (arg1_sign) {
2123            ST1 = floatx80_chs(ST0);
2124        } else {
2125            ST1 = ST0;
2126        }
2127    } else if (floatx80_is_zero(ST0)) {
2128        if (floatx80_is_zero(ST1)) {
2129            float_raise(float_flag_invalid, &env->fp_status);
2130            ST1 = floatx80_default_nan(&env->fp_status);
2131        } else {
2132            /* Result is infinity with opposite sign to ST1.  */
2133            float_raise(float_flag_divbyzero, &env->fp_status);
2134            ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
2135                                0x8000000000000000ULL);
2136        }
2137    } else if (floatx80_is_zero(ST1)) {
2138        if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
2139            ST1 = floatx80_chs(ST1);
2140        }
2141        /* Otherwise, ST1 is already the correct result.  */
2142    } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
2143        if (arg1_sign) {
2144            ST1 = floatx80_chs(floatx80_zero);
2145        } else {
2146            ST1 = floatx80_zero;
2147        }
2148    } else {
2149        int32_t int_exp;
2150        floatx80 arg0_m1;
2151        FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2152        signed char save_prec = env->fp_status.floatx80_rounding_precision;
2153        env->fp_status.float_rounding_mode = float_round_nearest_even;
2154        env->fp_status.floatx80_rounding_precision = 80;
2155
2156        if (arg0_exp == 0) {
2157            normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2158        }
2159        if (arg1_exp == 0) {
2160            normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2161        }
2162        int_exp = arg0_exp - 0x3fff;
2163        if (arg0_sig > 0xb504f333f9de6484ULL) {
2164            ++int_exp;
2165        }
2166        arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
2167                                               &env->fp_status),
2168                               floatx80_one, &env->fp_status);
2169        if (floatx80_is_zero(arg0_m1)) {
2170            /* Exact power of 2; multiply by ST1.  */
2171            env->fp_status.float_rounding_mode = save_mode;
2172            ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
2173                               ST1, &env->fp_status);
2174        } else {
2175            bool asign = extractFloatx80Sign(arg0_m1);
2176            int32_t aexp;
2177            uint64_t asig0, asig1, asig2;
2178            helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
2179            if (int_exp != 0) {
2180                bool isign = (int_exp < 0);
2181                int32_t iexp;
2182                uint64_t isig;
2183                int shift;
2184                int_exp = isign ? -int_exp : int_exp;
2185                shift = clz32(int_exp) + 32;
2186                isig = int_exp;
2187                isig <<= shift;
2188                iexp = 0x403e - shift;
2189                shift128RightJamming(asig0, asig1, iexp - aexp,
2190                                     &asig0, &asig1);
2191                if (asign == isign) {
2192                    add128(isig, 0, asig0, asig1, &asig0, &asig1);
2193                } else {
2194                    sub128(isig, 0, asig0, asig1, &asig0, &asig1);
2195                }
2196                aexp = iexp;
2197                asign = isign;
2198            }
2199            /*
2200             * Multiply by the second argument to compute the required
2201             * result.
2202             */
2203            if (arg1_exp == 0) {
2204                normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2205            }
2206            mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2207            aexp += arg1_exp - 0x3ffe;
2208            /* This result is inexact.  */
2209            asig1 |= 1;
2210            env->fp_status.float_rounding_mode = save_mode;
2211            ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp,
2212                                                asig0, asig1, &env->fp_status);
2213        }
2214
2215        env->fp_status.floatx80_rounding_precision = save_prec;
2216    }
2217    fpop(env);
2218    merge_exception_flags(env, old_flags);
2219}
2220
2221void helper_fsqrt(CPUX86State *env)
2222{
2223    uint8_t old_flags = save_exception_flags(env);
2224    if (floatx80_is_neg(ST0)) {
2225        env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
2226        env->fpus |= 0x400;
2227    }
2228    ST0 = floatx80_sqrt(ST0, &env->fp_status);
2229    merge_exception_flags(env, old_flags);
2230}
2231
2232void helper_fsincos(CPUX86State *env)
2233{
2234    double fptemp = floatx80_to_double(env, ST0);
2235
2236    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2237        env->fpus |= 0x400;
2238    } else {
2239        ST0 = double_to_floatx80(env, sin(fptemp));
2240        fpush(env);
2241        ST0 = double_to_floatx80(env, cos(fptemp));
2242        env->fpus &= ~0x400;  /* C2 <-- 0 */
2243        /* the above code is for |arg| < 2**63 only */
2244    }
2245}
2246
2247void helper_frndint(CPUX86State *env)
2248{
2249    uint8_t old_flags = save_exception_flags(env);
2250    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
2251    merge_exception_flags(env, old_flags);
2252}
2253
2254void helper_fscale(CPUX86State *env)
2255{
2256    uint8_t old_flags = save_exception_flags(env);
2257    if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
2258        float_raise(float_flag_invalid, &env->fp_status);
2259        ST0 = floatx80_default_nan(&env->fp_status);
2260    } else if (floatx80_is_any_nan(ST1)) {
2261        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2262            float_raise(float_flag_invalid, &env->fp_status);
2263        }
2264        ST0 = ST1;
2265        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2266            float_raise(float_flag_invalid, &env->fp_status);
2267            ST0 = floatx80_silence_nan(ST0, &env->fp_status);
2268        }
2269    } else if (floatx80_is_infinity(ST1) &&
2270               !floatx80_invalid_encoding(ST0) &&
2271               !floatx80_is_any_nan(ST0)) {
2272        if (floatx80_is_neg(ST1)) {
2273            if (floatx80_is_infinity(ST0)) {
2274                float_raise(float_flag_invalid, &env->fp_status);
2275                ST0 = floatx80_default_nan(&env->fp_status);
2276            } else {
2277                ST0 = (floatx80_is_neg(ST0) ?
2278                       floatx80_chs(floatx80_zero) :
2279                       floatx80_zero);
2280            }
2281        } else {
2282            if (floatx80_is_zero(ST0)) {
2283                float_raise(float_flag_invalid, &env->fp_status);
2284                ST0 = floatx80_default_nan(&env->fp_status);
2285            } else {
2286                ST0 = (floatx80_is_neg(ST0) ?
2287                       floatx80_chs(floatx80_infinity) :
2288                       floatx80_infinity);
2289            }
2290        }
2291    } else {
2292        int n;
2293        signed char save = env->fp_status.floatx80_rounding_precision;
2294        uint8_t save_flags = get_float_exception_flags(&env->fp_status);
2295        set_float_exception_flags(0, &env->fp_status);
2296        n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
2297        set_float_exception_flags(save_flags, &env->fp_status);
2298        env->fp_status.floatx80_rounding_precision = 80;
2299        ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
2300        env->fp_status.floatx80_rounding_precision = save;
2301    }
2302    merge_exception_flags(env, old_flags);
2303}
2304
2305void helper_fsin(CPUX86State *env)
2306{
2307    double fptemp = floatx80_to_double(env, ST0);
2308
2309    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2310        env->fpus |= 0x400;
2311    } else {
2312        ST0 = double_to_floatx80(env, sin(fptemp));
2313        env->fpus &= ~0x400;  /* C2 <-- 0 */
2314        /* the above code is for |arg| < 2**53 only */
2315    }
2316}
2317
2318void helper_fcos(CPUX86State *env)
2319{
2320    double fptemp = floatx80_to_double(env, ST0);
2321
2322    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2323        env->fpus |= 0x400;
2324    } else {
2325        ST0 = double_to_floatx80(env, cos(fptemp));
2326        env->fpus &= ~0x400;  /* C2 <-- 0 */
2327        /* the above code is for |arg| < 2**63 only */
2328    }
2329}
2330
2331void helper_fxam_ST0(CPUX86State *env)
2332{
2333    CPU_LDoubleU temp;
2334    int expdif;
2335
2336    temp.d = ST0;
2337
2338    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2339    if (SIGND(temp)) {
2340        env->fpus |= 0x200; /* C1 <-- 1 */
2341    }
2342
2343    if (env->fptags[env->fpstt]) {
2344        env->fpus |= 0x4100; /* Empty */
2345        return;
2346    }
2347
2348    expdif = EXPD(temp);
2349    if (expdif == MAXEXPD) {
2350        if (MANTD(temp) == 0x8000000000000000ULL) {
2351            env->fpus |= 0x500; /* Infinity */
2352        } else if (MANTD(temp) & 0x8000000000000000ULL) {
2353            env->fpus |= 0x100; /* NaN */
2354        }
2355    } else if (expdif == 0) {
2356        if (MANTD(temp) == 0) {
2357            env->fpus |=  0x4000; /* Zero */
2358        } else {
2359            env->fpus |= 0x4400; /* Denormal */
2360        }
2361    } else if (MANTD(temp) & 0x8000000000000000ULL) {
2362        env->fpus |= 0x400;
2363    }
2364}
2365
2366static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
2367                      uintptr_t retaddr)
2368{
2369    int fpus, fptag, exp, i;
2370    uint64_t mant;
2371    CPU_LDoubleU tmp;
2372
2373    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2374    fptag = 0;
2375    for (i = 7; i >= 0; i--) {
2376        fptag <<= 2;
2377        if (env->fptags[i]) {
2378            fptag |= 3;
2379        } else {
2380            tmp.d = env->fpregs[i].d;
2381            exp = EXPD(tmp);
2382            mant = MANTD(tmp);
2383            if (exp == 0 && mant == 0) {
2384                /* zero */
2385                fptag |= 1;
2386            } else if (exp == 0 || exp == MAXEXPD
2387                       || (mant & (1LL << 63)) == 0) {
2388                /* NaNs, infinity, denormal */
2389                fptag |= 2;
2390            }
2391        }
2392    }
2393    if (data32) {
2394        /* 32 bit */
2395        cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
2396        cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
2397        cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
2398        cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
2399        cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
2400        cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
2401        cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
2402    } else {
2403        /* 16 bit */
2404        cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
2405        cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
2406        cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
2407        cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
2408        cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
2409        cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
2410        cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
2411    }
2412}
2413
2414void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
2415{
2416    do_fstenv(env, ptr, data32, GETPC());
2417}
2418
2419static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
2420{
2421    env->fpstt = (fpus >> 11) & 7;
2422    env->fpus = fpus & ~0x3800 & ~FPUS_B;
2423    env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
2424#if !defined(CONFIG_USER_ONLY)
2425    if (!(env->fpus & FPUS_SE)) {
2426        /*
2427         * Here the processor deasserts FERR#; in response, the chipset deasserts
2428         * IGNNE#.
2429         */
2430        cpu_clear_ignne();
2431    }
2432#endif
2433}
2434
2435static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
2436                      uintptr_t retaddr)
2437{
2438    int i, fpus, fptag;
2439
2440    if (data32) {
2441        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
2442        fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
2443        fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
2444    } else {
2445        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
2446        fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
2447        fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
2448    }
2449    cpu_set_fpus(env, fpus);
2450    for (i = 0; i < 8; i++) {
2451        env->fptags[i] = ((fptag & 3) == 3);
2452        fptag >>= 2;
2453    }
2454}
2455
2456void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
2457{
2458    do_fldenv(env, ptr, data32, GETPC());
2459}
2460
2461void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
2462{
2463    floatx80 tmp;
2464    int i;
2465
2466    do_fstenv(env, ptr, data32, GETPC());
2467
2468    ptr += (14 << data32);
2469    for (i = 0; i < 8; i++) {
2470        tmp = ST(i);
2471        helper_fstt(env, tmp, ptr, GETPC());
2472        ptr += 10;
2473    }
2474
2475    /* fninit */
2476    env->fpus = 0;
2477    env->fpstt = 0;
2478    cpu_set_fpuc(env, 0x37f);
2479    env->fptags[0] = 1;
2480    env->fptags[1] = 1;
2481    env->fptags[2] = 1;
2482    env->fptags[3] = 1;
2483    env->fptags[4] = 1;
2484    env->fptags[5] = 1;
2485    env->fptags[6] = 1;
2486    env->fptags[7] = 1;
2487}
2488
2489void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
2490{
2491    floatx80 tmp;
2492    int i;
2493
2494    do_fldenv(env, ptr, data32, GETPC());
2495    ptr += (14 << data32);
2496
2497    for (i = 0; i < 8; i++) {
2498        tmp = helper_fldt(env, ptr, GETPC());
2499        ST(i) = tmp;
2500        ptr += 10;
2501    }
2502}
2503
2504#if defined(CONFIG_USER_ONLY)
2505void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
2506{
2507    helper_fsave(env, ptr, data32);
2508}
2509
2510void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
2511{
2512    helper_frstor(env, ptr, data32);
2513}
2514#endif
2515
2516#define XO(X)  offsetof(X86XSaveArea, X)
2517
2518static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2519{
2520    int fpus, fptag, i;
2521    target_ulong addr;
2522
2523    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2524    fptag = 0;
2525    for (i = 0; i < 8; i++) {
2526        fptag |= (env->fptags[i] << i);
2527    }
2528
2529    cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
2530    cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
2531    cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
2532
2533    /* In 32-bit mode this is eip, sel, dp, sel.
2534       In 64-bit mode this is rip, rdp.
2535       But in either case we don't write actual data, just zeros.  */
2536    cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
2537    cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
2538
2539    addr = ptr + XO(legacy.fpregs);
2540    for (i = 0; i < 8; i++) {
2541        floatx80 tmp = ST(i);
2542        helper_fstt(env, tmp, addr, ra);
2543        addr += 16;
2544    }
2545}
2546
2547static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2548{
2549    update_mxcsr_from_sse_status(env);
2550    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
2551    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
2552}
2553
2554static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2555{
2556    int i, nb_xmm_regs;
2557    target_ulong addr;
2558
2559    if (env->hflags & HF_CS64_MASK) {
2560        nb_xmm_regs = 16;
2561    } else {
2562        nb_xmm_regs = 8;
2563    }
2564
2565    addr = ptr + XO(legacy.xmm_regs);
2566    for (i = 0; i < nb_xmm_regs; i++) {
2567        cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
2568        cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
2569        addr += 16;
2570    }
2571}
2572
2573static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2574{
2575    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2576    int i;
2577
2578    for (i = 0; i < 4; i++, addr += 16) {
2579        cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
2580        cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
2581    }
2582}
2583
2584static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2585{
2586    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
2587                    env->bndcs_regs.cfgu, ra);
2588    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
2589                    env->bndcs_regs.sts, ra);
2590}
2591
2592static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2593{
2594    cpu_stq_data_ra(env, ptr, env->pkru, ra);
2595}
2596
2597void helper_fxsave(CPUX86State *env, target_ulong ptr)
2598{
2599    uintptr_t ra = GETPC();
2600
2601    /* The operand must be 16 byte aligned */
2602    if (ptr & 0xf) {
2603        raise_exception_ra(env, EXCP0D_GPF, ra);
2604    }
2605
2606    do_xsave_fpu(env, ptr, ra);
2607
2608    if (env->cr[4] & CR4_OSFXSR_MASK) {
2609        do_xsave_mxcsr(env, ptr, ra);
2610        /* Fast FXSAVE leaves out the XMM registers */
2611        if (!(env->efer & MSR_EFER_FFXSR)
2612            || (env->hflags & HF_CPL_MASK)
2613            || !(env->hflags & HF_LMA_MASK)) {
2614            do_xsave_sse(env, ptr, ra);
2615        }
2616    }
2617}
2618
2619static uint64_t get_xinuse(CPUX86State *env)
2620{
2621    uint64_t inuse = -1;
2622
2623    /* For the most part, we don't track XINUSE.  We could calculate it
2624       here for all components, but it's probably less work to simply
2625       indicate in use.  That said, the state of BNDREGS is important
2626       enough to track in HFLAGS, so we might as well use that here.  */
2627    if ((env->hflags & HF_MPX_IU_MASK) == 0) {
2628       inuse &= ~XSTATE_BNDREGS_MASK;
2629    }
2630    return inuse;
2631}
2632
2633static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
2634                     uint64_t inuse, uint64_t opt, uintptr_t ra)
2635{
2636    uint64_t old_bv, new_bv;
2637
2638    /* The OS must have enabled XSAVE.  */
2639    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2640        raise_exception_ra(env, EXCP06_ILLOP, ra);
2641    }
2642
2643    /* The operand must be 64 byte aligned.  */
2644    if (ptr & 63) {
2645        raise_exception_ra(env, EXCP0D_GPF, ra);
2646    }
2647
2648    /* Never save anything not enabled by XCR0.  */
2649    rfbm &= env->xcr0;
2650    opt &= rfbm;
2651
2652    if (opt & XSTATE_FP_MASK) {
2653        do_xsave_fpu(env, ptr, ra);
2654    }
2655    if (rfbm & XSTATE_SSE_MASK) {
2656        /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
2657        do_xsave_mxcsr(env, ptr, ra);
2658    }
2659    if (opt & XSTATE_SSE_MASK) {
2660        do_xsave_sse(env, ptr, ra);
2661    }
2662    if (opt & XSTATE_BNDREGS_MASK) {
2663        do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
2664    }
2665    if (opt & XSTATE_BNDCSR_MASK) {
2666        do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
2667    }
2668    if (opt & XSTATE_PKRU_MASK) {
2669        do_xsave_pkru(env, ptr + XO(pkru_state), ra);
2670    }
2671
2672    /* Update the XSTATE_BV field.  */
2673    old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2674    new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
2675    cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
2676}
2677
2678void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2679{
2680    do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
2681}
2682
2683void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2684{
2685    uint64_t inuse = get_xinuse(env);
2686    do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
2687}
2688
2689static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2690{
2691    int i, fpuc, fpus, fptag;
2692    target_ulong addr;
2693
2694    fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
2695    fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
2696    fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
2697    cpu_set_fpuc(env, fpuc);
2698    cpu_set_fpus(env, fpus);
2699    fptag ^= 0xff;
2700    for (i = 0; i < 8; i++) {
2701        env->fptags[i] = ((fptag >> i) & 1);
2702    }
2703
2704    addr = ptr + XO(legacy.fpregs);
2705    for (i = 0; i < 8; i++) {
2706        floatx80 tmp = helper_fldt(env, addr, ra);
2707        ST(i) = tmp;
2708        addr += 16;
2709    }
2710}
2711
2712static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2713{
2714    cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
2715}
2716
2717static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2718{
2719    int i, nb_xmm_regs;
2720    target_ulong addr;
2721
2722    if (env->hflags & HF_CS64_MASK) {
2723        nb_xmm_regs = 16;
2724    } else {
2725        nb_xmm_regs = 8;
2726    }
2727
2728    addr = ptr + XO(legacy.xmm_regs);
2729    for (i = 0; i < nb_xmm_regs; i++) {
2730        env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
2731        env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
2732        addr += 16;
2733    }
2734}
2735
2736static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2737{
2738    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2739    int i;
2740
2741    for (i = 0; i < 4; i++, addr += 16) {
2742        env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
2743        env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
2744    }
2745}
2746
2747static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2748{
2749    /* FIXME: Extend highest implemented bit of linear address.  */
2750    env->bndcs_regs.cfgu
2751        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
2752    env->bndcs_regs.sts
2753        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
2754}
2755
2756static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2757{
2758    env->pkru = cpu_ldq_data_ra(env, ptr, ra);
2759}
2760
2761void helper_fxrstor(CPUX86State *env, target_ulong ptr)
2762{
2763    uintptr_t ra = GETPC();
2764
2765    /* The operand must be 16 byte aligned */
2766    if (ptr & 0xf) {
2767        raise_exception_ra(env, EXCP0D_GPF, ra);
2768    }
2769
2770    do_xrstor_fpu(env, ptr, ra);
2771
2772    if (env->cr[4] & CR4_OSFXSR_MASK) {
2773        do_xrstor_mxcsr(env, ptr, ra);
2774        /* Fast FXRSTOR leaves out the XMM registers */
2775        if (!(env->efer & MSR_EFER_FFXSR)
2776            || (env->hflags & HF_CPL_MASK)
2777            || !(env->hflags & HF_LMA_MASK)) {
2778            do_xrstor_sse(env, ptr, ra);
2779        }
2780    }
2781}
2782
2783#if defined(CONFIG_USER_ONLY)
2784void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
2785{
2786    helper_fxsave(env, ptr);
2787}
2788
2789void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
2790{
2791    helper_fxrstor(env, ptr);
2792}
2793#endif
2794
2795void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2796{
2797    uintptr_t ra = GETPC();
2798    uint64_t xstate_bv, xcomp_bv, reserve0;
2799
2800    rfbm &= env->xcr0;
2801
2802    /* The OS must have enabled XSAVE.  */
2803    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2804        raise_exception_ra(env, EXCP06_ILLOP, ra);
2805    }
2806
2807    /* The operand must be 64 byte aligned.  */
2808    if (ptr & 63) {
2809        raise_exception_ra(env, EXCP0D_GPF, ra);
2810    }
2811
2812    xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2813
2814    if ((int64_t)xstate_bv < 0) {
2815        /* FIXME: Compact form.  */
2816        raise_exception_ra(env, EXCP0D_GPF, ra);
2817    }
2818
2819    /* Standard form.  */
2820
2821    /* The XSTATE_BV field must not set bits not present in XCR0.  */
2822    if (xstate_bv & ~env->xcr0) {
2823        raise_exception_ra(env, EXCP0D_GPF, ra);
2824    }
2825
2826    /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
2827       revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
2828       describes only XCOMP_BV, but the description of the standard form
2829       of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
2830       includes the next 64-bit field.  */
2831    xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
2832    reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
2833    if (xcomp_bv || reserve0) {
2834        raise_exception_ra(env, EXCP0D_GPF, ra);
2835    }
2836
2837    if (rfbm & XSTATE_FP_MASK) {
2838        if (xstate_bv & XSTATE_FP_MASK) {
2839            do_xrstor_fpu(env, ptr, ra);
2840        } else {
2841            helper_fninit(env);
2842            memset(env->fpregs, 0, sizeof(env->fpregs));
2843        }
2844    }
2845    if (rfbm & XSTATE_SSE_MASK) {
2846        /* Note that the standard form of XRSTOR loads MXCSR from memory
2847           whether or not the XSTATE_BV bit is set.  */
2848        do_xrstor_mxcsr(env, ptr, ra);
2849        if (xstate_bv & XSTATE_SSE_MASK) {
2850            do_xrstor_sse(env, ptr, ra);
2851        } else {
2852            /* ??? When AVX is implemented, we may have to be more
2853               selective in the clearing.  */
2854            memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
2855        }
2856    }
2857    if (rfbm & XSTATE_BNDREGS_MASK) {
2858        if (xstate_bv & XSTATE_BNDREGS_MASK) {
2859            do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
2860            env->hflags |= HF_MPX_IU_MASK;
2861        } else {
2862            memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
2863            env->hflags &= ~HF_MPX_IU_MASK;
2864        }
2865    }
2866    if (rfbm & XSTATE_BNDCSR_MASK) {
2867        if (xstate_bv & XSTATE_BNDCSR_MASK) {
2868            do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
2869        } else {
2870            memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
2871        }
2872        cpu_sync_bndcs_hflags(env);
2873    }
2874    if (rfbm & XSTATE_PKRU_MASK) {
2875        uint64_t old_pkru = env->pkru;
2876        if (xstate_bv & XSTATE_PKRU_MASK) {
2877            do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
2878        } else {
2879            env->pkru = 0;
2880        }
2881        if (env->pkru != old_pkru) {
2882            CPUState *cs = env_cpu(env);
2883            tlb_flush(cs);
2884        }
2885    }
2886}
2887
2888#undef XO
2889
2890uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
2891{
2892    /* The OS must have enabled XSAVE.  */
2893    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2894        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2895    }
2896
2897    switch (ecx) {
2898    case 0:
2899        return env->xcr0;
2900    case 1:
2901        if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
2902            return env->xcr0 & get_xinuse(env);
2903        }
2904        break;
2905    }
2906    raise_exception_ra(env, EXCP0D_GPF, GETPC());
2907}
2908
2909void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
2910{
2911    uint32_t dummy, ena_lo, ena_hi;
2912    uint64_t ena;
2913
2914    /* The OS must have enabled XSAVE.  */
2915    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2916        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2917    }
2918
2919    /* Only XCR0 is defined at present; the FPU may not be disabled.  */
2920    if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
2921        goto do_gpf;
2922    }
2923
2924    /* Disallow enabling unimplemented features.  */
2925    cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
2926    ena = ((uint64_t)ena_hi << 32) | ena_lo;
2927    if (mask & ~ena) {
2928        goto do_gpf;
2929    }
2930
2931    /* Disallow enabling only half of MPX.  */
2932    if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
2933        & XSTATE_BNDCSR_MASK) {
2934        goto do_gpf;
2935    }
2936
2937    env->xcr0 = mask;
2938    cpu_sync_bndcs_hflags(env);
2939    return;
2940
2941 do_gpf:
2942    raise_exception_ra(env, EXCP0D_GPF, GETPC());
2943}
2944
2945/* MMX/SSE */
2946/* XXX: optimize by storing fptt and fptags in the static cpu state */
2947
2948#define SSE_DAZ             0x0040
2949#define SSE_RC_MASK         0x6000
2950#define SSE_RC_NEAR         0x0000
2951#define SSE_RC_DOWN         0x2000
2952#define SSE_RC_UP           0x4000
2953#define SSE_RC_CHOP         0x6000
2954#define SSE_FZ              0x8000
2955
2956void update_mxcsr_status(CPUX86State *env)
2957{
2958    uint32_t mxcsr = env->mxcsr;
2959    int rnd_type;
2960
2961    /* set rounding mode */
2962    switch (mxcsr & SSE_RC_MASK) {
2963    default:
2964    case SSE_RC_NEAR:
2965        rnd_type = float_round_nearest_even;
2966        break;
2967    case SSE_RC_DOWN:
2968        rnd_type = float_round_down;
2969        break;
2970    case SSE_RC_UP:
2971        rnd_type = float_round_up;
2972        break;
2973    case SSE_RC_CHOP:
2974        rnd_type = float_round_to_zero;
2975        break;
2976    }
2977    set_float_rounding_mode(rnd_type, &env->sse_status);
2978
2979    /* Set exception flags.  */
2980    set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
2981                              (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
2982                              (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
2983                              (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
2984                              (mxcsr & FPUS_PE ? float_flag_inexact : 0),
2985                              &env->sse_status);
2986
2987    /* set denormals are zero */
2988    set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
2989
2990    /* set flush to zero */
2991    set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
2992}
2993
2994void update_mxcsr_from_sse_status(CPUX86State *env)
2995{
2996    uint8_t flags = get_float_exception_flags(&env->sse_status);
2997    /*
2998     * The MXCSR denormal flag has opposite semantics to
2999     * float_flag_input_denormal (the softfloat code sets that flag
3000     * only when flushing input denormals to zero, but SSE sets it
3001     * only when not flushing them to zero), so is not converted
3002     * here.
3003     */
3004    env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
3005                   (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
3006                   (flags & float_flag_overflow ? FPUS_OE : 0) |
3007                   (flags & float_flag_underflow ? FPUS_UE : 0) |
3008                   (flags & float_flag_inexact ? FPUS_PE : 0) |
3009                   (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
3010                    0));
3011}
3012
3013void helper_update_mxcsr(CPUX86State *env)
3014{
3015    update_mxcsr_from_sse_status(env);
3016}
3017
3018void helper_ldmxcsr(CPUX86State *env, uint32_t val)
3019{
3020    cpu_set_mxcsr(env, val);
3021}
3022
3023void helper_enter_mmx(CPUX86State *env)
3024{
3025    env->fpstt = 0;
3026    *(uint32_t *)(env->fptags) = 0;
3027    *(uint32_t *)(env->fptags + 4) = 0;
3028}
3029
3030void helper_emms(CPUX86State *env)
3031{
3032    /* set to empty state */
3033    *(uint32_t *)(env->fptags) = 0x01010101;
3034    *(uint32_t *)(env->fptags + 4) = 0x01010101;
3035}
3036
3037/* XXX: suppress */
3038void helper_movq(CPUX86State *env, void *d, void *s)
3039{
3040    *(uint64_t *)d = *(uint64_t *)s;
3041}
3042
3043#define SHIFT 0
3044#include "ops_sse.h"
3045
3046#define SHIFT 1
3047#include "ops_sse.h"
3048