qemu/target/i386/tcg/fpu_helper.c
<<
>>
Prefs
   1/*
   2 *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include <math.h>
  22#include "cpu.h"
  23#include "tcg-cpu.h"
  24#include "exec/helper-proto.h"
  25#include "fpu/softfloat.h"
  26#include "fpu/softfloat-macros.h"
  27#include "helper-tcg.h"
  28
  29/* float macros */
  30#define FT0    (env->ft0)
  31#define ST0    (env->fpregs[env->fpstt].d)
  32#define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
  33#define ST1    ST(1)
  34
  35#define FPU_RC_MASK         0xc00
  36#define FPU_RC_NEAR         0x000
  37#define FPU_RC_DOWN         0x400
  38#define FPU_RC_UP           0x800
  39#define FPU_RC_CHOP         0xc00
  40
  41#define MAXTAN 9223372036854775808.0
  42
  43/* the following deal with x86 long double-precision numbers */
  44#define MAXEXPD 0x7fff
  45#define EXPBIAS 16383
  46#define EXPD(fp)        (fp.l.upper & 0x7fff)
  47#define SIGND(fp)       ((fp.l.upper) & 0x8000)
  48#define MANTD(fp)       (fp.l.lower)
  49#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  50
  51#define FPUS_IE (1 << 0)
  52#define FPUS_DE (1 << 1)
  53#define FPUS_ZE (1 << 2)
  54#define FPUS_OE (1 << 3)
  55#define FPUS_UE (1 << 4)
  56#define FPUS_PE (1 << 5)
  57#define FPUS_SF (1 << 6)
  58#define FPUS_SE (1 << 7)
  59#define FPUS_B  (1 << 15)
  60
  61#define FPUC_EM 0x3f
  62
  63#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  64#define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
  65#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  66#define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
  67#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  68#define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
  69#define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
  70#define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
  71
  72static inline void fpush(CPUX86State *env)
  73{
  74    env->fpstt = (env->fpstt - 1) & 7;
  75    env->fptags[env->fpstt] = 0; /* validate stack entry */
  76}
  77
  78static inline void fpop(CPUX86State *env)
  79{
  80    env->fptags[env->fpstt] = 1; /* invalidate stack entry */
  81    env->fpstt = (env->fpstt + 1) & 7;
  82}
  83
  84static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr)
  85{
  86    CPU_LDoubleU temp;
  87
  88    temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
  89    temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
  90    return temp.d;
  91}
  92
  93static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
  94                    uintptr_t retaddr)
  95{
  96    CPU_LDoubleU temp;
  97
  98    temp.d = f;
  99    cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
 100    cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
 101}
 102
 103/* x87 FPU helpers */
 104
 105static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
 106{
 107    union {
 108        float64 f64;
 109        double d;
 110    } u;
 111
 112    u.f64 = floatx80_to_float64(a, &env->fp_status);
 113    return u.d;
 114}
 115
 116static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 117{
 118    union {
 119        float64 f64;
 120        double d;
 121    } u;
 122
 123    u.d = a;
 124    return float64_to_floatx80(u.f64, &env->fp_status);
 125}
 126
 127static void fpu_set_exception(CPUX86State *env, int mask)
 128{
 129    env->fpus |= mask;
 130    if (env->fpus & (~env->fpuc & FPUC_EM)) {
 131        env->fpus |= FPUS_SE | FPUS_B;
 132    }
 133}
 134
 135static inline uint8_t save_exception_flags(CPUX86State *env)
 136{
 137    uint8_t old_flags = get_float_exception_flags(&env->fp_status);
 138    set_float_exception_flags(0, &env->fp_status);
 139    return old_flags;
 140}
 141
 142static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
 143{
 144    uint8_t new_flags = get_float_exception_flags(&env->fp_status);
 145    float_raise(old_flags, &env->fp_status);
 146    fpu_set_exception(env,
 147                      ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
 148                       (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
 149                       (new_flags & float_flag_overflow ? FPUS_OE : 0) |
 150                       (new_flags & float_flag_underflow ? FPUS_UE : 0) |
 151                       (new_flags & float_flag_inexact ? FPUS_PE : 0) |
 152                       (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
 153}
 154
 155static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 156{
 157    uint8_t old_flags = save_exception_flags(env);
 158    floatx80 ret = floatx80_div(a, b, &env->fp_status);
 159    merge_exception_flags(env, old_flags);
 160    return ret;
 161}
 162
 163static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 164{
 165    if (env->cr[0] & CR0_NE_MASK) {
 166        raise_exception_ra(env, EXCP10_COPR, retaddr);
 167    }
 168#if !defined(CONFIG_USER_ONLY)
 169    else {
 170        fpu_check_raise_ferr_irq(env);
 171    }
 172#endif
 173}
 174
 175void helper_flds_FT0(CPUX86State *env, uint32_t val)
 176{
 177    uint8_t old_flags = save_exception_flags(env);
 178    union {
 179        float32 f;
 180        uint32_t i;
 181    } u;
 182
 183    u.i = val;
 184    FT0 = float32_to_floatx80(u.f, &env->fp_status);
 185    merge_exception_flags(env, old_flags);
 186}
 187
 188void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 189{
 190    uint8_t old_flags = save_exception_flags(env);
 191    union {
 192        float64 f;
 193        uint64_t i;
 194    } u;
 195
 196    u.i = val;
 197    FT0 = float64_to_floatx80(u.f, &env->fp_status);
 198    merge_exception_flags(env, old_flags);
 199}
 200
 201void helper_fildl_FT0(CPUX86State *env, int32_t val)
 202{
 203    FT0 = int32_to_floatx80(val, &env->fp_status);
 204}
 205
 206void helper_flds_ST0(CPUX86State *env, uint32_t val)
 207{
 208    uint8_t old_flags = save_exception_flags(env);
 209    int new_fpstt;
 210    union {
 211        float32 f;
 212        uint32_t i;
 213    } u;
 214
 215    new_fpstt = (env->fpstt - 1) & 7;
 216    u.i = val;
 217    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 218    env->fpstt = new_fpstt;
 219    env->fptags[new_fpstt] = 0; /* validate stack entry */
 220    merge_exception_flags(env, old_flags);
 221}
 222
 223void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 224{
 225    uint8_t old_flags = save_exception_flags(env);
 226    int new_fpstt;
 227    union {
 228        float64 f;
 229        uint64_t i;
 230    } u;
 231
 232    new_fpstt = (env->fpstt - 1) & 7;
 233    u.i = val;
 234    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 235    env->fpstt = new_fpstt;
 236    env->fptags[new_fpstt] = 0; /* validate stack entry */
 237    merge_exception_flags(env, old_flags);
 238}
 239
 240static FloatX80RoundPrec tmp_maximise_precision(float_status *st)
 241{
 242    FloatX80RoundPrec old = get_floatx80_rounding_precision(st);
 243    set_floatx80_rounding_precision(floatx80_precision_x, st);
 244    return old;
 245}
 246
 247void helper_fildl_ST0(CPUX86State *env, int32_t val)
 248{
 249    int new_fpstt;
 250    FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
 251
 252    new_fpstt = (env->fpstt - 1) & 7;
 253    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 254    env->fpstt = new_fpstt;
 255    env->fptags[new_fpstt] = 0; /* validate stack entry */
 256
 257    set_floatx80_rounding_precision(old, &env->fp_status);
 258}
 259
 260void helper_fildll_ST0(CPUX86State *env, int64_t val)
 261{
 262    int new_fpstt;
 263    FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
 264
 265    new_fpstt = (env->fpstt - 1) & 7;
 266    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 267    env->fpstt = new_fpstt;
 268    env->fptags[new_fpstt] = 0; /* validate stack entry */
 269
 270    set_floatx80_rounding_precision(old, &env->fp_status);
 271}
 272
 273uint32_t helper_fsts_ST0(CPUX86State *env)
 274{
 275    uint8_t old_flags = save_exception_flags(env);
 276    union {
 277        float32 f;
 278        uint32_t i;
 279    } u;
 280
 281    u.f = floatx80_to_float32(ST0, &env->fp_status);
 282    merge_exception_flags(env, old_flags);
 283    return u.i;
 284}
 285
 286uint64_t helper_fstl_ST0(CPUX86State *env)
 287{
 288    uint8_t old_flags = save_exception_flags(env);
 289    union {
 290        float64 f;
 291        uint64_t i;
 292    } u;
 293
 294    u.f = floatx80_to_float64(ST0, &env->fp_status);
 295    merge_exception_flags(env, old_flags);
 296    return u.i;
 297}
 298
 299int32_t helper_fist_ST0(CPUX86State *env)
 300{
 301    uint8_t old_flags = save_exception_flags(env);
 302    int32_t val;
 303
 304    val = floatx80_to_int32(ST0, &env->fp_status);
 305    if (val != (int16_t)val) {
 306        set_float_exception_flags(float_flag_invalid, &env->fp_status);
 307        val = -32768;
 308    }
 309    merge_exception_flags(env, old_flags);
 310    return val;
 311}
 312
 313int32_t helper_fistl_ST0(CPUX86State *env)
 314{
 315    uint8_t old_flags = save_exception_flags(env);
 316    int32_t val;
 317
 318    val = floatx80_to_int32(ST0, &env->fp_status);
 319    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 320        val = 0x80000000;
 321    }
 322    merge_exception_flags(env, old_flags);
 323    return val;
 324}
 325
 326int64_t helper_fistll_ST0(CPUX86State *env)
 327{
 328    uint8_t old_flags = save_exception_flags(env);
 329    int64_t val;
 330
 331    val = floatx80_to_int64(ST0, &env->fp_status);
 332    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 333        val = 0x8000000000000000ULL;
 334    }
 335    merge_exception_flags(env, old_flags);
 336    return val;
 337}
 338
 339int32_t helper_fistt_ST0(CPUX86State *env)
 340{
 341    uint8_t old_flags = save_exception_flags(env);
 342    int32_t val;
 343
 344    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 345    if (val != (int16_t)val) {
 346        set_float_exception_flags(float_flag_invalid, &env->fp_status);
 347        val = -32768;
 348    }
 349    merge_exception_flags(env, old_flags);
 350    return val;
 351}
 352
 353int32_t helper_fisttl_ST0(CPUX86State *env)
 354{
 355    uint8_t old_flags = save_exception_flags(env);
 356    int32_t val;
 357
 358    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 359    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 360        val = 0x80000000;
 361    }
 362    merge_exception_flags(env, old_flags);
 363    return val;
 364}
 365
 366int64_t helper_fisttll_ST0(CPUX86State *env)
 367{
 368    uint8_t old_flags = save_exception_flags(env);
 369    int64_t val;
 370
 371    val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 372    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 373        val = 0x8000000000000000ULL;
 374    }
 375    merge_exception_flags(env, old_flags);
 376    return val;
 377}
 378
 379void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 380{
 381    int new_fpstt;
 382
 383    new_fpstt = (env->fpstt - 1) & 7;
 384    env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC());
 385    env->fpstt = new_fpstt;
 386    env->fptags[new_fpstt] = 0; /* validate stack entry */
 387}
 388
 389void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 390{
 391    do_fstt(env, ST0, ptr, GETPC());
 392}
 393
 394void helper_fpush(CPUX86State *env)
 395{
 396    fpush(env);
 397}
 398
 399void helper_fpop(CPUX86State *env)
 400{
 401    fpop(env);
 402}
 403
 404void helper_fdecstp(CPUX86State *env)
 405{
 406    env->fpstt = (env->fpstt - 1) & 7;
 407    env->fpus &= ~0x4700;
 408}
 409
 410void helper_fincstp(CPUX86State *env)
 411{
 412    env->fpstt = (env->fpstt + 1) & 7;
 413    env->fpus &= ~0x4700;
 414}
 415
 416/* FPU move */
 417
 418void helper_ffree_STN(CPUX86State *env, int st_index)
 419{
 420    env->fptags[(env->fpstt + st_index) & 7] = 1;
 421}
 422
 423void helper_fmov_ST0_FT0(CPUX86State *env)
 424{
 425    ST0 = FT0;
 426}
 427
 428void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 429{
 430    FT0 = ST(st_index);
 431}
 432
 433void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 434{
 435    ST0 = ST(st_index);
 436}
 437
 438void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 439{
 440    ST(st_index) = ST0;
 441}
 442
 443void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 444{
 445    floatx80 tmp;
 446
 447    tmp = ST(st_index);
 448    ST(st_index) = ST0;
 449    ST0 = tmp;
 450}
 451
 452/* FPU operations */
 453
 454static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 455
 456void helper_fcom_ST0_FT0(CPUX86State *env)
 457{
 458    uint8_t old_flags = save_exception_flags(env);
 459    FloatRelation ret;
 460
 461    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 462    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 463    merge_exception_flags(env, old_flags);
 464}
 465
 466void helper_fucom_ST0_FT0(CPUX86State *env)
 467{
 468    uint8_t old_flags = save_exception_flags(env);
 469    FloatRelation ret;
 470
 471    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 472    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 473    merge_exception_flags(env, old_flags);
 474}
 475
 476static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 477
 478void helper_fcomi_ST0_FT0(CPUX86State *env)
 479{
 480    uint8_t old_flags = save_exception_flags(env);
 481    int eflags;
 482    FloatRelation ret;
 483
 484    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 485    eflags = cpu_cc_compute_all(env, CC_OP);
 486    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 487    CC_SRC = eflags;
 488    merge_exception_flags(env, old_flags);
 489}
 490
 491void helper_fucomi_ST0_FT0(CPUX86State *env)
 492{
 493    uint8_t old_flags = save_exception_flags(env);
 494    int eflags;
 495    FloatRelation ret;
 496
 497    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 498    eflags = cpu_cc_compute_all(env, CC_OP);
 499    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 500    CC_SRC = eflags;
 501    merge_exception_flags(env, old_flags);
 502}
 503
 504void helper_fadd_ST0_FT0(CPUX86State *env)
 505{
 506    uint8_t old_flags = save_exception_flags(env);
 507    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 508    merge_exception_flags(env, old_flags);
 509}
 510
 511void helper_fmul_ST0_FT0(CPUX86State *env)
 512{
 513    uint8_t old_flags = save_exception_flags(env);
 514    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 515    merge_exception_flags(env, old_flags);
 516}
 517
 518void helper_fsub_ST0_FT0(CPUX86State *env)
 519{
 520    uint8_t old_flags = save_exception_flags(env);
 521    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 522    merge_exception_flags(env, old_flags);
 523}
 524
 525void helper_fsubr_ST0_FT0(CPUX86State *env)
 526{
 527    uint8_t old_flags = save_exception_flags(env);
 528    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 529    merge_exception_flags(env, old_flags);
 530}
 531
 532void helper_fdiv_ST0_FT0(CPUX86State *env)
 533{
 534    ST0 = helper_fdiv(env, ST0, FT0);
 535}
 536
 537void helper_fdivr_ST0_FT0(CPUX86State *env)
 538{
 539    ST0 = helper_fdiv(env, FT0, ST0);
 540}
 541
 542/* fp operations between STN and ST0 */
 543
 544void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 545{
 546    uint8_t old_flags = save_exception_flags(env);
 547    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 548    merge_exception_flags(env, old_flags);
 549}
 550
 551void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 552{
 553    uint8_t old_flags = save_exception_flags(env);
 554    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 555    merge_exception_flags(env, old_flags);
 556}
 557
 558void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 559{
 560    uint8_t old_flags = save_exception_flags(env);
 561    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 562    merge_exception_flags(env, old_flags);
 563}
 564
 565void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 566{
 567    uint8_t old_flags = save_exception_flags(env);
 568    ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 569    merge_exception_flags(env, old_flags);
 570}
 571
 572void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 573{
 574    floatx80 *p;
 575
 576    p = &ST(st_index);
 577    *p = helper_fdiv(env, *p, ST0);
 578}
 579
 580void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 581{
 582    floatx80 *p;
 583
 584    p = &ST(st_index);
 585    *p = helper_fdiv(env, ST0, *p);
 586}
 587
 588/* misc FPU operations */
 589void helper_fchs_ST0(CPUX86State *env)
 590{
 591    ST0 = floatx80_chs(ST0);
 592}
 593
 594void helper_fabs_ST0(CPUX86State *env)
 595{
 596    ST0 = floatx80_abs(ST0);
 597}
 598
 599void helper_fld1_ST0(CPUX86State *env)
 600{
 601    ST0 = floatx80_one;
 602}
 603
 604void helper_fldl2t_ST0(CPUX86State *env)
 605{
 606    switch (env->fpuc & FPU_RC_MASK) {
 607    case FPU_RC_UP:
 608        ST0 = floatx80_l2t_u;
 609        break;
 610    default:
 611        ST0 = floatx80_l2t;
 612        break;
 613    }
 614}
 615
 616void helper_fldl2e_ST0(CPUX86State *env)
 617{
 618    switch (env->fpuc & FPU_RC_MASK) {
 619    case FPU_RC_DOWN:
 620    case FPU_RC_CHOP:
 621        ST0 = floatx80_l2e_d;
 622        break;
 623    default:
 624        ST0 = floatx80_l2e;
 625        break;
 626    }
 627}
 628
 629void helper_fldpi_ST0(CPUX86State *env)
 630{
 631    switch (env->fpuc & FPU_RC_MASK) {
 632    case FPU_RC_DOWN:
 633    case FPU_RC_CHOP:
 634        ST0 = floatx80_pi_d;
 635        break;
 636    default:
 637        ST0 = floatx80_pi;
 638        break;
 639    }
 640}
 641
 642void helper_fldlg2_ST0(CPUX86State *env)
 643{
 644    switch (env->fpuc & FPU_RC_MASK) {
 645    case FPU_RC_DOWN:
 646    case FPU_RC_CHOP:
 647        ST0 = floatx80_lg2_d;
 648        break;
 649    default:
 650        ST0 = floatx80_lg2;
 651        break;
 652    }
 653}
 654
 655void helper_fldln2_ST0(CPUX86State *env)
 656{
 657    switch (env->fpuc & FPU_RC_MASK) {
 658    case FPU_RC_DOWN:
 659    case FPU_RC_CHOP:
 660        ST0 = floatx80_ln2_d;
 661        break;
 662    default:
 663        ST0 = floatx80_ln2;
 664        break;
 665    }
 666}
 667
 668void helper_fldz_ST0(CPUX86State *env)
 669{
 670    ST0 = floatx80_zero;
 671}
 672
 673void helper_fldz_FT0(CPUX86State *env)
 674{
 675    FT0 = floatx80_zero;
 676}
 677
 678uint32_t helper_fnstsw(CPUX86State *env)
 679{
 680    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 681}
 682
 683uint32_t helper_fnstcw(CPUX86State *env)
 684{
 685    return env->fpuc;
 686}
 687
 688void update_fp_status(CPUX86State *env)
 689{
 690    FloatRoundMode rnd_mode;
 691    FloatX80RoundPrec rnd_prec;
 692
 693    /* set rounding mode */
 694    switch (env->fpuc & FPU_RC_MASK) {
 695    default:
 696    case FPU_RC_NEAR:
 697        rnd_mode = float_round_nearest_even;
 698        break;
 699    case FPU_RC_DOWN:
 700        rnd_mode = float_round_down;
 701        break;
 702    case FPU_RC_UP:
 703        rnd_mode = float_round_up;
 704        break;
 705    case FPU_RC_CHOP:
 706        rnd_mode = float_round_to_zero;
 707        break;
 708    }
 709    set_float_rounding_mode(rnd_mode, &env->fp_status);
 710
 711    switch ((env->fpuc >> 8) & 3) {
 712    case 0:
 713        rnd_prec = floatx80_precision_s;
 714        break;
 715    case 2:
 716        rnd_prec = floatx80_precision_d;
 717        break;
 718    case 3:
 719    default:
 720        rnd_prec = floatx80_precision_x;
 721        break;
 722    }
 723    set_floatx80_rounding_precision(rnd_prec, &env->fp_status);
 724}
 725
 726void helper_fldcw(CPUX86State *env, uint32_t val)
 727{
 728    cpu_set_fpuc(env, val);
 729}
 730
 731void helper_fclex(CPUX86State *env)
 732{
 733    env->fpus &= 0x7f00;
 734}
 735
 736void helper_fwait(CPUX86State *env)
 737{
 738    if (env->fpus & FPUS_SE) {
 739        fpu_raise_exception(env, GETPC());
 740    }
 741}
 742
 743static void do_fninit(CPUX86State *env)
 744{
 745    env->fpus = 0;
 746    env->fpstt = 0;
 747    env->fpcs = 0;
 748    env->fpds = 0;
 749    env->fpip = 0;
 750    env->fpdp = 0;
 751    cpu_set_fpuc(env, 0x37f);
 752    env->fptags[0] = 1;
 753    env->fptags[1] = 1;
 754    env->fptags[2] = 1;
 755    env->fptags[3] = 1;
 756    env->fptags[4] = 1;
 757    env->fptags[5] = 1;
 758    env->fptags[6] = 1;
 759    env->fptags[7] = 1;
 760}
 761
 762void helper_fninit(CPUX86State *env)
 763{
 764    do_fninit(env);
 765}
 766
 767/* BCD ops */
 768
 769void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 770{
 771    floatx80 tmp;
 772    uint64_t val;
 773    unsigned int v;
 774    int i;
 775
 776    val = 0;
 777    for (i = 8; i >= 0; i--) {
 778        v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 779        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 780    }
 781    tmp = int64_to_floatx80(val, &env->fp_status);
 782    if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 783        tmp = floatx80_chs(tmp);
 784    }
 785    fpush(env);
 786    ST0 = tmp;
 787}
 788
 789void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 790{
 791    uint8_t old_flags = save_exception_flags(env);
 792    int v;
 793    target_ulong mem_ref, mem_end;
 794    int64_t val;
 795    CPU_LDoubleU temp;
 796
 797    temp.d = ST0;
 798
 799    val = floatx80_to_int64(ST0, &env->fp_status);
 800    mem_ref = ptr;
 801    if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
 802        set_float_exception_flags(float_flag_invalid, &env->fp_status);
 803        while (mem_ref < ptr + 7) {
 804            cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 805        }
 806        cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
 807        cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
 808        cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
 809        merge_exception_flags(env, old_flags);
 810        return;
 811    }
 812    mem_end = mem_ref + 9;
 813    if (SIGND(temp)) {
 814        cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 815        val = -val;
 816    } else {
 817        cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 818    }
 819    while (mem_ref < mem_end) {
 820        if (val == 0) {
 821            break;
 822        }
 823        v = val % 100;
 824        val = val / 100;
 825        v = ((v / 10) << 4) | (v % 10);
 826        cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 827    }
 828    while (mem_ref < mem_end) {
 829        cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 830    }
 831    merge_exception_flags(env, old_flags);
 832}
 833
 834/* 128-bit significand of log(2).  */
 835#define ln2_sig_high 0xb17217f7d1cf79abULL
 836#define ln2_sig_low 0xc9e3b39803f2f6afULL
 837
 838/*
 839 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
 840 * the interval [-1/64, 1/64].
 841 */
 842#define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
 843#define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
 844#define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
 845#define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
 846#define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
 847#define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
 848#define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
 849#define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
 850#define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
 851
 852struct f2xm1_data {
 853    /*
 854     * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
 855     * are very close to exact floatx80 values.
 856     */
 857    floatx80 t;
 858    /* The value of 2^t.  */
 859    floatx80 exp2;
 860    /* The value of 2^t - 1.  */
 861    floatx80 exp2m1;
 862};
 863
 864static const struct f2xm1_data f2xm1_table[65] = {
 865    { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
 866      make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
 867      make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
 868    { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
 869      make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
 870      make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
 871    { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
 872      make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
 873      make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
 874    { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
 875      make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
 876      make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
 877    { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
 878      make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
 879      make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
 880    { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
 881      make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
 882      make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
 883    { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
 884      make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
 885      make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
 886    { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
 887      make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
 888      make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
 889    { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
 890      make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
 891      make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
 892    { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
 893      make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
 894      make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
 895    { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
 896      make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
 897      make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
 898    { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
 899      make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
 900      make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
 901    { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
 902      make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
 903      make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
 904    { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
 905      make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
 906      make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
 907    { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
 908      make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
 909      make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
 910    { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
 911      make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
 912      make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
 913    { make_floatx80_init(0xbffe, 0x800000000000227dULL),
 914      make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
 915      make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
 916    { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
 917      make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
 918      make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
 919    { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
 920      make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
 921      make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
 922    { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
 923      make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
 924      make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
 925    { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
 926      make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
 927      make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
 928    { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
 929      make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
 930      make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
 931    { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
 932      make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
 933      make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
 934    { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
 935      make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
 936      make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
 937    { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
 938      make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
 939      make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
 940    { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
 941      make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
 942      make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
 943    { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
 944      make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
 945      make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
 946    { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
 947      make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
 948      make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
 949    { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
 950      make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
 951      make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
 952    { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
 953      make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
 954      make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
 955    { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
 956      make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
 957      make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
 958    { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
 959      make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
 960      make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
 961    { floatx80_zero_init,
 962      make_floatx80_init(0x3fff, 0x8000000000000000ULL),
 963      floatx80_zero_init },
 964    { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
 965      make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
 966      make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
 967    { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
 968      make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
 969      make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
 970    { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
 971      make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
 972      make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
 973    { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
 974      make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
 975      make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
 976    { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
 977      make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
 978      make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
 979    { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
 980      make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
 981      make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
 982    { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
 983      make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
 984      make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
 985    { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
 986      make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
 987      make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
 988    { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
 989      make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
 990      make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
 991    { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
 992      make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
 993      make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
 994    { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
 995      make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
 996      make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
 997    { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
 998      make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
 999      make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
1000    { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
1001      make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
1002      make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
1003    { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
1004      make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
1005      make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
1006    { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
1007      make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
1008      make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
1009    { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
1010      make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
1011      make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
1012    { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
1013      make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
1014      make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
1015    { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
1016      make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
1017      make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
1018    { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
1019      make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
1020      make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
1021    { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
1022      make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
1023      make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
1024    { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
1025      make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
1026      make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
1027    { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
1028      make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
1029      make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
1030    { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
1031      make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
1032      make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
1033    { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
1034      make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
1035      make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
1036    { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
1037      make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
1038      make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
1039    { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
1040      make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
1041      make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
1042    { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
1043      make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
1044      make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
1045    { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
1046      make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
1047      make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
1048    { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
1049      make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
1050      make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
1051    { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
1052      make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
1053      make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
1054    { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
1055      make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
1056      make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
1057    { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
1058      make_floatx80_init(0x4000, 0x8000000000000000ULL),
1059      make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
1060};
1061
1062void helper_f2xm1(CPUX86State *env)
1063{
1064    uint8_t old_flags = save_exception_flags(env);
1065    uint64_t sig = extractFloatx80Frac(ST0);
1066    int32_t exp = extractFloatx80Exp(ST0);
1067    bool sign = extractFloatx80Sign(ST0);
1068
1069    if (floatx80_invalid_encoding(ST0)) {
1070        float_raise(float_flag_invalid, &env->fp_status);
1071        ST0 = floatx80_default_nan(&env->fp_status);
1072    } else if (floatx80_is_any_nan(ST0)) {
1073        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1074            float_raise(float_flag_invalid, &env->fp_status);
1075            ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1076        }
1077    } else if (exp > 0x3fff ||
1078               (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
1079        /* Out of range for the instruction, treat as invalid.  */
1080        float_raise(float_flag_invalid, &env->fp_status);
1081        ST0 = floatx80_default_nan(&env->fp_status);
1082    } else if (exp == 0x3fff) {
1083        /* Argument 1 or -1, exact result 1 or -0.5.  */
1084        if (sign) {
1085            ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
1086        }
1087    } else if (exp < 0x3fb0) {
1088        if (!floatx80_is_zero(ST0)) {
1089            /*
1090             * Multiplying the argument by an extra-precision version
1091             * of log(2) is sufficiently precise.  Zero arguments are
1092             * returned unchanged.
1093             */
1094            uint64_t sig0, sig1, sig2;
1095            if (exp == 0) {
1096                normalizeFloatx80Subnormal(sig, &exp, &sig);
1097            }
1098            mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
1099                            &sig2);
1100            /* This result is inexact.  */
1101            sig1 |= 1;
1102            ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1103                                                sign, exp, sig0, sig1,
1104                                                &env->fp_status);
1105        }
1106    } else {
1107        floatx80 tmp, y, accum;
1108        bool asign, bsign;
1109        int32_t n, aexp, bexp;
1110        uint64_t asig0, asig1, asig2, bsig0, bsig1;
1111        FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1112        FloatX80RoundPrec save_prec =
1113            env->fp_status.floatx80_rounding_precision;
1114        env->fp_status.float_rounding_mode = float_round_nearest_even;
1115        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1116
1117        /* Find the nearest multiple of 1/32 to the argument.  */
1118        tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
1119        n = 32 + floatx80_to_int32(tmp, &env->fp_status);
1120        y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
1121
1122        if (floatx80_is_zero(y)) {
1123            /*
1124             * Use the value of 2^t - 1 from the table, to avoid
1125             * needing to special-case zero as a result of
1126             * multiplication below.
1127             */
1128            ST0 = f2xm1_table[n].t;
1129            set_float_exception_flags(float_flag_inexact, &env->fp_status);
1130            env->fp_status.float_rounding_mode = save_mode;
1131        } else {
1132            /*
1133             * Compute the lower parts of a polynomial expansion for
1134             * (2^y - 1) / y.
1135             */
1136            accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
1137            accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
1138            accum = floatx80_mul(accum, y, &env->fp_status);
1139            accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
1140            accum = floatx80_mul(accum, y, &env->fp_status);
1141            accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
1142            accum = floatx80_mul(accum, y, &env->fp_status);
1143            accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
1144            accum = floatx80_mul(accum, y, &env->fp_status);
1145            accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
1146            accum = floatx80_mul(accum, y, &env->fp_status);
1147            accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
1148            accum = floatx80_mul(accum, y, &env->fp_status);
1149            accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
1150
1151            /*
1152             * The full polynomial expansion is f2xm1_coeff_0 + accum
1153             * (where accum has much lower magnitude, and so, in
1154             * particular, carry out of the addition is not possible).
1155             * (This expansion is only accurate to about 70 bits, not
1156             * 128 bits.)
1157             */
1158            aexp = extractFloatx80Exp(f2xm1_coeff_0);
1159            asign = extractFloatx80Sign(f2xm1_coeff_0);
1160            shift128RightJamming(extractFloatx80Frac(accum), 0,
1161                                 aexp - extractFloatx80Exp(accum),
1162                                 &asig0, &asig1);
1163            bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
1164            bsig1 = 0;
1165            if (asign == extractFloatx80Sign(accum)) {
1166                add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1167            } else {
1168                sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1169            }
1170            /* And thus compute an approximation to 2^y - 1.  */
1171            mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
1172                            &asig0, &asig1, &asig2);
1173            aexp += extractFloatx80Exp(y) - 0x3ffe;
1174            asign ^= extractFloatx80Sign(y);
1175            if (n != 32) {
1176                /*
1177                 * Multiply this by the precomputed value of 2^t and
1178                 * add that of 2^t - 1.
1179                 */
1180                mul128By64To192(asig0, asig1,
1181                                extractFloatx80Frac(f2xm1_table[n].exp2),
1182                                &asig0, &asig1, &asig2);
1183                aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
1184                bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
1185                bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
1186                bsig1 = 0;
1187                if (bexp < aexp) {
1188                    shift128RightJamming(bsig0, bsig1, aexp - bexp,
1189                                         &bsig0, &bsig1);
1190                } else if (aexp < bexp) {
1191                    shift128RightJamming(asig0, asig1, bexp - aexp,
1192                                         &asig0, &asig1);
1193                    aexp = bexp;
1194                }
1195                /* The sign of 2^t - 1 is always that of the result.  */
1196                bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
1197                if (asign == bsign) {
1198                    /* Avoid possible carry out of the addition.  */
1199                    shift128RightJamming(asig0, asig1, 1,
1200                                         &asig0, &asig1);
1201                    shift128RightJamming(bsig0, bsig1, 1,
1202                                         &bsig0, &bsig1);
1203                    ++aexp;
1204                    add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
1205                } else {
1206                    sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1207                    asign = bsign;
1208                }
1209            }
1210            env->fp_status.float_rounding_mode = save_mode;
1211            /* This result is inexact.  */
1212            asig1 |= 1;
1213            ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1214                                                asign, aexp, asig0, asig1,
1215                                                &env->fp_status);
1216        }
1217
1218        env->fp_status.floatx80_rounding_precision = save_prec;
1219    }
1220    merge_exception_flags(env, old_flags);
1221}
1222
1223void helper_fptan(CPUX86State *env)
1224{
1225    double fptemp = floatx80_to_double(env, ST0);
1226
1227    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1228        env->fpus |= 0x400;
1229    } else {
1230        fptemp = tan(fptemp);
1231        ST0 = double_to_floatx80(env, fptemp);
1232        fpush(env);
1233        ST0 = floatx80_one;
1234        env->fpus &= ~0x400; /* C2 <-- 0 */
1235        /* the above code is for |arg| < 2**52 only */
1236    }
1237}
1238
1239/* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision.  */
1240#define pi_4_exp 0x3ffe
1241#define pi_4_sig_high 0xc90fdaa22168c234ULL
1242#define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
1243#define pi_2_exp 0x3fff
1244#define pi_2_sig_high 0xc90fdaa22168c234ULL
1245#define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
1246#define pi_34_exp 0x4000
1247#define pi_34_sig_high 0x96cbe3f9990e91a7ULL
1248#define pi_34_sig_low 0x9394c9e8a0a5159dULL
1249#define pi_exp 0x4000
1250#define pi_sig_high 0xc90fdaa22168c234ULL
1251#define pi_sig_low 0xc4c6628b80dc1cd1ULL
1252
1253/*
1254 * Polynomial coefficients for an approximation to atan(x), with only
1255 * odd powers of x used, for x in the interval [-1/16, 1/16].  (Unlike
1256 * for some other approximations, no low part is needed for the first
1257 * coefficient here to achieve a sufficiently accurate result, because
1258 * the coefficient in this minimax approximation is very close to
1259 * exactly 1.)
1260 */
1261#define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
1262#define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
1263#define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
1264#define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
1265#define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
1266#define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
1267#define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
1268
1269struct fpatan_data {
1270    /* High and low parts of atan(x).  */
1271    floatx80 atan_high, atan_low;
1272};
1273
1274static const struct fpatan_data fpatan_table[9] = {
1275    { floatx80_zero_init,
1276      floatx80_zero_init },
1277    { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
1278      make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
1279    { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
1280      make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
1281    { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
1282      make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
1283    { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
1284      make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
1285    { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
1286      make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
1287    { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
1288      make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
1289    { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
1290      make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
1291    { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
1292      make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
1293};
1294
1295void helper_fpatan(CPUX86State *env)
1296{
1297    uint8_t old_flags = save_exception_flags(env);
1298    uint64_t arg0_sig = extractFloatx80Frac(ST0);
1299    int32_t arg0_exp = extractFloatx80Exp(ST0);
1300    bool arg0_sign = extractFloatx80Sign(ST0);
1301    uint64_t arg1_sig = extractFloatx80Frac(ST1);
1302    int32_t arg1_exp = extractFloatx80Exp(ST1);
1303    bool arg1_sign = extractFloatx80Sign(ST1);
1304
1305    if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1306        float_raise(float_flag_invalid, &env->fp_status);
1307        ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1308    } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1309        float_raise(float_flag_invalid, &env->fp_status);
1310        ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1311    } else if (floatx80_invalid_encoding(ST0) ||
1312               floatx80_invalid_encoding(ST1)) {
1313        float_raise(float_flag_invalid, &env->fp_status);
1314        ST1 = floatx80_default_nan(&env->fp_status);
1315    } else if (floatx80_is_any_nan(ST0)) {
1316        ST1 = ST0;
1317    } else if (floatx80_is_any_nan(ST1)) {
1318        /* Pass this NaN through.  */
1319    } else if (floatx80_is_zero(ST1) && !arg0_sign) {
1320        /* Pass this zero through.  */
1321    } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
1322                 arg0_exp - arg1_exp >= 80) &&
1323               !arg0_sign) {
1324        /*
1325         * Dividing ST1 by ST0 gives the correct result up to
1326         * rounding, and avoids spurious underflow exceptions that
1327         * might result from passing some small values through the
1328         * polynomial approximation, but if a finite nonzero result of
1329         * division is exact, the result of fpatan is still inexact
1330         * (and underflowing where appropriate).
1331         */
1332        FloatX80RoundPrec save_prec =
1333            env->fp_status.floatx80_rounding_precision;
1334        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1335        ST1 = floatx80_div(ST1, ST0, &env->fp_status);
1336        env->fp_status.floatx80_rounding_precision = save_prec;
1337        if (!floatx80_is_zero(ST1) &&
1338            !(get_float_exception_flags(&env->fp_status) &
1339              float_flag_inexact)) {
1340            /*
1341             * The mathematical result is very slightly closer to zero
1342             * than this exact result.  Round a value with the
1343             * significand adjusted accordingly to get the correct
1344             * exceptions, and possibly an adjusted result depending
1345             * on the rounding mode.
1346             */
1347            uint64_t sig = extractFloatx80Frac(ST1);
1348            int32_t exp = extractFloatx80Exp(ST1);
1349            bool sign = extractFloatx80Sign(ST1);
1350            if (exp == 0) {
1351                normalizeFloatx80Subnormal(sig, &exp, &sig);
1352            }
1353            ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1354                                                sign, exp, sig - 1,
1355                                                -1, &env->fp_status);
1356        }
1357    } else {
1358        /* The result is inexact.  */
1359        bool rsign = arg1_sign;
1360        int32_t rexp;
1361        uint64_t rsig0, rsig1;
1362        if (floatx80_is_zero(ST1)) {
1363            /*
1364             * ST0 is negative.  The result is pi with the sign of
1365             * ST1.
1366             */
1367            rexp = pi_exp;
1368            rsig0 = pi_sig_high;
1369            rsig1 = pi_sig_low;
1370        } else if (floatx80_is_infinity(ST1)) {
1371            if (floatx80_is_infinity(ST0)) {
1372                if (arg0_sign) {
1373                    rexp = pi_34_exp;
1374                    rsig0 = pi_34_sig_high;
1375                    rsig1 = pi_34_sig_low;
1376                } else {
1377                    rexp = pi_4_exp;
1378                    rsig0 = pi_4_sig_high;
1379                    rsig1 = pi_4_sig_low;
1380                }
1381            } else {
1382                rexp = pi_2_exp;
1383                rsig0 = pi_2_sig_high;
1384                rsig1 = pi_2_sig_low;
1385            }
1386        } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
1387            rexp = pi_2_exp;
1388            rsig0 = pi_2_sig_high;
1389            rsig1 = pi_2_sig_low;
1390        } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
1391            /* ST0 is negative.  */
1392            rexp = pi_exp;
1393            rsig0 = pi_sig_high;
1394            rsig1 = pi_sig_low;
1395        } else {
1396            /*
1397             * ST0 and ST1 are finite, nonzero and with exponents not
1398             * too far apart.
1399             */
1400            int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
1401            int32_t azexp, axexp;
1402            bool adj_sub, ysign, zsign;
1403            uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
1404            uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
1405            uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
1406            uint64_t azsig0, azsig1;
1407            uint64_t azsig2, azsig3, axsig0, axsig1;
1408            floatx80 x8;
1409            FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1410            FloatX80RoundPrec save_prec =
1411                env->fp_status.floatx80_rounding_precision;
1412            env->fp_status.float_rounding_mode = float_round_nearest_even;
1413            env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1414
1415            if (arg0_exp == 0) {
1416                normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
1417            }
1418            if (arg1_exp == 0) {
1419                normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1420            }
1421            if (arg0_exp > arg1_exp ||
1422                (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
1423                /* Work with abs(ST1) / abs(ST0).  */
1424                num_exp = arg1_exp;
1425                num_sig = arg1_sig;
1426                den_exp = arg0_exp;
1427                den_sig = arg0_sig;
1428                if (arg0_sign) {
1429                    /* The result is subtracted from pi.  */
1430                    adj_exp = pi_exp;
1431                    adj_sig0 = pi_sig_high;
1432                    adj_sig1 = pi_sig_low;
1433                    adj_sub = true;
1434                } else {
1435                    /* The result is used as-is.  */
1436                    adj_exp = 0;
1437                    adj_sig0 = 0;
1438                    adj_sig1 = 0;
1439                    adj_sub = false;
1440                }
1441            } else {
1442                /* Work with abs(ST0) / abs(ST1).  */
1443                num_exp = arg0_exp;
1444                num_sig = arg0_sig;
1445                den_exp = arg1_exp;
1446                den_sig = arg1_sig;
1447                /* The result is added to or subtracted from pi/2.  */
1448                adj_exp = pi_2_exp;
1449                adj_sig0 = pi_2_sig_high;
1450                adj_sig1 = pi_2_sig_low;
1451                adj_sub = !arg0_sign;
1452            }
1453
1454            /*
1455             * Compute x = num/den, where 0 < x <= 1 and x is not too
1456             * small.
1457             */
1458            xexp = num_exp - den_exp + 0x3ffe;
1459            remsig0 = num_sig;
1460            remsig1 = 0;
1461            if (den_sig <= remsig0) {
1462                shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1463                ++xexp;
1464            }
1465            xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
1466            mul64To128(den_sig, xsig0, &msig0, &msig1);
1467            sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
1468            while ((int64_t) remsig0 < 0) {
1469                --xsig0;
1470                add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
1471            }
1472            xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
1473            /*
1474             * No need to correct any estimation error in xsig1; even
1475             * with such error, it is accurate enough.
1476             */
1477
1478            /*
1479             * Split x as x = t + y, where t = n/8 is the nearest
1480             * multiple of 1/8 to x.
1481             */
1482            x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1483                                               false, xexp + 3, xsig0,
1484                                               xsig1, &env->fp_status);
1485            n = floatx80_to_int32(x8, &env->fp_status);
1486            if (n == 0) {
1487                ysign = false;
1488                yexp = xexp;
1489                ysig0 = xsig0;
1490                ysig1 = xsig1;
1491                texp = 0;
1492                tsig = 0;
1493            } else {
1494                int shift = clz32(n) + 32;
1495                texp = 0x403b - shift;
1496                tsig = n;
1497                tsig <<= shift;
1498                if (texp == xexp) {
1499                    sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
1500                    if ((int64_t) ysig0 >= 0) {
1501                        ysign = false;
1502                        if (ysig0 == 0) {
1503                            if (ysig1 == 0) {
1504                                yexp = 0;
1505                            } else {
1506                                shift = clz64(ysig1) + 64;
1507                                yexp = xexp - shift;
1508                                shift128Left(ysig0, ysig1, shift,
1509                                             &ysig0, &ysig1);
1510                            }
1511                        } else {
1512                            shift = clz64(ysig0);
1513                            yexp = xexp - shift;
1514                            shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1515                        }
1516                    } else {
1517                        ysign = true;
1518                        sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
1519                        if (ysig0 == 0) {
1520                            shift = clz64(ysig1) + 64;
1521                        } else {
1522                            shift = clz64(ysig0);
1523                        }
1524                        yexp = xexp - shift;
1525                        shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1526                    }
1527                } else {
1528                    /*
1529                     * t's exponent must be greater than x's because t
1530                     * is positive and the nearest multiple of 1/8 to
1531                     * x, and if x has a greater exponent, the power
1532                     * of 2 with that exponent is also a multiple of
1533                     * 1/8.
1534                     */
1535                    uint64_t usig0, usig1;
1536                    shift128RightJamming(xsig0, xsig1, texp - xexp,
1537                                         &usig0, &usig1);
1538                    ysign = true;
1539                    sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
1540                    if (ysig0 == 0) {
1541                        shift = clz64(ysig1) + 64;
1542                    } else {
1543                        shift = clz64(ysig0);
1544                    }
1545                    yexp = texp - shift;
1546                    shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1547                }
1548            }
1549
1550            /*
1551             * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
1552             * arctan(z).
1553             */
1554            zsign = ysign;
1555            if (texp == 0 || yexp == 0) {
1556                zexp = yexp;
1557                zsig0 = ysig0;
1558                zsig1 = ysig1;
1559            } else {
1560                /*
1561                 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
1562                 */
1563                int32_t dexp = texp + xexp - 0x3ffe;
1564                uint64_t dsig0, dsig1, dsig2;
1565                mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
1566                /*
1567                 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
1568                 * bit).  Add 1 to produce the denominator 1+tx.
1569                 */
1570                shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
1571                                     &dsig0, &dsig1);
1572                dsig0 |= 0x8000000000000000ULL;
1573                zexp = yexp - 1;
1574                remsig0 = ysig0;
1575                remsig1 = ysig1;
1576                remsig2 = 0;
1577                if (dsig0 <= remsig0) {
1578                    shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1579                    ++zexp;
1580                }
1581                zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
1582                mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
1583                sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
1584                       &remsig0, &remsig1, &remsig2);
1585                while ((int64_t) remsig0 < 0) {
1586                    --zsig0;
1587                    add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
1588                           &remsig0, &remsig1, &remsig2);
1589                }
1590                zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
1591                /* No need to correct any estimation error in zsig1.  */
1592            }
1593
1594            if (zexp == 0) {
1595                azexp = 0;
1596                azsig0 = 0;
1597                azsig1 = 0;
1598            } else {
1599                floatx80 z2, accum;
1600                uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
1601                /* Compute z^2.  */
1602                mul128To256(zsig0, zsig1, zsig0, zsig1,
1603                            &z2sig0, &z2sig1, &z2sig2, &z2sig3);
1604                z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
1605                                                   zexp + zexp - 0x3ffe,
1606                                                   z2sig0, z2sig1,
1607                                                   &env->fp_status);
1608
1609                /* Compute the lower parts of the polynomial expansion.  */
1610                accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
1611                accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
1612                accum = floatx80_mul(accum, z2, &env->fp_status);
1613                accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
1614                accum = floatx80_mul(accum, z2, &env->fp_status);
1615                accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
1616                accum = floatx80_mul(accum, z2, &env->fp_status);
1617                accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
1618                accum = floatx80_mul(accum, z2, &env->fp_status);
1619                accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
1620                accum = floatx80_mul(accum, z2, &env->fp_status);
1621
1622                /*
1623                 * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
1624                 * fpatan_coeff_0 is 1, and accum is negative and much smaller.
1625                 */
1626                aexp = extractFloatx80Exp(fpatan_coeff_0);
1627                shift128RightJamming(extractFloatx80Frac(accum), 0,
1628                                     aexp - extractFloatx80Exp(accum),
1629                                     &asig0, &asig1);
1630                sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
1631                       &asig0, &asig1);
1632                /* Multiply by z to compute arctan(z).  */
1633                azexp = aexp + zexp - 0x3ffe;
1634                mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
1635                            &azsig2, &azsig3);
1636            }
1637
1638            /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign).  */
1639            if (texp == 0) {
1640                /* z is positive.  */
1641                axexp = azexp;
1642                axsig0 = azsig0;
1643                axsig1 = azsig1;
1644            } else {
1645                bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
1646                int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
1647                uint64_t low_sig0 =
1648                    extractFloatx80Frac(fpatan_table[n].atan_low);
1649                uint64_t low_sig1 = 0;
1650                axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
1651                axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
1652                axsig1 = 0;
1653                shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
1654                                     &low_sig0, &low_sig1);
1655                if (low_sign) {
1656                    sub128(axsig0, axsig1, low_sig0, low_sig1,
1657                           &axsig0, &axsig1);
1658                } else {
1659                    add128(axsig0, axsig1, low_sig0, low_sig1,
1660                           &axsig0, &axsig1);
1661                }
1662                if (azexp >= axexp) {
1663                    shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
1664                                         &axsig0, &axsig1);
1665                    axexp = azexp + 1;
1666                    shift128RightJamming(azsig0, azsig1, 1,
1667                                         &azsig0, &azsig1);
1668                } else {
1669                    shift128RightJamming(axsig0, axsig1, 1,
1670                                         &axsig0, &axsig1);
1671                    shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
1672                                         &azsig0, &azsig1);
1673                    ++axexp;
1674                }
1675                if (zsign) {
1676                    sub128(axsig0, axsig1, azsig0, azsig1,
1677                           &axsig0, &axsig1);
1678                } else {
1679                    add128(axsig0, axsig1, azsig0, azsig1,
1680                           &axsig0, &axsig1);
1681                }
1682            }
1683
1684            if (adj_exp == 0) {
1685                rexp = axexp;
1686                rsig0 = axsig0;
1687                rsig1 = axsig1;
1688            } else {
1689                /*
1690                 * Add or subtract arctan(x) (exponent axexp,
1691                 * significand axsig0 and axsig1, positive, not
1692                 * necessarily normalized) to the number given by
1693                 * adj_exp, adj_sig0 and adj_sig1, according to
1694                 * adj_sub.
1695                 */
1696                if (adj_exp >= axexp) {
1697                    shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
1698                                         &axsig0, &axsig1);
1699                    rexp = adj_exp + 1;
1700                    shift128RightJamming(adj_sig0, adj_sig1, 1,
1701                                         &adj_sig0, &adj_sig1);
1702                } else {
1703                    shift128RightJamming(axsig0, axsig1, 1,
1704                                         &axsig0, &axsig1);
1705                    shift128RightJamming(adj_sig0, adj_sig1,
1706                                         axexp - adj_exp + 1,
1707                                         &adj_sig0, &adj_sig1);
1708                    rexp = axexp + 1;
1709                }
1710                if (adj_sub) {
1711                    sub128(adj_sig0, adj_sig1, axsig0, axsig1,
1712                           &rsig0, &rsig1);
1713                } else {
1714                    add128(adj_sig0, adj_sig1, axsig0, axsig1,
1715                           &rsig0, &rsig1);
1716                }
1717            }
1718
1719            env->fp_status.float_rounding_mode = save_mode;
1720            env->fp_status.floatx80_rounding_precision = save_prec;
1721        }
1722        /* This result is inexact.  */
1723        rsig1 |= 1;
1724        ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp,
1725                                            rsig0, rsig1, &env->fp_status);
1726    }
1727
1728    fpop(env);
1729    merge_exception_flags(env, old_flags);
1730}
1731
1732void helper_fxtract(CPUX86State *env)
1733{
1734    uint8_t old_flags = save_exception_flags(env);
1735    CPU_LDoubleU temp;
1736
1737    temp.d = ST0;
1738
1739    if (floatx80_is_zero(ST0)) {
1740        /* Easy way to generate -inf and raising division by 0 exception */
1741        ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
1742                           &env->fp_status);
1743        fpush(env);
1744        ST0 = temp.d;
1745    } else if (floatx80_invalid_encoding(ST0)) {
1746        float_raise(float_flag_invalid, &env->fp_status);
1747        ST0 = floatx80_default_nan(&env->fp_status);
1748        fpush(env);
1749        ST0 = ST1;
1750    } else if (floatx80_is_any_nan(ST0)) {
1751        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1752            float_raise(float_flag_invalid, &env->fp_status);
1753            ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1754        }
1755        fpush(env);
1756        ST0 = ST1;
1757    } else if (floatx80_is_infinity(ST0)) {
1758        fpush(env);
1759        ST0 = ST1;
1760        ST1 = floatx80_infinity;
1761    } else {
1762        int expdif;
1763
1764        if (EXPD(temp) == 0) {
1765            int shift = clz64(temp.l.lower);
1766            temp.l.lower <<= shift;
1767            expdif = 1 - EXPBIAS - shift;
1768            float_raise(float_flag_input_denormal, &env->fp_status);
1769        } else {
1770            expdif = EXPD(temp) - EXPBIAS;
1771        }
1772        /* DP exponent bias */
1773        ST0 = int32_to_floatx80(expdif, &env->fp_status);
1774        fpush(env);
1775        BIASEXPONENT(temp);
1776        ST0 = temp.d;
1777    }
1778    merge_exception_flags(env, old_flags);
1779}
1780
1781static void helper_fprem_common(CPUX86State *env, bool mod)
1782{
1783    uint8_t old_flags = save_exception_flags(env);
1784    uint64_t quotient;
1785    CPU_LDoubleU temp0, temp1;
1786    int exp0, exp1, expdiff;
1787
1788    temp0.d = ST0;
1789    temp1.d = ST1;
1790    exp0 = EXPD(temp0);
1791    exp1 = EXPD(temp1);
1792
1793    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1794    if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1795        exp0 == 0x7fff || exp1 == 0x7fff ||
1796        floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
1797        ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1798    } else {
1799        if (exp0 == 0) {
1800            exp0 = 1 - clz64(temp0.l.lower);
1801        }
1802        if (exp1 == 0) {
1803            exp1 = 1 - clz64(temp1.l.lower);
1804        }
1805        expdiff = exp0 - exp1;
1806        if (expdiff < 64) {
1807            ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1808            env->fpus |= (quotient & 0x4) << (8 - 2);  /* (C0) <-- q2 */
1809            env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
1810            env->fpus |= (quotient & 0x1) << (9 - 0);  /* (C1) <-- q0 */
1811        } else {
1812            /*
1813             * Partial remainder.  This choice of how many bits to
1814             * process at once is specified in AMD instruction set
1815             * manuals, and empirically is followed by Intel
1816             * processors as well; it ensures that the final remainder
1817             * operation in a loop does produce the correct low three
1818             * bits of the quotient.  AMD manuals specify that the
1819             * flags other than C2 are cleared, and empirically Intel
1820             * processors clear them as well.
1821             */
1822            int n = 32 + (expdiff % 32);
1823            temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
1824            ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
1825            env->fpus |= 0x400;  /* C2 <-- 1 */
1826        }
1827    }
1828    merge_exception_flags(env, old_flags);
1829}
1830
1831void helper_fprem1(CPUX86State *env)
1832{
1833    helper_fprem_common(env, false);
1834}
1835
1836void helper_fprem(CPUX86State *env)
1837{
1838    helper_fprem_common(env, true);
1839}
1840
1841/* 128-bit significand of log2(e).  */
1842#define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1843#define log2_e_sig_low 0xbe87fed0691d3e89ULL
1844
1845/*
1846 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1847 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1848 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1849 * interval [sqrt(2)/2, sqrt(2)].
1850 */
1851#define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1852#define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1853#define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1854#define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1855#define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1856#define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1857#define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1858#define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1859#define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1860#define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1861#define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1862
1863/*
1864 * Compute an approximation of log2(1+arg), where 1+arg is in the
1865 * interval [sqrt(2)/2, sqrt(2)].  It is assumed that when this
1866 * function is called, rounding precision is set to 80 and the
1867 * round-to-nearest mode is in effect.  arg must not be exactly zero,
1868 * and must not be so close to zero that underflow might occur.
1869 */
1870static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
1871                                uint64_t *sig0, uint64_t *sig1)
1872{
1873    uint64_t arg0_sig = extractFloatx80Frac(arg);
1874    int32_t arg0_exp = extractFloatx80Exp(arg);
1875    bool arg0_sign = extractFloatx80Sign(arg);
1876    bool asign;
1877    int32_t dexp, texp, aexp;
1878    uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
1879    uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
1880    uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
1881    floatx80 t2, accum;
1882
1883    /*
1884     * Compute an approximation of arg/(2+arg), with extra precision,
1885     * as the argument to a polynomial approximation.  The extra
1886     * precision is only needed for the first term of the
1887     * approximation, with subsequent terms being significantly
1888     * smaller; the approximation only uses odd exponents, and the
1889     * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1890     */
1891    if (arg0_sign) {
1892        dexp = 0x3fff;
1893        shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1894        sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
1895    } else {
1896        dexp = 0x4000;
1897        shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1898        dsig0 |= 0x8000000000000000ULL;
1899    }
1900    texp = arg0_exp - dexp + 0x3ffe;
1901    rsig0 = arg0_sig;
1902    rsig1 = 0;
1903    rsig2 = 0;
1904    if (dsig0 <= rsig0) {
1905        shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
1906        ++texp;
1907    }
1908    tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
1909    mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
1910    sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
1911           &rsig0, &rsig1, &rsig2);
1912    while ((int64_t) rsig0 < 0) {
1913        --tsig0;
1914        add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
1915               &rsig0, &rsig1, &rsig2);
1916    }
1917    tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
1918    /*
1919     * No need to correct any estimation error in tsig1; even with
1920     * such error, it is accurate enough.  Now compute the square of
1921     * that approximation.
1922     */
1923    mul128To256(tsig0, tsig1, tsig0, tsig1,
1924                &t2sig0, &t2sig1, &t2sig2, &t2sig3);
1925    t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
1926                                       texp + texp - 0x3ffe,
1927                                       t2sig0, t2sig1, &env->fp_status);
1928
1929    /* Compute the lower parts of the polynomial expansion.  */
1930    accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
1931    accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
1932    accum = floatx80_mul(accum, t2, &env->fp_status);
1933    accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
1934    accum = floatx80_mul(accum, t2, &env->fp_status);
1935    accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
1936    accum = floatx80_mul(accum, t2, &env->fp_status);
1937    accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
1938    accum = floatx80_mul(accum, t2, &env->fp_status);
1939    accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
1940    accum = floatx80_mul(accum, t2, &env->fp_status);
1941    accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
1942    accum = floatx80_mul(accum, t2, &env->fp_status);
1943    accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
1944    accum = floatx80_mul(accum, t2, &env->fp_status);
1945    accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
1946    accum = floatx80_mul(accum, t2, &env->fp_status);
1947    accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
1948
1949    /*
1950     * The full polynomial expansion is fyl2x_coeff_0 + accum (where
1951     * accum has much lower magnitude, and so, in particular, carry
1952     * out of the addition is not possible), multiplied by t.  (This
1953     * expansion is only accurate to about 70 bits, not 128 bits.)
1954     */
1955    aexp = extractFloatx80Exp(fyl2x_coeff_0);
1956    asign = extractFloatx80Sign(fyl2x_coeff_0);
1957    shift128RightJamming(extractFloatx80Frac(accum), 0,
1958                         aexp - extractFloatx80Exp(accum),
1959                         &asig0, &asig1);
1960    bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
1961    bsig1 = 0;
1962    if (asign == extractFloatx80Sign(accum)) {
1963        add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1964    } else {
1965        sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1966    }
1967    /* Multiply by t to compute the required result.  */
1968    mul128To256(asig0, asig1, tsig0, tsig1,
1969                &asig0, &asig1, &asig2, &asig3);
1970    aexp += texp - 0x3ffe;
1971    *exp = aexp;
1972    *sig0 = asig0;
1973    *sig1 = asig1;
1974}
1975
1976void helper_fyl2xp1(CPUX86State *env)
1977{
1978    uint8_t old_flags = save_exception_flags(env);
1979    uint64_t arg0_sig = extractFloatx80Frac(ST0);
1980    int32_t arg0_exp = extractFloatx80Exp(ST0);
1981    bool arg0_sign = extractFloatx80Sign(ST0);
1982    uint64_t arg1_sig = extractFloatx80Frac(ST1);
1983    int32_t arg1_exp = extractFloatx80Exp(ST1);
1984    bool arg1_sign = extractFloatx80Sign(ST1);
1985
1986    if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1987        float_raise(float_flag_invalid, &env->fp_status);
1988        ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1989    } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1990        float_raise(float_flag_invalid, &env->fp_status);
1991        ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1992    } else if (floatx80_invalid_encoding(ST0) ||
1993               floatx80_invalid_encoding(ST1)) {
1994        float_raise(float_flag_invalid, &env->fp_status);
1995        ST1 = floatx80_default_nan(&env->fp_status);
1996    } else if (floatx80_is_any_nan(ST0)) {
1997        ST1 = ST0;
1998    } else if (floatx80_is_any_nan(ST1)) {
1999        /* Pass this NaN through.  */
2000    } else if (arg0_exp > 0x3ffd ||
2001               (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
2002                                                  0x95f619980c4336f7ULL :
2003                                                  0xd413cccfe7799211ULL))) {
2004        /*
2005         * Out of range for the instruction (ST0 must have absolute
2006         * value less than 1 - sqrt(2)/2 = 0.292..., according to
2007         * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
2008         * to sqrt(2) - 1, which we allow here), treat as invalid.
2009         */
2010        float_raise(float_flag_invalid, &env->fp_status);
2011        ST1 = floatx80_default_nan(&env->fp_status);
2012    } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
2013               arg1_exp == 0x7fff) {
2014        /*
2015         * One argument is zero, or multiplying by infinity; correct
2016         * result is exact and can be obtained by multiplying the
2017         * arguments.
2018         */
2019        ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
2020    } else if (arg0_exp < 0x3fb0) {
2021        /*
2022         * Multiplying both arguments and an extra-precision version
2023         * of log2(e) is sufficiently precise.
2024         */
2025        uint64_t sig0, sig1, sig2;
2026        int32_t exp;
2027        if (arg0_exp == 0) {
2028            normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2029        }
2030        if (arg1_exp == 0) {
2031            normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2032        }
2033        mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
2034                        &sig0, &sig1, &sig2);
2035        exp = arg0_exp + 1;
2036        mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
2037        exp += arg1_exp - 0x3ffe;
2038        /* This result is inexact.  */
2039        sig1 |= 1;
2040        ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2041                                            arg0_sign ^ arg1_sign, exp,
2042                                            sig0, sig1, &env->fp_status);
2043    } else {
2044        int32_t aexp;
2045        uint64_t asig0, asig1, asig2;
2046        FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2047        FloatX80RoundPrec save_prec =
2048            env->fp_status.floatx80_rounding_precision;
2049        env->fp_status.float_rounding_mode = float_round_nearest_even;
2050        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2051
2052        helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
2053        /*
2054         * Multiply by the second argument to compute the required
2055         * result.
2056         */
2057        if (arg1_exp == 0) {
2058            normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2059        }
2060        mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2061        aexp += arg1_exp - 0x3ffe;
2062        /* This result is inexact.  */
2063        asig1 |= 1;
2064        env->fp_status.float_rounding_mode = save_mode;
2065        ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2066                                            arg0_sign ^ arg1_sign, aexp,
2067                                            asig0, asig1, &env->fp_status);
2068        env->fp_status.floatx80_rounding_precision = save_prec;
2069    }
2070    fpop(env);
2071    merge_exception_flags(env, old_flags);
2072}
2073
2074void helper_fyl2x(CPUX86State *env)
2075{
2076    uint8_t old_flags = save_exception_flags(env);
2077    uint64_t arg0_sig = extractFloatx80Frac(ST0);
2078    int32_t arg0_exp = extractFloatx80Exp(ST0);
2079    bool arg0_sign = extractFloatx80Sign(ST0);
2080    uint64_t arg1_sig = extractFloatx80Frac(ST1);
2081    int32_t arg1_exp = extractFloatx80Exp(ST1);
2082    bool arg1_sign = extractFloatx80Sign(ST1);
2083
2084    if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2085        float_raise(float_flag_invalid, &env->fp_status);
2086        ST1 = floatx80_silence_nan(ST0, &env->fp_status);
2087    } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
2088        float_raise(float_flag_invalid, &env->fp_status);
2089        ST1 = floatx80_silence_nan(ST1, &env->fp_status);
2090    } else if (floatx80_invalid_encoding(ST0) ||
2091               floatx80_invalid_encoding(ST1)) {
2092        float_raise(float_flag_invalid, &env->fp_status);
2093        ST1 = floatx80_default_nan(&env->fp_status);
2094    } else if (floatx80_is_any_nan(ST0)) {
2095        ST1 = ST0;
2096    } else if (floatx80_is_any_nan(ST1)) {
2097        /* Pass this NaN through.  */
2098    } else if (arg0_sign && !floatx80_is_zero(ST0)) {
2099        float_raise(float_flag_invalid, &env->fp_status);
2100        ST1 = floatx80_default_nan(&env->fp_status);
2101    } else if (floatx80_is_infinity(ST1)) {
2102        FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
2103                                             &env->fp_status);
2104        switch (cmp) {
2105        case float_relation_less:
2106            ST1 = floatx80_chs(ST1);
2107            break;
2108        case float_relation_greater:
2109            /* Result is infinity of the same sign as ST1.  */
2110            break;
2111        default:
2112            float_raise(float_flag_invalid, &env->fp_status);
2113            ST1 = floatx80_default_nan(&env->fp_status);
2114            break;
2115        }
2116    } else if (floatx80_is_infinity(ST0)) {
2117        if (floatx80_is_zero(ST1)) {
2118            float_raise(float_flag_invalid, &env->fp_status);
2119            ST1 = floatx80_default_nan(&env->fp_status);
2120        } else if (arg1_sign) {
2121            ST1 = floatx80_chs(ST0);
2122        } else {
2123            ST1 = ST0;
2124        }
2125    } else if (floatx80_is_zero(ST0)) {
2126        if (floatx80_is_zero(ST1)) {
2127            float_raise(float_flag_invalid, &env->fp_status);
2128            ST1 = floatx80_default_nan(&env->fp_status);
2129        } else {
2130            /* Result is infinity with opposite sign to ST1.  */
2131            float_raise(float_flag_divbyzero, &env->fp_status);
2132            ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
2133                                0x8000000000000000ULL);
2134        }
2135    } else if (floatx80_is_zero(ST1)) {
2136        if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
2137            ST1 = floatx80_chs(ST1);
2138        }
2139        /* Otherwise, ST1 is already the correct result.  */
2140    } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
2141        if (arg1_sign) {
2142            ST1 = floatx80_chs(floatx80_zero);
2143        } else {
2144            ST1 = floatx80_zero;
2145        }
2146    } else {
2147        int32_t int_exp;
2148        floatx80 arg0_m1;
2149        FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2150        FloatX80RoundPrec save_prec =
2151            env->fp_status.floatx80_rounding_precision;
2152        env->fp_status.float_rounding_mode = float_round_nearest_even;
2153        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2154
2155        if (arg0_exp == 0) {
2156            normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2157        }
2158        if (arg1_exp == 0) {
2159            normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2160        }
2161        int_exp = arg0_exp - 0x3fff;
2162        if (arg0_sig > 0xb504f333f9de6484ULL) {
2163            ++int_exp;
2164        }
2165        arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
2166                                               &env->fp_status),
2167                               floatx80_one, &env->fp_status);
2168        if (floatx80_is_zero(arg0_m1)) {
2169            /* Exact power of 2; multiply by ST1.  */
2170            env->fp_status.float_rounding_mode = save_mode;
2171            ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
2172                               ST1, &env->fp_status);
2173        } else {
2174            bool asign = extractFloatx80Sign(arg0_m1);
2175            int32_t aexp;
2176            uint64_t asig0, asig1, asig2;
2177            helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
2178            if (int_exp != 0) {
2179                bool isign = (int_exp < 0);
2180                int32_t iexp;
2181                uint64_t isig;
2182                int shift;
2183                int_exp = isign ? -int_exp : int_exp;
2184                shift = clz32(int_exp) + 32;
2185                isig = int_exp;
2186                isig <<= shift;
2187                iexp = 0x403e - shift;
2188                shift128RightJamming(asig0, asig1, iexp - aexp,
2189                                     &asig0, &asig1);
2190                if (asign == isign) {
2191                    add128(isig, 0, asig0, asig1, &asig0, &asig1);
2192                } else {
2193                    sub128(isig, 0, asig0, asig1, &asig0, &asig1);
2194                }
2195                aexp = iexp;
2196                asign = isign;
2197            }
2198            /*
2199             * Multiply by the second argument to compute the required
2200             * result.
2201             */
2202            if (arg1_exp == 0) {
2203                normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2204            }
2205            mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2206            aexp += arg1_exp - 0x3ffe;
2207            /* This result is inexact.  */
2208            asig1 |= 1;
2209            env->fp_status.float_rounding_mode = save_mode;
2210            ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2211                                                asign ^ arg1_sign, aexp,
2212                                                asig0, asig1, &env->fp_status);
2213        }
2214
2215        env->fp_status.floatx80_rounding_precision = save_prec;
2216    }
2217    fpop(env);
2218    merge_exception_flags(env, old_flags);
2219}
2220
2221void helper_fsqrt(CPUX86State *env)
2222{
2223    uint8_t old_flags = save_exception_flags(env);
2224    if (floatx80_is_neg(ST0)) {
2225        env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
2226        env->fpus |= 0x400;
2227    }
2228    ST0 = floatx80_sqrt(ST0, &env->fp_status);
2229    merge_exception_flags(env, old_flags);
2230}
2231
2232void helper_fsincos(CPUX86State *env)
2233{
2234    double fptemp = floatx80_to_double(env, ST0);
2235
2236    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2237        env->fpus |= 0x400;
2238    } else {
2239        ST0 = double_to_floatx80(env, sin(fptemp));
2240        fpush(env);
2241        ST0 = double_to_floatx80(env, cos(fptemp));
2242        env->fpus &= ~0x400;  /* C2 <-- 0 */
2243        /* the above code is for |arg| < 2**63 only */
2244    }
2245}
2246
2247void helper_frndint(CPUX86State *env)
2248{
2249    uint8_t old_flags = save_exception_flags(env);
2250    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
2251    merge_exception_flags(env, old_flags);
2252}
2253
2254void helper_fscale(CPUX86State *env)
2255{
2256    uint8_t old_flags = save_exception_flags(env);
2257    if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
2258        float_raise(float_flag_invalid, &env->fp_status);
2259        ST0 = floatx80_default_nan(&env->fp_status);
2260    } else if (floatx80_is_any_nan(ST1)) {
2261        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2262            float_raise(float_flag_invalid, &env->fp_status);
2263        }
2264        ST0 = ST1;
2265        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2266            float_raise(float_flag_invalid, &env->fp_status);
2267            ST0 = floatx80_silence_nan(ST0, &env->fp_status);
2268        }
2269    } else if (floatx80_is_infinity(ST1) &&
2270               !floatx80_invalid_encoding(ST0) &&
2271               !floatx80_is_any_nan(ST0)) {
2272        if (floatx80_is_neg(ST1)) {
2273            if (floatx80_is_infinity(ST0)) {
2274                float_raise(float_flag_invalid, &env->fp_status);
2275                ST0 = floatx80_default_nan(&env->fp_status);
2276            } else {
2277                ST0 = (floatx80_is_neg(ST0) ?
2278                       floatx80_chs(floatx80_zero) :
2279                       floatx80_zero);
2280            }
2281        } else {
2282            if (floatx80_is_zero(ST0)) {
2283                float_raise(float_flag_invalid, &env->fp_status);
2284                ST0 = floatx80_default_nan(&env->fp_status);
2285            } else {
2286                ST0 = (floatx80_is_neg(ST0) ?
2287                       floatx80_chs(floatx80_infinity) :
2288                       floatx80_infinity);
2289            }
2290        }
2291    } else {
2292        int n;
2293        FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
2294        uint8_t save_flags = get_float_exception_flags(&env->fp_status);
2295        set_float_exception_flags(0, &env->fp_status);
2296        n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
2297        set_float_exception_flags(save_flags, &env->fp_status);
2298        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2299        ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
2300        env->fp_status.floatx80_rounding_precision = save;
2301    }
2302    merge_exception_flags(env, old_flags);
2303}
2304
2305void helper_fsin(CPUX86State *env)
2306{
2307    double fptemp = floatx80_to_double(env, ST0);
2308
2309    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2310        env->fpus |= 0x400;
2311    } else {
2312        ST0 = double_to_floatx80(env, sin(fptemp));
2313        env->fpus &= ~0x400;  /* C2 <-- 0 */
2314        /* the above code is for |arg| < 2**53 only */
2315    }
2316}
2317
2318void helper_fcos(CPUX86State *env)
2319{
2320    double fptemp = floatx80_to_double(env, ST0);
2321
2322    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2323        env->fpus |= 0x400;
2324    } else {
2325        ST0 = double_to_floatx80(env, cos(fptemp));
2326        env->fpus &= ~0x400;  /* C2 <-- 0 */
2327        /* the above code is for |arg| < 2**63 only */
2328    }
2329}
2330
2331void helper_fxam_ST0(CPUX86State *env)
2332{
2333    CPU_LDoubleU temp;
2334    int expdif;
2335
2336    temp.d = ST0;
2337
2338    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2339    if (SIGND(temp)) {
2340        env->fpus |= 0x200; /* C1 <-- 1 */
2341    }
2342
2343    if (env->fptags[env->fpstt]) {
2344        env->fpus |= 0x4100; /* Empty */
2345        return;
2346    }
2347
2348    expdif = EXPD(temp);
2349    if (expdif == MAXEXPD) {
2350        if (MANTD(temp) == 0x8000000000000000ULL) {
2351            env->fpus |= 0x500; /* Infinity */
2352        } else if (MANTD(temp) & 0x8000000000000000ULL) {
2353            env->fpus |= 0x100; /* NaN */
2354        }
2355    } else if (expdif == 0) {
2356        if (MANTD(temp) == 0) {
2357            env->fpus |=  0x4000; /* Zero */
2358        } else {
2359            env->fpus |= 0x4400; /* Denormal */
2360        }
2361    } else if (MANTD(temp) & 0x8000000000000000ULL) {
2362        env->fpus |= 0x400;
2363    }
2364}
2365
2366static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
2367                      uintptr_t retaddr)
2368{
2369    int fpus, fptag, exp, i;
2370    uint64_t mant;
2371    CPU_LDoubleU tmp;
2372
2373    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2374    fptag = 0;
2375    for (i = 7; i >= 0; i--) {
2376        fptag <<= 2;
2377        if (env->fptags[i]) {
2378            fptag |= 3;
2379        } else {
2380            tmp.d = env->fpregs[i].d;
2381            exp = EXPD(tmp);
2382            mant = MANTD(tmp);
2383            if (exp == 0 && mant == 0) {
2384                /* zero */
2385                fptag |= 1;
2386            } else if (exp == 0 || exp == MAXEXPD
2387                       || (mant & (1LL << 63)) == 0) {
2388                /* NaNs, infinity, denormal */
2389                fptag |= 2;
2390            }
2391        }
2392    }
2393    if (data32) {
2394        /* 32 bit */
2395        cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
2396        cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
2397        cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
2398        cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */
2399        cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */
2400        cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */
2401        cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */
2402    } else {
2403        /* 16 bit */
2404        cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
2405        cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
2406        cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
2407        cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr);
2408        cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr);
2409        cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr);
2410        cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr);
2411    }
2412}
2413
2414void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
2415{
2416    do_fstenv(env, ptr, data32, GETPC());
2417}
2418
2419static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
2420{
2421    env->fpstt = (fpus >> 11) & 7;
2422    env->fpus = fpus & ~0x3800 & ~FPUS_B;
2423    env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
2424#if !defined(CONFIG_USER_ONLY)
2425    if (!(env->fpus & FPUS_SE)) {
2426        /*
2427         * Here the processor deasserts FERR#; in response, the chipset deasserts
2428         * IGNNE#.
2429         */
2430        cpu_clear_ignne();
2431    }
2432#endif
2433}
2434
2435static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
2436                      uintptr_t retaddr)
2437{
2438    int i, fpus, fptag;
2439
2440    if (data32) {
2441        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
2442        fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
2443        fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
2444    } else {
2445        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
2446        fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
2447        fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
2448    }
2449    cpu_set_fpus(env, fpus);
2450    for (i = 0; i < 8; i++) {
2451        env->fptags[i] = ((fptag & 3) == 3);
2452        fptag >>= 2;
2453    }
2454}
2455
2456void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
2457{
2458    do_fldenv(env, ptr, data32, GETPC());
2459}
2460
2461static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
2462                     uintptr_t retaddr)
2463{
2464    floatx80 tmp;
2465    int i;
2466
2467    do_fstenv(env, ptr, data32, retaddr);
2468
2469    ptr += (14 << data32);
2470    for (i = 0; i < 8; i++) {
2471        tmp = ST(i);
2472        do_fstt(env, tmp, ptr, retaddr);
2473        ptr += 10;
2474    }
2475
2476    do_fninit(env);
2477}
2478
2479void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
2480{
2481    do_fsave(env, ptr, data32, GETPC());
2482}
2483
2484static void do_frstor(CPUX86State *env, target_ulong ptr, int data32,
2485                      uintptr_t retaddr)
2486{
2487    floatx80 tmp;
2488    int i;
2489
2490    do_fldenv(env, ptr, data32, retaddr);
2491    ptr += (14 << data32);
2492
2493    for (i = 0; i < 8; i++) {
2494        tmp = do_fldt(env, ptr, retaddr);
2495        ST(i) = tmp;
2496        ptr += 10;
2497    }
2498}
2499
2500void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
2501{
2502    do_frstor(env, ptr, data32, GETPC());
2503}
2504
2505#if defined(CONFIG_USER_ONLY)
2506void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
2507{
2508    do_fsave(env, ptr, data32, 0);
2509}
2510
2511void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
2512{
2513    do_frstor(env, ptr, data32, 0);
2514}
2515#endif
2516
2517#define XO(X)  offsetof(X86XSaveArea, X)
2518
2519static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2520{
2521    int fpus, fptag, i;
2522    target_ulong addr;
2523
2524    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2525    fptag = 0;
2526    for (i = 0; i < 8; i++) {
2527        fptag |= (env->fptags[i] << i);
2528    }
2529
2530    cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
2531    cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
2532    cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
2533
2534    /* In 32-bit mode this is eip, sel, dp, sel.
2535       In 64-bit mode this is rip, rdp.
2536       But in either case we don't write actual data, just zeros.  */
2537    cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
2538    cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
2539
2540    addr = ptr + XO(legacy.fpregs);
2541    for (i = 0; i < 8; i++) {
2542        floatx80 tmp = ST(i);
2543        do_fstt(env, tmp, addr, ra);
2544        addr += 16;
2545    }
2546}
2547
2548static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2549{
2550    update_mxcsr_from_sse_status(env);
2551    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
2552    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
2553}
2554
2555static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2556{
2557    int i, nb_xmm_regs;
2558    target_ulong addr;
2559
2560    if (env->hflags & HF_CS64_MASK) {
2561        nb_xmm_regs = 16;
2562    } else {
2563        nb_xmm_regs = 8;
2564    }
2565
2566    addr = ptr + XO(legacy.xmm_regs);
2567    for (i = 0; i < nb_xmm_regs; i++) {
2568        cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
2569        cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
2570        addr += 16;
2571    }
2572}
2573
2574static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2575{
2576    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2577    int i;
2578
2579    for (i = 0; i < 4; i++, addr += 16) {
2580        cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
2581        cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
2582    }
2583}
2584
2585static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2586{
2587    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
2588                    env->bndcs_regs.cfgu, ra);
2589    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
2590                    env->bndcs_regs.sts, ra);
2591}
2592
2593static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2594{
2595    cpu_stq_data_ra(env, ptr, env->pkru, ra);
2596}
2597
2598static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2599{
2600    /* The operand must be 16 byte aligned */
2601    if (ptr & 0xf) {
2602        raise_exception_ra(env, EXCP0D_GPF, ra);
2603    }
2604
2605    do_xsave_fpu(env, ptr, ra);
2606
2607    if (env->cr[4] & CR4_OSFXSR_MASK) {
2608        do_xsave_mxcsr(env, ptr, ra);
2609        /* Fast FXSAVE leaves out the XMM registers */
2610        if (!(env->efer & MSR_EFER_FFXSR)
2611            || (env->hflags & HF_CPL_MASK)
2612            || !(env->hflags & HF_LMA_MASK)) {
2613            do_xsave_sse(env, ptr, ra);
2614        }
2615    }
2616}
2617
2618void helper_fxsave(CPUX86State *env, target_ulong ptr)
2619{
2620    do_fxsave(env, ptr, GETPC());
2621}
2622
2623static uint64_t get_xinuse(CPUX86State *env)
2624{
2625    uint64_t inuse = -1;
2626
2627    /* For the most part, we don't track XINUSE.  We could calculate it
2628       here for all components, but it's probably less work to simply
2629       indicate in use.  That said, the state of BNDREGS is important
2630       enough to track in HFLAGS, so we might as well use that here.  */
2631    if ((env->hflags & HF_MPX_IU_MASK) == 0) {
2632       inuse &= ~XSTATE_BNDREGS_MASK;
2633    }
2634    return inuse;
2635}
2636
2637static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
2638                     uint64_t inuse, uint64_t opt, uintptr_t ra)
2639{
2640    uint64_t old_bv, new_bv;
2641
2642    /* The OS must have enabled XSAVE.  */
2643    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2644        raise_exception_ra(env, EXCP06_ILLOP, ra);
2645    }
2646
2647    /* The operand must be 64 byte aligned.  */
2648    if (ptr & 63) {
2649        raise_exception_ra(env, EXCP0D_GPF, ra);
2650    }
2651
2652    /* Never save anything not enabled by XCR0.  */
2653    rfbm &= env->xcr0;
2654    opt &= rfbm;
2655
2656    if (opt & XSTATE_FP_MASK) {
2657        do_xsave_fpu(env, ptr, ra);
2658    }
2659    if (rfbm & XSTATE_SSE_MASK) {
2660        /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
2661        do_xsave_mxcsr(env, ptr, ra);
2662    }
2663    if (opt & XSTATE_SSE_MASK) {
2664        do_xsave_sse(env, ptr, ra);
2665    }
2666    if (opt & XSTATE_BNDREGS_MASK) {
2667        do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
2668    }
2669    if (opt & XSTATE_BNDCSR_MASK) {
2670        do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
2671    }
2672    if (opt & XSTATE_PKRU_MASK) {
2673        do_xsave_pkru(env, ptr + XO(pkru_state), ra);
2674    }
2675
2676    /* Update the XSTATE_BV field.  */
2677    old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2678    new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
2679    cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
2680}
2681
2682void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2683{
2684    do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
2685}
2686
2687void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2688{
2689    uint64_t inuse = get_xinuse(env);
2690    do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
2691}
2692
2693static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2694{
2695    int i, fpuc, fpus, fptag;
2696    target_ulong addr;
2697
2698    fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
2699    fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
2700    fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
2701    cpu_set_fpuc(env, fpuc);
2702    cpu_set_fpus(env, fpus);
2703    fptag ^= 0xff;
2704    for (i = 0; i < 8; i++) {
2705        env->fptags[i] = ((fptag >> i) & 1);
2706    }
2707
2708    addr = ptr + XO(legacy.fpregs);
2709    for (i = 0; i < 8; i++) {
2710        floatx80 tmp = do_fldt(env, addr, ra);
2711        ST(i) = tmp;
2712        addr += 16;
2713    }
2714}
2715
2716static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2717{
2718    cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
2719}
2720
2721static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2722{
2723    int i, nb_xmm_regs;
2724    target_ulong addr;
2725
2726    if (env->hflags & HF_CS64_MASK) {
2727        nb_xmm_regs = 16;
2728    } else {
2729        nb_xmm_regs = 8;
2730    }
2731
2732    addr = ptr + XO(legacy.xmm_regs);
2733    for (i = 0; i < nb_xmm_regs; i++) {
2734        env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
2735        env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
2736        addr += 16;
2737    }
2738}
2739
2740static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2741{
2742    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2743    int i;
2744
2745    for (i = 0; i < 4; i++, addr += 16) {
2746        env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
2747        env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
2748    }
2749}
2750
2751static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2752{
2753    /* FIXME: Extend highest implemented bit of linear address.  */
2754    env->bndcs_regs.cfgu
2755        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
2756    env->bndcs_regs.sts
2757        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
2758}
2759
2760static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2761{
2762    env->pkru = cpu_ldq_data_ra(env, ptr, ra);
2763}
2764
2765static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2766{
2767    /* The operand must be 16 byte aligned */
2768    if (ptr & 0xf) {
2769        raise_exception_ra(env, EXCP0D_GPF, ra);
2770    }
2771
2772    do_xrstor_fpu(env, ptr, ra);
2773
2774    if (env->cr[4] & CR4_OSFXSR_MASK) {
2775        do_xrstor_mxcsr(env, ptr, ra);
2776        /* Fast FXRSTOR leaves out the XMM registers */
2777        if (!(env->efer & MSR_EFER_FFXSR)
2778            || (env->hflags & HF_CPL_MASK)
2779            || !(env->hflags & HF_LMA_MASK)) {
2780            do_xrstor_sse(env, ptr, ra);
2781        }
2782    }
2783}
2784
2785void helper_fxrstor(CPUX86State *env, target_ulong ptr)
2786{
2787    do_fxrstor(env, ptr, GETPC());
2788}
2789
2790#if defined(CONFIG_USER_ONLY)
2791void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
2792{
2793    do_fxsave(env, ptr, 0);
2794}
2795
2796void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
2797{
2798    do_fxrstor(env, ptr, 0);
2799}
2800#endif
2801
2802void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2803{
2804    uintptr_t ra = GETPC();
2805    uint64_t xstate_bv, xcomp_bv, reserve0;
2806
2807    rfbm &= env->xcr0;
2808
2809    /* The OS must have enabled XSAVE.  */
2810    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2811        raise_exception_ra(env, EXCP06_ILLOP, ra);
2812    }
2813
2814    /* The operand must be 64 byte aligned.  */
2815    if (ptr & 63) {
2816        raise_exception_ra(env, EXCP0D_GPF, ra);
2817    }
2818
2819    xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2820
2821    if ((int64_t)xstate_bv < 0) {
2822        /* FIXME: Compact form.  */
2823        raise_exception_ra(env, EXCP0D_GPF, ra);
2824    }
2825
2826    /* Standard form.  */
2827
2828    /* The XSTATE_BV field must not set bits not present in XCR0.  */
2829    if (xstate_bv & ~env->xcr0) {
2830        raise_exception_ra(env, EXCP0D_GPF, ra);
2831    }
2832
2833    /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
2834       revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
2835       describes only XCOMP_BV, but the description of the standard form
2836       of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
2837       includes the next 64-bit field.  */
2838    xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
2839    reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
2840    if (xcomp_bv || reserve0) {
2841        raise_exception_ra(env, EXCP0D_GPF, ra);
2842    }
2843
2844    if (rfbm & XSTATE_FP_MASK) {
2845        if (xstate_bv & XSTATE_FP_MASK) {
2846            do_xrstor_fpu(env, ptr, ra);
2847        } else {
2848            do_fninit(env);
2849            memset(env->fpregs, 0, sizeof(env->fpregs));
2850        }
2851    }
2852    if (rfbm & XSTATE_SSE_MASK) {
2853        /* Note that the standard form of XRSTOR loads MXCSR from memory
2854           whether or not the XSTATE_BV bit is set.  */
2855        do_xrstor_mxcsr(env, ptr, ra);
2856        if (xstate_bv & XSTATE_SSE_MASK) {
2857            do_xrstor_sse(env, ptr, ra);
2858        } else {
2859            /* ??? When AVX is implemented, we may have to be more
2860               selective in the clearing.  */
2861            memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
2862        }
2863    }
2864    if (rfbm & XSTATE_BNDREGS_MASK) {
2865        if (xstate_bv & XSTATE_BNDREGS_MASK) {
2866            do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
2867            env->hflags |= HF_MPX_IU_MASK;
2868        } else {
2869            memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
2870            env->hflags &= ~HF_MPX_IU_MASK;
2871        }
2872    }
2873    if (rfbm & XSTATE_BNDCSR_MASK) {
2874        if (xstate_bv & XSTATE_BNDCSR_MASK) {
2875            do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
2876        } else {
2877            memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
2878        }
2879        cpu_sync_bndcs_hflags(env);
2880    }
2881    if (rfbm & XSTATE_PKRU_MASK) {
2882        uint64_t old_pkru = env->pkru;
2883        if (xstate_bv & XSTATE_PKRU_MASK) {
2884            do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
2885        } else {
2886            env->pkru = 0;
2887        }
2888        if (env->pkru != old_pkru) {
2889            CPUState *cs = env_cpu(env);
2890            tlb_flush(cs);
2891        }
2892    }
2893}
2894
2895#undef XO
2896
2897uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
2898{
2899    /* The OS must have enabled XSAVE.  */
2900    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2901        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2902    }
2903
2904    switch (ecx) {
2905    case 0:
2906        return env->xcr0;
2907    case 1:
2908        if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
2909            return env->xcr0 & get_xinuse(env);
2910        }
2911        break;
2912    }
2913    raise_exception_ra(env, EXCP0D_GPF, GETPC());
2914}
2915
2916void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
2917{
2918    uint32_t dummy, ena_lo, ena_hi;
2919    uint64_t ena;
2920
2921    /* The OS must have enabled XSAVE.  */
2922    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2923        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2924    }
2925
2926    /* Only XCR0 is defined at present; the FPU may not be disabled.  */
2927    if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
2928        goto do_gpf;
2929    }
2930
2931    /* Disallow enabling unimplemented features.  */
2932    cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
2933    ena = ((uint64_t)ena_hi << 32) | ena_lo;
2934    if (mask & ~ena) {
2935        goto do_gpf;
2936    }
2937
2938    /* Disallow enabling only half of MPX.  */
2939    if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
2940        & XSTATE_BNDCSR_MASK) {
2941        goto do_gpf;
2942    }
2943
2944    env->xcr0 = mask;
2945    cpu_sync_bndcs_hflags(env);
2946    return;
2947
2948 do_gpf:
2949    raise_exception_ra(env, EXCP0D_GPF, GETPC());
2950}
2951
2952/* MMX/SSE */
2953/* XXX: optimize by storing fptt and fptags in the static cpu state */
2954
2955#define SSE_DAZ             0x0040
2956#define SSE_RC_MASK         0x6000
2957#define SSE_RC_NEAR         0x0000
2958#define SSE_RC_DOWN         0x2000
2959#define SSE_RC_UP           0x4000
2960#define SSE_RC_CHOP         0x6000
2961#define SSE_FZ              0x8000
2962
2963void update_mxcsr_status(CPUX86State *env)
2964{
2965    uint32_t mxcsr = env->mxcsr;
2966    int rnd_type;
2967
2968    /* set rounding mode */
2969    switch (mxcsr & SSE_RC_MASK) {
2970    default:
2971    case SSE_RC_NEAR:
2972        rnd_type = float_round_nearest_even;
2973        break;
2974    case SSE_RC_DOWN:
2975        rnd_type = float_round_down;
2976        break;
2977    case SSE_RC_UP:
2978        rnd_type = float_round_up;
2979        break;
2980    case SSE_RC_CHOP:
2981        rnd_type = float_round_to_zero;
2982        break;
2983    }
2984    set_float_rounding_mode(rnd_type, &env->sse_status);
2985
2986    /* Set exception flags.  */
2987    set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
2988                              (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
2989                              (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
2990                              (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
2991                              (mxcsr & FPUS_PE ? float_flag_inexact : 0),
2992                              &env->sse_status);
2993
2994    /* set denormals are zero */
2995    set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
2996
2997    /* set flush to zero */
2998    set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
2999}
3000
3001void update_mxcsr_from_sse_status(CPUX86State *env)
3002{
3003    uint8_t flags = get_float_exception_flags(&env->sse_status);
3004    /*
3005     * The MXCSR denormal flag has opposite semantics to
3006     * float_flag_input_denormal (the softfloat code sets that flag
3007     * only when flushing input denormals to zero, but SSE sets it
3008     * only when not flushing them to zero), so is not converted
3009     * here.
3010     */
3011    env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
3012                   (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
3013                   (flags & float_flag_overflow ? FPUS_OE : 0) |
3014                   (flags & float_flag_underflow ? FPUS_UE : 0) |
3015                   (flags & float_flag_inexact ? FPUS_PE : 0) |
3016                   (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
3017                    0));
3018}
3019
3020void helper_update_mxcsr(CPUX86State *env)
3021{
3022    update_mxcsr_from_sse_status(env);
3023}
3024
3025void helper_ldmxcsr(CPUX86State *env, uint32_t val)
3026{
3027    cpu_set_mxcsr(env, val);
3028}
3029
3030void helper_enter_mmx(CPUX86State *env)
3031{
3032    env->fpstt = 0;
3033    *(uint32_t *)(env->fptags) = 0;
3034    *(uint32_t *)(env->fptags + 4) = 0;
3035}
3036
3037void helper_emms(CPUX86State *env)
3038{
3039    /* set to empty state */
3040    *(uint32_t *)(env->fptags) = 0x01010101;
3041    *(uint32_t *)(env->fptags + 4) = 0x01010101;
3042}
3043
3044/* XXX: suppress */
3045void helper_movq(CPUX86State *env, void *d, void *s)
3046{
3047    *(uint64_t *)d = *(uint64_t *)s;
3048}
3049
3050#define SHIFT 0
3051#include "ops_sse.h"
3052
3053#define SHIFT 1
3054#include "ops_sse.h"
3055