qemu/target/i386/tcg/fpu_helper.c
<<
>>
Prefs
   1/*
   2 *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include <math.h>
  22#include "cpu.h"
  23#include "tcg-cpu.h"
  24#include "exec/helper-proto.h"
  25#include "fpu/softfloat.h"
  26#include "fpu/softfloat-macros.h"
  27#include "helper-tcg.h"
  28
  29/* float macros */
  30#define FT0    (env->ft0)
  31#define ST0    (env->fpregs[env->fpstt].d)
  32#define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
  33#define ST1    ST(1)
  34
  35#define FPU_RC_SHIFT        10
  36#define FPU_RC_MASK         (3 << FPU_RC_SHIFT)
  37#define FPU_RC_NEAR         0x000
  38#define FPU_RC_DOWN         0x400
  39#define FPU_RC_UP           0x800
  40#define FPU_RC_CHOP         0xc00
  41
  42#define MAXTAN 9223372036854775808.0
  43
  44/* the following deal with x86 long double-precision numbers */
  45#define MAXEXPD 0x7fff
  46#define EXPBIAS 16383
  47#define EXPD(fp)        (fp.l.upper & 0x7fff)
  48#define SIGND(fp)       ((fp.l.upper) & 0x8000)
  49#define MANTD(fp)       (fp.l.lower)
  50#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  51
  52#define FPUS_IE (1 << 0)
  53#define FPUS_DE (1 << 1)
  54#define FPUS_ZE (1 << 2)
  55#define FPUS_OE (1 << 3)
  56#define FPUS_UE (1 << 4)
  57#define FPUS_PE (1 << 5)
  58#define FPUS_SF (1 << 6)
  59#define FPUS_SE (1 << 7)
  60#define FPUS_B  (1 << 15)
  61
  62#define FPUC_EM 0x3f
  63
  64#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  65#define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
  66#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  67#define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
  68#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  69#define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
  70#define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
  71#define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
  72
  73static inline void fpush(CPUX86State *env)
  74{
  75    env->fpstt = (env->fpstt - 1) & 7;
  76    env->fptags[env->fpstt] = 0; /* validate stack entry */
  77}
  78
  79static inline void fpop(CPUX86State *env)
  80{
  81    env->fptags[env->fpstt] = 1; /* invalidate stack entry */
  82    env->fpstt = (env->fpstt + 1) & 7;
  83}
  84
  85static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr)
  86{
  87    CPU_LDoubleU temp;
  88
  89    temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
  90    temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
  91    return temp.d;
  92}
  93
  94static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
  95                    uintptr_t retaddr)
  96{
  97    CPU_LDoubleU temp;
  98
  99    temp.d = f;
 100    cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
 101    cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
 102}
 103
 104/* x87 FPU helpers */
 105
 106static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
 107{
 108    union {
 109        float64 f64;
 110        double d;
 111    } u;
 112
 113    u.f64 = floatx80_to_float64(a, &env->fp_status);
 114    return u.d;
 115}
 116
 117static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 118{
 119    union {
 120        float64 f64;
 121        double d;
 122    } u;
 123
 124    u.d = a;
 125    return float64_to_floatx80(u.f64, &env->fp_status);
 126}
 127
 128static void fpu_set_exception(CPUX86State *env, int mask)
 129{
 130    env->fpus |= mask;
 131    if (env->fpus & (~env->fpuc & FPUC_EM)) {
 132        env->fpus |= FPUS_SE | FPUS_B;
 133    }
 134}
 135
 136static inline uint8_t save_exception_flags(CPUX86State *env)
 137{
 138    uint8_t old_flags = get_float_exception_flags(&env->fp_status);
 139    set_float_exception_flags(0, &env->fp_status);
 140    return old_flags;
 141}
 142
 143static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
 144{
 145    uint8_t new_flags = get_float_exception_flags(&env->fp_status);
 146    float_raise(old_flags, &env->fp_status);
 147    fpu_set_exception(env,
 148                      ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
 149                       (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
 150                       (new_flags & float_flag_overflow ? FPUS_OE : 0) |
 151                       (new_flags & float_flag_underflow ? FPUS_UE : 0) |
 152                       (new_flags & float_flag_inexact ? FPUS_PE : 0) |
 153                       (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
 154}
 155
 156static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 157{
 158    uint8_t old_flags = save_exception_flags(env);
 159    floatx80 ret = floatx80_div(a, b, &env->fp_status);
 160    merge_exception_flags(env, old_flags);
 161    return ret;
 162}
 163
 164static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 165{
 166    if (env->cr[0] & CR0_NE_MASK) {
 167        raise_exception_ra(env, EXCP10_COPR, retaddr);
 168    }
 169#if !defined(CONFIG_USER_ONLY)
 170    else {
 171        fpu_check_raise_ferr_irq(env);
 172    }
 173#endif
 174}
 175
 176void helper_flds_FT0(CPUX86State *env, uint32_t val)
 177{
 178    uint8_t old_flags = save_exception_flags(env);
 179    union {
 180        float32 f;
 181        uint32_t i;
 182    } u;
 183
 184    u.i = val;
 185    FT0 = float32_to_floatx80(u.f, &env->fp_status);
 186    merge_exception_flags(env, old_flags);
 187}
 188
 189void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 190{
 191    uint8_t old_flags = save_exception_flags(env);
 192    union {
 193        float64 f;
 194        uint64_t i;
 195    } u;
 196
 197    u.i = val;
 198    FT0 = float64_to_floatx80(u.f, &env->fp_status);
 199    merge_exception_flags(env, old_flags);
 200}
 201
 202void helper_fildl_FT0(CPUX86State *env, int32_t val)
 203{
 204    FT0 = int32_to_floatx80(val, &env->fp_status);
 205}
 206
 207void helper_flds_ST0(CPUX86State *env, uint32_t val)
 208{
 209    uint8_t old_flags = save_exception_flags(env);
 210    int new_fpstt;
 211    union {
 212        float32 f;
 213        uint32_t i;
 214    } u;
 215
 216    new_fpstt = (env->fpstt - 1) & 7;
 217    u.i = val;
 218    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 219    env->fpstt = new_fpstt;
 220    env->fptags[new_fpstt] = 0; /* validate stack entry */
 221    merge_exception_flags(env, old_flags);
 222}
 223
 224void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 225{
 226    uint8_t old_flags = save_exception_flags(env);
 227    int new_fpstt;
 228    union {
 229        float64 f;
 230        uint64_t i;
 231    } u;
 232
 233    new_fpstt = (env->fpstt - 1) & 7;
 234    u.i = val;
 235    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 236    env->fpstt = new_fpstt;
 237    env->fptags[new_fpstt] = 0; /* validate stack entry */
 238    merge_exception_flags(env, old_flags);
 239}
 240
 241static FloatX80RoundPrec tmp_maximise_precision(float_status *st)
 242{
 243    FloatX80RoundPrec old = get_floatx80_rounding_precision(st);
 244    set_floatx80_rounding_precision(floatx80_precision_x, st);
 245    return old;
 246}
 247
 248void helper_fildl_ST0(CPUX86State *env, int32_t val)
 249{
 250    int new_fpstt;
 251    FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
 252
 253    new_fpstt = (env->fpstt - 1) & 7;
 254    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 255    env->fpstt = new_fpstt;
 256    env->fptags[new_fpstt] = 0; /* validate stack entry */
 257
 258    set_floatx80_rounding_precision(old, &env->fp_status);
 259}
 260
 261void helper_fildll_ST0(CPUX86State *env, int64_t val)
 262{
 263    int new_fpstt;
 264    FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
 265
 266    new_fpstt = (env->fpstt - 1) & 7;
 267    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 268    env->fpstt = new_fpstt;
 269    env->fptags[new_fpstt] = 0; /* validate stack entry */
 270
 271    set_floatx80_rounding_precision(old, &env->fp_status);
 272}
 273
 274uint32_t helper_fsts_ST0(CPUX86State *env)
 275{
 276    uint8_t old_flags = save_exception_flags(env);
 277    union {
 278        float32 f;
 279        uint32_t i;
 280    } u;
 281
 282    u.f = floatx80_to_float32(ST0, &env->fp_status);
 283    merge_exception_flags(env, old_flags);
 284    return u.i;
 285}
 286
 287uint64_t helper_fstl_ST0(CPUX86State *env)
 288{
 289    uint8_t old_flags = save_exception_flags(env);
 290    union {
 291        float64 f;
 292        uint64_t i;
 293    } u;
 294
 295    u.f = floatx80_to_float64(ST0, &env->fp_status);
 296    merge_exception_flags(env, old_flags);
 297    return u.i;
 298}
 299
 300int32_t helper_fist_ST0(CPUX86State *env)
 301{
 302    uint8_t old_flags = save_exception_flags(env);
 303    int32_t val;
 304
 305    val = floatx80_to_int32(ST0, &env->fp_status);
 306    if (val != (int16_t)val) {
 307        set_float_exception_flags(float_flag_invalid, &env->fp_status);
 308        val = -32768;
 309    }
 310    merge_exception_flags(env, old_flags);
 311    return val;
 312}
 313
 314int32_t helper_fistl_ST0(CPUX86State *env)
 315{
 316    uint8_t old_flags = save_exception_flags(env);
 317    int32_t val;
 318
 319    val = floatx80_to_int32(ST0, &env->fp_status);
 320    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 321        val = 0x80000000;
 322    }
 323    merge_exception_flags(env, old_flags);
 324    return val;
 325}
 326
 327int64_t helper_fistll_ST0(CPUX86State *env)
 328{
 329    uint8_t old_flags = save_exception_flags(env);
 330    int64_t val;
 331
 332    val = floatx80_to_int64(ST0, &env->fp_status);
 333    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 334        val = 0x8000000000000000ULL;
 335    }
 336    merge_exception_flags(env, old_flags);
 337    return val;
 338}
 339
 340int32_t helper_fistt_ST0(CPUX86State *env)
 341{
 342    uint8_t old_flags = save_exception_flags(env);
 343    int32_t val;
 344
 345    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 346    if (val != (int16_t)val) {
 347        set_float_exception_flags(float_flag_invalid, &env->fp_status);
 348        val = -32768;
 349    }
 350    merge_exception_flags(env, old_flags);
 351    return val;
 352}
 353
 354int32_t helper_fisttl_ST0(CPUX86State *env)
 355{
 356    uint8_t old_flags = save_exception_flags(env);
 357    int32_t val;
 358
 359    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 360    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 361        val = 0x80000000;
 362    }
 363    merge_exception_flags(env, old_flags);
 364    return val;
 365}
 366
 367int64_t helper_fisttll_ST0(CPUX86State *env)
 368{
 369    uint8_t old_flags = save_exception_flags(env);
 370    int64_t val;
 371
 372    val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 373    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 374        val = 0x8000000000000000ULL;
 375    }
 376    merge_exception_flags(env, old_flags);
 377    return val;
 378}
 379
 380void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 381{
 382    int new_fpstt;
 383
 384    new_fpstt = (env->fpstt - 1) & 7;
 385    env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC());
 386    env->fpstt = new_fpstt;
 387    env->fptags[new_fpstt] = 0; /* validate stack entry */
 388}
 389
 390void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 391{
 392    do_fstt(env, ST0, ptr, GETPC());
 393}
 394
 395void helper_fpush(CPUX86State *env)
 396{
 397    fpush(env);
 398}
 399
 400void helper_fpop(CPUX86State *env)
 401{
 402    fpop(env);
 403}
 404
 405void helper_fdecstp(CPUX86State *env)
 406{
 407    env->fpstt = (env->fpstt - 1) & 7;
 408    env->fpus &= ~0x4700;
 409}
 410
 411void helper_fincstp(CPUX86State *env)
 412{
 413    env->fpstt = (env->fpstt + 1) & 7;
 414    env->fpus &= ~0x4700;
 415}
 416
 417/* FPU move */
 418
 419void helper_ffree_STN(CPUX86State *env, int st_index)
 420{
 421    env->fptags[(env->fpstt + st_index) & 7] = 1;
 422}
 423
 424void helper_fmov_ST0_FT0(CPUX86State *env)
 425{
 426    ST0 = FT0;
 427}
 428
 429void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 430{
 431    FT0 = ST(st_index);
 432}
 433
 434void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 435{
 436    ST0 = ST(st_index);
 437}
 438
 439void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 440{
 441    ST(st_index) = ST0;
 442}
 443
 444void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 445{
 446    floatx80 tmp;
 447
 448    tmp = ST(st_index);
 449    ST(st_index) = ST0;
 450    ST0 = tmp;
 451}
 452
 453/* FPU operations */
 454
 455static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 456
 457void helper_fcom_ST0_FT0(CPUX86State *env)
 458{
 459    uint8_t old_flags = save_exception_flags(env);
 460    FloatRelation ret;
 461
 462    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 463    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 464    merge_exception_flags(env, old_flags);
 465}
 466
 467void helper_fucom_ST0_FT0(CPUX86State *env)
 468{
 469    uint8_t old_flags = save_exception_flags(env);
 470    FloatRelation ret;
 471
 472    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 473    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 474    merge_exception_flags(env, old_flags);
 475}
 476
 477static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 478
 479void helper_fcomi_ST0_FT0(CPUX86State *env)
 480{
 481    uint8_t old_flags = save_exception_flags(env);
 482    int eflags;
 483    FloatRelation ret;
 484
 485    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 486    eflags = cpu_cc_compute_all(env, CC_OP);
 487    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 488    CC_SRC = eflags;
 489    merge_exception_flags(env, old_flags);
 490}
 491
 492void helper_fucomi_ST0_FT0(CPUX86State *env)
 493{
 494    uint8_t old_flags = save_exception_flags(env);
 495    int eflags;
 496    FloatRelation ret;
 497
 498    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 499    eflags = cpu_cc_compute_all(env, CC_OP);
 500    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 501    CC_SRC = eflags;
 502    merge_exception_flags(env, old_flags);
 503}
 504
 505void helper_fadd_ST0_FT0(CPUX86State *env)
 506{
 507    uint8_t old_flags = save_exception_flags(env);
 508    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 509    merge_exception_flags(env, old_flags);
 510}
 511
 512void helper_fmul_ST0_FT0(CPUX86State *env)
 513{
 514    uint8_t old_flags = save_exception_flags(env);
 515    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 516    merge_exception_flags(env, old_flags);
 517}
 518
 519void helper_fsub_ST0_FT0(CPUX86State *env)
 520{
 521    uint8_t old_flags = save_exception_flags(env);
 522    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 523    merge_exception_flags(env, old_flags);
 524}
 525
 526void helper_fsubr_ST0_FT0(CPUX86State *env)
 527{
 528    uint8_t old_flags = save_exception_flags(env);
 529    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 530    merge_exception_flags(env, old_flags);
 531}
 532
 533void helper_fdiv_ST0_FT0(CPUX86State *env)
 534{
 535    ST0 = helper_fdiv(env, ST0, FT0);
 536}
 537
 538void helper_fdivr_ST0_FT0(CPUX86State *env)
 539{
 540    ST0 = helper_fdiv(env, FT0, ST0);
 541}
 542
 543/* fp operations between STN and ST0 */
 544
 545void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 546{
 547    uint8_t old_flags = save_exception_flags(env);
 548    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 549    merge_exception_flags(env, old_flags);
 550}
 551
 552void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 553{
 554    uint8_t old_flags = save_exception_flags(env);
 555    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 556    merge_exception_flags(env, old_flags);
 557}
 558
 559void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 560{
 561    uint8_t old_flags = save_exception_flags(env);
 562    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 563    merge_exception_flags(env, old_flags);
 564}
 565
 566void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 567{
 568    uint8_t old_flags = save_exception_flags(env);
 569    ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 570    merge_exception_flags(env, old_flags);
 571}
 572
 573void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 574{
 575    floatx80 *p;
 576
 577    p = &ST(st_index);
 578    *p = helper_fdiv(env, *p, ST0);
 579}
 580
 581void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 582{
 583    floatx80 *p;
 584
 585    p = &ST(st_index);
 586    *p = helper_fdiv(env, ST0, *p);
 587}
 588
 589/* misc FPU operations */
 590void helper_fchs_ST0(CPUX86State *env)
 591{
 592    ST0 = floatx80_chs(ST0);
 593}
 594
 595void helper_fabs_ST0(CPUX86State *env)
 596{
 597    ST0 = floatx80_abs(ST0);
 598}
 599
 600void helper_fld1_ST0(CPUX86State *env)
 601{
 602    ST0 = floatx80_one;
 603}
 604
 605void helper_fldl2t_ST0(CPUX86State *env)
 606{
 607    switch (env->fpuc & FPU_RC_MASK) {
 608    case FPU_RC_UP:
 609        ST0 = floatx80_l2t_u;
 610        break;
 611    default:
 612        ST0 = floatx80_l2t;
 613        break;
 614    }
 615}
 616
 617void helper_fldl2e_ST0(CPUX86State *env)
 618{
 619    switch (env->fpuc & FPU_RC_MASK) {
 620    case FPU_RC_DOWN:
 621    case FPU_RC_CHOP:
 622        ST0 = floatx80_l2e_d;
 623        break;
 624    default:
 625        ST0 = floatx80_l2e;
 626        break;
 627    }
 628}
 629
 630void helper_fldpi_ST0(CPUX86State *env)
 631{
 632    switch (env->fpuc & FPU_RC_MASK) {
 633    case FPU_RC_DOWN:
 634    case FPU_RC_CHOP:
 635        ST0 = floatx80_pi_d;
 636        break;
 637    default:
 638        ST0 = floatx80_pi;
 639        break;
 640    }
 641}
 642
 643void helper_fldlg2_ST0(CPUX86State *env)
 644{
 645    switch (env->fpuc & FPU_RC_MASK) {
 646    case FPU_RC_DOWN:
 647    case FPU_RC_CHOP:
 648        ST0 = floatx80_lg2_d;
 649        break;
 650    default:
 651        ST0 = floatx80_lg2;
 652        break;
 653    }
 654}
 655
 656void helper_fldln2_ST0(CPUX86State *env)
 657{
 658    switch (env->fpuc & FPU_RC_MASK) {
 659    case FPU_RC_DOWN:
 660    case FPU_RC_CHOP:
 661        ST0 = floatx80_ln2_d;
 662        break;
 663    default:
 664        ST0 = floatx80_ln2;
 665        break;
 666    }
 667}
 668
 669void helper_fldz_ST0(CPUX86State *env)
 670{
 671    ST0 = floatx80_zero;
 672}
 673
 674void helper_fldz_FT0(CPUX86State *env)
 675{
 676    FT0 = floatx80_zero;
 677}
 678
 679uint32_t helper_fnstsw(CPUX86State *env)
 680{
 681    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 682}
 683
 684uint32_t helper_fnstcw(CPUX86State *env)
 685{
 686    return env->fpuc;
 687}
 688
 689static void set_x86_rounding_mode(unsigned mode, float_status *status)
 690{
 691    static FloatRoundMode x86_round_mode[4] = {
 692        float_round_nearest_even,
 693        float_round_down,
 694        float_round_up,
 695        float_round_to_zero
 696    };
 697    assert(mode < ARRAY_SIZE(x86_round_mode));
 698    set_float_rounding_mode(x86_round_mode[mode], status);
 699}
 700
 701void update_fp_status(CPUX86State *env)
 702{
 703    int rnd_mode;
 704    FloatX80RoundPrec rnd_prec;
 705
 706    /* set rounding mode */
 707    rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT;
 708    set_x86_rounding_mode(rnd_mode, &env->fp_status);
 709
 710    switch ((env->fpuc >> 8) & 3) {
 711    case 0:
 712        rnd_prec = floatx80_precision_s;
 713        break;
 714    case 2:
 715        rnd_prec = floatx80_precision_d;
 716        break;
 717    case 3:
 718    default:
 719        rnd_prec = floatx80_precision_x;
 720        break;
 721    }
 722    set_floatx80_rounding_precision(rnd_prec, &env->fp_status);
 723}
 724
 725void helper_fldcw(CPUX86State *env, uint32_t val)
 726{
 727    cpu_set_fpuc(env, val);
 728}
 729
 730void helper_fclex(CPUX86State *env)
 731{
 732    env->fpus &= 0x7f00;
 733}
 734
 735void helper_fwait(CPUX86State *env)
 736{
 737    if (env->fpus & FPUS_SE) {
 738        fpu_raise_exception(env, GETPC());
 739    }
 740}
 741
 742static void do_fninit(CPUX86State *env)
 743{
 744    env->fpus = 0;
 745    env->fpstt = 0;
 746    env->fpcs = 0;
 747    env->fpds = 0;
 748    env->fpip = 0;
 749    env->fpdp = 0;
 750    cpu_set_fpuc(env, 0x37f);
 751    env->fptags[0] = 1;
 752    env->fptags[1] = 1;
 753    env->fptags[2] = 1;
 754    env->fptags[3] = 1;
 755    env->fptags[4] = 1;
 756    env->fptags[5] = 1;
 757    env->fptags[6] = 1;
 758    env->fptags[7] = 1;
 759}
 760
 761void helper_fninit(CPUX86State *env)
 762{
 763    do_fninit(env);
 764}
 765
 766/* BCD ops */
 767
 768void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 769{
 770    floatx80 tmp;
 771    uint64_t val;
 772    unsigned int v;
 773    int i;
 774
 775    val = 0;
 776    for (i = 8; i >= 0; i--) {
 777        v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 778        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 779    }
 780    tmp = int64_to_floatx80(val, &env->fp_status);
 781    if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 782        tmp = floatx80_chs(tmp);
 783    }
 784    fpush(env);
 785    ST0 = tmp;
 786}
 787
 788void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 789{
 790    uint8_t old_flags = save_exception_flags(env);
 791    int v;
 792    target_ulong mem_ref, mem_end;
 793    int64_t val;
 794    CPU_LDoubleU temp;
 795
 796    temp.d = ST0;
 797
 798    val = floatx80_to_int64(ST0, &env->fp_status);
 799    mem_ref = ptr;
 800    if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
 801        set_float_exception_flags(float_flag_invalid, &env->fp_status);
 802        while (mem_ref < ptr + 7) {
 803            cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 804        }
 805        cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
 806        cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
 807        cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
 808        merge_exception_flags(env, old_flags);
 809        return;
 810    }
 811    mem_end = mem_ref + 9;
 812    if (SIGND(temp)) {
 813        cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 814        val = -val;
 815    } else {
 816        cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 817    }
 818    while (mem_ref < mem_end) {
 819        if (val == 0) {
 820            break;
 821        }
 822        v = val % 100;
 823        val = val / 100;
 824        v = ((v / 10) << 4) | (v % 10);
 825        cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 826    }
 827    while (mem_ref < mem_end) {
 828        cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 829    }
 830    merge_exception_flags(env, old_flags);
 831}
 832
 833/* 128-bit significand of log(2).  */
 834#define ln2_sig_high 0xb17217f7d1cf79abULL
 835#define ln2_sig_low 0xc9e3b39803f2f6afULL
 836
 837/*
 838 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
 839 * the interval [-1/64, 1/64].
 840 */
 841#define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
 842#define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
 843#define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
 844#define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
 845#define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
 846#define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
 847#define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
 848#define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
 849#define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
 850
 851struct f2xm1_data {
 852    /*
 853     * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
 854     * are very close to exact floatx80 values.
 855     */
 856    floatx80 t;
 857    /* The value of 2^t.  */
 858    floatx80 exp2;
 859    /* The value of 2^t - 1.  */
 860    floatx80 exp2m1;
 861};
 862
 863static const struct f2xm1_data f2xm1_table[65] = {
 864    { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
 865      make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
 866      make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
 867    { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
 868      make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
 869      make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
 870    { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
 871      make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
 872      make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
 873    { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
 874      make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
 875      make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
 876    { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
 877      make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
 878      make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
 879    { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
 880      make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
 881      make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
 882    { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
 883      make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
 884      make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
 885    { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
 886      make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
 887      make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
 888    { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
 889      make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
 890      make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
 891    { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
 892      make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
 893      make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
 894    { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
 895      make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
 896      make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
 897    { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
 898      make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
 899      make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
 900    { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
 901      make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
 902      make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
 903    { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
 904      make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
 905      make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
 906    { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
 907      make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
 908      make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
 909    { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
 910      make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
 911      make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
 912    { make_floatx80_init(0xbffe, 0x800000000000227dULL),
 913      make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
 914      make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
 915    { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
 916      make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
 917      make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
 918    { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
 919      make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
 920      make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
 921    { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
 922      make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
 923      make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
 924    { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
 925      make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
 926      make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
 927    { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
 928      make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
 929      make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
 930    { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
 931      make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
 932      make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
 933    { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
 934      make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
 935      make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
 936    { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
 937      make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
 938      make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
 939    { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
 940      make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
 941      make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
 942    { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
 943      make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
 944      make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
 945    { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
 946      make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
 947      make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
 948    { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
 949      make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
 950      make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
 951    { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
 952      make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
 953      make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
 954    { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
 955      make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
 956      make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
 957    { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
 958      make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
 959      make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
 960    { floatx80_zero_init,
 961      make_floatx80_init(0x3fff, 0x8000000000000000ULL),
 962      floatx80_zero_init },
 963    { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
 964      make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
 965      make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
 966    { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
 967      make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
 968      make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
 969    { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
 970      make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
 971      make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
 972    { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
 973      make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
 974      make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
 975    { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
 976      make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
 977      make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
 978    { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
 979      make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
 980      make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
 981    { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
 982      make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
 983      make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
 984    { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
 985      make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
 986      make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
 987    { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
 988      make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
 989      make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
 990    { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
 991      make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
 992      make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
 993    { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
 994      make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
 995      make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
 996    { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
 997      make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
 998      make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
 999    { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
1000      make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
1001      make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
1002    { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
1003      make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
1004      make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
1005    { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
1006      make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
1007      make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
1008    { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
1009      make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
1010      make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
1011    { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
1012      make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
1013      make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
1014    { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
1015      make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
1016      make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
1017    { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
1018      make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
1019      make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
1020    { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
1021      make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
1022      make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
1023    { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
1024      make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
1025      make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
1026    { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
1027      make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
1028      make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
1029    { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
1030      make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
1031      make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
1032    { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
1033      make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
1034      make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
1035    { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
1036      make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
1037      make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
1038    { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
1039      make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
1040      make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
1041    { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
1042      make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
1043      make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
1044    { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
1045      make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
1046      make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
1047    { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
1048      make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
1049      make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
1050    { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
1051      make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
1052      make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
1053    { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
1054      make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
1055      make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
1056    { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
1057      make_floatx80_init(0x4000, 0x8000000000000000ULL),
1058      make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
1059};
1060
1061void helper_f2xm1(CPUX86State *env)
1062{
1063    uint8_t old_flags = save_exception_flags(env);
1064    uint64_t sig = extractFloatx80Frac(ST0);
1065    int32_t exp = extractFloatx80Exp(ST0);
1066    bool sign = extractFloatx80Sign(ST0);
1067
1068    if (floatx80_invalid_encoding(ST0)) {
1069        float_raise(float_flag_invalid, &env->fp_status);
1070        ST0 = floatx80_default_nan(&env->fp_status);
1071    } else if (floatx80_is_any_nan(ST0)) {
1072        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1073            float_raise(float_flag_invalid, &env->fp_status);
1074            ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1075        }
1076    } else if (exp > 0x3fff ||
1077               (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
1078        /* Out of range for the instruction, treat as invalid.  */
1079        float_raise(float_flag_invalid, &env->fp_status);
1080        ST0 = floatx80_default_nan(&env->fp_status);
1081    } else if (exp == 0x3fff) {
1082        /* Argument 1 or -1, exact result 1 or -0.5.  */
1083        if (sign) {
1084            ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
1085        }
1086    } else if (exp < 0x3fb0) {
1087        if (!floatx80_is_zero(ST0)) {
1088            /*
1089             * Multiplying the argument by an extra-precision version
1090             * of log(2) is sufficiently precise.  Zero arguments are
1091             * returned unchanged.
1092             */
1093            uint64_t sig0, sig1, sig2;
1094            if (exp == 0) {
1095                normalizeFloatx80Subnormal(sig, &exp, &sig);
1096            }
1097            mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
1098                            &sig2);
1099            /* This result is inexact.  */
1100            sig1 |= 1;
1101            ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1102                                                sign, exp, sig0, sig1,
1103                                                &env->fp_status);
1104        }
1105    } else {
1106        floatx80 tmp, y, accum;
1107        bool asign, bsign;
1108        int32_t n, aexp, bexp;
1109        uint64_t asig0, asig1, asig2, bsig0, bsig1;
1110        FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1111        FloatX80RoundPrec save_prec =
1112            env->fp_status.floatx80_rounding_precision;
1113        env->fp_status.float_rounding_mode = float_round_nearest_even;
1114        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1115
1116        /* Find the nearest multiple of 1/32 to the argument.  */
1117        tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
1118        n = 32 + floatx80_to_int32(tmp, &env->fp_status);
1119        y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
1120
1121        if (floatx80_is_zero(y)) {
1122            /*
1123             * Use the value of 2^t - 1 from the table, to avoid
1124             * needing to special-case zero as a result of
1125             * multiplication below.
1126             */
1127            ST0 = f2xm1_table[n].t;
1128            set_float_exception_flags(float_flag_inexact, &env->fp_status);
1129            env->fp_status.float_rounding_mode = save_mode;
1130        } else {
1131            /*
1132             * Compute the lower parts of a polynomial expansion for
1133             * (2^y - 1) / y.
1134             */
1135            accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
1136            accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
1137            accum = floatx80_mul(accum, y, &env->fp_status);
1138            accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
1139            accum = floatx80_mul(accum, y, &env->fp_status);
1140            accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
1141            accum = floatx80_mul(accum, y, &env->fp_status);
1142            accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
1143            accum = floatx80_mul(accum, y, &env->fp_status);
1144            accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
1145            accum = floatx80_mul(accum, y, &env->fp_status);
1146            accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
1147            accum = floatx80_mul(accum, y, &env->fp_status);
1148            accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
1149
1150            /*
1151             * The full polynomial expansion is f2xm1_coeff_0 + accum
1152             * (where accum has much lower magnitude, and so, in
1153             * particular, carry out of the addition is not possible).
1154             * (This expansion is only accurate to about 70 bits, not
1155             * 128 bits.)
1156             */
1157            aexp = extractFloatx80Exp(f2xm1_coeff_0);
1158            asign = extractFloatx80Sign(f2xm1_coeff_0);
1159            shift128RightJamming(extractFloatx80Frac(accum), 0,
1160                                 aexp - extractFloatx80Exp(accum),
1161                                 &asig0, &asig1);
1162            bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
1163            bsig1 = 0;
1164            if (asign == extractFloatx80Sign(accum)) {
1165                add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1166            } else {
1167                sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1168            }
1169            /* And thus compute an approximation to 2^y - 1.  */
1170            mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
1171                            &asig0, &asig1, &asig2);
1172            aexp += extractFloatx80Exp(y) - 0x3ffe;
1173            asign ^= extractFloatx80Sign(y);
1174            if (n != 32) {
1175                /*
1176                 * Multiply this by the precomputed value of 2^t and
1177                 * add that of 2^t - 1.
1178                 */
1179                mul128By64To192(asig0, asig1,
1180                                extractFloatx80Frac(f2xm1_table[n].exp2),
1181                                &asig0, &asig1, &asig2);
1182                aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
1183                bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
1184                bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
1185                bsig1 = 0;
1186                if (bexp < aexp) {
1187                    shift128RightJamming(bsig0, bsig1, aexp - bexp,
1188                                         &bsig0, &bsig1);
1189                } else if (aexp < bexp) {
1190                    shift128RightJamming(asig0, asig1, bexp - aexp,
1191                                         &asig0, &asig1);
1192                    aexp = bexp;
1193                }
1194                /* The sign of 2^t - 1 is always that of the result.  */
1195                bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
1196                if (asign == bsign) {
1197                    /* Avoid possible carry out of the addition.  */
1198                    shift128RightJamming(asig0, asig1, 1,
1199                                         &asig0, &asig1);
1200                    shift128RightJamming(bsig0, bsig1, 1,
1201                                         &bsig0, &bsig1);
1202                    ++aexp;
1203                    add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
1204                } else {
1205                    sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1206                    asign = bsign;
1207                }
1208            }
1209            env->fp_status.float_rounding_mode = save_mode;
1210            /* This result is inexact.  */
1211            asig1 |= 1;
1212            ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1213                                                asign, aexp, asig0, asig1,
1214                                                &env->fp_status);
1215        }
1216
1217        env->fp_status.floatx80_rounding_precision = save_prec;
1218    }
1219    merge_exception_flags(env, old_flags);
1220}
1221
1222void helper_fptan(CPUX86State *env)
1223{
1224    double fptemp = floatx80_to_double(env, ST0);
1225
1226    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1227        env->fpus |= 0x400;
1228    } else {
1229        fptemp = tan(fptemp);
1230        ST0 = double_to_floatx80(env, fptemp);
1231        fpush(env);
1232        ST0 = floatx80_one;
1233        env->fpus &= ~0x400; /* C2 <-- 0 */
1234        /* the above code is for |arg| < 2**52 only */
1235    }
1236}
1237
1238/* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision.  */
1239#define pi_4_exp 0x3ffe
1240#define pi_4_sig_high 0xc90fdaa22168c234ULL
1241#define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
1242#define pi_2_exp 0x3fff
1243#define pi_2_sig_high 0xc90fdaa22168c234ULL
1244#define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
1245#define pi_34_exp 0x4000
1246#define pi_34_sig_high 0x96cbe3f9990e91a7ULL
1247#define pi_34_sig_low 0x9394c9e8a0a5159dULL
1248#define pi_exp 0x4000
1249#define pi_sig_high 0xc90fdaa22168c234ULL
1250#define pi_sig_low 0xc4c6628b80dc1cd1ULL
1251
1252/*
1253 * Polynomial coefficients for an approximation to atan(x), with only
1254 * odd powers of x used, for x in the interval [-1/16, 1/16].  (Unlike
1255 * for some other approximations, no low part is needed for the first
1256 * coefficient here to achieve a sufficiently accurate result, because
1257 * the coefficient in this minimax approximation is very close to
1258 * exactly 1.)
1259 */
1260#define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
1261#define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
1262#define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
1263#define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
1264#define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
1265#define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
1266#define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
1267
1268struct fpatan_data {
1269    /* High and low parts of atan(x).  */
1270    floatx80 atan_high, atan_low;
1271};
1272
1273static const struct fpatan_data fpatan_table[9] = {
1274    { floatx80_zero_init,
1275      floatx80_zero_init },
1276    { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
1277      make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
1278    { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
1279      make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
1280    { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
1281      make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
1282    { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
1283      make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
1284    { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
1285      make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
1286    { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
1287      make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
1288    { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
1289      make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
1290    { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
1291      make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
1292};
1293
1294void helper_fpatan(CPUX86State *env)
1295{
1296    uint8_t old_flags = save_exception_flags(env);
1297    uint64_t arg0_sig = extractFloatx80Frac(ST0);
1298    int32_t arg0_exp = extractFloatx80Exp(ST0);
1299    bool arg0_sign = extractFloatx80Sign(ST0);
1300    uint64_t arg1_sig = extractFloatx80Frac(ST1);
1301    int32_t arg1_exp = extractFloatx80Exp(ST1);
1302    bool arg1_sign = extractFloatx80Sign(ST1);
1303
1304    if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1305        float_raise(float_flag_invalid, &env->fp_status);
1306        ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1307    } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1308        float_raise(float_flag_invalid, &env->fp_status);
1309        ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1310    } else if (floatx80_invalid_encoding(ST0) ||
1311               floatx80_invalid_encoding(ST1)) {
1312        float_raise(float_flag_invalid, &env->fp_status);
1313        ST1 = floatx80_default_nan(&env->fp_status);
1314    } else if (floatx80_is_any_nan(ST0)) {
1315        ST1 = ST0;
1316    } else if (floatx80_is_any_nan(ST1)) {
1317        /* Pass this NaN through.  */
1318    } else if (floatx80_is_zero(ST1) && !arg0_sign) {
1319        /* Pass this zero through.  */
1320    } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
1321                 arg0_exp - arg1_exp >= 80) &&
1322               !arg0_sign) {
1323        /*
1324         * Dividing ST1 by ST0 gives the correct result up to
1325         * rounding, and avoids spurious underflow exceptions that
1326         * might result from passing some small values through the
1327         * polynomial approximation, but if a finite nonzero result of
1328         * division is exact, the result of fpatan is still inexact
1329         * (and underflowing where appropriate).
1330         */
1331        FloatX80RoundPrec save_prec =
1332            env->fp_status.floatx80_rounding_precision;
1333        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1334        ST1 = floatx80_div(ST1, ST0, &env->fp_status);
1335        env->fp_status.floatx80_rounding_precision = save_prec;
1336        if (!floatx80_is_zero(ST1) &&
1337            !(get_float_exception_flags(&env->fp_status) &
1338              float_flag_inexact)) {
1339            /*
1340             * The mathematical result is very slightly closer to zero
1341             * than this exact result.  Round a value with the
1342             * significand adjusted accordingly to get the correct
1343             * exceptions, and possibly an adjusted result depending
1344             * on the rounding mode.
1345             */
1346            uint64_t sig = extractFloatx80Frac(ST1);
1347            int32_t exp = extractFloatx80Exp(ST1);
1348            bool sign = extractFloatx80Sign(ST1);
1349            if (exp == 0) {
1350                normalizeFloatx80Subnormal(sig, &exp, &sig);
1351            }
1352            ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1353                                                sign, exp, sig - 1,
1354                                                -1, &env->fp_status);
1355        }
1356    } else {
1357        /* The result is inexact.  */
1358        bool rsign = arg1_sign;
1359        int32_t rexp;
1360        uint64_t rsig0, rsig1;
1361        if (floatx80_is_zero(ST1)) {
1362            /*
1363             * ST0 is negative.  The result is pi with the sign of
1364             * ST1.
1365             */
1366            rexp = pi_exp;
1367            rsig0 = pi_sig_high;
1368            rsig1 = pi_sig_low;
1369        } else if (floatx80_is_infinity(ST1)) {
1370            if (floatx80_is_infinity(ST0)) {
1371                if (arg0_sign) {
1372                    rexp = pi_34_exp;
1373                    rsig0 = pi_34_sig_high;
1374                    rsig1 = pi_34_sig_low;
1375                } else {
1376                    rexp = pi_4_exp;
1377                    rsig0 = pi_4_sig_high;
1378                    rsig1 = pi_4_sig_low;
1379                }
1380            } else {
1381                rexp = pi_2_exp;
1382                rsig0 = pi_2_sig_high;
1383                rsig1 = pi_2_sig_low;
1384            }
1385        } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
1386            rexp = pi_2_exp;
1387            rsig0 = pi_2_sig_high;
1388            rsig1 = pi_2_sig_low;
1389        } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
1390            /* ST0 is negative.  */
1391            rexp = pi_exp;
1392            rsig0 = pi_sig_high;
1393            rsig1 = pi_sig_low;
1394        } else {
1395            /*
1396             * ST0 and ST1 are finite, nonzero and with exponents not
1397             * too far apart.
1398             */
1399            int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
1400            int32_t azexp, axexp;
1401            bool adj_sub, ysign, zsign;
1402            uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
1403            uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
1404            uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
1405            uint64_t azsig0, azsig1;
1406            uint64_t azsig2, azsig3, axsig0, axsig1;
1407            floatx80 x8;
1408            FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1409            FloatX80RoundPrec save_prec =
1410                env->fp_status.floatx80_rounding_precision;
1411            env->fp_status.float_rounding_mode = float_round_nearest_even;
1412            env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1413
1414            if (arg0_exp == 0) {
1415                normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
1416            }
1417            if (arg1_exp == 0) {
1418                normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1419            }
1420            if (arg0_exp > arg1_exp ||
1421                (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
1422                /* Work with abs(ST1) / abs(ST0).  */
1423                num_exp = arg1_exp;
1424                num_sig = arg1_sig;
1425                den_exp = arg0_exp;
1426                den_sig = arg0_sig;
1427                if (arg0_sign) {
1428                    /* The result is subtracted from pi.  */
1429                    adj_exp = pi_exp;
1430                    adj_sig0 = pi_sig_high;
1431                    adj_sig1 = pi_sig_low;
1432                    adj_sub = true;
1433                } else {
1434                    /* The result is used as-is.  */
1435                    adj_exp = 0;
1436                    adj_sig0 = 0;
1437                    adj_sig1 = 0;
1438                    adj_sub = false;
1439                }
1440            } else {
1441                /* Work with abs(ST0) / abs(ST1).  */
1442                num_exp = arg0_exp;
1443                num_sig = arg0_sig;
1444                den_exp = arg1_exp;
1445                den_sig = arg1_sig;
1446                /* The result is added to or subtracted from pi/2.  */
1447                adj_exp = pi_2_exp;
1448                adj_sig0 = pi_2_sig_high;
1449                adj_sig1 = pi_2_sig_low;
1450                adj_sub = !arg0_sign;
1451            }
1452
1453            /*
1454             * Compute x = num/den, where 0 < x <= 1 and x is not too
1455             * small.
1456             */
1457            xexp = num_exp - den_exp + 0x3ffe;
1458            remsig0 = num_sig;
1459            remsig1 = 0;
1460            if (den_sig <= remsig0) {
1461                shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1462                ++xexp;
1463            }
1464            xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
1465            mul64To128(den_sig, xsig0, &msig0, &msig1);
1466            sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
1467            while ((int64_t) remsig0 < 0) {
1468                --xsig0;
1469                add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
1470            }
1471            xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
1472            /*
1473             * No need to correct any estimation error in xsig1; even
1474             * with such error, it is accurate enough.
1475             */
1476
1477            /*
1478             * Split x as x = t + y, where t = n/8 is the nearest
1479             * multiple of 1/8 to x.
1480             */
1481            x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1482                                               false, xexp + 3, xsig0,
1483                                               xsig1, &env->fp_status);
1484            n = floatx80_to_int32(x8, &env->fp_status);
1485            if (n == 0) {
1486                ysign = false;
1487                yexp = xexp;
1488                ysig0 = xsig0;
1489                ysig1 = xsig1;
1490                texp = 0;
1491                tsig = 0;
1492            } else {
1493                int shift = clz32(n) + 32;
1494                texp = 0x403b - shift;
1495                tsig = n;
1496                tsig <<= shift;
1497                if (texp == xexp) {
1498                    sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
1499                    if ((int64_t) ysig0 >= 0) {
1500                        ysign = false;
1501                        if (ysig0 == 0) {
1502                            if (ysig1 == 0) {
1503                                yexp = 0;
1504                            } else {
1505                                shift = clz64(ysig1) + 64;
1506                                yexp = xexp - shift;
1507                                shift128Left(ysig0, ysig1, shift,
1508                                             &ysig0, &ysig1);
1509                            }
1510                        } else {
1511                            shift = clz64(ysig0);
1512                            yexp = xexp - shift;
1513                            shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1514                        }
1515                    } else {
1516                        ysign = true;
1517                        sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
1518                        if (ysig0 == 0) {
1519                            shift = clz64(ysig1) + 64;
1520                        } else {
1521                            shift = clz64(ysig0);
1522                        }
1523                        yexp = xexp - shift;
1524                        shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1525                    }
1526                } else {
1527                    /*
1528                     * t's exponent must be greater than x's because t
1529                     * is positive and the nearest multiple of 1/8 to
1530                     * x, and if x has a greater exponent, the power
1531                     * of 2 with that exponent is also a multiple of
1532                     * 1/8.
1533                     */
1534                    uint64_t usig0, usig1;
1535                    shift128RightJamming(xsig0, xsig1, texp - xexp,
1536                                         &usig0, &usig1);
1537                    ysign = true;
1538                    sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
1539                    if (ysig0 == 0) {
1540                        shift = clz64(ysig1) + 64;
1541                    } else {
1542                        shift = clz64(ysig0);
1543                    }
1544                    yexp = texp - shift;
1545                    shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1546                }
1547            }
1548
1549            /*
1550             * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
1551             * arctan(z).
1552             */
1553            zsign = ysign;
1554            if (texp == 0 || yexp == 0) {
1555                zexp = yexp;
1556                zsig0 = ysig0;
1557                zsig1 = ysig1;
1558            } else {
1559                /*
1560                 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
1561                 */
1562                int32_t dexp = texp + xexp - 0x3ffe;
1563                uint64_t dsig0, dsig1, dsig2;
1564                mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
1565                /*
1566                 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
1567                 * bit).  Add 1 to produce the denominator 1+tx.
1568                 */
1569                shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
1570                                     &dsig0, &dsig1);
1571                dsig0 |= 0x8000000000000000ULL;
1572                zexp = yexp - 1;
1573                remsig0 = ysig0;
1574                remsig1 = ysig1;
1575                remsig2 = 0;
1576                if (dsig0 <= remsig0) {
1577                    shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1578                    ++zexp;
1579                }
1580                zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
1581                mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
1582                sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
1583                       &remsig0, &remsig1, &remsig2);
1584                while ((int64_t) remsig0 < 0) {
1585                    --zsig0;
1586                    add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
1587                           &remsig0, &remsig1, &remsig2);
1588                }
1589                zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
1590                /* No need to correct any estimation error in zsig1.  */
1591            }
1592
1593            if (zexp == 0) {
1594                azexp = 0;
1595                azsig0 = 0;
1596                azsig1 = 0;
1597            } else {
1598                floatx80 z2, accum;
1599                uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
1600                /* Compute z^2.  */
1601                mul128To256(zsig0, zsig1, zsig0, zsig1,
1602                            &z2sig0, &z2sig1, &z2sig2, &z2sig3);
1603                z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
1604                                                   zexp + zexp - 0x3ffe,
1605                                                   z2sig0, z2sig1,
1606                                                   &env->fp_status);
1607
1608                /* Compute the lower parts of the polynomial expansion.  */
1609                accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
1610                accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
1611                accum = floatx80_mul(accum, z2, &env->fp_status);
1612                accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
1613                accum = floatx80_mul(accum, z2, &env->fp_status);
1614                accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
1615                accum = floatx80_mul(accum, z2, &env->fp_status);
1616                accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
1617                accum = floatx80_mul(accum, z2, &env->fp_status);
1618                accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
1619                accum = floatx80_mul(accum, z2, &env->fp_status);
1620
1621                /*
1622                 * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
1623                 * fpatan_coeff_0 is 1, and accum is negative and much smaller.
1624                 */
1625                aexp = extractFloatx80Exp(fpatan_coeff_0);
1626                shift128RightJamming(extractFloatx80Frac(accum), 0,
1627                                     aexp - extractFloatx80Exp(accum),
1628                                     &asig0, &asig1);
1629                sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
1630                       &asig0, &asig1);
1631                /* Multiply by z to compute arctan(z).  */
1632                azexp = aexp + zexp - 0x3ffe;
1633                mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
1634                            &azsig2, &azsig3);
1635            }
1636
1637            /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign).  */
1638            if (texp == 0) {
1639                /* z is positive.  */
1640                axexp = azexp;
1641                axsig0 = azsig0;
1642                axsig1 = azsig1;
1643            } else {
1644                bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
1645                int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
1646                uint64_t low_sig0 =
1647                    extractFloatx80Frac(fpatan_table[n].atan_low);
1648                uint64_t low_sig1 = 0;
1649                axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
1650                axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
1651                axsig1 = 0;
1652                shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
1653                                     &low_sig0, &low_sig1);
1654                if (low_sign) {
1655                    sub128(axsig0, axsig1, low_sig0, low_sig1,
1656                           &axsig0, &axsig1);
1657                } else {
1658                    add128(axsig0, axsig1, low_sig0, low_sig1,
1659                           &axsig0, &axsig1);
1660                }
1661                if (azexp >= axexp) {
1662                    shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
1663                                         &axsig0, &axsig1);
1664                    axexp = azexp + 1;
1665                    shift128RightJamming(azsig0, azsig1, 1,
1666                                         &azsig0, &azsig1);
1667                } else {
1668                    shift128RightJamming(axsig0, axsig1, 1,
1669                                         &axsig0, &axsig1);
1670                    shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
1671                                         &azsig0, &azsig1);
1672                    ++axexp;
1673                }
1674                if (zsign) {
1675                    sub128(axsig0, axsig1, azsig0, azsig1,
1676                           &axsig0, &axsig1);
1677                } else {
1678                    add128(axsig0, axsig1, azsig0, azsig1,
1679                           &axsig0, &axsig1);
1680                }
1681            }
1682
1683            if (adj_exp == 0) {
1684                rexp = axexp;
1685                rsig0 = axsig0;
1686                rsig1 = axsig1;
1687            } else {
1688                /*
1689                 * Add or subtract arctan(x) (exponent axexp,
1690                 * significand axsig0 and axsig1, positive, not
1691                 * necessarily normalized) to the number given by
1692                 * adj_exp, adj_sig0 and adj_sig1, according to
1693                 * adj_sub.
1694                 */
1695                if (adj_exp >= axexp) {
1696                    shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
1697                                         &axsig0, &axsig1);
1698                    rexp = adj_exp + 1;
1699                    shift128RightJamming(adj_sig0, adj_sig1, 1,
1700                                         &adj_sig0, &adj_sig1);
1701                } else {
1702                    shift128RightJamming(axsig0, axsig1, 1,
1703                                         &axsig0, &axsig1);
1704                    shift128RightJamming(adj_sig0, adj_sig1,
1705                                         axexp - adj_exp + 1,
1706                                         &adj_sig0, &adj_sig1);
1707                    rexp = axexp + 1;
1708                }
1709                if (adj_sub) {
1710                    sub128(adj_sig0, adj_sig1, axsig0, axsig1,
1711                           &rsig0, &rsig1);
1712                } else {
1713                    add128(adj_sig0, adj_sig1, axsig0, axsig1,
1714                           &rsig0, &rsig1);
1715                }
1716            }
1717
1718            env->fp_status.float_rounding_mode = save_mode;
1719            env->fp_status.floatx80_rounding_precision = save_prec;
1720        }
1721        /* This result is inexact.  */
1722        rsig1 |= 1;
1723        ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp,
1724                                            rsig0, rsig1, &env->fp_status);
1725    }
1726
1727    fpop(env);
1728    merge_exception_flags(env, old_flags);
1729}
1730
1731void helper_fxtract(CPUX86State *env)
1732{
1733    uint8_t old_flags = save_exception_flags(env);
1734    CPU_LDoubleU temp;
1735
1736    temp.d = ST0;
1737
1738    if (floatx80_is_zero(ST0)) {
1739        /* Easy way to generate -inf and raising division by 0 exception */
1740        ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
1741                           &env->fp_status);
1742        fpush(env);
1743        ST0 = temp.d;
1744    } else if (floatx80_invalid_encoding(ST0)) {
1745        float_raise(float_flag_invalid, &env->fp_status);
1746        ST0 = floatx80_default_nan(&env->fp_status);
1747        fpush(env);
1748        ST0 = ST1;
1749    } else if (floatx80_is_any_nan(ST0)) {
1750        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1751            float_raise(float_flag_invalid, &env->fp_status);
1752            ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1753        }
1754        fpush(env);
1755        ST0 = ST1;
1756    } else if (floatx80_is_infinity(ST0)) {
1757        fpush(env);
1758        ST0 = ST1;
1759        ST1 = floatx80_infinity;
1760    } else {
1761        int expdif;
1762
1763        if (EXPD(temp) == 0) {
1764            int shift = clz64(temp.l.lower);
1765            temp.l.lower <<= shift;
1766            expdif = 1 - EXPBIAS - shift;
1767            float_raise(float_flag_input_denormal, &env->fp_status);
1768        } else {
1769            expdif = EXPD(temp) - EXPBIAS;
1770        }
1771        /* DP exponent bias */
1772        ST0 = int32_to_floatx80(expdif, &env->fp_status);
1773        fpush(env);
1774        BIASEXPONENT(temp);
1775        ST0 = temp.d;
1776    }
1777    merge_exception_flags(env, old_flags);
1778}
1779
1780static void helper_fprem_common(CPUX86State *env, bool mod)
1781{
1782    uint8_t old_flags = save_exception_flags(env);
1783    uint64_t quotient;
1784    CPU_LDoubleU temp0, temp1;
1785    int exp0, exp1, expdiff;
1786
1787    temp0.d = ST0;
1788    temp1.d = ST1;
1789    exp0 = EXPD(temp0);
1790    exp1 = EXPD(temp1);
1791
1792    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1793    if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1794        exp0 == 0x7fff || exp1 == 0x7fff ||
1795        floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
1796        ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1797    } else {
1798        if (exp0 == 0) {
1799            exp0 = 1 - clz64(temp0.l.lower);
1800        }
1801        if (exp1 == 0) {
1802            exp1 = 1 - clz64(temp1.l.lower);
1803        }
1804        expdiff = exp0 - exp1;
1805        if (expdiff < 64) {
1806            ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1807            env->fpus |= (quotient & 0x4) << (8 - 2);  /* (C0) <-- q2 */
1808            env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
1809            env->fpus |= (quotient & 0x1) << (9 - 0);  /* (C1) <-- q0 */
1810        } else {
1811            /*
1812             * Partial remainder.  This choice of how many bits to
1813             * process at once is specified in AMD instruction set
1814             * manuals, and empirically is followed by Intel
1815             * processors as well; it ensures that the final remainder
1816             * operation in a loop does produce the correct low three
1817             * bits of the quotient.  AMD manuals specify that the
1818             * flags other than C2 are cleared, and empirically Intel
1819             * processors clear them as well.
1820             */
1821            int n = 32 + (expdiff % 32);
1822            temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
1823            ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
1824            env->fpus |= 0x400;  /* C2 <-- 1 */
1825        }
1826    }
1827    merge_exception_flags(env, old_flags);
1828}
1829
1830void helper_fprem1(CPUX86State *env)
1831{
1832    helper_fprem_common(env, false);
1833}
1834
1835void helper_fprem(CPUX86State *env)
1836{
1837    helper_fprem_common(env, true);
1838}
1839
1840/* 128-bit significand of log2(e).  */
1841#define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1842#define log2_e_sig_low 0xbe87fed0691d3e89ULL
1843
1844/*
1845 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1846 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1847 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1848 * interval [sqrt(2)/2, sqrt(2)].
1849 */
1850#define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1851#define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1852#define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1853#define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1854#define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1855#define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1856#define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1857#define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1858#define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1859#define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1860#define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1861
1862/*
1863 * Compute an approximation of log2(1+arg), where 1+arg is in the
1864 * interval [sqrt(2)/2, sqrt(2)].  It is assumed that when this
1865 * function is called, rounding precision is set to 80 and the
1866 * round-to-nearest mode is in effect.  arg must not be exactly zero,
1867 * and must not be so close to zero that underflow might occur.
1868 */
1869static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
1870                                uint64_t *sig0, uint64_t *sig1)
1871{
1872    uint64_t arg0_sig = extractFloatx80Frac(arg);
1873    int32_t arg0_exp = extractFloatx80Exp(arg);
1874    bool arg0_sign = extractFloatx80Sign(arg);
1875    bool asign;
1876    int32_t dexp, texp, aexp;
1877    uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
1878    uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
1879    uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
1880    floatx80 t2, accum;
1881
1882    /*
1883     * Compute an approximation of arg/(2+arg), with extra precision,
1884     * as the argument to a polynomial approximation.  The extra
1885     * precision is only needed for the first term of the
1886     * approximation, with subsequent terms being significantly
1887     * smaller; the approximation only uses odd exponents, and the
1888     * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1889     */
1890    if (arg0_sign) {
1891        dexp = 0x3fff;
1892        shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1893        sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
1894    } else {
1895        dexp = 0x4000;
1896        shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1897        dsig0 |= 0x8000000000000000ULL;
1898    }
1899    texp = arg0_exp - dexp + 0x3ffe;
1900    rsig0 = arg0_sig;
1901    rsig1 = 0;
1902    rsig2 = 0;
1903    if (dsig0 <= rsig0) {
1904        shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
1905        ++texp;
1906    }
1907    tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
1908    mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
1909    sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
1910           &rsig0, &rsig1, &rsig2);
1911    while ((int64_t) rsig0 < 0) {
1912        --tsig0;
1913        add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
1914               &rsig0, &rsig1, &rsig2);
1915    }
1916    tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
1917    /*
1918     * No need to correct any estimation error in tsig1; even with
1919     * such error, it is accurate enough.  Now compute the square of
1920     * that approximation.
1921     */
1922    mul128To256(tsig0, tsig1, tsig0, tsig1,
1923                &t2sig0, &t2sig1, &t2sig2, &t2sig3);
1924    t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
1925                                       texp + texp - 0x3ffe,
1926                                       t2sig0, t2sig1, &env->fp_status);
1927
1928    /* Compute the lower parts of the polynomial expansion.  */
1929    accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
1930    accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
1931    accum = floatx80_mul(accum, t2, &env->fp_status);
1932    accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
1933    accum = floatx80_mul(accum, t2, &env->fp_status);
1934    accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
1935    accum = floatx80_mul(accum, t2, &env->fp_status);
1936    accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
1937    accum = floatx80_mul(accum, t2, &env->fp_status);
1938    accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
1939    accum = floatx80_mul(accum, t2, &env->fp_status);
1940    accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
1941    accum = floatx80_mul(accum, t2, &env->fp_status);
1942    accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
1943    accum = floatx80_mul(accum, t2, &env->fp_status);
1944    accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
1945    accum = floatx80_mul(accum, t2, &env->fp_status);
1946    accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
1947
1948    /*
1949     * The full polynomial expansion is fyl2x_coeff_0 + accum (where
1950     * accum has much lower magnitude, and so, in particular, carry
1951     * out of the addition is not possible), multiplied by t.  (This
1952     * expansion is only accurate to about 70 bits, not 128 bits.)
1953     */
1954    aexp = extractFloatx80Exp(fyl2x_coeff_0);
1955    asign = extractFloatx80Sign(fyl2x_coeff_0);
1956    shift128RightJamming(extractFloatx80Frac(accum), 0,
1957                         aexp - extractFloatx80Exp(accum),
1958                         &asig0, &asig1);
1959    bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
1960    bsig1 = 0;
1961    if (asign == extractFloatx80Sign(accum)) {
1962        add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1963    } else {
1964        sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1965    }
1966    /* Multiply by t to compute the required result.  */
1967    mul128To256(asig0, asig1, tsig0, tsig1,
1968                &asig0, &asig1, &asig2, &asig3);
1969    aexp += texp - 0x3ffe;
1970    *exp = aexp;
1971    *sig0 = asig0;
1972    *sig1 = asig1;
1973}
1974
1975void helper_fyl2xp1(CPUX86State *env)
1976{
1977    uint8_t old_flags = save_exception_flags(env);
1978    uint64_t arg0_sig = extractFloatx80Frac(ST0);
1979    int32_t arg0_exp = extractFloatx80Exp(ST0);
1980    bool arg0_sign = extractFloatx80Sign(ST0);
1981    uint64_t arg1_sig = extractFloatx80Frac(ST1);
1982    int32_t arg1_exp = extractFloatx80Exp(ST1);
1983    bool arg1_sign = extractFloatx80Sign(ST1);
1984
1985    if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1986        float_raise(float_flag_invalid, &env->fp_status);
1987        ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1988    } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1989        float_raise(float_flag_invalid, &env->fp_status);
1990        ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1991    } else if (floatx80_invalid_encoding(ST0) ||
1992               floatx80_invalid_encoding(ST1)) {
1993        float_raise(float_flag_invalid, &env->fp_status);
1994        ST1 = floatx80_default_nan(&env->fp_status);
1995    } else if (floatx80_is_any_nan(ST0)) {
1996        ST1 = ST0;
1997    } else if (floatx80_is_any_nan(ST1)) {
1998        /* Pass this NaN through.  */
1999    } else if (arg0_exp > 0x3ffd ||
2000               (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
2001                                                  0x95f619980c4336f7ULL :
2002                                                  0xd413cccfe7799211ULL))) {
2003        /*
2004         * Out of range for the instruction (ST0 must have absolute
2005         * value less than 1 - sqrt(2)/2 = 0.292..., according to
2006         * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
2007         * to sqrt(2) - 1, which we allow here), treat as invalid.
2008         */
2009        float_raise(float_flag_invalid, &env->fp_status);
2010        ST1 = floatx80_default_nan(&env->fp_status);
2011    } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
2012               arg1_exp == 0x7fff) {
2013        /*
2014         * One argument is zero, or multiplying by infinity; correct
2015         * result is exact and can be obtained by multiplying the
2016         * arguments.
2017         */
2018        ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
2019    } else if (arg0_exp < 0x3fb0) {
2020        /*
2021         * Multiplying both arguments and an extra-precision version
2022         * of log2(e) is sufficiently precise.
2023         */
2024        uint64_t sig0, sig1, sig2;
2025        int32_t exp;
2026        if (arg0_exp == 0) {
2027            normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2028        }
2029        if (arg1_exp == 0) {
2030            normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2031        }
2032        mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
2033                        &sig0, &sig1, &sig2);
2034        exp = arg0_exp + 1;
2035        mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
2036        exp += arg1_exp - 0x3ffe;
2037        /* This result is inexact.  */
2038        sig1 |= 1;
2039        ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2040                                            arg0_sign ^ arg1_sign, exp,
2041                                            sig0, sig1, &env->fp_status);
2042    } else {
2043        int32_t aexp;
2044        uint64_t asig0, asig1, asig2;
2045        FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2046        FloatX80RoundPrec save_prec =
2047            env->fp_status.floatx80_rounding_precision;
2048        env->fp_status.float_rounding_mode = float_round_nearest_even;
2049        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2050
2051        helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
2052        /*
2053         * Multiply by the second argument to compute the required
2054         * result.
2055         */
2056        if (arg1_exp == 0) {
2057            normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2058        }
2059        mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2060        aexp += arg1_exp - 0x3ffe;
2061        /* This result is inexact.  */
2062        asig1 |= 1;
2063        env->fp_status.float_rounding_mode = save_mode;
2064        ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2065                                            arg0_sign ^ arg1_sign, aexp,
2066                                            asig0, asig1, &env->fp_status);
2067        env->fp_status.floatx80_rounding_precision = save_prec;
2068    }
2069    fpop(env);
2070    merge_exception_flags(env, old_flags);
2071}
2072
2073void helper_fyl2x(CPUX86State *env)
2074{
2075    uint8_t old_flags = save_exception_flags(env);
2076    uint64_t arg0_sig = extractFloatx80Frac(ST0);
2077    int32_t arg0_exp = extractFloatx80Exp(ST0);
2078    bool arg0_sign = extractFloatx80Sign(ST0);
2079    uint64_t arg1_sig = extractFloatx80Frac(ST1);
2080    int32_t arg1_exp = extractFloatx80Exp(ST1);
2081    bool arg1_sign = extractFloatx80Sign(ST1);
2082
2083    if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2084        float_raise(float_flag_invalid, &env->fp_status);
2085        ST1 = floatx80_silence_nan(ST0, &env->fp_status);
2086    } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
2087        float_raise(float_flag_invalid, &env->fp_status);
2088        ST1 = floatx80_silence_nan(ST1, &env->fp_status);
2089    } else if (floatx80_invalid_encoding(ST0) ||
2090               floatx80_invalid_encoding(ST1)) {
2091        float_raise(float_flag_invalid, &env->fp_status);
2092        ST1 = floatx80_default_nan(&env->fp_status);
2093    } else if (floatx80_is_any_nan(ST0)) {
2094        ST1 = ST0;
2095    } else if (floatx80_is_any_nan(ST1)) {
2096        /* Pass this NaN through.  */
2097    } else if (arg0_sign && !floatx80_is_zero(ST0)) {
2098        float_raise(float_flag_invalid, &env->fp_status);
2099        ST1 = floatx80_default_nan(&env->fp_status);
2100    } else if (floatx80_is_infinity(ST1)) {
2101        FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
2102                                             &env->fp_status);
2103        switch (cmp) {
2104        case float_relation_less:
2105            ST1 = floatx80_chs(ST1);
2106            break;
2107        case float_relation_greater:
2108            /* Result is infinity of the same sign as ST1.  */
2109            break;
2110        default:
2111            float_raise(float_flag_invalid, &env->fp_status);
2112            ST1 = floatx80_default_nan(&env->fp_status);
2113            break;
2114        }
2115    } else if (floatx80_is_infinity(ST0)) {
2116        if (floatx80_is_zero(ST1)) {
2117            float_raise(float_flag_invalid, &env->fp_status);
2118            ST1 = floatx80_default_nan(&env->fp_status);
2119        } else if (arg1_sign) {
2120            ST1 = floatx80_chs(ST0);
2121        } else {
2122            ST1 = ST0;
2123        }
2124    } else if (floatx80_is_zero(ST0)) {
2125        if (floatx80_is_zero(ST1)) {
2126            float_raise(float_flag_invalid, &env->fp_status);
2127            ST1 = floatx80_default_nan(&env->fp_status);
2128        } else {
2129            /* Result is infinity with opposite sign to ST1.  */
2130            float_raise(float_flag_divbyzero, &env->fp_status);
2131            ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
2132                                0x8000000000000000ULL);
2133        }
2134    } else if (floatx80_is_zero(ST1)) {
2135        if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
2136            ST1 = floatx80_chs(ST1);
2137        }
2138        /* Otherwise, ST1 is already the correct result.  */
2139    } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
2140        if (arg1_sign) {
2141            ST1 = floatx80_chs(floatx80_zero);
2142        } else {
2143            ST1 = floatx80_zero;
2144        }
2145    } else {
2146        int32_t int_exp;
2147        floatx80 arg0_m1;
2148        FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2149        FloatX80RoundPrec save_prec =
2150            env->fp_status.floatx80_rounding_precision;
2151        env->fp_status.float_rounding_mode = float_round_nearest_even;
2152        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2153
2154        if (arg0_exp == 0) {
2155            normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2156        }
2157        if (arg1_exp == 0) {
2158            normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2159        }
2160        int_exp = arg0_exp - 0x3fff;
2161        if (arg0_sig > 0xb504f333f9de6484ULL) {
2162            ++int_exp;
2163        }
2164        arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
2165                                               &env->fp_status),
2166                               floatx80_one, &env->fp_status);
2167        if (floatx80_is_zero(arg0_m1)) {
2168            /* Exact power of 2; multiply by ST1.  */
2169            env->fp_status.float_rounding_mode = save_mode;
2170            ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
2171                               ST1, &env->fp_status);
2172        } else {
2173            bool asign = extractFloatx80Sign(arg0_m1);
2174            int32_t aexp;
2175            uint64_t asig0, asig1, asig2;
2176            helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
2177            if (int_exp != 0) {
2178                bool isign = (int_exp < 0);
2179                int32_t iexp;
2180                uint64_t isig;
2181                int shift;
2182                int_exp = isign ? -int_exp : int_exp;
2183                shift = clz32(int_exp) + 32;
2184                isig = int_exp;
2185                isig <<= shift;
2186                iexp = 0x403e - shift;
2187                shift128RightJamming(asig0, asig1, iexp - aexp,
2188                                     &asig0, &asig1);
2189                if (asign == isign) {
2190                    add128(isig, 0, asig0, asig1, &asig0, &asig1);
2191                } else {
2192                    sub128(isig, 0, asig0, asig1, &asig0, &asig1);
2193                }
2194                aexp = iexp;
2195                asign = isign;
2196            }
2197            /*
2198             * Multiply by the second argument to compute the required
2199             * result.
2200             */
2201            if (arg1_exp == 0) {
2202                normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2203            }
2204            mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2205            aexp += arg1_exp - 0x3ffe;
2206            /* This result is inexact.  */
2207            asig1 |= 1;
2208            env->fp_status.float_rounding_mode = save_mode;
2209            ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2210                                                asign ^ arg1_sign, aexp,
2211                                                asig0, asig1, &env->fp_status);
2212        }
2213
2214        env->fp_status.floatx80_rounding_precision = save_prec;
2215    }
2216    fpop(env);
2217    merge_exception_flags(env, old_flags);
2218}
2219
2220void helper_fsqrt(CPUX86State *env)
2221{
2222    uint8_t old_flags = save_exception_flags(env);
2223    if (floatx80_is_neg(ST0)) {
2224        env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
2225        env->fpus |= 0x400;
2226    }
2227    ST0 = floatx80_sqrt(ST0, &env->fp_status);
2228    merge_exception_flags(env, old_flags);
2229}
2230
2231void helper_fsincos(CPUX86State *env)
2232{
2233    double fptemp = floatx80_to_double(env, ST0);
2234
2235    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2236        env->fpus |= 0x400;
2237    } else {
2238        ST0 = double_to_floatx80(env, sin(fptemp));
2239        fpush(env);
2240        ST0 = double_to_floatx80(env, cos(fptemp));
2241        env->fpus &= ~0x400;  /* C2 <-- 0 */
2242        /* the above code is for |arg| < 2**63 only */
2243    }
2244}
2245
2246void helper_frndint(CPUX86State *env)
2247{
2248    uint8_t old_flags = save_exception_flags(env);
2249    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
2250    merge_exception_flags(env, old_flags);
2251}
2252
2253void helper_fscale(CPUX86State *env)
2254{
2255    uint8_t old_flags = save_exception_flags(env);
2256    if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
2257        float_raise(float_flag_invalid, &env->fp_status);
2258        ST0 = floatx80_default_nan(&env->fp_status);
2259    } else if (floatx80_is_any_nan(ST1)) {
2260        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2261            float_raise(float_flag_invalid, &env->fp_status);
2262        }
2263        ST0 = ST1;
2264        if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2265            float_raise(float_flag_invalid, &env->fp_status);
2266            ST0 = floatx80_silence_nan(ST0, &env->fp_status);
2267        }
2268    } else if (floatx80_is_infinity(ST1) &&
2269               !floatx80_invalid_encoding(ST0) &&
2270               !floatx80_is_any_nan(ST0)) {
2271        if (floatx80_is_neg(ST1)) {
2272            if (floatx80_is_infinity(ST0)) {
2273                float_raise(float_flag_invalid, &env->fp_status);
2274                ST0 = floatx80_default_nan(&env->fp_status);
2275            } else {
2276                ST0 = (floatx80_is_neg(ST0) ?
2277                       floatx80_chs(floatx80_zero) :
2278                       floatx80_zero);
2279            }
2280        } else {
2281            if (floatx80_is_zero(ST0)) {
2282                float_raise(float_flag_invalid, &env->fp_status);
2283                ST0 = floatx80_default_nan(&env->fp_status);
2284            } else {
2285                ST0 = (floatx80_is_neg(ST0) ?
2286                       floatx80_chs(floatx80_infinity) :
2287                       floatx80_infinity);
2288            }
2289        }
2290    } else {
2291        int n;
2292        FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
2293        uint8_t save_flags = get_float_exception_flags(&env->fp_status);
2294        set_float_exception_flags(0, &env->fp_status);
2295        n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
2296        set_float_exception_flags(save_flags, &env->fp_status);
2297        env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2298        ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
2299        env->fp_status.floatx80_rounding_precision = save;
2300    }
2301    merge_exception_flags(env, old_flags);
2302}
2303
2304void helper_fsin(CPUX86State *env)
2305{
2306    double fptemp = floatx80_to_double(env, ST0);
2307
2308    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2309        env->fpus |= 0x400;
2310    } else {
2311        ST0 = double_to_floatx80(env, sin(fptemp));
2312        env->fpus &= ~0x400;  /* C2 <-- 0 */
2313        /* the above code is for |arg| < 2**53 only */
2314    }
2315}
2316
2317void helper_fcos(CPUX86State *env)
2318{
2319    double fptemp = floatx80_to_double(env, ST0);
2320
2321    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2322        env->fpus |= 0x400;
2323    } else {
2324        ST0 = double_to_floatx80(env, cos(fptemp));
2325        env->fpus &= ~0x400;  /* C2 <-- 0 */
2326        /* the above code is for |arg| < 2**63 only */
2327    }
2328}
2329
2330void helper_fxam_ST0(CPUX86State *env)
2331{
2332    CPU_LDoubleU temp;
2333    int expdif;
2334
2335    temp.d = ST0;
2336
2337    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2338    if (SIGND(temp)) {
2339        env->fpus |= 0x200; /* C1 <-- 1 */
2340    }
2341
2342    if (env->fptags[env->fpstt]) {
2343        env->fpus |= 0x4100; /* Empty */
2344        return;
2345    }
2346
2347    expdif = EXPD(temp);
2348    if (expdif == MAXEXPD) {
2349        if (MANTD(temp) == 0x8000000000000000ULL) {
2350            env->fpus |= 0x500; /* Infinity */
2351        } else if (MANTD(temp) & 0x8000000000000000ULL) {
2352            env->fpus |= 0x100; /* NaN */
2353        }
2354    } else if (expdif == 0) {
2355        if (MANTD(temp) == 0) {
2356            env->fpus |=  0x4000; /* Zero */
2357        } else {
2358            env->fpus |= 0x4400; /* Denormal */
2359        }
2360    } else if (MANTD(temp) & 0x8000000000000000ULL) {
2361        env->fpus |= 0x400;
2362    }
2363}
2364
2365static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
2366                      uintptr_t retaddr)
2367{
2368    int fpus, fptag, exp, i;
2369    uint64_t mant;
2370    CPU_LDoubleU tmp;
2371
2372    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2373    fptag = 0;
2374    for (i = 7; i >= 0; i--) {
2375        fptag <<= 2;
2376        if (env->fptags[i]) {
2377            fptag |= 3;
2378        } else {
2379            tmp.d = env->fpregs[i].d;
2380            exp = EXPD(tmp);
2381            mant = MANTD(tmp);
2382            if (exp == 0 && mant == 0) {
2383                /* zero */
2384                fptag |= 1;
2385            } else if (exp == 0 || exp == MAXEXPD
2386                       || (mant & (1LL << 63)) == 0) {
2387                /* NaNs, infinity, denormal */
2388                fptag |= 2;
2389            }
2390        }
2391    }
2392    if (data32) {
2393        /* 32 bit */
2394        cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
2395        cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
2396        cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
2397        cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */
2398        cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */
2399        cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */
2400        cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */
2401    } else {
2402        /* 16 bit */
2403        cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
2404        cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
2405        cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
2406        cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr);
2407        cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr);
2408        cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr);
2409        cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr);
2410    }
2411}
2412
2413void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
2414{
2415    do_fstenv(env, ptr, data32, GETPC());
2416}
2417
2418static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
2419{
2420    env->fpstt = (fpus >> 11) & 7;
2421    env->fpus = fpus & ~0x3800 & ~FPUS_B;
2422    env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
2423#if !defined(CONFIG_USER_ONLY)
2424    if (!(env->fpus & FPUS_SE)) {
2425        /*
2426         * Here the processor deasserts FERR#; in response, the chipset deasserts
2427         * IGNNE#.
2428         */
2429        cpu_clear_ignne();
2430    }
2431#endif
2432}
2433
2434static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
2435                      uintptr_t retaddr)
2436{
2437    int i, fpus, fptag;
2438
2439    if (data32) {
2440        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
2441        fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
2442        fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
2443    } else {
2444        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
2445        fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
2446        fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
2447    }
2448    cpu_set_fpus(env, fpus);
2449    for (i = 0; i < 8; i++) {
2450        env->fptags[i] = ((fptag & 3) == 3);
2451        fptag >>= 2;
2452    }
2453}
2454
2455void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
2456{
2457    do_fldenv(env, ptr, data32, GETPC());
2458}
2459
2460static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
2461                     uintptr_t retaddr)
2462{
2463    floatx80 tmp;
2464    int i;
2465
2466    do_fstenv(env, ptr, data32, retaddr);
2467
2468    ptr += (target_ulong)14 << data32;
2469    for (i = 0; i < 8; i++) {
2470        tmp = ST(i);
2471        do_fstt(env, tmp, ptr, retaddr);
2472        ptr += 10;
2473    }
2474
2475    do_fninit(env);
2476}
2477
2478void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
2479{
2480    do_fsave(env, ptr, data32, GETPC());
2481}
2482
2483static void do_frstor(CPUX86State *env, target_ulong ptr, int data32,
2484                      uintptr_t retaddr)
2485{
2486    floatx80 tmp;
2487    int i;
2488
2489    do_fldenv(env, ptr, data32, retaddr);
2490    ptr += (target_ulong)14 << data32;
2491
2492    for (i = 0; i < 8; i++) {
2493        tmp = do_fldt(env, ptr, retaddr);
2494        ST(i) = tmp;
2495        ptr += 10;
2496    }
2497}
2498
2499void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
2500{
2501    do_frstor(env, ptr, data32, GETPC());
2502}
2503
2504#define XO(X)  offsetof(X86XSaveArea, X)
2505
2506static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2507{
2508    int fpus, fptag, i;
2509    target_ulong addr;
2510
2511    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2512    fptag = 0;
2513    for (i = 0; i < 8; i++) {
2514        fptag |= (env->fptags[i] << i);
2515    }
2516
2517    cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
2518    cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
2519    cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
2520
2521    /* In 32-bit mode this is eip, sel, dp, sel.
2522       In 64-bit mode this is rip, rdp.
2523       But in either case we don't write actual data, just zeros.  */
2524    cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
2525    cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
2526
2527    addr = ptr + XO(legacy.fpregs);
2528    for (i = 0; i < 8; i++) {
2529        floatx80 tmp = ST(i);
2530        do_fstt(env, tmp, addr, ra);
2531        addr += 16;
2532    }
2533}
2534
2535static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2536{
2537    update_mxcsr_from_sse_status(env);
2538    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
2539    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
2540}
2541
2542static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2543{
2544    int i, nb_xmm_regs;
2545    target_ulong addr;
2546
2547    if (env->hflags & HF_CS64_MASK) {
2548        nb_xmm_regs = 16;
2549    } else {
2550        nb_xmm_regs = 8;
2551    }
2552
2553    addr = ptr + XO(legacy.xmm_regs);
2554    for (i = 0; i < nb_xmm_regs; i++) {
2555        cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
2556        cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
2557        addr += 16;
2558    }
2559}
2560
2561static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2562{
2563    int i, nb_xmm_regs;
2564
2565    if (env->hflags & HF_CS64_MASK) {
2566        nb_xmm_regs = 16;
2567    } else {
2568        nb_xmm_regs = 8;
2569    }
2570
2571    for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
2572        cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra);
2573        cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra);
2574    }
2575}
2576
2577static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2578{
2579    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2580    int i;
2581
2582    for (i = 0; i < 4; i++, addr += 16) {
2583        cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
2584        cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
2585    }
2586}
2587
2588static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2589{
2590    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
2591                    env->bndcs_regs.cfgu, ra);
2592    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
2593                    env->bndcs_regs.sts, ra);
2594}
2595
2596static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2597{
2598    cpu_stq_data_ra(env, ptr, env->pkru, ra);
2599}
2600
2601static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2602{
2603    /* The operand must be 16 byte aligned */
2604    if (ptr & 0xf) {
2605        raise_exception_ra(env, EXCP0D_GPF, ra);
2606    }
2607
2608    do_xsave_fpu(env, ptr, ra);
2609
2610    if (env->cr[4] & CR4_OSFXSR_MASK) {
2611        do_xsave_mxcsr(env, ptr, ra);
2612        /* Fast FXSAVE leaves out the XMM registers */
2613        if (!(env->efer & MSR_EFER_FFXSR)
2614            || (env->hflags & HF_CPL_MASK)
2615            || !(env->hflags & HF_LMA_MASK)) {
2616            do_xsave_sse(env, ptr, ra);
2617        }
2618    }
2619}
2620
2621void helper_fxsave(CPUX86State *env, target_ulong ptr)
2622{
2623    do_fxsave(env, ptr, GETPC());
2624}
2625
2626static uint64_t get_xinuse(CPUX86State *env)
2627{
2628    uint64_t inuse = -1;
2629
2630    /* For the most part, we don't track XINUSE.  We could calculate it
2631       here for all components, but it's probably less work to simply
2632       indicate in use.  That said, the state of BNDREGS is important
2633       enough to track in HFLAGS, so we might as well use that here.  */
2634    if ((env->hflags & HF_MPX_IU_MASK) == 0) {
2635       inuse &= ~XSTATE_BNDREGS_MASK;
2636    }
2637    return inuse;
2638}
2639
2640static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
2641                     uint64_t inuse, uint64_t opt, uintptr_t ra)
2642{
2643    uint64_t old_bv, new_bv;
2644
2645    /* The OS must have enabled XSAVE.  */
2646    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2647        raise_exception_ra(env, EXCP06_ILLOP, ra);
2648    }
2649
2650    /* The operand must be 64 byte aligned.  */
2651    if (ptr & 63) {
2652        raise_exception_ra(env, EXCP0D_GPF, ra);
2653    }
2654
2655    /* Never save anything not enabled by XCR0.  */
2656    rfbm &= env->xcr0;
2657    opt &= rfbm;
2658
2659    if (opt & XSTATE_FP_MASK) {
2660        do_xsave_fpu(env, ptr, ra);
2661    }
2662    if (rfbm & XSTATE_SSE_MASK) {
2663        /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
2664        do_xsave_mxcsr(env, ptr, ra);
2665    }
2666    if (opt & XSTATE_SSE_MASK) {
2667        do_xsave_sse(env, ptr, ra);
2668    }
2669    if (opt & XSTATE_YMM_MASK) {
2670        do_xsave_ymmh(env, ptr + XO(avx_state), ra);
2671    }
2672    if (opt & XSTATE_BNDREGS_MASK) {
2673        do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
2674    }
2675    if (opt & XSTATE_BNDCSR_MASK) {
2676        do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
2677    }
2678    if (opt & XSTATE_PKRU_MASK) {
2679        do_xsave_pkru(env, ptr + XO(pkru_state), ra);
2680    }
2681
2682    /* Update the XSTATE_BV field.  */
2683    old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2684    new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
2685    cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
2686}
2687
2688void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2689{
2690    do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
2691}
2692
2693void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2694{
2695    uint64_t inuse = get_xinuse(env);
2696    do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
2697}
2698
2699static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2700{
2701    int i, fpuc, fpus, fptag;
2702    target_ulong addr;
2703
2704    fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
2705    fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
2706    fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
2707    cpu_set_fpuc(env, fpuc);
2708    cpu_set_fpus(env, fpus);
2709    fptag ^= 0xff;
2710    for (i = 0; i < 8; i++) {
2711        env->fptags[i] = ((fptag >> i) & 1);
2712    }
2713
2714    addr = ptr + XO(legacy.fpregs);
2715    for (i = 0; i < 8; i++) {
2716        floatx80 tmp = do_fldt(env, addr, ra);
2717        ST(i) = tmp;
2718        addr += 16;
2719    }
2720}
2721
2722static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2723{
2724    cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
2725}
2726
2727static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2728{
2729    int i, nb_xmm_regs;
2730    target_ulong addr;
2731
2732    if (env->hflags & HF_CS64_MASK) {
2733        nb_xmm_regs = 16;
2734    } else {
2735        nb_xmm_regs = 8;
2736    }
2737
2738    addr = ptr + XO(legacy.xmm_regs);
2739    for (i = 0; i < nb_xmm_regs; i++) {
2740        env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
2741        env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
2742        addr += 16;
2743    }
2744}
2745
2746static void do_clear_sse(CPUX86State *env)
2747{
2748    int i, nb_xmm_regs;
2749
2750    if (env->hflags & HF_CS64_MASK) {
2751        nb_xmm_regs = 16;
2752    } else {
2753        nb_xmm_regs = 8;
2754    }
2755
2756    for (i = 0; i < nb_xmm_regs; i++) {
2757        env->xmm_regs[i].ZMM_Q(0) = 0;
2758        env->xmm_regs[i].ZMM_Q(1) = 0;
2759    }
2760}
2761
2762static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2763{
2764    int i, nb_xmm_regs;
2765
2766    if (env->hflags & HF_CS64_MASK) {
2767        nb_xmm_regs = 16;
2768    } else {
2769        nb_xmm_regs = 8;
2770    }
2771
2772    for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
2773        env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra);
2774        env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra);
2775    }
2776}
2777
2778static void do_clear_ymmh(CPUX86State *env)
2779{
2780    int i, nb_xmm_regs;
2781
2782    if (env->hflags & HF_CS64_MASK) {
2783        nb_xmm_regs = 16;
2784    } else {
2785        nb_xmm_regs = 8;
2786    }
2787
2788    for (i = 0; i < nb_xmm_regs; i++) {
2789        env->xmm_regs[i].ZMM_Q(2) = 0;
2790        env->xmm_regs[i].ZMM_Q(3) = 0;
2791    }
2792}
2793
2794static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2795{
2796    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2797    int i;
2798
2799    for (i = 0; i < 4; i++, addr += 16) {
2800        env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
2801        env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
2802    }
2803}
2804
2805static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2806{
2807    /* FIXME: Extend highest implemented bit of linear address.  */
2808    env->bndcs_regs.cfgu
2809        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
2810    env->bndcs_regs.sts
2811        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
2812}
2813
2814static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2815{
2816    env->pkru = cpu_ldq_data_ra(env, ptr, ra);
2817}
2818
2819static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2820{
2821    /* The operand must be 16 byte aligned */
2822    if (ptr & 0xf) {
2823        raise_exception_ra(env, EXCP0D_GPF, ra);
2824    }
2825
2826    do_xrstor_fpu(env, ptr, ra);
2827
2828    if (env->cr[4] & CR4_OSFXSR_MASK) {
2829        do_xrstor_mxcsr(env, ptr, ra);
2830        /* Fast FXRSTOR leaves out the XMM registers */
2831        if (!(env->efer & MSR_EFER_FFXSR)
2832            || (env->hflags & HF_CPL_MASK)
2833            || !(env->hflags & HF_LMA_MASK)) {
2834            do_xrstor_sse(env, ptr, ra);
2835        }
2836    }
2837}
2838
2839void helper_fxrstor(CPUX86State *env, target_ulong ptr)
2840{
2841    do_fxrstor(env, ptr, GETPC());
2842}
2843
2844static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra)
2845{
2846    uint64_t xstate_bv, xcomp_bv, reserve0;
2847
2848    rfbm &= env->xcr0;
2849
2850    /* The OS must have enabled XSAVE.  */
2851    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2852        raise_exception_ra(env, EXCP06_ILLOP, ra);
2853    }
2854
2855    /* The operand must be 64 byte aligned.  */
2856    if (ptr & 63) {
2857        raise_exception_ra(env, EXCP0D_GPF, ra);
2858    }
2859
2860    xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
2861
2862    if ((int64_t)xstate_bv < 0) {
2863        /* FIXME: Compact form.  */
2864        raise_exception_ra(env, EXCP0D_GPF, ra);
2865    }
2866
2867    /* Standard form.  */
2868
2869    /* The XSTATE_BV field must not set bits not present in XCR0.  */
2870    if (xstate_bv & ~env->xcr0) {
2871        raise_exception_ra(env, EXCP0D_GPF, ra);
2872    }
2873
2874    /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
2875       revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
2876       describes only XCOMP_BV, but the description of the standard form
2877       of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
2878       includes the next 64-bit field.  */
2879    xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
2880    reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
2881    if (xcomp_bv || reserve0) {
2882        raise_exception_ra(env, EXCP0D_GPF, ra);
2883    }
2884
2885    if (rfbm & XSTATE_FP_MASK) {
2886        if (xstate_bv & XSTATE_FP_MASK) {
2887            do_xrstor_fpu(env, ptr, ra);
2888        } else {
2889            do_fninit(env);
2890            memset(env->fpregs, 0, sizeof(env->fpregs));
2891        }
2892    }
2893    if (rfbm & XSTATE_SSE_MASK) {
2894        /* Note that the standard form of XRSTOR loads MXCSR from memory
2895           whether or not the XSTATE_BV bit is set.  */
2896        do_xrstor_mxcsr(env, ptr, ra);
2897        if (xstate_bv & XSTATE_SSE_MASK) {
2898            do_xrstor_sse(env, ptr, ra);
2899        } else {
2900            do_clear_sse(env);
2901        }
2902    }
2903    if (rfbm & XSTATE_YMM_MASK) {
2904        if (xstate_bv & XSTATE_YMM_MASK) {
2905            do_xrstor_ymmh(env, ptr + XO(avx_state), ra);
2906        } else {
2907            do_clear_ymmh(env);
2908        }
2909    }
2910    if (rfbm & XSTATE_BNDREGS_MASK) {
2911        if (xstate_bv & XSTATE_BNDREGS_MASK) {
2912            do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
2913            env->hflags |= HF_MPX_IU_MASK;
2914        } else {
2915            memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
2916            env->hflags &= ~HF_MPX_IU_MASK;
2917        }
2918    }
2919    if (rfbm & XSTATE_BNDCSR_MASK) {
2920        if (xstate_bv & XSTATE_BNDCSR_MASK) {
2921            do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
2922        } else {
2923            memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
2924        }
2925        cpu_sync_bndcs_hflags(env);
2926    }
2927    if (rfbm & XSTATE_PKRU_MASK) {
2928        uint64_t old_pkru = env->pkru;
2929        if (xstate_bv & XSTATE_PKRU_MASK) {
2930            do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
2931        } else {
2932            env->pkru = 0;
2933        }
2934        if (env->pkru != old_pkru) {
2935            CPUState *cs = env_cpu(env);
2936            tlb_flush(cs);
2937        }
2938    }
2939}
2940
2941#undef XO
2942
2943void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2944{
2945    do_xrstor(env, ptr, rfbm, GETPC());
2946}
2947
2948#if defined(CONFIG_USER_ONLY)
2949void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
2950{
2951    do_fsave(env, ptr, data32, 0);
2952}
2953
2954void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
2955{
2956    do_frstor(env, ptr, data32, 0);
2957}
2958
2959void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
2960{
2961    do_fxsave(env, ptr, 0);
2962}
2963
2964void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
2965{
2966    do_fxrstor(env, ptr, 0);
2967}
2968
2969void cpu_x86_xsave(CPUX86State *env, target_ulong ptr)
2970{
2971    do_xsave(env, ptr, -1, get_xinuse(env), -1, 0);
2972}
2973
2974void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr)
2975{
2976    do_xrstor(env, ptr, -1, 0);
2977}
2978#endif
2979
2980uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
2981{
2982    /* The OS must have enabled XSAVE.  */
2983    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2984        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
2985    }
2986
2987    switch (ecx) {
2988    case 0:
2989        return env->xcr0;
2990    case 1:
2991        if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
2992            return env->xcr0 & get_xinuse(env);
2993        }
2994        break;
2995    }
2996    raise_exception_ra(env, EXCP0D_GPF, GETPC());
2997}
2998
2999void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
3000{
3001    uint32_t dummy, ena_lo, ena_hi;
3002    uint64_t ena;
3003
3004    /* The OS must have enabled XSAVE.  */
3005    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
3006        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
3007    }
3008
3009    /* Only XCR0 is defined at present; the FPU may not be disabled.  */
3010    if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
3011        goto do_gpf;
3012    }
3013
3014    /* Disallow enabling unimplemented features.  */
3015    cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
3016    ena = ((uint64_t)ena_hi << 32) | ena_lo;
3017    if (mask & ~ena) {
3018        goto do_gpf;
3019    }
3020
3021    /* Disallow enabling only half of MPX.  */
3022    if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
3023        & XSTATE_BNDCSR_MASK) {
3024        goto do_gpf;
3025    }
3026
3027    env->xcr0 = mask;
3028    cpu_sync_bndcs_hflags(env);
3029    cpu_sync_avx_hflag(env);
3030    return;
3031
3032 do_gpf:
3033    raise_exception_ra(env, EXCP0D_GPF, GETPC());
3034}
3035
3036/* MMX/SSE */
3037/* XXX: optimize by storing fptt and fptags in the static cpu state */
3038
3039#define SSE_DAZ             0x0040
3040#define SSE_RC_SHIFT        13
3041#define SSE_RC_MASK         (3 << SSE_RC_SHIFT)
3042#define SSE_FZ              0x8000
3043
3044void update_mxcsr_status(CPUX86State *env)
3045{
3046    uint32_t mxcsr = env->mxcsr;
3047    int rnd_type;
3048
3049    /* set rounding mode */
3050    rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT;
3051    set_x86_rounding_mode(rnd_type, &env->sse_status);
3052
3053    /* Set exception flags.  */
3054    set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
3055                              (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
3056                              (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
3057                              (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
3058                              (mxcsr & FPUS_PE ? float_flag_inexact : 0),
3059                              &env->sse_status);
3060
3061    /* set denormals are zero */
3062    set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
3063
3064    /* set flush to zero */
3065    set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
3066}
3067
3068void update_mxcsr_from_sse_status(CPUX86State *env)
3069{
3070    uint8_t flags = get_float_exception_flags(&env->sse_status);
3071    /*
3072     * The MXCSR denormal flag has opposite semantics to
3073     * float_flag_input_denormal (the softfloat code sets that flag
3074     * only when flushing input denormals to zero, but SSE sets it
3075     * only when not flushing them to zero), so is not converted
3076     * here.
3077     */
3078    env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
3079                   (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
3080                   (flags & float_flag_overflow ? FPUS_OE : 0) |
3081                   (flags & float_flag_underflow ? FPUS_UE : 0) |
3082                   (flags & float_flag_inexact ? FPUS_PE : 0) |
3083                   (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
3084                    0));
3085}
3086
3087void helper_update_mxcsr(CPUX86State *env)
3088{
3089    update_mxcsr_from_sse_status(env);
3090}
3091
3092void helper_ldmxcsr(CPUX86State *env, uint32_t val)
3093{
3094    cpu_set_mxcsr(env, val);
3095}
3096
3097void helper_enter_mmx(CPUX86State *env)
3098{
3099    env->fpstt = 0;
3100    *(uint32_t *)(env->fptags) = 0;
3101    *(uint32_t *)(env->fptags + 4) = 0;
3102}
3103
3104void helper_emms(CPUX86State *env)
3105{
3106    /* set to empty state */
3107    *(uint32_t *)(env->fptags) = 0x01010101;
3108    *(uint32_t *)(env->fptags + 4) = 0x01010101;
3109}
3110
3111#define SHIFT 0
3112#include "ops_sse.h"
3113
3114#define SHIFT 1
3115#include "ops_sse.h"
3116
3117#define SHIFT 2
3118#include "ops_sse.h"
3119