qemu/target/i386/fpu_helper.c
<<
>>
Prefs
   1/*
   2 *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include <math.h>
  22#include "cpu.h"
  23#include "exec/helper-proto.h"
  24#include "qemu/host-utils.h"
  25#include "exec/exec-all.h"
  26#include "exec/cpu_ldst.h"
  27#include "fpu/softfloat.h"
  28
  29#ifdef CONFIG_SOFTMMU
  30#include "hw/irq.h"
  31#endif
  32
  33#define FPU_RC_MASK         0xc00
  34#define FPU_RC_NEAR         0x000
  35#define FPU_RC_DOWN         0x400
  36#define FPU_RC_UP           0x800
  37#define FPU_RC_CHOP         0xc00
  38
  39#define MAXTAN 9223372036854775808.0
  40
  41/* the following deal with x86 long double-precision numbers */
  42#define MAXEXPD 0x7fff
  43#define EXPBIAS 16383
  44#define EXPD(fp)        (fp.l.upper & 0x7fff)
  45#define SIGND(fp)       ((fp.l.upper) & 0x8000)
  46#define MANTD(fp)       (fp.l.lower)
  47#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  48
  49#define FPUS_IE (1 << 0)
  50#define FPUS_DE (1 << 1)
  51#define FPUS_ZE (1 << 2)
  52#define FPUS_OE (1 << 3)
  53#define FPUS_UE (1 << 4)
  54#define FPUS_PE (1 << 5)
  55#define FPUS_SF (1 << 6)
  56#define FPUS_SE (1 << 7)
  57#define FPUS_B  (1 << 15)
  58
  59#define FPUC_EM 0x3f
  60
  61#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  62#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  63#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  64
  65#if !defined(CONFIG_USER_ONLY)
  66static qemu_irq ferr_irq;
  67
  68void x86_register_ferr_irq(qemu_irq irq)
  69{
  70    ferr_irq = irq;
  71}
  72
  73static void cpu_clear_ignne(void)
  74{
  75    CPUX86State *env = &X86_CPU(first_cpu)->env;
  76    env->hflags2 &= ~HF2_IGNNE_MASK;
  77}
  78
  79void cpu_set_ignne(void)
  80{
  81    CPUX86State *env = &X86_CPU(first_cpu)->env;
  82    env->hflags2 |= HF2_IGNNE_MASK;
  83    /*
  84     * We get here in response to a write to port F0h.  The chipset should
  85     * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
  86     * cleared, because FERR# and FP_IRQ are two separate pins on real
  87     * hardware.  However, we don't model FERR# as a qemu_irq, so we just
  88     * do directly what the chipset would do, i.e. deassert FP_IRQ.
  89     */
  90    qemu_irq_lower(ferr_irq);
  91}
  92#endif
  93
  94
  95static inline void fpush(CPUX86State *env)
  96{
  97    env->fpstt = (env->fpstt - 1) & 7;
  98    env->fptags[env->fpstt] = 0; /* validate stack entry */
  99}
 100
 101static inline void fpop(CPUX86State *env)
 102{
 103    env->fptags[env->fpstt] = 1; /* invalidate stack entry */
 104    env->fpstt = (env->fpstt + 1) & 7;
 105}
 106
 107static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
 108                                   uintptr_t retaddr)
 109{
 110    CPU_LDoubleU temp;
 111
 112    temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
 113    temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
 114    return temp.d;
 115}
 116
 117static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
 118                               uintptr_t retaddr)
 119{
 120    CPU_LDoubleU temp;
 121
 122    temp.d = f;
 123    cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
 124    cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
 125}
 126
 127/* x87 FPU helpers */
 128
 129static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
 130{
 131    union {
 132        float64 f64;
 133        double d;
 134    } u;
 135
 136    u.f64 = floatx80_to_float64(a, &env->fp_status);
 137    return u.d;
 138}
 139
 140static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 141{
 142    union {
 143        float64 f64;
 144        double d;
 145    } u;
 146
 147    u.d = a;
 148    return float64_to_floatx80(u.f64, &env->fp_status);
 149}
 150
 151static void fpu_set_exception(CPUX86State *env, int mask)
 152{
 153    env->fpus |= mask;
 154    if (env->fpus & (~env->fpuc & FPUC_EM)) {
 155        env->fpus |= FPUS_SE | FPUS_B;
 156    }
 157}
 158
 159static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 160{
 161    if (floatx80_is_zero(b)) {
 162        fpu_set_exception(env, FPUS_ZE);
 163    }
 164    return floatx80_div(a, b, &env->fp_status);
 165}
 166
 167static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 168{
 169    if (env->cr[0] & CR0_NE_MASK) {
 170        raise_exception_ra(env, EXCP10_COPR, retaddr);
 171    }
 172#if !defined(CONFIG_USER_ONLY)
 173    else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
 174        qemu_irq_raise(ferr_irq);
 175    }
 176#endif
 177}
 178
 179void helper_flds_FT0(CPUX86State *env, uint32_t val)
 180{
 181    union {
 182        float32 f;
 183        uint32_t i;
 184    } u;
 185
 186    u.i = val;
 187    FT0 = float32_to_floatx80(u.f, &env->fp_status);
 188}
 189
 190void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 191{
 192    union {
 193        float64 f;
 194        uint64_t i;
 195    } u;
 196
 197    u.i = val;
 198    FT0 = float64_to_floatx80(u.f, &env->fp_status);
 199}
 200
 201void helper_fildl_FT0(CPUX86State *env, int32_t val)
 202{
 203    FT0 = int32_to_floatx80(val, &env->fp_status);
 204}
 205
 206void helper_flds_ST0(CPUX86State *env, uint32_t val)
 207{
 208    int new_fpstt;
 209    union {
 210        float32 f;
 211        uint32_t i;
 212    } u;
 213
 214    new_fpstt = (env->fpstt - 1) & 7;
 215    u.i = val;
 216    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 217    env->fpstt = new_fpstt;
 218    env->fptags[new_fpstt] = 0; /* validate stack entry */
 219}
 220
 221void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 222{
 223    int new_fpstt;
 224    union {
 225        float64 f;
 226        uint64_t i;
 227    } u;
 228
 229    new_fpstt = (env->fpstt - 1) & 7;
 230    u.i = val;
 231    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 232    env->fpstt = new_fpstt;
 233    env->fptags[new_fpstt] = 0; /* validate stack entry */
 234}
 235
 236void helper_fildl_ST0(CPUX86State *env, int32_t val)
 237{
 238    int new_fpstt;
 239
 240    new_fpstt = (env->fpstt - 1) & 7;
 241    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 242    env->fpstt = new_fpstt;
 243    env->fptags[new_fpstt] = 0; /* validate stack entry */
 244}
 245
 246void helper_fildll_ST0(CPUX86State *env, int64_t val)
 247{
 248    int new_fpstt;
 249
 250    new_fpstt = (env->fpstt - 1) & 7;
 251    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 252    env->fpstt = new_fpstt;
 253    env->fptags[new_fpstt] = 0; /* validate stack entry */
 254}
 255
 256uint32_t helper_fsts_ST0(CPUX86State *env)
 257{
 258    union {
 259        float32 f;
 260        uint32_t i;
 261    } u;
 262
 263    u.f = floatx80_to_float32(ST0, &env->fp_status);
 264    return u.i;
 265}
 266
 267uint64_t helper_fstl_ST0(CPUX86State *env)
 268{
 269    union {
 270        float64 f;
 271        uint64_t i;
 272    } u;
 273
 274    u.f = floatx80_to_float64(ST0, &env->fp_status);
 275    return u.i;
 276}
 277
 278int32_t helper_fist_ST0(CPUX86State *env)
 279{
 280    int32_t val;
 281
 282    val = floatx80_to_int32(ST0, &env->fp_status);
 283    if (val != (int16_t)val) {
 284        val = -32768;
 285    }
 286    return val;
 287}
 288
 289int32_t helper_fistl_ST0(CPUX86State *env)
 290{
 291    int32_t val;
 292    signed char old_exp_flags;
 293
 294    old_exp_flags = get_float_exception_flags(&env->fp_status);
 295    set_float_exception_flags(0, &env->fp_status);
 296
 297    val = floatx80_to_int32(ST0, &env->fp_status);
 298    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 299        val = 0x80000000;
 300    }
 301    set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 302                                | old_exp_flags, &env->fp_status);
 303    return val;
 304}
 305
 306int64_t helper_fistll_ST0(CPUX86State *env)
 307{
 308    int64_t val;
 309    signed char old_exp_flags;
 310
 311    old_exp_flags = get_float_exception_flags(&env->fp_status);
 312    set_float_exception_flags(0, &env->fp_status);
 313
 314    val = floatx80_to_int64(ST0, &env->fp_status);
 315    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 316        val = 0x8000000000000000ULL;
 317    }
 318    set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 319                                | old_exp_flags, &env->fp_status);
 320    return val;
 321}
 322
 323int32_t helper_fistt_ST0(CPUX86State *env)
 324{
 325    int32_t val;
 326
 327    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 328    if (val != (int16_t)val) {
 329        val = -32768;
 330    }
 331    return val;
 332}
 333
 334int32_t helper_fisttl_ST0(CPUX86State *env)
 335{
 336    return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 337}
 338
 339int64_t helper_fisttll_ST0(CPUX86State *env)
 340{
 341    return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 342}
 343
 344void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 345{
 346    int new_fpstt;
 347
 348    new_fpstt = (env->fpstt - 1) & 7;
 349    env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 350    env->fpstt = new_fpstt;
 351    env->fptags[new_fpstt] = 0; /* validate stack entry */
 352}
 353
 354void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 355{
 356    helper_fstt(env, ST0, ptr, GETPC());
 357}
 358
 359void helper_fpush(CPUX86State *env)
 360{
 361    fpush(env);
 362}
 363
 364void helper_fpop(CPUX86State *env)
 365{
 366    fpop(env);
 367}
 368
 369void helper_fdecstp(CPUX86State *env)
 370{
 371    env->fpstt = (env->fpstt - 1) & 7;
 372    env->fpus &= ~0x4700;
 373}
 374
 375void helper_fincstp(CPUX86State *env)
 376{
 377    env->fpstt = (env->fpstt + 1) & 7;
 378    env->fpus &= ~0x4700;
 379}
 380
 381/* FPU move */
 382
 383void helper_ffree_STN(CPUX86State *env, int st_index)
 384{
 385    env->fptags[(env->fpstt + st_index) & 7] = 1;
 386}
 387
 388void helper_fmov_ST0_FT0(CPUX86State *env)
 389{
 390    ST0 = FT0;
 391}
 392
 393void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 394{
 395    FT0 = ST(st_index);
 396}
 397
 398void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 399{
 400    ST0 = ST(st_index);
 401}
 402
 403void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 404{
 405    ST(st_index) = ST0;
 406}
 407
 408void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 409{
 410    floatx80 tmp;
 411
 412    tmp = ST(st_index);
 413    ST(st_index) = ST0;
 414    ST0 = tmp;
 415}
 416
 417/* FPU operations */
 418
 419static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 420
 421void helper_fcom_ST0_FT0(CPUX86State *env)
 422{
 423    int ret;
 424
 425    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 426    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 427}
 428
 429void helper_fucom_ST0_FT0(CPUX86State *env)
 430{
 431    int ret;
 432
 433    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 434    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 435}
 436
 437static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 438
 439void helper_fcomi_ST0_FT0(CPUX86State *env)
 440{
 441    int eflags;
 442    int ret;
 443
 444    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 445    eflags = cpu_cc_compute_all(env, CC_OP);
 446    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 447    CC_SRC = eflags;
 448}
 449
 450void helper_fucomi_ST0_FT0(CPUX86State *env)
 451{
 452    int eflags;
 453    int ret;
 454
 455    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 456    eflags = cpu_cc_compute_all(env, CC_OP);
 457    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 458    CC_SRC = eflags;
 459}
 460
 461void helper_fadd_ST0_FT0(CPUX86State *env)
 462{
 463    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 464}
 465
 466void helper_fmul_ST0_FT0(CPUX86State *env)
 467{
 468    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 469}
 470
 471void helper_fsub_ST0_FT0(CPUX86State *env)
 472{
 473    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 474}
 475
 476void helper_fsubr_ST0_FT0(CPUX86State *env)
 477{
 478    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 479}
 480
 481void helper_fdiv_ST0_FT0(CPUX86State *env)
 482{
 483    ST0 = helper_fdiv(env, ST0, FT0);
 484}
 485
 486void helper_fdivr_ST0_FT0(CPUX86State *env)
 487{
 488    ST0 = helper_fdiv(env, FT0, ST0);
 489}
 490
 491/* fp operations between STN and ST0 */
 492
 493void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 494{
 495    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 496}
 497
 498void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 499{
 500    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 501}
 502
 503void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 504{
 505    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 506}
 507
 508void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 509{
 510    ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 511}
 512
 513void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 514{
 515    floatx80 *p;
 516
 517    p = &ST(st_index);
 518    *p = helper_fdiv(env, *p, ST0);
 519}
 520
 521void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 522{
 523    floatx80 *p;
 524
 525    p = &ST(st_index);
 526    *p = helper_fdiv(env, ST0, *p);
 527}
 528
 529/* misc FPU operations */
 530void helper_fchs_ST0(CPUX86State *env)
 531{
 532    ST0 = floatx80_chs(ST0);
 533}
 534
 535void helper_fabs_ST0(CPUX86State *env)
 536{
 537    ST0 = floatx80_abs(ST0);
 538}
 539
 540void helper_fld1_ST0(CPUX86State *env)
 541{
 542    ST0 = floatx80_one;
 543}
 544
 545void helper_fldl2t_ST0(CPUX86State *env)
 546{
 547    ST0 = floatx80_l2t;
 548}
 549
 550void helper_fldl2e_ST0(CPUX86State *env)
 551{
 552    ST0 = floatx80_l2e;
 553}
 554
 555void helper_fldpi_ST0(CPUX86State *env)
 556{
 557    ST0 = floatx80_pi;
 558}
 559
 560void helper_fldlg2_ST0(CPUX86State *env)
 561{
 562    ST0 = floatx80_lg2;
 563}
 564
 565void helper_fldln2_ST0(CPUX86State *env)
 566{
 567    ST0 = floatx80_ln2;
 568}
 569
 570void helper_fldz_ST0(CPUX86State *env)
 571{
 572    ST0 = floatx80_zero;
 573}
 574
 575void helper_fldz_FT0(CPUX86State *env)
 576{
 577    FT0 = floatx80_zero;
 578}
 579
 580uint32_t helper_fnstsw(CPUX86State *env)
 581{
 582    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 583}
 584
 585uint32_t helper_fnstcw(CPUX86State *env)
 586{
 587    return env->fpuc;
 588}
 589
 590void update_fp_status(CPUX86State *env)
 591{
 592    int rnd_type;
 593
 594    /* set rounding mode */
 595    switch (env->fpuc & FPU_RC_MASK) {
 596    default:
 597    case FPU_RC_NEAR:
 598        rnd_type = float_round_nearest_even;
 599        break;
 600    case FPU_RC_DOWN:
 601        rnd_type = float_round_down;
 602        break;
 603    case FPU_RC_UP:
 604        rnd_type = float_round_up;
 605        break;
 606    case FPU_RC_CHOP:
 607        rnd_type = float_round_to_zero;
 608        break;
 609    }
 610    set_float_rounding_mode(rnd_type, &env->fp_status);
 611    switch ((env->fpuc >> 8) & 3) {
 612    case 0:
 613        rnd_type = 32;
 614        break;
 615    case 2:
 616        rnd_type = 64;
 617        break;
 618    case 3:
 619    default:
 620        rnd_type = 80;
 621        break;
 622    }
 623    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 624}
 625
 626void helper_fldcw(CPUX86State *env, uint32_t val)
 627{
 628    cpu_set_fpuc(env, val);
 629}
 630
 631void helper_fclex(CPUX86State *env)
 632{
 633    env->fpus &= 0x7f00;
 634}
 635
 636void helper_fwait(CPUX86State *env)
 637{
 638    if (env->fpus & FPUS_SE) {
 639        fpu_raise_exception(env, GETPC());
 640    }
 641}
 642
 643void helper_fninit(CPUX86State *env)
 644{
 645    env->fpus = 0;
 646    env->fpstt = 0;
 647    cpu_set_fpuc(env, 0x37f);
 648    env->fptags[0] = 1;
 649    env->fptags[1] = 1;
 650    env->fptags[2] = 1;
 651    env->fptags[3] = 1;
 652    env->fptags[4] = 1;
 653    env->fptags[5] = 1;
 654    env->fptags[6] = 1;
 655    env->fptags[7] = 1;
 656}
 657
 658/* BCD ops */
 659
 660void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 661{
 662    floatx80 tmp;
 663    uint64_t val;
 664    unsigned int v;
 665    int i;
 666
 667    val = 0;
 668    for (i = 8; i >= 0; i--) {
 669        v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 670        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 671    }
 672    tmp = int64_to_floatx80(val, &env->fp_status);
 673    if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 674        tmp = floatx80_chs(tmp);
 675    }
 676    fpush(env);
 677    ST0 = tmp;
 678}
 679
 680void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 681{
 682    int v;
 683    target_ulong mem_ref, mem_end;
 684    int64_t val;
 685
 686    val = floatx80_to_int64(ST0, &env->fp_status);
 687    mem_ref = ptr;
 688    mem_end = mem_ref + 9;
 689    if (val < 0) {
 690        cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 691        val = -val;
 692    } else {
 693        cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 694    }
 695    while (mem_ref < mem_end) {
 696        if (val == 0) {
 697            break;
 698        }
 699        v = val % 100;
 700        val = val / 100;
 701        v = ((v / 10) << 4) | (v % 10);
 702        cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 703    }
 704    while (mem_ref < mem_end) {
 705        cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 706    }
 707}
 708
 709void helper_f2xm1(CPUX86State *env)
 710{
 711    double val = floatx80_to_double(env, ST0);
 712
 713    val = pow(2.0, val) - 1.0;
 714    ST0 = double_to_floatx80(env, val);
 715}
 716
 717void helper_fyl2x(CPUX86State *env)
 718{
 719    double fptemp = floatx80_to_double(env, ST0);
 720
 721    if (fptemp > 0.0) {
 722        fptemp = log(fptemp) / log(2.0); /* log2(ST) */
 723        fptemp *= floatx80_to_double(env, ST1);
 724        ST1 = double_to_floatx80(env, fptemp);
 725        fpop(env);
 726    } else {
 727        env->fpus &= ~0x4700;
 728        env->fpus |= 0x400;
 729    }
 730}
 731
 732void helper_fptan(CPUX86State *env)
 733{
 734    double fptemp = floatx80_to_double(env, ST0);
 735
 736    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 737        env->fpus |= 0x400;
 738    } else {
 739        fptemp = tan(fptemp);
 740        ST0 = double_to_floatx80(env, fptemp);
 741        fpush(env);
 742        ST0 = floatx80_one;
 743        env->fpus &= ~0x400; /* C2 <-- 0 */
 744        /* the above code is for |arg| < 2**52 only */
 745    }
 746}
 747
 748void helper_fpatan(CPUX86State *env)
 749{
 750    double fptemp, fpsrcop;
 751
 752    fpsrcop = floatx80_to_double(env, ST1);
 753    fptemp = floatx80_to_double(env, ST0);
 754    ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
 755    fpop(env);
 756}
 757
 758void helper_fxtract(CPUX86State *env)
 759{
 760    CPU_LDoubleU temp;
 761
 762    temp.d = ST0;
 763
 764    if (floatx80_is_zero(ST0)) {
 765        /* Easy way to generate -inf and raising division by 0 exception */
 766        ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
 767                           &env->fp_status);
 768        fpush(env);
 769        ST0 = temp.d;
 770    } else {
 771        int expdif;
 772
 773        expdif = EXPD(temp) - EXPBIAS;
 774        /* DP exponent bias */
 775        ST0 = int32_to_floatx80(expdif, &env->fp_status);
 776        fpush(env);
 777        BIASEXPONENT(temp);
 778        ST0 = temp.d;
 779    }
 780}
 781
 782void helper_fprem1(CPUX86State *env)
 783{
 784    double st0, st1, dblq, fpsrcop, fptemp;
 785    CPU_LDoubleU fpsrcop1, fptemp1;
 786    int expdif;
 787    signed long long int q;
 788
 789    st0 = floatx80_to_double(env, ST0);
 790    st1 = floatx80_to_double(env, ST1);
 791
 792    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 793        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 794        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 795        return;
 796    }
 797
 798    fpsrcop = st0;
 799    fptemp = st1;
 800    fpsrcop1.d = ST0;
 801    fptemp1.d = ST1;
 802    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 803
 804    if (expdif < 0) {
 805        /* optimisation? taken from the AMD docs */
 806        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 807        /* ST0 is unchanged */
 808        return;
 809    }
 810
 811    if (expdif < 53) {
 812        dblq = fpsrcop / fptemp;
 813        /* round dblq towards nearest integer */
 814        dblq = rint(dblq);
 815        st0 = fpsrcop - fptemp * dblq;
 816
 817        /* convert dblq to q by truncating towards zero */
 818        if (dblq < 0.0) {
 819            q = (signed long long int)(-dblq);
 820        } else {
 821            q = (signed long long int)dblq;
 822        }
 823
 824        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 825        /* (C0,C3,C1) <-- (q2,q1,q0) */
 826        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 827        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 828        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 829    } else {
 830        env->fpus |= 0x400;  /* C2 <-- 1 */
 831        fptemp = pow(2.0, expdif - 50);
 832        fpsrcop = (st0 / st1) / fptemp;
 833        /* fpsrcop = integer obtained by chopping */
 834        fpsrcop = (fpsrcop < 0.0) ?
 835                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 836        st0 -= (st1 * fpsrcop * fptemp);
 837    }
 838    ST0 = double_to_floatx80(env, st0);
 839}
 840
 841void helper_fprem(CPUX86State *env)
 842{
 843    double st0, st1, dblq, fpsrcop, fptemp;
 844    CPU_LDoubleU fpsrcop1, fptemp1;
 845    int expdif;
 846    signed long long int q;
 847
 848    st0 = floatx80_to_double(env, ST0);
 849    st1 = floatx80_to_double(env, ST1);
 850
 851    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 852        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 853        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 854        return;
 855    }
 856
 857    fpsrcop = st0;
 858    fptemp = st1;
 859    fpsrcop1.d = ST0;
 860    fptemp1.d = ST1;
 861    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 862
 863    if (expdif < 0) {
 864        /* optimisation? taken from the AMD docs */
 865        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 866        /* ST0 is unchanged */
 867        return;
 868    }
 869
 870    if (expdif < 53) {
 871        dblq = fpsrcop / fptemp; /* ST0 / ST1 */
 872        /* round dblq towards zero */
 873        dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
 874        st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
 875
 876        /* convert dblq to q by truncating towards zero */
 877        if (dblq < 0.0) {
 878            q = (signed long long int)(-dblq);
 879        } else {
 880            q = (signed long long int)dblq;
 881        }
 882
 883        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 884        /* (C0,C3,C1) <-- (q2,q1,q0) */
 885        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 886        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 887        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 888    } else {
 889        int N = 32 + (expdif % 32); /* as per AMD docs */
 890
 891        env->fpus |= 0x400;  /* C2 <-- 1 */
 892        fptemp = pow(2.0, (double)(expdif - N));
 893        fpsrcop = (st0 / st1) / fptemp;
 894        /* fpsrcop = integer obtained by chopping */
 895        fpsrcop = (fpsrcop < 0.0) ?
 896                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 897        st0 -= (st1 * fpsrcop * fptemp);
 898    }
 899    ST0 = double_to_floatx80(env, st0);
 900}
 901
 902void helper_fyl2xp1(CPUX86State *env)
 903{
 904    double fptemp = floatx80_to_double(env, ST0);
 905
 906    if ((fptemp + 1.0) > 0.0) {
 907        fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
 908        fptemp *= floatx80_to_double(env, ST1);
 909        ST1 = double_to_floatx80(env, fptemp);
 910        fpop(env);
 911    } else {
 912        env->fpus &= ~0x4700;
 913        env->fpus |= 0x400;
 914    }
 915}
 916
 917void helper_fsqrt(CPUX86State *env)
 918{
 919    if (floatx80_is_neg(ST0)) {
 920        env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
 921        env->fpus |= 0x400;
 922    }
 923    ST0 = floatx80_sqrt(ST0, &env->fp_status);
 924}
 925
 926void helper_fsincos(CPUX86State *env)
 927{
 928    double fptemp = floatx80_to_double(env, ST0);
 929
 930    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 931        env->fpus |= 0x400;
 932    } else {
 933        ST0 = double_to_floatx80(env, sin(fptemp));
 934        fpush(env);
 935        ST0 = double_to_floatx80(env, cos(fptemp));
 936        env->fpus &= ~0x400;  /* C2 <-- 0 */
 937        /* the above code is for |arg| < 2**63 only */
 938    }
 939}
 940
 941void helper_frndint(CPUX86State *env)
 942{
 943    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
 944}
 945
 946void helper_fscale(CPUX86State *env)
 947{
 948    if (floatx80_is_any_nan(ST1)) {
 949        ST0 = ST1;
 950    } else {
 951        int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
 952        ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
 953    }
 954}
 955
 956void helper_fsin(CPUX86State *env)
 957{
 958    double fptemp = floatx80_to_double(env, ST0);
 959
 960    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 961        env->fpus |= 0x400;
 962    } else {
 963        ST0 = double_to_floatx80(env, sin(fptemp));
 964        env->fpus &= ~0x400;  /* C2 <-- 0 */
 965        /* the above code is for |arg| < 2**53 only */
 966    }
 967}
 968
 969void helper_fcos(CPUX86State *env)
 970{
 971    double fptemp = floatx80_to_double(env, ST0);
 972
 973    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 974        env->fpus |= 0x400;
 975    } else {
 976        ST0 = double_to_floatx80(env, cos(fptemp));
 977        env->fpus &= ~0x400;  /* C2 <-- 0 */
 978        /* the above code is for |arg| < 2**63 only */
 979    }
 980}
 981
 982void helper_fxam_ST0(CPUX86State *env)
 983{
 984    CPU_LDoubleU temp;
 985    int expdif;
 986
 987    temp.d = ST0;
 988
 989    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 990    if (SIGND(temp)) {
 991        env->fpus |= 0x200; /* C1 <-- 1 */
 992    }
 993
 994    /* XXX: test fptags too */
 995    expdif = EXPD(temp);
 996    if (expdif == MAXEXPD) {
 997        if (MANTD(temp) == 0x8000000000000000ULL) {
 998            env->fpus |= 0x500; /* Infinity */
 999        } else {
1000            env->fpus |= 0x100; /* NaN */
1001        }
1002    } else if (expdif == 0) {
1003        if (MANTD(temp) == 0) {
1004            env->fpus |=  0x4000; /* Zero */
1005        } else {
1006            env->fpus |= 0x4400; /* Denormal */
1007        }
1008    } else {
1009        env->fpus |= 0x400;
1010    }
1011}
1012
1013static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
1014                      uintptr_t retaddr)
1015{
1016    int fpus, fptag, exp, i;
1017    uint64_t mant;
1018    CPU_LDoubleU tmp;
1019
1020    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1021    fptag = 0;
1022    for (i = 7; i >= 0; i--) {
1023        fptag <<= 2;
1024        if (env->fptags[i]) {
1025            fptag |= 3;
1026        } else {
1027            tmp.d = env->fpregs[i].d;
1028            exp = EXPD(tmp);
1029            mant = MANTD(tmp);
1030            if (exp == 0 && mant == 0) {
1031                /* zero */
1032                fptag |= 1;
1033            } else if (exp == 0 || exp == MAXEXPD
1034                       || (mant & (1LL << 63)) == 0) {
1035                /* NaNs, infinity, denormal */
1036                fptag |= 2;
1037            }
1038        }
1039    }
1040    if (data32) {
1041        /* 32 bit */
1042        cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1043        cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1044        cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1045        cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1046        cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1047        cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1048        cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1049    } else {
1050        /* 16 bit */
1051        cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1052        cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1053        cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1054        cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1055        cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1056        cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1057        cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1058    }
1059}
1060
1061void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1062{
1063    do_fstenv(env, ptr, data32, GETPC());
1064}
1065
1066static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
1067{
1068    env->fpstt = (fpus >> 11) & 7;
1069    env->fpus = fpus & ~0x3800 & ~FPUS_B;
1070    env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
1071#if !defined(CONFIG_USER_ONLY)
1072    if (!(env->fpus & FPUS_SE)) {
1073        /*
1074         * Here the processor deasserts FERR#; in response, the chipset deasserts
1075         * IGNNE#.
1076         */
1077        cpu_clear_ignne();
1078    }
1079#endif
1080}
1081
1082static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1083                      uintptr_t retaddr)
1084{
1085    int i, fpus, fptag;
1086
1087    if (data32) {
1088        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1089        fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1090        fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1091    } else {
1092        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1093        fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1094        fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1095    }
1096    cpu_set_fpus(env, fpus);
1097    for (i = 0; i < 8; i++) {
1098        env->fptags[i] = ((fptag & 3) == 3);
1099        fptag >>= 2;
1100    }
1101}
1102
1103void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1104{
1105    do_fldenv(env, ptr, data32, GETPC());
1106}
1107
1108void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1109{
1110    floatx80 tmp;
1111    int i;
1112
1113    do_fstenv(env, ptr, data32, GETPC());
1114
1115    ptr += (14 << data32);
1116    for (i = 0; i < 8; i++) {
1117        tmp = ST(i);
1118        helper_fstt(env, tmp, ptr, GETPC());
1119        ptr += 10;
1120    }
1121
1122    /* fninit */
1123    env->fpus = 0;
1124    env->fpstt = 0;
1125    cpu_set_fpuc(env, 0x37f);
1126    env->fptags[0] = 1;
1127    env->fptags[1] = 1;
1128    env->fptags[2] = 1;
1129    env->fptags[3] = 1;
1130    env->fptags[4] = 1;
1131    env->fptags[5] = 1;
1132    env->fptags[6] = 1;
1133    env->fptags[7] = 1;
1134}
1135
1136void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1137{
1138    floatx80 tmp;
1139    int i;
1140
1141    do_fldenv(env, ptr, data32, GETPC());
1142    ptr += (14 << data32);
1143
1144    for (i = 0; i < 8; i++) {
1145        tmp = helper_fldt(env, ptr, GETPC());
1146        ST(i) = tmp;
1147        ptr += 10;
1148    }
1149}
1150
1151#if defined(CONFIG_USER_ONLY)
1152void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1153{
1154    helper_fsave(env, ptr, data32);
1155}
1156
1157void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1158{
1159    helper_frstor(env, ptr, data32);
1160}
1161#endif
1162
1163#define XO(X)  offsetof(X86XSaveArea, X)
1164
1165static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1166{
1167    int fpus, fptag, i;
1168    target_ulong addr;
1169
1170    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1171    fptag = 0;
1172    for (i = 0; i < 8; i++) {
1173        fptag |= (env->fptags[i] << i);
1174    }
1175
1176    cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1177    cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1178    cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1179
1180    /* In 32-bit mode this is eip, sel, dp, sel.
1181       In 64-bit mode this is rip, rdp.
1182       But in either case we don't write actual data, just zeros.  */
1183    cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1184    cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1185
1186    addr = ptr + XO(legacy.fpregs);
1187    for (i = 0; i < 8; i++) {
1188        floatx80 tmp = ST(i);
1189        helper_fstt(env, tmp, addr, ra);
1190        addr += 16;
1191    }
1192}
1193
1194static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1195{
1196    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1197    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1198}
1199
1200static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1201{
1202    int i, nb_xmm_regs;
1203    target_ulong addr;
1204
1205    if (env->hflags & HF_CS64_MASK) {
1206        nb_xmm_regs = 16;
1207    } else {
1208        nb_xmm_regs = 8;
1209    }
1210
1211    addr = ptr + XO(legacy.xmm_regs);
1212    for (i = 0; i < nb_xmm_regs; i++) {
1213        cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1214        cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1215        addr += 16;
1216    }
1217}
1218
1219static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1220{
1221    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1222    int i;
1223
1224    for (i = 0; i < 4; i++, addr += 16) {
1225        cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1226        cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1227    }
1228}
1229
1230static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1231{
1232    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1233                    env->bndcs_regs.cfgu, ra);
1234    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1235                    env->bndcs_regs.sts, ra);
1236}
1237
1238static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1239{
1240    cpu_stq_data_ra(env, ptr, env->pkru, ra);
1241}
1242
1243void helper_fxsave(CPUX86State *env, target_ulong ptr)
1244{
1245    uintptr_t ra = GETPC();
1246
1247    /* The operand must be 16 byte aligned */
1248    if (ptr & 0xf) {
1249        raise_exception_ra(env, EXCP0D_GPF, ra);
1250    }
1251
1252    do_xsave_fpu(env, ptr, ra);
1253
1254    if (env->cr[4] & CR4_OSFXSR_MASK) {
1255        do_xsave_mxcsr(env, ptr, ra);
1256        /* Fast FXSAVE leaves out the XMM registers */
1257        if (!(env->efer & MSR_EFER_FFXSR)
1258            || (env->hflags & HF_CPL_MASK)
1259            || !(env->hflags & HF_LMA_MASK)) {
1260            do_xsave_sse(env, ptr, ra);
1261        }
1262    }
1263}
1264
1265static uint64_t get_xinuse(CPUX86State *env)
1266{
1267    uint64_t inuse = -1;
1268
1269    /* For the most part, we don't track XINUSE.  We could calculate it
1270       here for all components, but it's probably less work to simply
1271       indicate in use.  That said, the state of BNDREGS is important
1272       enough to track in HFLAGS, so we might as well use that here.  */
1273    if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1274       inuse &= ~XSTATE_BNDREGS_MASK;
1275    }
1276    return inuse;
1277}
1278
1279static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1280                     uint64_t inuse, uint64_t opt, uintptr_t ra)
1281{
1282    uint64_t old_bv, new_bv;
1283
1284    /* The OS must have enabled XSAVE.  */
1285    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1286        raise_exception_ra(env, EXCP06_ILLOP, ra);
1287    }
1288
1289    /* The operand must be 64 byte aligned.  */
1290    if (ptr & 63) {
1291        raise_exception_ra(env, EXCP0D_GPF, ra);
1292    }
1293
1294    /* Never save anything not enabled by XCR0.  */
1295    rfbm &= env->xcr0;
1296    opt &= rfbm;
1297
1298    if (opt & XSTATE_FP_MASK) {
1299        do_xsave_fpu(env, ptr, ra);
1300    }
1301    if (rfbm & XSTATE_SSE_MASK) {
1302        /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
1303        do_xsave_mxcsr(env, ptr, ra);
1304    }
1305    if (opt & XSTATE_SSE_MASK) {
1306        do_xsave_sse(env, ptr, ra);
1307    }
1308    if (opt & XSTATE_BNDREGS_MASK) {
1309        do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1310    }
1311    if (opt & XSTATE_BNDCSR_MASK) {
1312        do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1313    }
1314    if (opt & XSTATE_PKRU_MASK) {
1315        do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1316    }
1317
1318    /* Update the XSTATE_BV field.  */
1319    old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1320    new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1321    cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1322}
1323
1324void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1325{
1326    do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1327}
1328
1329void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1330{
1331    uint64_t inuse = get_xinuse(env);
1332    do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1333}
1334
1335static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1336{
1337    int i, fpuc, fpus, fptag;
1338    target_ulong addr;
1339
1340    fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1341    fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1342    fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1343    cpu_set_fpuc(env, fpuc);
1344    cpu_set_fpus(env, fpus);
1345    fptag ^= 0xff;
1346    for (i = 0; i < 8; i++) {
1347        env->fptags[i] = ((fptag >> i) & 1);
1348    }
1349
1350    addr = ptr + XO(legacy.fpregs);
1351    for (i = 0; i < 8; i++) {
1352        floatx80 tmp = helper_fldt(env, addr, ra);
1353        ST(i) = tmp;
1354        addr += 16;
1355    }
1356}
1357
1358static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1359{
1360    cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1361}
1362
1363static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1364{
1365    int i, nb_xmm_regs;
1366    target_ulong addr;
1367
1368    if (env->hflags & HF_CS64_MASK) {
1369        nb_xmm_regs = 16;
1370    } else {
1371        nb_xmm_regs = 8;
1372    }
1373
1374    addr = ptr + XO(legacy.xmm_regs);
1375    for (i = 0; i < nb_xmm_regs; i++) {
1376        env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1377        env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1378        addr += 16;
1379    }
1380}
1381
1382static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1383{
1384    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1385    int i;
1386
1387    for (i = 0; i < 4; i++, addr += 16) {
1388        env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1389        env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1390    }
1391}
1392
1393static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1394{
1395    /* FIXME: Extend highest implemented bit of linear address.  */
1396    env->bndcs_regs.cfgu
1397        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1398    env->bndcs_regs.sts
1399        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1400}
1401
1402static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1403{
1404    env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1405}
1406
1407void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1408{
1409    uintptr_t ra = GETPC();
1410
1411    /* The operand must be 16 byte aligned */
1412    if (ptr & 0xf) {
1413        raise_exception_ra(env, EXCP0D_GPF, ra);
1414    }
1415
1416    do_xrstor_fpu(env, ptr, ra);
1417
1418    if (env->cr[4] & CR4_OSFXSR_MASK) {
1419        do_xrstor_mxcsr(env, ptr, ra);
1420        /* Fast FXRSTOR leaves out the XMM registers */
1421        if (!(env->efer & MSR_EFER_FFXSR)
1422            || (env->hflags & HF_CPL_MASK)
1423            || !(env->hflags & HF_LMA_MASK)) {
1424            do_xrstor_sse(env, ptr, ra);
1425        }
1426    }
1427}
1428
1429#if defined(CONFIG_USER_ONLY)
1430void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1431{
1432    helper_fxsave(env, ptr);
1433}
1434
1435void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1436{
1437    helper_fxrstor(env, ptr);
1438}
1439#endif
1440
1441void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1442{
1443    uintptr_t ra = GETPC();
1444    uint64_t xstate_bv, xcomp_bv, reserve0;
1445
1446    rfbm &= env->xcr0;
1447
1448    /* The OS must have enabled XSAVE.  */
1449    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1450        raise_exception_ra(env, EXCP06_ILLOP, ra);
1451    }
1452
1453    /* The operand must be 64 byte aligned.  */
1454    if (ptr & 63) {
1455        raise_exception_ra(env, EXCP0D_GPF, ra);
1456    }
1457
1458    xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1459
1460    if ((int64_t)xstate_bv < 0) {
1461        /* FIXME: Compact form.  */
1462        raise_exception_ra(env, EXCP0D_GPF, ra);
1463    }
1464
1465    /* Standard form.  */
1466
1467    /* The XSTATE_BV field must not set bits not present in XCR0.  */
1468    if (xstate_bv & ~env->xcr0) {
1469        raise_exception_ra(env, EXCP0D_GPF, ra);
1470    }
1471
1472    /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
1473       revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1474       describes only XCOMP_BV, but the description of the standard form
1475       of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1476       includes the next 64-bit field.  */
1477    xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1478    reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1479    if (xcomp_bv || reserve0) {
1480        raise_exception_ra(env, EXCP0D_GPF, ra);
1481    }
1482
1483    if (rfbm & XSTATE_FP_MASK) {
1484        if (xstate_bv & XSTATE_FP_MASK) {
1485            do_xrstor_fpu(env, ptr, ra);
1486        } else {
1487            helper_fninit(env);
1488            memset(env->fpregs, 0, sizeof(env->fpregs));
1489        }
1490    }
1491    if (rfbm & XSTATE_SSE_MASK) {
1492        /* Note that the standard form of XRSTOR loads MXCSR from memory
1493           whether or not the XSTATE_BV bit is set.  */
1494        do_xrstor_mxcsr(env, ptr, ra);
1495        if (xstate_bv & XSTATE_SSE_MASK) {
1496            do_xrstor_sse(env, ptr, ra);
1497        } else {
1498            /* ??? When AVX is implemented, we may have to be more
1499               selective in the clearing.  */
1500            memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1501        }
1502    }
1503    if (rfbm & XSTATE_BNDREGS_MASK) {
1504        if (xstate_bv & XSTATE_BNDREGS_MASK) {
1505            do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1506            env->hflags |= HF_MPX_IU_MASK;
1507        } else {
1508            memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1509            env->hflags &= ~HF_MPX_IU_MASK;
1510        }
1511    }
1512    if (rfbm & XSTATE_BNDCSR_MASK) {
1513        if (xstate_bv & XSTATE_BNDCSR_MASK) {
1514            do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1515        } else {
1516            memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1517        }
1518        cpu_sync_bndcs_hflags(env);
1519    }
1520    if (rfbm & XSTATE_PKRU_MASK) {
1521        uint64_t old_pkru = env->pkru;
1522        if (xstate_bv & XSTATE_PKRU_MASK) {
1523            do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1524        } else {
1525            env->pkru = 0;
1526        }
1527        if (env->pkru != old_pkru) {
1528            CPUState *cs = env_cpu(env);
1529            tlb_flush(cs);
1530        }
1531    }
1532}
1533
1534#undef XO
1535
1536uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1537{
1538    /* The OS must have enabled XSAVE.  */
1539    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1540        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1541    }
1542
1543    switch (ecx) {
1544    case 0:
1545        return env->xcr0;
1546    case 1:
1547        if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1548            return env->xcr0 & get_xinuse(env);
1549        }
1550        break;
1551    }
1552    raise_exception_ra(env, EXCP0D_GPF, GETPC());
1553}
1554
1555void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1556{
1557    uint32_t dummy, ena_lo, ena_hi;
1558    uint64_t ena;
1559
1560    /* The OS must have enabled XSAVE.  */
1561    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1562        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1563    }
1564
1565    /* Only XCR0 is defined at present; the FPU may not be disabled.  */
1566    if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1567        goto do_gpf;
1568    }
1569
1570    /* Disallow enabling unimplemented features.  */
1571    cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1572    ena = ((uint64_t)ena_hi << 32) | ena_lo;
1573    if (mask & ~ena) {
1574        goto do_gpf;
1575    }
1576
1577    /* Disallow enabling only half of MPX.  */
1578    if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1579        & XSTATE_BNDCSR_MASK) {
1580        goto do_gpf;
1581    }
1582
1583    env->xcr0 = mask;
1584    cpu_sync_bndcs_hflags(env);
1585    return;
1586
1587 do_gpf:
1588    raise_exception_ra(env, EXCP0D_GPF, GETPC());
1589}
1590
1591/* MMX/SSE */
1592/* XXX: optimize by storing fptt and fptags in the static cpu state */
1593
1594#define SSE_DAZ             0x0040
1595#define SSE_RC_MASK         0x6000
1596#define SSE_RC_NEAR         0x0000
1597#define SSE_RC_DOWN         0x2000
1598#define SSE_RC_UP           0x4000
1599#define SSE_RC_CHOP         0x6000
1600#define SSE_FZ              0x8000
1601
1602void update_mxcsr_status(CPUX86State *env)
1603{
1604    uint32_t mxcsr = env->mxcsr;
1605    int rnd_type;
1606
1607    /* set rounding mode */
1608    switch (mxcsr & SSE_RC_MASK) {
1609    default:
1610    case SSE_RC_NEAR:
1611        rnd_type = float_round_nearest_even;
1612        break;
1613    case SSE_RC_DOWN:
1614        rnd_type = float_round_down;
1615        break;
1616    case SSE_RC_UP:
1617        rnd_type = float_round_up;
1618        break;
1619    case SSE_RC_CHOP:
1620        rnd_type = float_round_to_zero;
1621        break;
1622    }
1623    set_float_rounding_mode(rnd_type, &env->sse_status);
1624
1625    /* set denormals are zero */
1626    set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1627
1628    /* set flush to zero */
1629    set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1630}
1631
1632void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1633{
1634    cpu_set_mxcsr(env, val);
1635}
1636
1637void helper_enter_mmx(CPUX86State *env)
1638{
1639    env->fpstt = 0;
1640    *(uint32_t *)(env->fptags) = 0;
1641    *(uint32_t *)(env->fptags + 4) = 0;
1642}
1643
1644void helper_emms(CPUX86State *env)
1645{
1646    /* set to empty state */
1647    *(uint32_t *)(env->fptags) = 0x01010101;
1648    *(uint32_t *)(env->fptags + 4) = 0x01010101;
1649}
1650
1651/* XXX: suppress */
1652void helper_movq(CPUX86State *env, void *d, void *s)
1653{
1654    *(uint64_t *)d = *(uint64_t *)s;
1655}
1656
1657#define SHIFT 0
1658#include "ops_sse.h"
1659
1660#define SHIFT 1
1661#include "ops_sse.h"
1662