qemu/target/i386/fpu_helper.c
<<
>>
Prefs
   1/*
   2 *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include <math.h>
  22#include "cpu.h"
  23#include "exec/helper-proto.h"
  24#include "qemu/host-utils.h"
  25#include "exec/exec-all.h"
  26#include "exec/cpu_ldst.h"
  27#include "fpu/softfloat.h"
  28
  29#ifdef CONFIG_SOFTMMU
  30#include "hw/irq.h"
  31#endif
  32
  33#define FPU_RC_MASK         0xc00
  34#define FPU_RC_NEAR         0x000
  35#define FPU_RC_DOWN         0x400
  36#define FPU_RC_UP           0x800
  37#define FPU_RC_CHOP         0xc00
  38
  39#define MAXTAN 9223372036854775808.0
  40
  41/* the following deal with x86 long double-precision numbers */
  42#define MAXEXPD 0x7fff
  43#define EXPBIAS 16383
  44#define EXPD(fp)        (fp.l.upper & 0x7fff)
  45#define SIGND(fp)       ((fp.l.upper) & 0x8000)
  46#define MANTD(fp)       (fp.l.lower)
  47#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  48
  49#define FPUS_IE (1 << 0)
  50#define FPUS_DE (1 << 1)
  51#define FPUS_ZE (1 << 2)
  52#define FPUS_OE (1 << 3)
  53#define FPUS_UE (1 << 4)
  54#define FPUS_PE (1 << 5)
  55#define FPUS_SF (1 << 6)
  56#define FPUS_SE (1 << 7)
  57#define FPUS_B  (1 << 15)
  58
  59#define FPUC_EM 0x3f
  60
  61#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  62#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  63#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  64
  65#if !defined(CONFIG_USER_ONLY)
  66static qemu_irq ferr_irq;
  67
  68void x86_register_ferr_irq(qemu_irq irq)
  69{
  70    ferr_irq = irq;
  71}
  72
  73static void cpu_clear_ignne(void)
  74{
  75    CPUX86State *env = &X86_CPU(first_cpu)->env;
  76    env->hflags2 &= ~HF2_IGNNE_MASK;
  77}
  78
  79void cpu_set_ignne(void)
  80{
  81    CPUX86State *env = &X86_CPU(first_cpu)->env;
  82    env->hflags2 |= HF2_IGNNE_MASK;
  83    /*
  84     * We get here in response to a write to port F0h.  The chipset should
  85     * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is
  86     * cleared, because FERR# and FP_IRQ are two separate pins on real
  87     * hardware.  However, we don't model FERR# as a qemu_irq, so we just
  88     * do directly what the chipset would do, i.e. deassert FP_IRQ.
  89     */
  90    qemu_irq_lower(ferr_irq);
  91}
  92#endif
  93
  94
  95static inline void fpush(CPUX86State *env)
  96{
  97    env->fpstt = (env->fpstt - 1) & 7;
  98    env->fptags[env->fpstt] = 0; /* validate stack entry */
  99}
 100
 101static inline void fpop(CPUX86State *env)
 102{
 103    env->fptags[env->fpstt] = 1; /* invalidate stack entry */
 104    env->fpstt = (env->fpstt + 1) & 7;
 105}
 106
 107static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
 108                                   uintptr_t retaddr)
 109{
 110    CPU_LDoubleU temp;
 111
 112    temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
 113    temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
 114    return temp.d;
 115}
 116
 117static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
 118                               uintptr_t retaddr)
 119{
 120    CPU_LDoubleU temp;
 121
 122    temp.d = f;
 123    cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
 124    cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
 125}
 126
 127/* x87 FPU helpers */
 128
 129static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
 130{
 131    union {
 132        float64 f64;
 133        double d;
 134    } u;
 135
 136    u.f64 = floatx80_to_float64(a, &env->fp_status);
 137    return u.d;
 138}
 139
 140static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 141{
 142    union {
 143        float64 f64;
 144        double d;
 145    } u;
 146
 147    u.d = a;
 148    return float64_to_floatx80(u.f64, &env->fp_status);
 149}
 150
 151static void fpu_set_exception(CPUX86State *env, int mask)
 152{
 153    env->fpus |= mask;
 154    if (env->fpus & (~env->fpuc & FPUC_EM)) {
 155        env->fpus |= FPUS_SE | FPUS_B;
 156    }
 157}
 158
 159static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 160{
 161    if (floatx80_is_zero(b)) {
 162        fpu_set_exception(env, FPUS_ZE);
 163    }
 164    return floatx80_div(a, b, &env->fp_status);
 165}
 166
 167static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 168{
 169    if (env->cr[0] & CR0_NE_MASK) {
 170        raise_exception_ra(env, EXCP10_COPR, retaddr);
 171    }
 172#if !defined(CONFIG_USER_ONLY)
 173    else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) {
 174        qemu_irq_raise(ferr_irq);
 175    }
 176#endif
 177}
 178
 179void helper_flds_FT0(CPUX86State *env, uint32_t val)
 180{
 181    union {
 182        float32 f;
 183        uint32_t i;
 184    } u;
 185
 186    u.i = val;
 187    FT0 = float32_to_floatx80(u.f, &env->fp_status);
 188}
 189
 190void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 191{
 192    union {
 193        float64 f;
 194        uint64_t i;
 195    } u;
 196
 197    u.i = val;
 198    FT0 = float64_to_floatx80(u.f, &env->fp_status);
 199}
 200
 201void helper_fildl_FT0(CPUX86State *env, int32_t val)
 202{
 203    FT0 = int32_to_floatx80(val, &env->fp_status);
 204}
 205
 206void helper_flds_ST0(CPUX86State *env, uint32_t val)
 207{
 208    int new_fpstt;
 209    union {
 210        float32 f;
 211        uint32_t i;
 212    } u;
 213
 214    new_fpstt = (env->fpstt - 1) & 7;
 215    u.i = val;
 216    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 217    env->fpstt = new_fpstt;
 218    env->fptags[new_fpstt] = 0; /* validate stack entry */
 219}
 220
 221void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 222{
 223    int new_fpstt;
 224    union {
 225        float64 f;
 226        uint64_t i;
 227    } u;
 228
 229    new_fpstt = (env->fpstt - 1) & 7;
 230    u.i = val;
 231    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 232    env->fpstt = new_fpstt;
 233    env->fptags[new_fpstt] = 0; /* validate stack entry */
 234}
 235
 236void helper_fildl_ST0(CPUX86State *env, int32_t val)
 237{
 238    int new_fpstt;
 239
 240    new_fpstt = (env->fpstt - 1) & 7;
 241    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 242    env->fpstt = new_fpstt;
 243    env->fptags[new_fpstt] = 0; /* validate stack entry */
 244}
 245
 246void helper_fildll_ST0(CPUX86State *env, int64_t val)
 247{
 248    int new_fpstt;
 249
 250    new_fpstt = (env->fpstt - 1) & 7;
 251    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 252    env->fpstt = new_fpstt;
 253    env->fptags[new_fpstt] = 0; /* validate stack entry */
 254}
 255
 256uint32_t helper_fsts_ST0(CPUX86State *env)
 257{
 258    union {
 259        float32 f;
 260        uint32_t i;
 261    } u;
 262
 263    u.f = floatx80_to_float32(ST0, &env->fp_status);
 264    return u.i;
 265}
 266
 267uint64_t helper_fstl_ST0(CPUX86State *env)
 268{
 269    union {
 270        float64 f;
 271        uint64_t i;
 272    } u;
 273
 274    u.f = floatx80_to_float64(ST0, &env->fp_status);
 275    return u.i;
 276}
 277
 278int32_t helper_fist_ST0(CPUX86State *env)
 279{
 280    int32_t val;
 281
 282    val = floatx80_to_int32(ST0, &env->fp_status);
 283    if (val != (int16_t)val) {
 284        val = -32768;
 285    }
 286    return val;
 287}
 288
 289int32_t helper_fistl_ST0(CPUX86State *env)
 290{
 291    int32_t val;
 292    signed char old_exp_flags;
 293
 294    old_exp_flags = get_float_exception_flags(&env->fp_status);
 295    set_float_exception_flags(0, &env->fp_status);
 296
 297    val = floatx80_to_int32(ST0, &env->fp_status);
 298    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 299        val = 0x80000000;
 300    }
 301    set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 302                                | old_exp_flags, &env->fp_status);
 303    return val;
 304}
 305
 306int64_t helper_fistll_ST0(CPUX86State *env)
 307{
 308    int64_t val;
 309    signed char old_exp_flags;
 310
 311    old_exp_flags = get_float_exception_flags(&env->fp_status);
 312    set_float_exception_flags(0, &env->fp_status);
 313
 314    val = floatx80_to_int64(ST0, &env->fp_status);
 315    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 316        val = 0x8000000000000000ULL;
 317    }
 318    set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 319                                | old_exp_flags, &env->fp_status);
 320    return val;
 321}
 322
 323int32_t helper_fistt_ST0(CPUX86State *env)
 324{
 325    int32_t val;
 326
 327    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 328    if (val != (int16_t)val) {
 329        val = -32768;
 330    }
 331    return val;
 332}
 333
 334int32_t helper_fisttl_ST0(CPUX86State *env)
 335{
 336    return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 337}
 338
 339int64_t helper_fisttll_ST0(CPUX86State *env)
 340{
 341    return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 342}
 343
 344void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 345{
 346    int new_fpstt;
 347
 348    new_fpstt = (env->fpstt - 1) & 7;
 349    env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 350    env->fpstt = new_fpstt;
 351    env->fptags[new_fpstt] = 0; /* validate stack entry */
 352}
 353
 354void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 355{
 356    helper_fstt(env, ST0, ptr, GETPC());
 357}
 358
 359void helper_fpush(CPUX86State *env)
 360{
 361    fpush(env);
 362}
 363
 364void helper_fpop(CPUX86State *env)
 365{
 366    fpop(env);
 367}
 368
 369void helper_fdecstp(CPUX86State *env)
 370{
 371    env->fpstt = (env->fpstt - 1) & 7;
 372    env->fpus &= ~0x4700;
 373}
 374
 375void helper_fincstp(CPUX86State *env)
 376{
 377    env->fpstt = (env->fpstt + 1) & 7;
 378    env->fpus &= ~0x4700;
 379}
 380
 381/* FPU move */
 382
 383void helper_ffree_STN(CPUX86State *env, int st_index)
 384{
 385    env->fptags[(env->fpstt + st_index) & 7] = 1;
 386}
 387
 388void helper_fmov_ST0_FT0(CPUX86State *env)
 389{
 390    ST0 = FT0;
 391}
 392
 393void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 394{
 395    FT0 = ST(st_index);
 396}
 397
 398void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 399{
 400    ST0 = ST(st_index);
 401}
 402
 403void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 404{
 405    ST(st_index) = ST0;
 406}
 407
 408void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 409{
 410    floatx80 tmp;
 411
 412    tmp = ST(st_index);
 413    ST(st_index) = ST0;
 414    ST0 = tmp;
 415}
 416
 417/* FPU operations */
 418
 419static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 420
 421void helper_fcom_ST0_FT0(CPUX86State *env)
 422{
 423    int ret;
 424
 425    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 426    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 427}
 428
 429void helper_fucom_ST0_FT0(CPUX86State *env)
 430{
 431    int ret;
 432
 433    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 434    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 435}
 436
 437static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 438
 439void helper_fcomi_ST0_FT0(CPUX86State *env)
 440{
 441    int eflags;
 442    int ret;
 443
 444    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 445    eflags = cpu_cc_compute_all(env, CC_OP);
 446    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 447    CC_SRC = eflags;
 448}
 449
 450void helper_fucomi_ST0_FT0(CPUX86State *env)
 451{
 452    int eflags;
 453    int ret;
 454
 455    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 456    eflags = cpu_cc_compute_all(env, CC_OP);
 457    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 458    CC_SRC = eflags;
 459}
 460
 461void helper_fadd_ST0_FT0(CPUX86State *env)
 462{
 463    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 464}
 465
 466void helper_fmul_ST0_FT0(CPUX86State *env)
 467{
 468    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 469}
 470
 471void helper_fsub_ST0_FT0(CPUX86State *env)
 472{
 473    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 474}
 475
 476void helper_fsubr_ST0_FT0(CPUX86State *env)
 477{
 478    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 479}
 480
 481void helper_fdiv_ST0_FT0(CPUX86State *env)
 482{
 483    ST0 = helper_fdiv(env, ST0, FT0);
 484}
 485
 486void helper_fdivr_ST0_FT0(CPUX86State *env)
 487{
 488    ST0 = helper_fdiv(env, FT0, ST0);
 489}
 490
 491/* fp operations between STN and ST0 */
 492
 493void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 494{
 495    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 496}
 497
 498void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 499{
 500    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 501}
 502
 503void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 504{
 505    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 506}
 507
 508void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 509{
 510    ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 511}
 512
 513void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 514{
 515    floatx80 *p;
 516
 517    p = &ST(st_index);
 518    *p = helper_fdiv(env, *p, ST0);
 519}
 520
 521void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 522{
 523    floatx80 *p;
 524
 525    p = &ST(st_index);
 526    *p = helper_fdiv(env, ST0, *p);
 527}
 528
 529/* misc FPU operations */
 530void helper_fchs_ST0(CPUX86State *env)
 531{
 532    ST0 = floatx80_chs(ST0);
 533}
 534
 535void helper_fabs_ST0(CPUX86State *env)
 536{
 537    ST0 = floatx80_abs(ST0);
 538}
 539
 540void helper_fld1_ST0(CPUX86State *env)
 541{
 542    ST0 = floatx80_one;
 543}
 544
 545void helper_fldl2t_ST0(CPUX86State *env)
 546{
 547    ST0 = floatx80_l2t;
 548}
 549
 550void helper_fldl2e_ST0(CPUX86State *env)
 551{
 552    ST0 = floatx80_l2e;
 553}
 554
 555void helper_fldpi_ST0(CPUX86State *env)
 556{
 557    ST0 = floatx80_pi;
 558}
 559
 560void helper_fldlg2_ST0(CPUX86State *env)
 561{
 562    ST0 = floatx80_lg2;
 563}
 564
 565void helper_fldln2_ST0(CPUX86State *env)
 566{
 567    ST0 = floatx80_ln2;
 568}
 569
 570void helper_fldz_ST0(CPUX86State *env)
 571{
 572    ST0 = floatx80_zero;
 573}
 574
 575void helper_fldz_FT0(CPUX86State *env)
 576{
 577    FT0 = floatx80_zero;
 578}
 579
 580uint32_t helper_fnstsw(CPUX86State *env)
 581{
 582    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 583}
 584
 585uint32_t helper_fnstcw(CPUX86State *env)
 586{
 587    return env->fpuc;
 588}
 589
 590void update_fp_status(CPUX86State *env)
 591{
 592    int rnd_type;
 593
 594    /* set rounding mode */
 595    switch (env->fpuc & FPU_RC_MASK) {
 596    default:
 597    case FPU_RC_NEAR:
 598        rnd_type = float_round_nearest_even;
 599        break;
 600    case FPU_RC_DOWN:
 601        rnd_type = float_round_down;
 602        break;
 603    case FPU_RC_UP:
 604        rnd_type = float_round_up;
 605        break;
 606    case FPU_RC_CHOP:
 607        rnd_type = float_round_to_zero;
 608        break;
 609    }
 610    set_float_rounding_mode(rnd_type, &env->fp_status);
 611    switch ((env->fpuc >> 8) & 3) {
 612    case 0:
 613        rnd_type = 32;
 614        break;
 615    case 2:
 616        rnd_type = 64;
 617        break;
 618    case 3:
 619    default:
 620        rnd_type = 80;
 621        break;
 622    }
 623    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 624}
 625
 626void helper_fldcw(CPUX86State *env, uint32_t val)
 627{
 628    cpu_set_fpuc(env, val);
 629}
 630
 631void helper_fclex(CPUX86State *env)
 632{
 633    env->fpus &= 0x7f00;
 634}
 635
 636void helper_fwait(CPUX86State *env)
 637{
 638    if (env->fpus & FPUS_SE) {
 639        fpu_raise_exception(env, GETPC());
 640    }
 641}
 642
 643void helper_fninit(CPUX86State *env)
 644{
 645    env->fpus = 0;
 646    env->fpstt = 0;
 647    cpu_set_fpuc(env, 0x37f);
 648    env->fptags[0] = 1;
 649    env->fptags[1] = 1;
 650    env->fptags[2] = 1;
 651    env->fptags[3] = 1;
 652    env->fptags[4] = 1;
 653    env->fptags[5] = 1;
 654    env->fptags[6] = 1;
 655    env->fptags[7] = 1;
 656}
 657
 658/* BCD ops */
 659
 660void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 661{
 662    floatx80 tmp;
 663    uint64_t val;
 664    unsigned int v;
 665    int i;
 666
 667    val = 0;
 668    for (i = 8; i >= 0; i--) {
 669        v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 670        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 671    }
 672    tmp = int64_to_floatx80(val, &env->fp_status);
 673    if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 674        tmp = floatx80_chs(tmp);
 675    }
 676    fpush(env);
 677    ST0 = tmp;
 678}
 679
 680void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 681{
 682    int v;
 683    target_ulong mem_ref, mem_end;
 684    int64_t val;
 685
 686    val = floatx80_to_int64(ST0, &env->fp_status);
 687    mem_ref = ptr;
 688    mem_end = mem_ref + 9;
 689    if (val < 0) {
 690        cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 691        val = -val;
 692    } else {
 693        cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 694    }
 695    while (mem_ref < mem_end) {
 696        if (val == 0) {
 697            break;
 698        }
 699        v = val % 100;
 700        val = val / 100;
 701        v = ((v / 10) << 4) | (v % 10);
 702        cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 703    }
 704    while (mem_ref < mem_end) {
 705        cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 706    }
 707}
 708
 709void helper_f2xm1(CPUX86State *env)
 710{
 711    double val = floatx80_to_double(env, ST0);
 712
 713    val = pow(2.0, val) - 1.0;
 714    ST0 = double_to_floatx80(env, val);
 715}
 716
 717void helper_fyl2x(CPUX86State *env)
 718{
 719    double fptemp = floatx80_to_double(env, ST0);
 720
 721    if (fptemp > 0.0) {
 722        fptemp = log(fptemp) / log(2.0); /* log2(ST) */
 723        fptemp *= floatx80_to_double(env, ST1);
 724        ST1 = double_to_floatx80(env, fptemp);
 725        fpop(env);
 726    } else {
 727        env->fpus &= ~0x4700;
 728        env->fpus |= 0x400;
 729    }
 730}
 731
 732void helper_fptan(CPUX86State *env)
 733{
 734    double fptemp = floatx80_to_double(env, ST0);
 735
 736    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 737        env->fpus |= 0x400;
 738    } else {
 739        fptemp = tan(fptemp);
 740        ST0 = double_to_floatx80(env, fptemp);
 741        fpush(env);
 742        ST0 = floatx80_one;
 743        env->fpus &= ~0x400; /* C2 <-- 0 */
 744        /* the above code is for |arg| < 2**52 only */
 745    }
 746}
 747
 748void helper_fpatan(CPUX86State *env)
 749{
 750    double fptemp, fpsrcop;
 751
 752    fpsrcop = floatx80_to_double(env, ST1);
 753    fptemp = floatx80_to_double(env, ST0);
 754    ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
 755    fpop(env);
 756}
 757
 758void helper_fxtract(CPUX86State *env)
 759{
 760    CPU_LDoubleU temp;
 761
 762    temp.d = ST0;
 763
 764    if (floatx80_is_zero(ST0)) {
 765        /* Easy way to generate -inf and raising division by 0 exception */
 766        ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
 767                           &env->fp_status);
 768        fpush(env);
 769        ST0 = temp.d;
 770    } else {
 771        int expdif;
 772
 773        expdif = EXPD(temp) - EXPBIAS;
 774        /* DP exponent bias */
 775        ST0 = int32_to_floatx80(expdif, &env->fp_status);
 776        fpush(env);
 777        BIASEXPONENT(temp);
 778        ST0 = temp.d;
 779    }
 780}
 781
 782void helper_fprem1(CPUX86State *env)
 783{
 784    double st0, st1, dblq, fpsrcop, fptemp;
 785    CPU_LDoubleU fpsrcop1, fptemp1;
 786    int expdif;
 787    signed long long int q;
 788
 789    st0 = floatx80_to_double(env, ST0);
 790    st1 = floatx80_to_double(env, ST1);
 791
 792    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 793        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 794        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 795        return;
 796    }
 797
 798    fpsrcop = st0;
 799    fptemp = st1;
 800    fpsrcop1.d = ST0;
 801    fptemp1.d = ST1;
 802    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 803
 804    if (expdif < 0) {
 805        /* optimisation? taken from the AMD docs */
 806        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 807        /* ST0 is unchanged */
 808        return;
 809    }
 810
 811    if (expdif < 53) {
 812        dblq = fpsrcop / fptemp;
 813        /* round dblq towards nearest integer */
 814        dblq = rint(dblq);
 815        st0 = fpsrcop - fptemp * dblq;
 816
 817        /* convert dblq to q by truncating towards zero */
 818        if (dblq < 0.0) {
 819            q = (signed long long int)(-dblq);
 820        } else {
 821            q = (signed long long int)dblq;
 822        }
 823
 824        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 825        /* (C0,C3,C1) <-- (q2,q1,q0) */
 826        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 827        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 828        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 829    } else {
 830        env->fpus |= 0x400;  /* C2 <-- 1 */
 831        fptemp = pow(2.0, expdif - 50);
 832        fpsrcop = (st0 / st1) / fptemp;
 833        /* fpsrcop = integer obtained by chopping */
 834        fpsrcop = (fpsrcop < 0.0) ?
 835                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 836        st0 -= (st1 * fpsrcop * fptemp);
 837    }
 838    ST0 = double_to_floatx80(env, st0);
 839}
 840
 841void helper_fprem(CPUX86State *env)
 842{
 843    double st0, st1, dblq, fpsrcop, fptemp;
 844    CPU_LDoubleU fpsrcop1, fptemp1;
 845    int expdif;
 846    signed long long int q;
 847
 848    st0 = floatx80_to_double(env, ST0);
 849    st1 = floatx80_to_double(env, ST1);
 850
 851    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 852        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 853        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 854        return;
 855    }
 856
 857    fpsrcop = st0;
 858    fptemp = st1;
 859    fpsrcop1.d = ST0;
 860    fptemp1.d = ST1;
 861    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 862
 863    if (expdif < 0) {
 864        /* optimisation? taken from the AMD docs */
 865        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 866        /* ST0 is unchanged */
 867        return;
 868    }
 869
 870    if (expdif < 53) {
 871        dblq = fpsrcop / fptemp; /* ST0 / ST1 */
 872        /* round dblq towards zero */
 873        dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
 874        st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
 875
 876        /* convert dblq to q by truncating towards zero */
 877        if (dblq < 0.0) {
 878            q = (signed long long int)(-dblq);
 879        } else {
 880            q = (signed long long int)dblq;
 881        }
 882
 883        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 884        /* (C0,C3,C1) <-- (q2,q1,q0) */
 885        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 886        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 887        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 888    } else {
 889        int N = 32 + (expdif % 32); /* as per AMD docs */
 890
 891        env->fpus |= 0x400;  /* C2 <-- 1 */
 892        fptemp = pow(2.0, (double)(expdif - N));
 893        fpsrcop = (st0 / st1) / fptemp;
 894        /* fpsrcop = integer obtained by chopping */
 895        fpsrcop = (fpsrcop < 0.0) ?
 896                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 897        st0 -= (st1 * fpsrcop * fptemp);
 898    }
 899    ST0 = double_to_floatx80(env, st0);
 900}
 901
 902void helper_fyl2xp1(CPUX86State *env)
 903{
 904    double fptemp = floatx80_to_double(env, ST0);
 905
 906    if ((fptemp + 1.0) > 0.0) {
 907        fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
 908        fptemp *= floatx80_to_double(env, ST1);
 909        ST1 = double_to_floatx80(env, fptemp);
 910        fpop(env);
 911    } else {
 912        env->fpus &= ~0x4700;
 913        env->fpus |= 0x400;
 914    }
 915}
 916
 917void helper_fsqrt(CPUX86State *env)
 918{
 919    if (floatx80_is_neg(ST0)) {
 920        env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
 921        env->fpus |= 0x400;
 922    }
 923    ST0 = floatx80_sqrt(ST0, &env->fp_status);
 924}
 925
 926void helper_fsincos(CPUX86State *env)
 927{
 928    double fptemp = floatx80_to_double(env, ST0);
 929
 930    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 931        env->fpus |= 0x400;
 932    } else {
 933        ST0 = double_to_floatx80(env, sin(fptemp));
 934        fpush(env);
 935        ST0 = double_to_floatx80(env, cos(fptemp));
 936        env->fpus &= ~0x400;  /* C2 <-- 0 */
 937        /* the above code is for |arg| < 2**63 only */
 938    }
 939}
 940
 941void helper_frndint(CPUX86State *env)
 942{
 943    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
 944}
 945
 946void helper_fscale(CPUX86State *env)
 947{
 948    if (floatx80_is_any_nan(ST1)) {
 949        ST0 = ST1;
 950    } else {
 951        int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
 952        ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
 953    }
 954}
 955
 956void helper_fsin(CPUX86State *env)
 957{
 958    double fptemp = floatx80_to_double(env, ST0);
 959
 960    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 961        env->fpus |= 0x400;
 962    } else {
 963        ST0 = double_to_floatx80(env, sin(fptemp));
 964        env->fpus &= ~0x400;  /* C2 <-- 0 */
 965        /* the above code is for |arg| < 2**53 only */
 966    }
 967}
 968
 969void helper_fcos(CPUX86State *env)
 970{
 971    double fptemp = floatx80_to_double(env, ST0);
 972
 973    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 974        env->fpus |= 0x400;
 975    } else {
 976        ST0 = double_to_floatx80(env, cos(fptemp));
 977        env->fpus &= ~0x400;  /* C2 <-- 0 */
 978        /* the above code is for |arg| < 2**63 only */
 979    }
 980}
 981
 982void helper_fxam_ST0(CPUX86State *env)
 983{
 984    CPU_LDoubleU temp;
 985    int expdif;
 986
 987    temp.d = ST0;
 988
 989    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 990    if (SIGND(temp)) {
 991        env->fpus |= 0x200; /* C1 <-- 1 */
 992    }
 993
 994    if (env->fptags[env->fpstt]) {
 995        env->fpus |= 0x4100; /* Empty */
 996        return;
 997    }
 998
 999    expdif = EXPD(temp);
1000    if (expdif == MAXEXPD) {
1001        if (MANTD(temp) == 0x8000000000000000ULL) {
1002            env->fpus |= 0x500; /* Infinity */
1003        } else {
1004            env->fpus |= 0x100; /* NaN */
1005        }
1006    } else if (expdif == 0) {
1007        if (MANTD(temp) == 0) {
1008            env->fpus |=  0x4000; /* Zero */
1009        } else {
1010            env->fpus |= 0x4400; /* Denormal */
1011        }
1012    } else {
1013        env->fpus |= 0x400;
1014    }
1015}
1016
1017static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
1018                      uintptr_t retaddr)
1019{
1020    int fpus, fptag, exp, i;
1021    uint64_t mant;
1022    CPU_LDoubleU tmp;
1023
1024    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1025    fptag = 0;
1026    for (i = 7; i >= 0; i--) {
1027        fptag <<= 2;
1028        if (env->fptags[i]) {
1029            fptag |= 3;
1030        } else {
1031            tmp.d = env->fpregs[i].d;
1032            exp = EXPD(tmp);
1033            mant = MANTD(tmp);
1034            if (exp == 0 && mant == 0) {
1035                /* zero */
1036                fptag |= 1;
1037            } else if (exp == 0 || exp == MAXEXPD
1038                       || (mant & (1LL << 63)) == 0) {
1039                /* NaNs, infinity, denormal */
1040                fptag |= 2;
1041            }
1042        }
1043    }
1044    if (data32) {
1045        /* 32 bit */
1046        cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1047        cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1048        cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1049        cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1050        cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1051        cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1052        cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1053    } else {
1054        /* 16 bit */
1055        cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1056        cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1057        cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1058        cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1059        cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1060        cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1061        cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1062    }
1063}
1064
1065void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1066{
1067    do_fstenv(env, ptr, data32, GETPC());
1068}
1069
1070static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
1071{
1072    env->fpstt = (fpus >> 11) & 7;
1073    env->fpus = fpus & ~0x3800 & ~FPUS_B;
1074    env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
1075#if !defined(CONFIG_USER_ONLY)
1076    if (!(env->fpus & FPUS_SE)) {
1077        /*
1078         * Here the processor deasserts FERR#; in response, the chipset deasserts
1079         * IGNNE#.
1080         */
1081        cpu_clear_ignne();
1082    }
1083#endif
1084}
1085
1086static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1087                      uintptr_t retaddr)
1088{
1089    int i, fpus, fptag;
1090
1091    if (data32) {
1092        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1093        fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1094        fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1095    } else {
1096        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1097        fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1098        fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1099    }
1100    cpu_set_fpus(env, fpus);
1101    for (i = 0; i < 8; i++) {
1102        env->fptags[i] = ((fptag & 3) == 3);
1103        fptag >>= 2;
1104    }
1105}
1106
1107void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1108{
1109    do_fldenv(env, ptr, data32, GETPC());
1110}
1111
1112void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1113{
1114    floatx80 tmp;
1115    int i;
1116
1117    do_fstenv(env, ptr, data32, GETPC());
1118
1119    ptr += (14 << data32);
1120    for (i = 0; i < 8; i++) {
1121        tmp = ST(i);
1122        helper_fstt(env, tmp, ptr, GETPC());
1123        ptr += 10;
1124    }
1125
1126    /* fninit */
1127    env->fpus = 0;
1128    env->fpstt = 0;
1129    cpu_set_fpuc(env, 0x37f);
1130    env->fptags[0] = 1;
1131    env->fptags[1] = 1;
1132    env->fptags[2] = 1;
1133    env->fptags[3] = 1;
1134    env->fptags[4] = 1;
1135    env->fptags[5] = 1;
1136    env->fptags[6] = 1;
1137    env->fptags[7] = 1;
1138}
1139
1140void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1141{
1142    floatx80 tmp;
1143    int i;
1144
1145    do_fldenv(env, ptr, data32, GETPC());
1146    ptr += (14 << data32);
1147
1148    for (i = 0; i < 8; i++) {
1149        tmp = helper_fldt(env, ptr, GETPC());
1150        ST(i) = tmp;
1151        ptr += 10;
1152    }
1153}
1154
1155#if defined(CONFIG_USER_ONLY)
1156void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1157{
1158    helper_fsave(env, ptr, data32);
1159}
1160
1161void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1162{
1163    helper_frstor(env, ptr, data32);
1164}
1165#endif
1166
1167#define XO(X)  offsetof(X86XSaveArea, X)
1168
1169static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1170{
1171    int fpus, fptag, i;
1172    target_ulong addr;
1173
1174    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1175    fptag = 0;
1176    for (i = 0; i < 8; i++) {
1177        fptag |= (env->fptags[i] << i);
1178    }
1179
1180    cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1181    cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1182    cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1183
1184    /* In 32-bit mode this is eip, sel, dp, sel.
1185       In 64-bit mode this is rip, rdp.
1186       But in either case we don't write actual data, just zeros.  */
1187    cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1188    cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1189
1190    addr = ptr + XO(legacy.fpregs);
1191    for (i = 0; i < 8; i++) {
1192        floatx80 tmp = ST(i);
1193        helper_fstt(env, tmp, addr, ra);
1194        addr += 16;
1195    }
1196}
1197
1198static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1199{
1200    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1201    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1202}
1203
1204static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1205{
1206    int i, nb_xmm_regs;
1207    target_ulong addr;
1208
1209    if (env->hflags & HF_CS64_MASK) {
1210        nb_xmm_regs = 16;
1211    } else {
1212        nb_xmm_regs = 8;
1213    }
1214
1215    addr = ptr + XO(legacy.xmm_regs);
1216    for (i = 0; i < nb_xmm_regs; i++) {
1217        cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1218        cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1219        addr += 16;
1220    }
1221}
1222
1223static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1224{
1225    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1226    int i;
1227
1228    for (i = 0; i < 4; i++, addr += 16) {
1229        cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1230        cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1231    }
1232}
1233
1234static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1235{
1236    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1237                    env->bndcs_regs.cfgu, ra);
1238    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1239                    env->bndcs_regs.sts, ra);
1240}
1241
1242static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1243{
1244    cpu_stq_data_ra(env, ptr, env->pkru, ra);
1245}
1246
1247void helper_fxsave(CPUX86State *env, target_ulong ptr)
1248{
1249    uintptr_t ra = GETPC();
1250
1251    /* The operand must be 16 byte aligned */
1252    if (ptr & 0xf) {
1253        raise_exception_ra(env, EXCP0D_GPF, ra);
1254    }
1255
1256    do_xsave_fpu(env, ptr, ra);
1257
1258    if (env->cr[4] & CR4_OSFXSR_MASK) {
1259        do_xsave_mxcsr(env, ptr, ra);
1260        /* Fast FXSAVE leaves out the XMM registers */
1261        if (!(env->efer & MSR_EFER_FFXSR)
1262            || (env->hflags & HF_CPL_MASK)
1263            || !(env->hflags & HF_LMA_MASK)) {
1264            do_xsave_sse(env, ptr, ra);
1265        }
1266    }
1267}
1268
1269static uint64_t get_xinuse(CPUX86State *env)
1270{
1271    uint64_t inuse = -1;
1272
1273    /* For the most part, we don't track XINUSE.  We could calculate it
1274       here for all components, but it's probably less work to simply
1275       indicate in use.  That said, the state of BNDREGS is important
1276       enough to track in HFLAGS, so we might as well use that here.  */
1277    if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1278       inuse &= ~XSTATE_BNDREGS_MASK;
1279    }
1280    return inuse;
1281}
1282
1283static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1284                     uint64_t inuse, uint64_t opt, uintptr_t ra)
1285{
1286    uint64_t old_bv, new_bv;
1287
1288    /* The OS must have enabled XSAVE.  */
1289    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1290        raise_exception_ra(env, EXCP06_ILLOP, ra);
1291    }
1292
1293    /* The operand must be 64 byte aligned.  */
1294    if (ptr & 63) {
1295        raise_exception_ra(env, EXCP0D_GPF, ra);
1296    }
1297
1298    /* Never save anything not enabled by XCR0.  */
1299    rfbm &= env->xcr0;
1300    opt &= rfbm;
1301
1302    if (opt & XSTATE_FP_MASK) {
1303        do_xsave_fpu(env, ptr, ra);
1304    }
1305    if (rfbm & XSTATE_SSE_MASK) {
1306        /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
1307        do_xsave_mxcsr(env, ptr, ra);
1308    }
1309    if (opt & XSTATE_SSE_MASK) {
1310        do_xsave_sse(env, ptr, ra);
1311    }
1312    if (opt & XSTATE_BNDREGS_MASK) {
1313        do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1314    }
1315    if (opt & XSTATE_BNDCSR_MASK) {
1316        do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1317    }
1318    if (opt & XSTATE_PKRU_MASK) {
1319        do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1320    }
1321
1322    /* Update the XSTATE_BV field.  */
1323    old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1324    new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1325    cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1326}
1327
1328void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1329{
1330    do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1331}
1332
1333void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1334{
1335    uint64_t inuse = get_xinuse(env);
1336    do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1337}
1338
1339static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1340{
1341    int i, fpuc, fpus, fptag;
1342    target_ulong addr;
1343
1344    fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1345    fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1346    fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1347    cpu_set_fpuc(env, fpuc);
1348    cpu_set_fpus(env, fpus);
1349    fptag ^= 0xff;
1350    for (i = 0; i < 8; i++) {
1351        env->fptags[i] = ((fptag >> i) & 1);
1352    }
1353
1354    addr = ptr + XO(legacy.fpregs);
1355    for (i = 0; i < 8; i++) {
1356        floatx80 tmp = helper_fldt(env, addr, ra);
1357        ST(i) = tmp;
1358        addr += 16;
1359    }
1360}
1361
1362static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1363{
1364    cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1365}
1366
1367static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1368{
1369    int i, nb_xmm_regs;
1370    target_ulong addr;
1371
1372    if (env->hflags & HF_CS64_MASK) {
1373        nb_xmm_regs = 16;
1374    } else {
1375        nb_xmm_regs = 8;
1376    }
1377
1378    addr = ptr + XO(legacy.xmm_regs);
1379    for (i = 0; i < nb_xmm_regs; i++) {
1380        env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1381        env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1382        addr += 16;
1383    }
1384}
1385
1386static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1387{
1388    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1389    int i;
1390
1391    for (i = 0; i < 4; i++, addr += 16) {
1392        env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1393        env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1394    }
1395}
1396
1397static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1398{
1399    /* FIXME: Extend highest implemented bit of linear address.  */
1400    env->bndcs_regs.cfgu
1401        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1402    env->bndcs_regs.sts
1403        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1404}
1405
1406static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1407{
1408    env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1409}
1410
1411void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1412{
1413    uintptr_t ra = GETPC();
1414
1415    /* The operand must be 16 byte aligned */
1416    if (ptr & 0xf) {
1417        raise_exception_ra(env, EXCP0D_GPF, ra);
1418    }
1419
1420    do_xrstor_fpu(env, ptr, ra);
1421
1422    if (env->cr[4] & CR4_OSFXSR_MASK) {
1423        do_xrstor_mxcsr(env, ptr, ra);
1424        /* Fast FXRSTOR leaves out the XMM registers */
1425        if (!(env->efer & MSR_EFER_FFXSR)
1426            || (env->hflags & HF_CPL_MASK)
1427            || !(env->hflags & HF_LMA_MASK)) {
1428            do_xrstor_sse(env, ptr, ra);
1429        }
1430    }
1431}
1432
1433#if defined(CONFIG_USER_ONLY)
1434void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1435{
1436    helper_fxsave(env, ptr);
1437}
1438
1439void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1440{
1441    helper_fxrstor(env, ptr);
1442}
1443#endif
1444
1445void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1446{
1447    uintptr_t ra = GETPC();
1448    uint64_t xstate_bv, xcomp_bv, reserve0;
1449
1450    rfbm &= env->xcr0;
1451
1452    /* The OS must have enabled XSAVE.  */
1453    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1454        raise_exception_ra(env, EXCP06_ILLOP, ra);
1455    }
1456
1457    /* The operand must be 64 byte aligned.  */
1458    if (ptr & 63) {
1459        raise_exception_ra(env, EXCP0D_GPF, ra);
1460    }
1461
1462    xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1463
1464    if ((int64_t)xstate_bv < 0) {
1465        /* FIXME: Compact form.  */
1466        raise_exception_ra(env, EXCP0D_GPF, ra);
1467    }
1468
1469    /* Standard form.  */
1470
1471    /* The XSTATE_BV field must not set bits not present in XCR0.  */
1472    if (xstate_bv & ~env->xcr0) {
1473        raise_exception_ra(env, EXCP0D_GPF, ra);
1474    }
1475
1476    /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
1477       revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1478       describes only XCOMP_BV, but the description of the standard form
1479       of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1480       includes the next 64-bit field.  */
1481    xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1482    reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1483    if (xcomp_bv || reserve0) {
1484        raise_exception_ra(env, EXCP0D_GPF, ra);
1485    }
1486
1487    if (rfbm & XSTATE_FP_MASK) {
1488        if (xstate_bv & XSTATE_FP_MASK) {
1489            do_xrstor_fpu(env, ptr, ra);
1490        } else {
1491            helper_fninit(env);
1492            memset(env->fpregs, 0, sizeof(env->fpregs));
1493        }
1494    }
1495    if (rfbm & XSTATE_SSE_MASK) {
1496        /* Note that the standard form of XRSTOR loads MXCSR from memory
1497           whether or not the XSTATE_BV bit is set.  */
1498        do_xrstor_mxcsr(env, ptr, ra);
1499        if (xstate_bv & XSTATE_SSE_MASK) {
1500            do_xrstor_sse(env, ptr, ra);
1501        } else {
1502            /* ??? When AVX is implemented, we may have to be more
1503               selective in the clearing.  */
1504            memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1505        }
1506    }
1507    if (rfbm & XSTATE_BNDREGS_MASK) {
1508        if (xstate_bv & XSTATE_BNDREGS_MASK) {
1509            do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1510            env->hflags |= HF_MPX_IU_MASK;
1511        } else {
1512            memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1513            env->hflags &= ~HF_MPX_IU_MASK;
1514        }
1515    }
1516    if (rfbm & XSTATE_BNDCSR_MASK) {
1517        if (xstate_bv & XSTATE_BNDCSR_MASK) {
1518            do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1519        } else {
1520            memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1521        }
1522        cpu_sync_bndcs_hflags(env);
1523    }
1524    if (rfbm & XSTATE_PKRU_MASK) {
1525        uint64_t old_pkru = env->pkru;
1526        if (xstate_bv & XSTATE_PKRU_MASK) {
1527            do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1528        } else {
1529            env->pkru = 0;
1530        }
1531        if (env->pkru != old_pkru) {
1532            CPUState *cs = env_cpu(env);
1533            tlb_flush(cs);
1534        }
1535    }
1536}
1537
1538#undef XO
1539
1540uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1541{
1542    /* The OS must have enabled XSAVE.  */
1543    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1544        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1545    }
1546
1547    switch (ecx) {
1548    case 0:
1549        return env->xcr0;
1550    case 1:
1551        if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1552            return env->xcr0 & get_xinuse(env);
1553        }
1554        break;
1555    }
1556    raise_exception_ra(env, EXCP0D_GPF, GETPC());
1557}
1558
1559void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1560{
1561    uint32_t dummy, ena_lo, ena_hi;
1562    uint64_t ena;
1563
1564    /* The OS must have enabled XSAVE.  */
1565    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1566        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1567    }
1568
1569    /* Only XCR0 is defined at present; the FPU may not be disabled.  */
1570    if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1571        goto do_gpf;
1572    }
1573
1574    /* Disallow enabling unimplemented features.  */
1575    cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1576    ena = ((uint64_t)ena_hi << 32) | ena_lo;
1577    if (mask & ~ena) {
1578        goto do_gpf;
1579    }
1580
1581    /* Disallow enabling only half of MPX.  */
1582    if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1583        & XSTATE_BNDCSR_MASK) {
1584        goto do_gpf;
1585    }
1586
1587    env->xcr0 = mask;
1588    cpu_sync_bndcs_hflags(env);
1589    return;
1590
1591 do_gpf:
1592    raise_exception_ra(env, EXCP0D_GPF, GETPC());
1593}
1594
1595/* MMX/SSE */
1596/* XXX: optimize by storing fptt and fptags in the static cpu state */
1597
1598#define SSE_DAZ             0x0040
1599#define SSE_RC_MASK         0x6000
1600#define SSE_RC_NEAR         0x0000
1601#define SSE_RC_DOWN         0x2000
1602#define SSE_RC_UP           0x4000
1603#define SSE_RC_CHOP         0x6000
1604#define SSE_FZ              0x8000
1605
1606void update_mxcsr_status(CPUX86State *env)
1607{
1608    uint32_t mxcsr = env->mxcsr;
1609    int rnd_type;
1610
1611    /* set rounding mode */
1612    switch (mxcsr & SSE_RC_MASK) {
1613    default:
1614    case SSE_RC_NEAR:
1615        rnd_type = float_round_nearest_even;
1616        break;
1617    case SSE_RC_DOWN:
1618        rnd_type = float_round_down;
1619        break;
1620    case SSE_RC_UP:
1621        rnd_type = float_round_up;
1622        break;
1623    case SSE_RC_CHOP:
1624        rnd_type = float_round_to_zero;
1625        break;
1626    }
1627    set_float_rounding_mode(rnd_type, &env->sse_status);
1628
1629    /* set denormals are zero */
1630    set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1631
1632    /* set flush to zero */
1633    set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1634}
1635
1636void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1637{
1638    cpu_set_mxcsr(env, val);
1639}
1640
1641void helper_enter_mmx(CPUX86State *env)
1642{
1643    env->fpstt = 0;
1644    *(uint32_t *)(env->fptags) = 0;
1645    *(uint32_t *)(env->fptags + 4) = 0;
1646}
1647
1648void helper_emms(CPUX86State *env)
1649{
1650    /* set to empty state */
1651    *(uint32_t *)(env->fptags) = 0x01010101;
1652    *(uint32_t *)(env->fptags + 4) = 0x01010101;
1653}
1654
1655/* XXX: suppress */
1656void helper_movq(CPUX86State *env, void *d, void *s)
1657{
1658    *(uint64_t *)d = *(uint64_t *)s;
1659}
1660
1661#define SHIFT 0
1662#include "ops_sse.h"
1663
1664#define SHIFT 1
1665#include "ops_sse.h"
1666