qemu/target/i386/fpu_helper.c
<<
>>
Prefs
   1/*
   2 *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include <math.h>
  22#include "cpu.h"
  23#include "exec/helper-proto.h"
  24#include "qemu/host-utils.h"
  25#include "exec/exec-all.h"
  26#include "exec/cpu_ldst.h"
  27
  28#define FPU_RC_MASK         0xc00
  29#define FPU_RC_NEAR         0x000
  30#define FPU_RC_DOWN         0x400
  31#define FPU_RC_UP           0x800
  32#define FPU_RC_CHOP         0xc00
  33
  34#define MAXTAN 9223372036854775808.0
  35
  36/* the following deal with x86 long double-precision numbers */
  37#define MAXEXPD 0x7fff
  38#define EXPBIAS 16383
  39#define EXPD(fp)        (fp.l.upper & 0x7fff)
  40#define SIGND(fp)       ((fp.l.upper) & 0x8000)
  41#define MANTD(fp)       (fp.l.lower)
  42#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  43
  44#define FPUS_IE (1 << 0)
  45#define FPUS_DE (1 << 1)
  46#define FPUS_ZE (1 << 2)
  47#define FPUS_OE (1 << 3)
  48#define FPUS_UE (1 << 4)
  49#define FPUS_PE (1 << 5)
  50#define FPUS_SF (1 << 6)
  51#define FPUS_SE (1 << 7)
  52#define FPUS_B  (1 << 15)
  53
  54#define FPUC_EM 0x3f
  55
  56#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  57#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  58#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  59
  60static inline void fpush(CPUX86State *env)
  61{
  62    env->fpstt = (env->fpstt - 1) & 7;
  63    env->fptags[env->fpstt] = 0; /* validate stack entry */
  64}
  65
  66static inline void fpop(CPUX86State *env)
  67{
  68    env->fptags[env->fpstt] = 1; /* invalidate stack entry */
  69    env->fpstt = (env->fpstt + 1) & 7;
  70}
  71
  72static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
  73                                   uintptr_t retaddr)
  74{
  75    CPU_LDoubleU temp;
  76
  77    temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
  78    temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
  79    return temp.d;
  80}
  81
  82static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
  83                               uintptr_t retaddr)
  84{
  85    CPU_LDoubleU temp;
  86
  87    temp.d = f;
  88    cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
  89    cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
  90}
  91
  92/* x87 FPU helpers */
  93
  94static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
  95{
  96    union {
  97        float64 f64;
  98        double d;
  99    } u;
 100
 101    u.f64 = floatx80_to_float64(a, &env->fp_status);
 102    return u.d;
 103}
 104
 105static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 106{
 107    union {
 108        float64 f64;
 109        double d;
 110    } u;
 111
 112    u.d = a;
 113    return float64_to_floatx80(u.f64, &env->fp_status);
 114}
 115
 116static void fpu_set_exception(CPUX86State *env, int mask)
 117{
 118    env->fpus |= mask;
 119    if (env->fpus & (~env->fpuc & FPUC_EM)) {
 120        env->fpus |= FPUS_SE | FPUS_B;
 121    }
 122}
 123
 124static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 125{
 126    if (floatx80_is_zero(b)) {
 127        fpu_set_exception(env, FPUS_ZE);
 128    }
 129    return floatx80_div(a, b, &env->fp_status);
 130}
 131
 132static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 133{
 134    if (env->cr[0] & CR0_NE_MASK) {
 135        raise_exception_ra(env, EXCP10_COPR, retaddr);
 136    }
 137#if !defined(CONFIG_USER_ONLY)
 138    else {
 139        cpu_set_ferr(env);
 140    }
 141#endif
 142}
 143
 144void helper_flds_FT0(CPUX86State *env, uint32_t val)
 145{
 146    union {
 147        float32 f;
 148        uint32_t i;
 149    } u;
 150
 151    u.i = val;
 152    FT0 = float32_to_floatx80(u.f, &env->fp_status);
 153}
 154
 155void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 156{
 157    union {
 158        float64 f;
 159        uint64_t i;
 160    } u;
 161
 162    u.i = val;
 163    FT0 = float64_to_floatx80(u.f, &env->fp_status);
 164}
 165
 166void helper_fildl_FT0(CPUX86State *env, int32_t val)
 167{
 168    FT0 = int32_to_floatx80(val, &env->fp_status);
 169}
 170
 171void helper_flds_ST0(CPUX86State *env, uint32_t val)
 172{
 173    int new_fpstt;
 174    union {
 175        float32 f;
 176        uint32_t i;
 177    } u;
 178
 179    new_fpstt = (env->fpstt - 1) & 7;
 180    u.i = val;
 181    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 182    env->fpstt = new_fpstt;
 183    env->fptags[new_fpstt] = 0; /* validate stack entry */
 184}
 185
 186void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 187{
 188    int new_fpstt;
 189    union {
 190        float64 f;
 191        uint64_t i;
 192    } u;
 193
 194    new_fpstt = (env->fpstt - 1) & 7;
 195    u.i = val;
 196    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 197    env->fpstt = new_fpstt;
 198    env->fptags[new_fpstt] = 0; /* validate stack entry */
 199}
 200
 201void helper_fildl_ST0(CPUX86State *env, int32_t val)
 202{
 203    int new_fpstt;
 204
 205    new_fpstt = (env->fpstt - 1) & 7;
 206    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 207    env->fpstt = new_fpstt;
 208    env->fptags[new_fpstt] = 0; /* validate stack entry */
 209}
 210
 211void helper_fildll_ST0(CPUX86State *env, int64_t val)
 212{
 213    int new_fpstt;
 214
 215    new_fpstt = (env->fpstt - 1) & 7;
 216    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 217    env->fpstt = new_fpstt;
 218    env->fptags[new_fpstt] = 0; /* validate stack entry */
 219}
 220
 221uint32_t helper_fsts_ST0(CPUX86State *env)
 222{
 223    union {
 224        float32 f;
 225        uint32_t i;
 226    } u;
 227
 228    u.f = floatx80_to_float32(ST0, &env->fp_status);
 229    return u.i;
 230}
 231
 232uint64_t helper_fstl_ST0(CPUX86State *env)
 233{
 234    union {
 235        float64 f;
 236        uint64_t i;
 237    } u;
 238
 239    u.f = floatx80_to_float64(ST0, &env->fp_status);
 240    return u.i;
 241}
 242
 243int32_t helper_fist_ST0(CPUX86State *env)
 244{
 245    int32_t val;
 246
 247    val = floatx80_to_int32(ST0, &env->fp_status);
 248    if (val != (int16_t)val) {
 249        val = -32768;
 250    }
 251    return val;
 252}
 253
 254int32_t helper_fistl_ST0(CPUX86State *env)
 255{
 256    int32_t val;
 257    signed char old_exp_flags;
 258
 259    old_exp_flags = get_float_exception_flags(&env->fp_status);
 260    set_float_exception_flags(0, &env->fp_status);
 261
 262    val = floatx80_to_int32(ST0, &env->fp_status);
 263    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 264        val = 0x80000000;
 265    }
 266    set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 267                                | old_exp_flags, &env->fp_status);
 268    return val;
 269}
 270
 271int64_t helper_fistll_ST0(CPUX86State *env)
 272{
 273    int64_t val;
 274    signed char old_exp_flags;
 275
 276    old_exp_flags = get_float_exception_flags(&env->fp_status);
 277    set_float_exception_flags(0, &env->fp_status);
 278
 279    val = floatx80_to_int64(ST0, &env->fp_status);
 280    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 281        val = 0x8000000000000000ULL;
 282    }
 283    set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 284                                | old_exp_flags, &env->fp_status);
 285    return val;
 286}
 287
 288int32_t helper_fistt_ST0(CPUX86State *env)
 289{
 290    int32_t val;
 291
 292    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 293    if (val != (int16_t)val) {
 294        val = -32768;
 295    }
 296    return val;
 297}
 298
 299int32_t helper_fisttl_ST0(CPUX86State *env)
 300{
 301    return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 302}
 303
 304int64_t helper_fisttll_ST0(CPUX86State *env)
 305{
 306    return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 307}
 308
 309void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 310{
 311    int new_fpstt;
 312
 313    new_fpstt = (env->fpstt - 1) & 7;
 314    env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 315    env->fpstt = new_fpstt;
 316    env->fptags[new_fpstt] = 0; /* validate stack entry */
 317}
 318
 319void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 320{
 321    helper_fstt(env, ST0, ptr, GETPC());
 322}
 323
 324void helper_fpush(CPUX86State *env)
 325{
 326    fpush(env);
 327}
 328
 329void helper_fpop(CPUX86State *env)
 330{
 331    fpop(env);
 332}
 333
 334void helper_fdecstp(CPUX86State *env)
 335{
 336    env->fpstt = (env->fpstt - 1) & 7;
 337    env->fpus &= ~0x4700;
 338}
 339
 340void helper_fincstp(CPUX86State *env)
 341{
 342    env->fpstt = (env->fpstt + 1) & 7;
 343    env->fpus &= ~0x4700;
 344}
 345
 346/* FPU move */
 347
 348void helper_ffree_STN(CPUX86State *env, int st_index)
 349{
 350    env->fptags[(env->fpstt + st_index) & 7] = 1;
 351}
 352
 353void helper_fmov_ST0_FT0(CPUX86State *env)
 354{
 355    ST0 = FT0;
 356}
 357
 358void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 359{
 360    FT0 = ST(st_index);
 361}
 362
 363void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 364{
 365    ST0 = ST(st_index);
 366}
 367
 368void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 369{
 370    ST(st_index) = ST0;
 371}
 372
 373void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 374{
 375    floatx80 tmp;
 376
 377    tmp = ST(st_index);
 378    ST(st_index) = ST0;
 379    ST0 = tmp;
 380}
 381
 382/* FPU operations */
 383
 384static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 385
 386void helper_fcom_ST0_FT0(CPUX86State *env)
 387{
 388    int ret;
 389
 390    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 391    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 392}
 393
 394void helper_fucom_ST0_FT0(CPUX86State *env)
 395{
 396    int ret;
 397
 398    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 399    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 400}
 401
 402static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 403
 404void helper_fcomi_ST0_FT0(CPUX86State *env)
 405{
 406    int eflags;
 407    int ret;
 408
 409    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 410    eflags = cpu_cc_compute_all(env, CC_OP);
 411    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 412    CC_SRC = eflags;
 413}
 414
 415void helper_fucomi_ST0_FT0(CPUX86State *env)
 416{
 417    int eflags;
 418    int ret;
 419
 420    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 421    eflags = cpu_cc_compute_all(env, CC_OP);
 422    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 423    CC_SRC = eflags;
 424}
 425
 426void helper_fadd_ST0_FT0(CPUX86State *env)
 427{
 428    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 429}
 430
 431void helper_fmul_ST0_FT0(CPUX86State *env)
 432{
 433    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 434}
 435
 436void helper_fsub_ST0_FT0(CPUX86State *env)
 437{
 438    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 439}
 440
 441void helper_fsubr_ST0_FT0(CPUX86State *env)
 442{
 443    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 444}
 445
 446void helper_fdiv_ST0_FT0(CPUX86State *env)
 447{
 448    ST0 = helper_fdiv(env, ST0, FT0);
 449}
 450
 451void helper_fdivr_ST0_FT0(CPUX86State *env)
 452{
 453    ST0 = helper_fdiv(env, FT0, ST0);
 454}
 455
 456/* fp operations between STN and ST0 */
 457
 458void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 459{
 460    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 461}
 462
 463void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 464{
 465    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 466}
 467
 468void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 469{
 470    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 471}
 472
 473void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 474{
 475    ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 476}
 477
 478void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 479{
 480    floatx80 *p;
 481
 482    p = &ST(st_index);
 483    *p = helper_fdiv(env, *p, ST0);
 484}
 485
 486void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 487{
 488    floatx80 *p;
 489
 490    p = &ST(st_index);
 491    *p = helper_fdiv(env, ST0, *p);
 492}
 493
 494/* misc FPU operations */
 495void helper_fchs_ST0(CPUX86State *env)
 496{
 497    ST0 = floatx80_chs(ST0);
 498}
 499
 500void helper_fabs_ST0(CPUX86State *env)
 501{
 502    ST0 = floatx80_abs(ST0);
 503}
 504
 505void helper_fld1_ST0(CPUX86State *env)
 506{
 507    ST0 = floatx80_one;
 508}
 509
 510void helper_fldl2t_ST0(CPUX86State *env)
 511{
 512    ST0 = floatx80_l2t;
 513}
 514
 515void helper_fldl2e_ST0(CPUX86State *env)
 516{
 517    ST0 = floatx80_l2e;
 518}
 519
 520void helper_fldpi_ST0(CPUX86State *env)
 521{
 522    ST0 = floatx80_pi;
 523}
 524
 525void helper_fldlg2_ST0(CPUX86State *env)
 526{
 527    ST0 = floatx80_lg2;
 528}
 529
 530void helper_fldln2_ST0(CPUX86State *env)
 531{
 532    ST0 = floatx80_ln2;
 533}
 534
 535void helper_fldz_ST0(CPUX86State *env)
 536{
 537    ST0 = floatx80_zero;
 538}
 539
 540void helper_fldz_FT0(CPUX86State *env)
 541{
 542    FT0 = floatx80_zero;
 543}
 544
 545uint32_t helper_fnstsw(CPUX86State *env)
 546{
 547    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 548}
 549
 550uint32_t helper_fnstcw(CPUX86State *env)
 551{
 552    return env->fpuc;
 553}
 554
 555void update_fp_status(CPUX86State *env)
 556{
 557    int rnd_type;
 558
 559    /* set rounding mode */
 560    switch (env->fpuc & FPU_RC_MASK) {
 561    default:
 562    case FPU_RC_NEAR:
 563        rnd_type = float_round_nearest_even;
 564        break;
 565    case FPU_RC_DOWN:
 566        rnd_type = float_round_down;
 567        break;
 568    case FPU_RC_UP:
 569        rnd_type = float_round_up;
 570        break;
 571    case FPU_RC_CHOP:
 572        rnd_type = float_round_to_zero;
 573        break;
 574    }
 575    set_float_rounding_mode(rnd_type, &env->fp_status);
 576    switch ((env->fpuc >> 8) & 3) {
 577    case 0:
 578        rnd_type = 32;
 579        break;
 580    case 2:
 581        rnd_type = 64;
 582        break;
 583    case 3:
 584    default:
 585        rnd_type = 80;
 586        break;
 587    }
 588    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 589}
 590
 591void helper_fldcw(CPUX86State *env, uint32_t val)
 592{
 593    cpu_set_fpuc(env, val);
 594}
 595
 596void helper_fclex(CPUX86State *env)
 597{
 598    env->fpus &= 0x7f00;
 599}
 600
 601void helper_fwait(CPUX86State *env)
 602{
 603    if (env->fpus & FPUS_SE) {
 604        fpu_raise_exception(env, GETPC());
 605    }
 606}
 607
 608void helper_fninit(CPUX86State *env)
 609{
 610    env->fpus = 0;
 611    env->fpstt = 0;
 612    cpu_set_fpuc(env, 0x37f);
 613    env->fptags[0] = 1;
 614    env->fptags[1] = 1;
 615    env->fptags[2] = 1;
 616    env->fptags[3] = 1;
 617    env->fptags[4] = 1;
 618    env->fptags[5] = 1;
 619    env->fptags[6] = 1;
 620    env->fptags[7] = 1;
 621}
 622
 623/* BCD ops */
 624
 625void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 626{
 627    floatx80 tmp;
 628    uint64_t val;
 629    unsigned int v;
 630    int i;
 631
 632    val = 0;
 633    for (i = 8; i >= 0; i--) {
 634        v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 635        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 636    }
 637    tmp = int64_to_floatx80(val, &env->fp_status);
 638    if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 639        tmp = floatx80_chs(tmp);
 640    }
 641    fpush(env);
 642    ST0 = tmp;
 643}
 644
 645void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 646{
 647    int v;
 648    target_ulong mem_ref, mem_end;
 649    int64_t val;
 650
 651    val = floatx80_to_int64(ST0, &env->fp_status);
 652    mem_ref = ptr;
 653    mem_end = mem_ref + 9;
 654    if (val < 0) {
 655        cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 656        val = -val;
 657    } else {
 658        cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 659    }
 660    while (mem_ref < mem_end) {
 661        if (val == 0) {
 662            break;
 663        }
 664        v = val % 100;
 665        val = val / 100;
 666        v = ((v / 10) << 4) | (v % 10);
 667        cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 668    }
 669    while (mem_ref < mem_end) {
 670        cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 671    }
 672}
 673
 674void helper_f2xm1(CPUX86State *env)
 675{
 676    double val = floatx80_to_double(env, ST0);
 677
 678    val = pow(2.0, val) - 1.0;
 679    ST0 = double_to_floatx80(env, val);
 680}
 681
 682void helper_fyl2x(CPUX86State *env)
 683{
 684    double fptemp = floatx80_to_double(env, ST0);
 685
 686    if (fptemp > 0.0) {
 687        fptemp = log(fptemp) / log(2.0); /* log2(ST) */
 688        fptemp *= floatx80_to_double(env, ST1);
 689        ST1 = double_to_floatx80(env, fptemp);
 690        fpop(env);
 691    } else {
 692        env->fpus &= ~0x4700;
 693        env->fpus |= 0x400;
 694    }
 695}
 696
 697void helper_fptan(CPUX86State *env)
 698{
 699    double fptemp = floatx80_to_double(env, ST0);
 700
 701    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 702        env->fpus |= 0x400;
 703    } else {
 704        fptemp = tan(fptemp);
 705        ST0 = double_to_floatx80(env, fptemp);
 706        fpush(env);
 707        ST0 = floatx80_one;
 708        env->fpus &= ~0x400; /* C2 <-- 0 */
 709        /* the above code is for |arg| < 2**52 only */
 710    }
 711}
 712
 713void helper_fpatan(CPUX86State *env)
 714{
 715    double fptemp, fpsrcop;
 716
 717    fpsrcop = floatx80_to_double(env, ST1);
 718    fptemp = floatx80_to_double(env, ST0);
 719    ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
 720    fpop(env);
 721}
 722
 723void helper_fxtract(CPUX86State *env)
 724{
 725    CPU_LDoubleU temp;
 726
 727    temp.d = ST0;
 728
 729    if (floatx80_is_zero(ST0)) {
 730        /* Easy way to generate -inf and raising division by 0 exception */
 731        ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
 732                           &env->fp_status);
 733        fpush(env);
 734        ST0 = temp.d;
 735    } else {
 736        int expdif;
 737
 738        expdif = EXPD(temp) - EXPBIAS;
 739        /* DP exponent bias */
 740        ST0 = int32_to_floatx80(expdif, &env->fp_status);
 741        fpush(env);
 742        BIASEXPONENT(temp);
 743        ST0 = temp.d;
 744    }
 745}
 746
 747void helper_fprem1(CPUX86State *env)
 748{
 749    double st0, st1, dblq, fpsrcop, fptemp;
 750    CPU_LDoubleU fpsrcop1, fptemp1;
 751    int expdif;
 752    signed long long int q;
 753
 754    st0 = floatx80_to_double(env, ST0);
 755    st1 = floatx80_to_double(env, ST1);
 756
 757    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 758        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 759        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 760        return;
 761    }
 762
 763    fpsrcop = st0;
 764    fptemp = st1;
 765    fpsrcop1.d = ST0;
 766    fptemp1.d = ST1;
 767    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 768
 769    if (expdif < 0) {
 770        /* optimisation? taken from the AMD docs */
 771        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 772        /* ST0 is unchanged */
 773        return;
 774    }
 775
 776    if (expdif < 53) {
 777        dblq = fpsrcop / fptemp;
 778        /* round dblq towards nearest integer */
 779        dblq = rint(dblq);
 780        st0 = fpsrcop - fptemp * dblq;
 781
 782        /* convert dblq to q by truncating towards zero */
 783        if (dblq < 0.0) {
 784            q = (signed long long int)(-dblq);
 785        } else {
 786            q = (signed long long int)dblq;
 787        }
 788
 789        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 790        /* (C0,C3,C1) <-- (q2,q1,q0) */
 791        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 792        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 793        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 794    } else {
 795        env->fpus |= 0x400;  /* C2 <-- 1 */
 796        fptemp = pow(2.0, expdif - 50);
 797        fpsrcop = (st0 / st1) / fptemp;
 798        /* fpsrcop = integer obtained by chopping */
 799        fpsrcop = (fpsrcop < 0.0) ?
 800                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 801        st0 -= (st1 * fpsrcop * fptemp);
 802    }
 803    ST0 = double_to_floatx80(env, st0);
 804}
 805
 806void helper_fprem(CPUX86State *env)
 807{
 808    double st0, st1, dblq, fpsrcop, fptemp;
 809    CPU_LDoubleU fpsrcop1, fptemp1;
 810    int expdif;
 811    signed long long int q;
 812
 813    st0 = floatx80_to_double(env, ST0);
 814    st1 = floatx80_to_double(env, ST1);
 815
 816    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 817        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 818        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 819        return;
 820    }
 821
 822    fpsrcop = st0;
 823    fptemp = st1;
 824    fpsrcop1.d = ST0;
 825    fptemp1.d = ST1;
 826    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 827
 828    if (expdif < 0) {
 829        /* optimisation? taken from the AMD docs */
 830        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 831        /* ST0 is unchanged */
 832        return;
 833    }
 834
 835    if (expdif < 53) {
 836        dblq = fpsrcop / fptemp; /* ST0 / ST1 */
 837        /* round dblq towards zero */
 838        dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
 839        st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
 840
 841        /* convert dblq to q by truncating towards zero */
 842        if (dblq < 0.0) {
 843            q = (signed long long int)(-dblq);
 844        } else {
 845            q = (signed long long int)dblq;
 846        }
 847
 848        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 849        /* (C0,C3,C1) <-- (q2,q1,q0) */
 850        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 851        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 852        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 853    } else {
 854        int N = 32 + (expdif % 32); /* as per AMD docs */
 855
 856        env->fpus |= 0x400;  /* C2 <-- 1 */
 857        fptemp = pow(2.0, (double)(expdif - N));
 858        fpsrcop = (st0 / st1) / fptemp;
 859        /* fpsrcop = integer obtained by chopping */
 860        fpsrcop = (fpsrcop < 0.0) ?
 861                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 862        st0 -= (st1 * fpsrcop * fptemp);
 863    }
 864    ST0 = double_to_floatx80(env, st0);
 865}
 866
 867void helper_fyl2xp1(CPUX86State *env)
 868{
 869    double fptemp = floatx80_to_double(env, ST0);
 870
 871    if ((fptemp + 1.0) > 0.0) {
 872        fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
 873        fptemp *= floatx80_to_double(env, ST1);
 874        ST1 = double_to_floatx80(env, fptemp);
 875        fpop(env);
 876    } else {
 877        env->fpus &= ~0x4700;
 878        env->fpus |= 0x400;
 879    }
 880}
 881
 882void helper_fsqrt(CPUX86State *env)
 883{
 884    if (floatx80_is_neg(ST0)) {
 885        env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
 886        env->fpus |= 0x400;
 887    }
 888    ST0 = floatx80_sqrt(ST0, &env->fp_status);
 889}
 890
 891void helper_fsincos(CPUX86State *env)
 892{
 893    double fptemp = floatx80_to_double(env, ST0);
 894
 895    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 896        env->fpus |= 0x400;
 897    } else {
 898        ST0 = double_to_floatx80(env, sin(fptemp));
 899        fpush(env);
 900        ST0 = double_to_floatx80(env, cos(fptemp));
 901        env->fpus &= ~0x400;  /* C2 <-- 0 */
 902        /* the above code is for |arg| < 2**63 only */
 903    }
 904}
 905
 906void helper_frndint(CPUX86State *env)
 907{
 908    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
 909}
 910
 911void helper_fscale(CPUX86State *env)
 912{
 913    if (floatx80_is_any_nan(ST1)) {
 914        ST0 = ST1;
 915    } else {
 916        int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
 917        ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
 918    }
 919}
 920
 921void helper_fsin(CPUX86State *env)
 922{
 923    double fptemp = floatx80_to_double(env, ST0);
 924
 925    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 926        env->fpus |= 0x400;
 927    } else {
 928        ST0 = double_to_floatx80(env, sin(fptemp));
 929        env->fpus &= ~0x400;  /* C2 <-- 0 */
 930        /* the above code is for |arg| < 2**53 only */
 931    }
 932}
 933
 934void helper_fcos(CPUX86State *env)
 935{
 936    double fptemp = floatx80_to_double(env, ST0);
 937
 938    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 939        env->fpus |= 0x400;
 940    } else {
 941        ST0 = double_to_floatx80(env, cos(fptemp));
 942        env->fpus &= ~0x400;  /* C2 <-- 0 */
 943        /* the above code is for |arg| < 2**63 only */
 944    }
 945}
 946
 947void helper_fxam_ST0(CPUX86State *env)
 948{
 949    CPU_LDoubleU temp;
 950    int expdif;
 951
 952    temp.d = ST0;
 953
 954    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 955    if (SIGND(temp)) {
 956        env->fpus |= 0x200; /* C1 <-- 1 */
 957    }
 958
 959    /* XXX: test fptags too */
 960    expdif = EXPD(temp);
 961    if (expdif == MAXEXPD) {
 962        if (MANTD(temp) == 0x8000000000000000ULL) {
 963            env->fpus |= 0x500; /* Infinity */
 964        } else {
 965            env->fpus |= 0x100; /* NaN */
 966        }
 967    } else if (expdif == 0) {
 968        if (MANTD(temp) == 0) {
 969            env->fpus |=  0x4000; /* Zero */
 970        } else {
 971            env->fpus |= 0x4400; /* Denormal */
 972        }
 973    } else {
 974        env->fpus |= 0x400;
 975    }
 976}
 977
 978static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
 979                      uintptr_t retaddr)
 980{
 981    int fpus, fptag, exp, i;
 982    uint64_t mant;
 983    CPU_LDoubleU tmp;
 984
 985    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 986    fptag = 0;
 987    for (i = 7; i >= 0; i--) {
 988        fptag <<= 2;
 989        if (env->fptags[i]) {
 990            fptag |= 3;
 991        } else {
 992            tmp.d = env->fpregs[i].d;
 993            exp = EXPD(tmp);
 994            mant = MANTD(tmp);
 995            if (exp == 0 && mant == 0) {
 996                /* zero */
 997                fptag |= 1;
 998            } else if (exp == 0 || exp == MAXEXPD
 999                       || (mant & (1LL << 63)) == 0) {
1000                /* NaNs, infinity, denormal */
1001                fptag |= 2;
1002            }
1003        }
1004    }
1005    if (data32) {
1006        /* 32 bit */
1007        cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1008        cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1009        cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1010        cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1011        cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1012        cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1013        cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1014    } else {
1015        /* 16 bit */
1016        cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1017        cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1018        cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1019        cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1020        cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1021        cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1022        cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1023    }
1024}
1025
1026void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1027{
1028    do_fstenv(env, ptr, data32, GETPC());
1029}
1030
1031static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1032                      uintptr_t retaddr)
1033{
1034    int i, fpus, fptag;
1035
1036    if (data32) {
1037        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1038        fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1039        fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1040    } else {
1041        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1042        fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1043        fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1044    }
1045    env->fpstt = (fpus >> 11) & 7;
1046    env->fpus = fpus & ~0x3800;
1047    for (i = 0; i < 8; i++) {
1048        env->fptags[i] = ((fptag & 3) == 3);
1049        fptag >>= 2;
1050    }
1051}
1052
1053void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1054{
1055    do_fldenv(env, ptr, data32, GETPC());
1056}
1057
1058void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1059{
1060    floatx80 tmp;
1061    int i;
1062
1063    do_fstenv(env, ptr, data32, GETPC());
1064
1065    ptr += (14 << data32);
1066    for (i = 0; i < 8; i++) {
1067        tmp = ST(i);
1068        helper_fstt(env, tmp, ptr, GETPC());
1069        ptr += 10;
1070    }
1071
1072    /* fninit */
1073    env->fpus = 0;
1074    env->fpstt = 0;
1075    cpu_set_fpuc(env, 0x37f);
1076    env->fptags[0] = 1;
1077    env->fptags[1] = 1;
1078    env->fptags[2] = 1;
1079    env->fptags[3] = 1;
1080    env->fptags[4] = 1;
1081    env->fptags[5] = 1;
1082    env->fptags[6] = 1;
1083    env->fptags[7] = 1;
1084}
1085
1086void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1087{
1088    floatx80 tmp;
1089    int i;
1090
1091    do_fldenv(env, ptr, data32, GETPC());
1092    ptr += (14 << data32);
1093
1094    for (i = 0; i < 8; i++) {
1095        tmp = helper_fldt(env, ptr, GETPC());
1096        ST(i) = tmp;
1097        ptr += 10;
1098    }
1099}
1100
1101#if defined(CONFIG_USER_ONLY)
1102void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1103{
1104    helper_fsave(env, ptr, data32);
1105}
1106
1107void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1108{
1109    helper_frstor(env, ptr, data32);
1110}
1111#endif
1112
1113#define XO(X)  offsetof(X86XSaveArea, X)
1114
1115static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1116{
1117    int fpus, fptag, i;
1118    target_ulong addr;
1119
1120    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1121    fptag = 0;
1122    for (i = 0; i < 8; i++) {
1123        fptag |= (env->fptags[i] << i);
1124    }
1125
1126    cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1127    cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1128    cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1129
1130    /* In 32-bit mode this is eip, sel, dp, sel.
1131       In 64-bit mode this is rip, rdp.
1132       But in either case we don't write actual data, just zeros.  */
1133    cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1134    cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1135
1136    addr = ptr + XO(legacy.fpregs);
1137    for (i = 0; i < 8; i++) {
1138        floatx80 tmp = ST(i);
1139        helper_fstt(env, tmp, addr, ra);
1140        addr += 16;
1141    }
1142}
1143
1144static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1145{
1146    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1147    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1148}
1149
1150static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1151{
1152    int i, nb_xmm_regs;
1153    target_ulong addr;
1154
1155    if (env->hflags & HF_CS64_MASK) {
1156        nb_xmm_regs = 16;
1157    } else {
1158        nb_xmm_regs = 8;
1159    }
1160
1161    addr = ptr + XO(legacy.xmm_regs);
1162    for (i = 0; i < nb_xmm_regs; i++) {
1163        cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1164        cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1165        addr += 16;
1166    }
1167}
1168
1169static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1170{
1171    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1172    int i;
1173
1174    for (i = 0; i < 4; i++, addr += 16) {
1175        cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1176        cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1177    }
1178}
1179
1180static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1181{
1182    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1183                    env->bndcs_regs.cfgu, ra);
1184    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1185                    env->bndcs_regs.sts, ra);
1186}
1187
1188static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1189{
1190    cpu_stq_data_ra(env, ptr, env->pkru, ra);
1191}
1192
1193void helper_fxsave(CPUX86State *env, target_ulong ptr)
1194{
1195    uintptr_t ra = GETPC();
1196
1197    /* The operand must be 16 byte aligned */
1198    if (ptr & 0xf) {
1199        raise_exception_ra(env, EXCP0D_GPF, ra);
1200    }
1201
1202    do_xsave_fpu(env, ptr, ra);
1203
1204    if (env->cr[4] & CR4_OSFXSR_MASK) {
1205        do_xsave_mxcsr(env, ptr, ra);
1206        /* Fast FXSAVE leaves out the XMM registers */
1207        if (!(env->efer & MSR_EFER_FFXSR)
1208            || (env->hflags & HF_CPL_MASK)
1209            || !(env->hflags & HF_LMA_MASK)) {
1210            do_xsave_sse(env, ptr, ra);
1211        }
1212    }
1213}
1214
1215static uint64_t get_xinuse(CPUX86State *env)
1216{
1217    uint64_t inuse = -1;
1218
1219    /* For the most part, we don't track XINUSE.  We could calculate it
1220       here for all components, but it's probably less work to simply
1221       indicate in use.  That said, the state of BNDREGS is important
1222       enough to track in HFLAGS, so we might as well use that here.  */
1223    if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1224       inuse &= ~XSTATE_BNDREGS_MASK;
1225    }
1226    return inuse;
1227}
1228
1229static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1230                     uint64_t inuse, uint64_t opt, uintptr_t ra)
1231{
1232    uint64_t old_bv, new_bv;
1233
1234    /* The OS must have enabled XSAVE.  */
1235    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1236        raise_exception_ra(env, EXCP06_ILLOP, ra);
1237    }
1238
1239    /* The operand must be 64 byte aligned.  */
1240    if (ptr & 63) {
1241        raise_exception_ra(env, EXCP0D_GPF, ra);
1242    }
1243
1244    /* Never save anything not enabled by XCR0.  */
1245    rfbm &= env->xcr0;
1246    opt &= rfbm;
1247
1248    if (opt & XSTATE_FP_MASK) {
1249        do_xsave_fpu(env, ptr, ra);
1250    }
1251    if (rfbm & XSTATE_SSE_MASK) {
1252        /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
1253        do_xsave_mxcsr(env, ptr, ra);
1254    }
1255    if (opt & XSTATE_SSE_MASK) {
1256        do_xsave_sse(env, ptr, ra);
1257    }
1258    if (opt & XSTATE_BNDREGS_MASK) {
1259        do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1260    }
1261    if (opt & XSTATE_BNDCSR_MASK) {
1262        do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1263    }
1264    if (opt & XSTATE_PKRU_MASK) {
1265        do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1266    }
1267
1268    /* Update the XSTATE_BV field.  */
1269    old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1270    new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1271    cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1272}
1273
1274void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1275{
1276    do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1277}
1278
1279void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1280{
1281    uint64_t inuse = get_xinuse(env);
1282    do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1283}
1284
1285static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1286{
1287    int i, fpuc, fpus, fptag;
1288    target_ulong addr;
1289
1290    fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1291    fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1292    fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1293    cpu_set_fpuc(env, fpuc);
1294    env->fpstt = (fpus >> 11) & 7;
1295    env->fpus = fpus & ~0x3800;
1296    fptag ^= 0xff;
1297    for (i = 0; i < 8; i++) {
1298        env->fptags[i] = ((fptag >> i) & 1);
1299    }
1300
1301    addr = ptr + XO(legacy.fpregs);
1302    for (i = 0; i < 8; i++) {
1303        floatx80 tmp = helper_fldt(env, addr, ra);
1304        ST(i) = tmp;
1305        addr += 16;
1306    }
1307}
1308
1309static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1310{
1311    cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1312}
1313
1314static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1315{
1316    int i, nb_xmm_regs;
1317    target_ulong addr;
1318
1319    if (env->hflags & HF_CS64_MASK) {
1320        nb_xmm_regs = 16;
1321    } else {
1322        nb_xmm_regs = 8;
1323    }
1324
1325    addr = ptr + XO(legacy.xmm_regs);
1326    for (i = 0; i < nb_xmm_regs; i++) {
1327        env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1328        env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1329        addr += 16;
1330    }
1331}
1332
1333static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1334{
1335    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1336    int i;
1337
1338    for (i = 0; i < 4; i++, addr += 16) {
1339        env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1340        env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1341    }
1342}
1343
1344static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1345{
1346    /* FIXME: Extend highest implemented bit of linear address.  */
1347    env->bndcs_regs.cfgu
1348        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1349    env->bndcs_regs.sts
1350        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1351}
1352
1353static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1354{
1355    env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1356}
1357
1358void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1359{
1360    uintptr_t ra = GETPC();
1361
1362    /* The operand must be 16 byte aligned */
1363    if (ptr & 0xf) {
1364        raise_exception_ra(env, EXCP0D_GPF, ra);
1365    }
1366
1367    do_xrstor_fpu(env, ptr, ra);
1368
1369    if (env->cr[4] & CR4_OSFXSR_MASK) {
1370        do_xrstor_mxcsr(env, ptr, ra);
1371        /* Fast FXRSTOR leaves out the XMM registers */
1372        if (!(env->efer & MSR_EFER_FFXSR)
1373            || (env->hflags & HF_CPL_MASK)
1374            || !(env->hflags & HF_LMA_MASK)) {
1375            do_xrstor_sse(env, ptr, ra);
1376        }
1377    }
1378}
1379
1380#if defined(CONFIG_USER_ONLY)
1381void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1382{
1383    helper_fxsave(env, ptr);
1384}
1385
1386void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1387{
1388    helper_fxrstor(env, ptr);
1389}
1390#endif
1391
1392void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1393{
1394    uintptr_t ra = GETPC();
1395    uint64_t xstate_bv, xcomp_bv, reserve0;
1396
1397    rfbm &= env->xcr0;
1398
1399    /* The OS must have enabled XSAVE.  */
1400    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1401        raise_exception_ra(env, EXCP06_ILLOP, ra);
1402    }
1403
1404    /* The operand must be 64 byte aligned.  */
1405    if (ptr & 63) {
1406        raise_exception_ra(env, EXCP0D_GPF, ra);
1407    }
1408
1409    xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1410
1411    if ((int64_t)xstate_bv < 0) {
1412        /* FIXME: Compact form.  */
1413        raise_exception_ra(env, EXCP0D_GPF, ra);
1414    }
1415
1416    /* Standard form.  */
1417
1418    /* The XSTATE_BV field must not set bits not present in XCR0.  */
1419    if (xstate_bv & ~env->xcr0) {
1420        raise_exception_ra(env, EXCP0D_GPF, ra);
1421    }
1422
1423    /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
1424       revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1425       describes only XCOMP_BV, but the description of the standard form
1426       of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1427       includes the next 64-bit field.  */
1428    xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1429    reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1430    if (xcomp_bv || reserve0) {
1431        raise_exception_ra(env, EXCP0D_GPF, ra);
1432    }
1433
1434    if (rfbm & XSTATE_FP_MASK) {
1435        if (xstate_bv & XSTATE_FP_MASK) {
1436            do_xrstor_fpu(env, ptr, ra);
1437        } else {
1438            helper_fninit(env);
1439            memset(env->fpregs, 0, sizeof(env->fpregs));
1440        }
1441    }
1442    if (rfbm & XSTATE_SSE_MASK) {
1443        /* Note that the standard form of XRSTOR loads MXCSR from memory
1444           whether or not the XSTATE_BV bit is set.  */
1445        do_xrstor_mxcsr(env, ptr, ra);
1446        if (xstate_bv & XSTATE_SSE_MASK) {
1447            do_xrstor_sse(env, ptr, ra);
1448        } else {
1449            /* ??? When AVX is implemented, we may have to be more
1450               selective in the clearing.  */
1451            memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1452        }
1453    }
1454    if (rfbm & XSTATE_BNDREGS_MASK) {
1455        if (xstate_bv & XSTATE_BNDREGS_MASK) {
1456            do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1457            env->hflags |= HF_MPX_IU_MASK;
1458        } else {
1459            memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1460            env->hflags &= ~HF_MPX_IU_MASK;
1461        }
1462    }
1463    if (rfbm & XSTATE_BNDCSR_MASK) {
1464        if (xstate_bv & XSTATE_BNDCSR_MASK) {
1465            do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1466        } else {
1467            memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1468        }
1469        cpu_sync_bndcs_hflags(env);
1470    }
1471    if (rfbm & XSTATE_PKRU_MASK) {
1472        uint64_t old_pkru = env->pkru;
1473        if (xstate_bv & XSTATE_PKRU_MASK) {
1474            do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1475        } else {
1476            env->pkru = 0;
1477        }
1478        if (env->pkru != old_pkru) {
1479            CPUState *cs = CPU(x86_env_get_cpu(env));
1480            tlb_flush(cs);
1481        }
1482    }
1483}
1484
1485#undef XO
1486
1487uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1488{
1489    /* The OS must have enabled XSAVE.  */
1490    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1491        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1492    }
1493
1494    switch (ecx) {
1495    case 0:
1496        return env->xcr0;
1497    case 1:
1498        if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1499            return env->xcr0 & get_xinuse(env);
1500        }
1501        break;
1502    }
1503    raise_exception_ra(env, EXCP0D_GPF, GETPC());
1504}
1505
1506void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1507{
1508    uint32_t dummy, ena_lo, ena_hi;
1509    uint64_t ena;
1510
1511    /* The OS must have enabled XSAVE.  */
1512    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1513        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1514    }
1515
1516    /* Only XCR0 is defined at present; the FPU may not be disabled.  */
1517    if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1518        goto do_gpf;
1519    }
1520
1521    /* Disallow enabling unimplemented features.  */
1522    cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1523    ena = ((uint64_t)ena_hi << 32) | ena_lo;
1524    if (mask & ~ena) {
1525        goto do_gpf;
1526    }
1527
1528    /* Disallow enabling only half of MPX.  */
1529    if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1530        & XSTATE_BNDCSR_MASK) {
1531        goto do_gpf;
1532    }
1533
1534    env->xcr0 = mask;
1535    cpu_sync_bndcs_hflags(env);
1536    return;
1537
1538 do_gpf:
1539    raise_exception_ra(env, EXCP0D_GPF, GETPC());
1540}
1541
1542/* MMX/SSE */
1543/* XXX: optimize by storing fptt and fptags in the static cpu state */
1544
1545#define SSE_DAZ             0x0040
1546#define SSE_RC_MASK         0x6000
1547#define SSE_RC_NEAR         0x0000
1548#define SSE_RC_DOWN         0x2000
1549#define SSE_RC_UP           0x4000
1550#define SSE_RC_CHOP         0x6000
1551#define SSE_FZ              0x8000
1552
1553void update_mxcsr_status(CPUX86State *env)
1554{
1555    uint32_t mxcsr = env->mxcsr;
1556    int rnd_type;
1557
1558    /* set rounding mode */
1559    switch (mxcsr & SSE_RC_MASK) {
1560    default:
1561    case SSE_RC_NEAR:
1562        rnd_type = float_round_nearest_even;
1563        break;
1564    case SSE_RC_DOWN:
1565        rnd_type = float_round_down;
1566        break;
1567    case SSE_RC_UP:
1568        rnd_type = float_round_up;
1569        break;
1570    case SSE_RC_CHOP:
1571        rnd_type = float_round_to_zero;
1572        break;
1573    }
1574    set_float_rounding_mode(rnd_type, &env->sse_status);
1575
1576    /* set denormals are zero */
1577    set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1578
1579    /* set flush to zero */
1580    set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1581}
1582
1583void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1584{
1585    cpu_set_mxcsr(env, val);
1586}
1587
1588void helper_enter_mmx(CPUX86State *env)
1589{
1590    env->fpstt = 0;
1591    *(uint32_t *)(env->fptags) = 0;
1592    *(uint32_t *)(env->fptags + 4) = 0;
1593}
1594
1595void helper_emms(CPUX86State *env)
1596{
1597    /* set to empty state */
1598    *(uint32_t *)(env->fptags) = 0x01010101;
1599    *(uint32_t *)(env->fptags + 4) = 0x01010101;
1600}
1601
1602/* XXX: suppress */
1603void helper_movq(CPUX86State *env, void *d, void *s)
1604{
1605    *(uint64_t *)d = *(uint64_t *)s;
1606}
1607
1608#define SHIFT 0
1609#include "ops_sse.h"
1610
1611#define SHIFT 1
1612#include "ops_sse.h"
1613