qemu/target-i386/fpu_helper.c
<<
>>
Prefs
   1/*
   2 *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include <math.h>
  22#include "cpu.h"
  23#include "exec/helper-proto.h"
  24#include "qemu/host-utils.h"
  25#include "exec/cpu_ldst.h"
  26
  27#define FPU_RC_MASK         0xc00
  28#define FPU_RC_NEAR         0x000
  29#define FPU_RC_DOWN         0x400
  30#define FPU_RC_UP           0x800
  31#define FPU_RC_CHOP         0xc00
  32
  33#define MAXTAN 9223372036854775808.0
  34
  35/* the following deal with x86 long double-precision numbers */
  36#define MAXEXPD 0x7fff
  37#define EXPBIAS 16383
  38#define EXPD(fp)        (fp.l.upper & 0x7fff)
  39#define SIGND(fp)       ((fp.l.upper) & 0x8000)
  40#define MANTD(fp)       (fp.l.lower)
  41#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  42
  43#define FPUS_IE (1 << 0)
  44#define FPUS_DE (1 << 1)
  45#define FPUS_ZE (1 << 2)
  46#define FPUS_OE (1 << 3)
  47#define FPUS_UE (1 << 4)
  48#define FPUS_PE (1 << 5)
  49#define FPUS_SF (1 << 6)
  50#define FPUS_SE (1 << 7)
  51#define FPUS_B  (1 << 15)
  52
  53#define FPUC_EM 0x3f
  54
  55#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  56#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  57#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  58
  59static inline void fpush(CPUX86State *env)
  60{
  61    env->fpstt = (env->fpstt - 1) & 7;
  62    env->fptags[env->fpstt] = 0; /* validate stack entry */
  63}
  64
  65static inline void fpop(CPUX86State *env)
  66{
  67    env->fptags[env->fpstt] = 1; /* invalidate stack entry */
  68    env->fpstt = (env->fpstt + 1) & 7;
  69}
  70
  71static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
  72                                   uintptr_t retaddr)
  73{
  74    CPU_LDoubleU temp;
  75
  76    temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
  77    temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
  78    return temp.d;
  79}
  80
  81static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
  82                               uintptr_t retaddr)
  83{
  84    CPU_LDoubleU temp;
  85
  86    temp.d = f;
  87    cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
  88    cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
  89}
  90
  91/* x87 FPU helpers */
  92
  93static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
  94{
  95    union {
  96        float64 f64;
  97        double d;
  98    } u;
  99
 100    u.f64 = floatx80_to_float64(a, &env->fp_status);
 101    return u.d;
 102}
 103
 104static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 105{
 106    union {
 107        float64 f64;
 108        double d;
 109    } u;
 110
 111    u.d = a;
 112    return float64_to_floatx80(u.f64, &env->fp_status);
 113}
 114
 115static void fpu_set_exception(CPUX86State *env, int mask)
 116{
 117    env->fpus |= mask;
 118    if (env->fpus & (~env->fpuc & FPUC_EM)) {
 119        env->fpus |= FPUS_SE | FPUS_B;
 120    }
 121}
 122
 123static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 124{
 125    if (floatx80_is_zero(b)) {
 126        fpu_set_exception(env, FPUS_ZE);
 127    }
 128    return floatx80_div(a, b, &env->fp_status);
 129}
 130
 131static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 132{
 133    if (env->cr[0] & CR0_NE_MASK) {
 134        raise_exception_ra(env, EXCP10_COPR, retaddr);
 135    }
 136#if !defined(CONFIG_USER_ONLY)
 137    else {
 138        cpu_set_ferr(env);
 139    }
 140#endif
 141}
 142
 143void helper_flds_FT0(CPUX86State *env, uint32_t val)
 144{
 145    union {
 146        float32 f;
 147        uint32_t i;
 148    } u;
 149
 150    u.i = val;
 151    FT0 = float32_to_floatx80(u.f, &env->fp_status);
 152}
 153
 154void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 155{
 156    union {
 157        float64 f;
 158        uint64_t i;
 159    } u;
 160
 161    u.i = val;
 162    FT0 = float64_to_floatx80(u.f, &env->fp_status);
 163}
 164
 165void helper_fildl_FT0(CPUX86State *env, int32_t val)
 166{
 167    FT0 = int32_to_floatx80(val, &env->fp_status);
 168}
 169
 170void helper_flds_ST0(CPUX86State *env, uint32_t val)
 171{
 172    int new_fpstt;
 173    union {
 174        float32 f;
 175        uint32_t i;
 176    } u;
 177
 178    new_fpstt = (env->fpstt - 1) & 7;
 179    u.i = val;
 180    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 181    env->fpstt = new_fpstt;
 182    env->fptags[new_fpstt] = 0; /* validate stack entry */
 183}
 184
 185void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 186{
 187    int new_fpstt;
 188    union {
 189        float64 f;
 190        uint64_t i;
 191    } u;
 192
 193    new_fpstt = (env->fpstt - 1) & 7;
 194    u.i = val;
 195    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 196    env->fpstt = new_fpstt;
 197    env->fptags[new_fpstt] = 0; /* validate stack entry */
 198}
 199
 200void helper_fildl_ST0(CPUX86State *env, int32_t val)
 201{
 202    int new_fpstt;
 203
 204    new_fpstt = (env->fpstt - 1) & 7;
 205    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 206    env->fpstt = new_fpstt;
 207    env->fptags[new_fpstt] = 0; /* validate stack entry */
 208}
 209
 210void helper_fildll_ST0(CPUX86State *env, int64_t val)
 211{
 212    int new_fpstt;
 213
 214    new_fpstt = (env->fpstt - 1) & 7;
 215    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 216    env->fpstt = new_fpstt;
 217    env->fptags[new_fpstt] = 0; /* validate stack entry */
 218}
 219
 220uint32_t helper_fsts_ST0(CPUX86State *env)
 221{
 222    union {
 223        float32 f;
 224        uint32_t i;
 225    } u;
 226
 227    u.f = floatx80_to_float32(ST0, &env->fp_status);
 228    return u.i;
 229}
 230
 231uint64_t helper_fstl_ST0(CPUX86State *env)
 232{
 233    union {
 234        float64 f;
 235        uint64_t i;
 236    } u;
 237
 238    u.f = floatx80_to_float64(ST0, &env->fp_status);
 239    return u.i;
 240}
 241
 242int32_t helper_fist_ST0(CPUX86State *env)
 243{
 244    int32_t val;
 245
 246    val = floatx80_to_int32(ST0, &env->fp_status);
 247    if (val != (int16_t)val) {
 248        val = -32768;
 249    }
 250    return val;
 251}
 252
 253int32_t helper_fistl_ST0(CPUX86State *env)
 254{
 255    int32_t val;
 256    signed char old_exp_flags;
 257
 258    old_exp_flags = get_float_exception_flags(&env->fp_status);
 259    set_float_exception_flags(0, &env->fp_status);
 260
 261    val = floatx80_to_int32(ST0, &env->fp_status);
 262    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 263        val = 0x80000000;
 264    }
 265    set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 266                                | old_exp_flags, &env->fp_status);
 267    return val;
 268}
 269
 270int64_t helper_fistll_ST0(CPUX86State *env)
 271{
 272    int64_t val;
 273    signed char old_exp_flags;
 274
 275    old_exp_flags = get_float_exception_flags(&env->fp_status);
 276    set_float_exception_flags(0, &env->fp_status);
 277
 278    val = floatx80_to_int64(ST0, &env->fp_status);
 279    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 280        val = 0x8000000000000000ULL;
 281    }
 282    set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 283                                | old_exp_flags, &env->fp_status);
 284    return val;
 285}
 286
 287int32_t helper_fistt_ST0(CPUX86State *env)
 288{
 289    int32_t val;
 290
 291    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 292    if (val != (int16_t)val) {
 293        val = -32768;
 294    }
 295    return val;
 296}
 297
 298int32_t helper_fisttl_ST0(CPUX86State *env)
 299{
 300    int32_t val;
 301
 302    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 303    return val;
 304}
 305
 306int64_t helper_fisttll_ST0(CPUX86State *env)
 307{
 308    int64_t val;
 309
 310    val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 311    return val;
 312}
 313
 314void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 315{
 316    int new_fpstt;
 317
 318    new_fpstt = (env->fpstt - 1) & 7;
 319    env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 320    env->fpstt = new_fpstt;
 321    env->fptags[new_fpstt] = 0; /* validate stack entry */
 322}
 323
 324void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 325{
 326    helper_fstt(env, ST0, ptr, GETPC());
 327}
 328
 329void helper_fpush(CPUX86State *env)
 330{
 331    fpush(env);
 332}
 333
 334void helper_fpop(CPUX86State *env)
 335{
 336    fpop(env);
 337}
 338
 339void helper_fdecstp(CPUX86State *env)
 340{
 341    env->fpstt = (env->fpstt - 1) & 7;
 342    env->fpus &= ~0x4700;
 343}
 344
 345void helper_fincstp(CPUX86State *env)
 346{
 347    env->fpstt = (env->fpstt + 1) & 7;
 348    env->fpus &= ~0x4700;
 349}
 350
 351/* FPU move */
 352
 353void helper_ffree_STN(CPUX86State *env, int st_index)
 354{
 355    env->fptags[(env->fpstt + st_index) & 7] = 1;
 356}
 357
 358void helper_fmov_ST0_FT0(CPUX86State *env)
 359{
 360    ST0 = FT0;
 361}
 362
 363void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 364{
 365    FT0 = ST(st_index);
 366}
 367
 368void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 369{
 370    ST0 = ST(st_index);
 371}
 372
 373void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 374{
 375    ST(st_index) = ST0;
 376}
 377
 378void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 379{
 380    floatx80 tmp;
 381
 382    tmp = ST(st_index);
 383    ST(st_index) = ST0;
 384    ST0 = tmp;
 385}
 386
 387/* FPU operations */
 388
 389static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 390
 391void helper_fcom_ST0_FT0(CPUX86State *env)
 392{
 393    int ret;
 394
 395    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 396    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 397}
 398
 399void helper_fucom_ST0_FT0(CPUX86State *env)
 400{
 401    int ret;
 402
 403    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 404    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 405}
 406
 407static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 408
 409void helper_fcomi_ST0_FT0(CPUX86State *env)
 410{
 411    int eflags;
 412    int ret;
 413
 414    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 415    eflags = cpu_cc_compute_all(env, CC_OP);
 416    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 417    CC_SRC = eflags;
 418}
 419
 420void helper_fucomi_ST0_FT0(CPUX86State *env)
 421{
 422    int eflags;
 423    int ret;
 424
 425    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 426    eflags = cpu_cc_compute_all(env, CC_OP);
 427    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 428    CC_SRC = eflags;
 429}
 430
 431void helper_fadd_ST0_FT0(CPUX86State *env)
 432{
 433    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 434}
 435
 436void helper_fmul_ST0_FT0(CPUX86State *env)
 437{
 438    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 439}
 440
 441void helper_fsub_ST0_FT0(CPUX86State *env)
 442{
 443    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 444}
 445
 446void helper_fsubr_ST0_FT0(CPUX86State *env)
 447{
 448    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 449}
 450
 451void helper_fdiv_ST0_FT0(CPUX86State *env)
 452{
 453    ST0 = helper_fdiv(env, ST0, FT0);
 454}
 455
 456void helper_fdivr_ST0_FT0(CPUX86State *env)
 457{
 458    ST0 = helper_fdiv(env, FT0, ST0);
 459}
 460
 461/* fp operations between STN and ST0 */
 462
 463void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 464{
 465    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 466}
 467
 468void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 469{
 470    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 471}
 472
 473void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 474{
 475    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 476}
 477
 478void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 479{
 480    ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 481}
 482
 483void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 484{
 485    floatx80 *p;
 486
 487    p = &ST(st_index);
 488    *p = helper_fdiv(env, *p, ST0);
 489}
 490
 491void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 492{
 493    floatx80 *p;
 494
 495    p = &ST(st_index);
 496    *p = helper_fdiv(env, ST0, *p);
 497}
 498
 499/* misc FPU operations */
 500void helper_fchs_ST0(CPUX86State *env)
 501{
 502    ST0 = floatx80_chs(ST0);
 503}
 504
 505void helper_fabs_ST0(CPUX86State *env)
 506{
 507    ST0 = floatx80_abs(ST0);
 508}
 509
 510void helper_fld1_ST0(CPUX86State *env)
 511{
 512    ST0 = floatx80_one;
 513}
 514
 515void helper_fldl2t_ST0(CPUX86State *env)
 516{
 517    ST0 = floatx80_l2t;
 518}
 519
 520void helper_fldl2e_ST0(CPUX86State *env)
 521{
 522    ST0 = floatx80_l2e;
 523}
 524
 525void helper_fldpi_ST0(CPUX86State *env)
 526{
 527    ST0 = floatx80_pi;
 528}
 529
 530void helper_fldlg2_ST0(CPUX86State *env)
 531{
 532    ST0 = floatx80_lg2;
 533}
 534
 535void helper_fldln2_ST0(CPUX86State *env)
 536{
 537    ST0 = floatx80_ln2;
 538}
 539
 540void helper_fldz_ST0(CPUX86State *env)
 541{
 542    ST0 = floatx80_zero;
 543}
 544
 545void helper_fldz_FT0(CPUX86State *env)
 546{
 547    FT0 = floatx80_zero;
 548}
 549
 550uint32_t helper_fnstsw(CPUX86State *env)
 551{
 552    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 553}
 554
 555uint32_t helper_fnstcw(CPUX86State *env)
 556{
 557    return env->fpuc;
 558}
 559
 560void update_fp_status(CPUX86State *env)
 561{
 562    int rnd_type;
 563
 564    /* set rounding mode */
 565    switch (env->fpuc & FPU_RC_MASK) {
 566    default:
 567    case FPU_RC_NEAR:
 568        rnd_type = float_round_nearest_even;
 569        break;
 570    case FPU_RC_DOWN:
 571        rnd_type = float_round_down;
 572        break;
 573    case FPU_RC_UP:
 574        rnd_type = float_round_up;
 575        break;
 576    case FPU_RC_CHOP:
 577        rnd_type = float_round_to_zero;
 578        break;
 579    }
 580    set_float_rounding_mode(rnd_type, &env->fp_status);
 581    switch ((env->fpuc >> 8) & 3) {
 582    case 0:
 583        rnd_type = 32;
 584        break;
 585    case 2:
 586        rnd_type = 64;
 587        break;
 588    case 3:
 589    default:
 590        rnd_type = 80;
 591        break;
 592    }
 593    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 594}
 595
 596void helper_fldcw(CPUX86State *env, uint32_t val)
 597{
 598    cpu_set_fpuc(env, val);
 599}
 600
 601void helper_fclex(CPUX86State *env)
 602{
 603    env->fpus &= 0x7f00;
 604}
 605
 606void helper_fwait(CPUX86State *env)
 607{
 608    if (env->fpus & FPUS_SE) {
 609        fpu_raise_exception(env, GETPC());
 610    }
 611}
 612
 613void helper_fninit(CPUX86State *env)
 614{
 615    env->fpus = 0;
 616    env->fpstt = 0;
 617    cpu_set_fpuc(env, 0x37f);
 618    env->fptags[0] = 1;
 619    env->fptags[1] = 1;
 620    env->fptags[2] = 1;
 621    env->fptags[3] = 1;
 622    env->fptags[4] = 1;
 623    env->fptags[5] = 1;
 624    env->fptags[6] = 1;
 625    env->fptags[7] = 1;
 626}
 627
 628/* BCD ops */
 629
 630void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 631{
 632    floatx80 tmp;
 633    uint64_t val;
 634    unsigned int v;
 635    int i;
 636
 637    val = 0;
 638    for (i = 8; i >= 0; i--) {
 639        v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 640        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 641    }
 642    tmp = int64_to_floatx80(val, &env->fp_status);
 643    if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 644        tmp = floatx80_chs(tmp);
 645    }
 646    fpush(env);
 647    ST0 = tmp;
 648}
 649
 650void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 651{
 652    int v;
 653    target_ulong mem_ref, mem_end;
 654    int64_t val;
 655
 656    val = floatx80_to_int64(ST0, &env->fp_status);
 657    mem_ref = ptr;
 658    mem_end = mem_ref + 9;
 659    if (val < 0) {
 660        cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 661        val = -val;
 662    } else {
 663        cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 664    }
 665    while (mem_ref < mem_end) {
 666        if (val == 0) {
 667            break;
 668        }
 669        v = val % 100;
 670        val = val / 100;
 671        v = ((v / 10) << 4) | (v % 10);
 672        cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 673    }
 674    while (mem_ref < mem_end) {
 675        cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 676    }
 677}
 678
 679void helper_f2xm1(CPUX86State *env)
 680{
 681    double val = floatx80_to_double(env, ST0);
 682
 683    val = pow(2.0, val) - 1.0;
 684    ST0 = double_to_floatx80(env, val);
 685}
 686
 687void helper_fyl2x(CPUX86State *env)
 688{
 689    double fptemp = floatx80_to_double(env, ST0);
 690
 691    if (fptemp > 0.0) {
 692        fptemp = log(fptemp) / log(2.0); /* log2(ST) */
 693        fptemp *= floatx80_to_double(env, ST1);
 694        ST1 = double_to_floatx80(env, fptemp);
 695        fpop(env);
 696    } else {
 697        env->fpus &= ~0x4700;
 698        env->fpus |= 0x400;
 699    }
 700}
 701
 702void helper_fptan(CPUX86State *env)
 703{
 704    double fptemp = floatx80_to_double(env, ST0);
 705
 706    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 707        env->fpus |= 0x400;
 708    } else {
 709        fptemp = tan(fptemp);
 710        ST0 = double_to_floatx80(env, fptemp);
 711        fpush(env);
 712        ST0 = floatx80_one;
 713        env->fpus &= ~0x400; /* C2 <-- 0 */
 714        /* the above code is for |arg| < 2**52 only */
 715    }
 716}
 717
 718void helper_fpatan(CPUX86State *env)
 719{
 720    double fptemp, fpsrcop;
 721
 722    fpsrcop = floatx80_to_double(env, ST1);
 723    fptemp = floatx80_to_double(env, ST0);
 724    ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
 725    fpop(env);
 726}
 727
 728void helper_fxtract(CPUX86State *env)
 729{
 730    CPU_LDoubleU temp;
 731
 732    temp.d = ST0;
 733
 734    if (floatx80_is_zero(ST0)) {
 735        /* Easy way to generate -inf and raising division by 0 exception */
 736        ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
 737                           &env->fp_status);
 738        fpush(env);
 739        ST0 = temp.d;
 740    } else {
 741        int expdif;
 742
 743        expdif = EXPD(temp) - EXPBIAS;
 744        /* DP exponent bias */
 745        ST0 = int32_to_floatx80(expdif, &env->fp_status);
 746        fpush(env);
 747        BIASEXPONENT(temp);
 748        ST0 = temp.d;
 749    }
 750}
 751
 752void helper_fprem1(CPUX86State *env)
 753{
 754    double st0, st1, dblq, fpsrcop, fptemp;
 755    CPU_LDoubleU fpsrcop1, fptemp1;
 756    int expdif;
 757    signed long long int q;
 758
 759    st0 = floatx80_to_double(env, ST0);
 760    st1 = floatx80_to_double(env, ST1);
 761
 762    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 763        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 764        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 765        return;
 766    }
 767
 768    fpsrcop = st0;
 769    fptemp = st1;
 770    fpsrcop1.d = ST0;
 771    fptemp1.d = ST1;
 772    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 773
 774    if (expdif < 0) {
 775        /* optimisation? taken from the AMD docs */
 776        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 777        /* ST0 is unchanged */
 778        return;
 779    }
 780
 781    if (expdif < 53) {
 782        dblq = fpsrcop / fptemp;
 783        /* round dblq towards nearest integer */
 784        dblq = rint(dblq);
 785        st0 = fpsrcop - fptemp * dblq;
 786
 787        /* convert dblq to q by truncating towards zero */
 788        if (dblq < 0.0) {
 789            q = (signed long long int)(-dblq);
 790        } else {
 791            q = (signed long long int)dblq;
 792        }
 793
 794        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 795        /* (C0,C3,C1) <-- (q2,q1,q0) */
 796        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 797        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 798        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 799    } else {
 800        env->fpus |= 0x400;  /* C2 <-- 1 */
 801        fptemp = pow(2.0, expdif - 50);
 802        fpsrcop = (st0 / st1) / fptemp;
 803        /* fpsrcop = integer obtained by chopping */
 804        fpsrcop = (fpsrcop < 0.0) ?
 805                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 806        st0 -= (st1 * fpsrcop * fptemp);
 807    }
 808    ST0 = double_to_floatx80(env, st0);
 809}
 810
 811void helper_fprem(CPUX86State *env)
 812{
 813    double st0, st1, dblq, fpsrcop, fptemp;
 814    CPU_LDoubleU fpsrcop1, fptemp1;
 815    int expdif;
 816    signed long long int q;
 817
 818    st0 = floatx80_to_double(env, ST0);
 819    st1 = floatx80_to_double(env, ST1);
 820
 821    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 822        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 823        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 824        return;
 825    }
 826
 827    fpsrcop = st0;
 828    fptemp = st1;
 829    fpsrcop1.d = ST0;
 830    fptemp1.d = ST1;
 831    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 832
 833    if (expdif < 0) {
 834        /* optimisation? taken from the AMD docs */
 835        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 836        /* ST0 is unchanged */
 837        return;
 838    }
 839
 840    if (expdif < 53) {
 841        dblq = fpsrcop / fptemp; /* ST0 / ST1 */
 842        /* round dblq towards zero */
 843        dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
 844        st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
 845
 846        /* convert dblq to q by truncating towards zero */
 847        if (dblq < 0.0) {
 848            q = (signed long long int)(-dblq);
 849        } else {
 850            q = (signed long long int)dblq;
 851        }
 852
 853        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 854        /* (C0,C3,C1) <-- (q2,q1,q0) */
 855        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 856        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 857        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 858    } else {
 859        int N = 32 + (expdif % 32); /* as per AMD docs */
 860
 861        env->fpus |= 0x400;  /* C2 <-- 1 */
 862        fptemp = pow(2.0, (double)(expdif - N));
 863        fpsrcop = (st0 / st1) / fptemp;
 864        /* fpsrcop = integer obtained by chopping */
 865        fpsrcop = (fpsrcop < 0.0) ?
 866                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 867        st0 -= (st1 * fpsrcop * fptemp);
 868    }
 869    ST0 = double_to_floatx80(env, st0);
 870}
 871
 872void helper_fyl2xp1(CPUX86State *env)
 873{
 874    double fptemp = floatx80_to_double(env, ST0);
 875
 876    if ((fptemp + 1.0) > 0.0) {
 877        fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
 878        fptemp *= floatx80_to_double(env, ST1);
 879        ST1 = double_to_floatx80(env, fptemp);
 880        fpop(env);
 881    } else {
 882        env->fpus &= ~0x4700;
 883        env->fpus |= 0x400;
 884    }
 885}
 886
 887void helper_fsqrt(CPUX86State *env)
 888{
 889    if (floatx80_is_neg(ST0)) {
 890        env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
 891        env->fpus |= 0x400;
 892    }
 893    ST0 = floatx80_sqrt(ST0, &env->fp_status);
 894}
 895
 896void helper_fsincos(CPUX86State *env)
 897{
 898    double fptemp = floatx80_to_double(env, ST0);
 899
 900    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 901        env->fpus |= 0x400;
 902    } else {
 903        ST0 = double_to_floatx80(env, sin(fptemp));
 904        fpush(env);
 905        ST0 = double_to_floatx80(env, cos(fptemp));
 906        env->fpus &= ~0x400;  /* C2 <-- 0 */
 907        /* the above code is for |arg| < 2**63 only */
 908    }
 909}
 910
 911void helper_frndint(CPUX86State *env)
 912{
 913    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
 914}
 915
 916void helper_fscale(CPUX86State *env)
 917{
 918    if (floatx80_is_any_nan(ST1)) {
 919        ST0 = ST1;
 920    } else {
 921        int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
 922        ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
 923    }
 924}
 925
 926void helper_fsin(CPUX86State *env)
 927{
 928    double fptemp = floatx80_to_double(env, ST0);
 929
 930    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 931        env->fpus |= 0x400;
 932    } else {
 933        ST0 = double_to_floatx80(env, sin(fptemp));
 934        env->fpus &= ~0x400;  /* C2 <-- 0 */
 935        /* the above code is for |arg| < 2**53 only */
 936    }
 937}
 938
 939void helper_fcos(CPUX86State *env)
 940{
 941    double fptemp = floatx80_to_double(env, ST0);
 942
 943    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 944        env->fpus |= 0x400;
 945    } else {
 946        ST0 = double_to_floatx80(env, cos(fptemp));
 947        env->fpus &= ~0x400;  /* C2 <-- 0 */
 948        /* the above code is for |arg| < 2**63 only */
 949    }
 950}
 951
 952void helper_fxam_ST0(CPUX86State *env)
 953{
 954    CPU_LDoubleU temp;
 955    int expdif;
 956
 957    temp.d = ST0;
 958
 959    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 960    if (SIGND(temp)) {
 961        env->fpus |= 0x200; /* C1 <-- 1 */
 962    }
 963
 964    /* XXX: test fptags too */
 965    expdif = EXPD(temp);
 966    if (expdif == MAXEXPD) {
 967        if (MANTD(temp) == 0x8000000000000000ULL) {
 968            env->fpus |= 0x500; /* Infinity */
 969        } else {
 970            env->fpus |= 0x100; /* NaN */
 971        }
 972    } else if (expdif == 0) {
 973        if (MANTD(temp) == 0) {
 974            env->fpus |=  0x4000; /* Zero */
 975        } else {
 976            env->fpus |= 0x4400; /* Denormal */
 977        }
 978    } else {
 979        env->fpus |= 0x400;
 980    }
 981}
 982
 983static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
 984                      uintptr_t retaddr)
 985{
 986    int fpus, fptag, exp, i;
 987    uint64_t mant;
 988    CPU_LDoubleU tmp;
 989
 990    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 991    fptag = 0;
 992    for (i = 7; i >= 0; i--) {
 993        fptag <<= 2;
 994        if (env->fptags[i]) {
 995            fptag |= 3;
 996        } else {
 997            tmp.d = env->fpregs[i].d;
 998            exp = EXPD(tmp);
 999            mant = MANTD(tmp);
1000            if (exp == 0 && mant == 0) {
1001                /* zero */
1002                fptag |= 1;
1003            } else if (exp == 0 || exp == MAXEXPD
1004                       || (mant & (1LL << 63)) == 0) {
1005                /* NaNs, infinity, denormal */
1006                fptag |= 2;
1007            }
1008        }
1009    }
1010    if (data32) {
1011        /* 32 bit */
1012        cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1013        cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1014        cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1015        cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1016        cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1017        cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1018        cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1019    } else {
1020        /* 16 bit */
1021        cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1022        cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1023        cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1024        cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1025        cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1026        cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1027        cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1028    }
1029}
1030
1031void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1032{
1033    do_fstenv(env, ptr, data32, GETPC());
1034}
1035
1036static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1037                      uintptr_t retaddr)
1038{
1039    int i, fpus, fptag;
1040
1041    if (data32) {
1042        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1043        fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1044        fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1045    } else {
1046        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1047        fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1048        fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1049    }
1050    env->fpstt = (fpus >> 11) & 7;
1051    env->fpus = fpus & ~0x3800;
1052    for (i = 0; i < 8; i++) {
1053        env->fptags[i] = ((fptag & 3) == 3);
1054        fptag >>= 2;
1055    }
1056}
1057
1058void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1059{
1060    do_fldenv(env, ptr, data32, GETPC());
1061}
1062
1063void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1064{
1065    floatx80 tmp;
1066    int i;
1067
1068    do_fstenv(env, ptr, data32, GETPC());
1069
1070    ptr += (14 << data32);
1071    for (i = 0; i < 8; i++) {
1072        tmp = ST(i);
1073        helper_fstt(env, tmp, ptr, GETPC());
1074        ptr += 10;
1075    }
1076
1077    /* fninit */
1078    env->fpus = 0;
1079    env->fpstt = 0;
1080    cpu_set_fpuc(env, 0x37f);
1081    env->fptags[0] = 1;
1082    env->fptags[1] = 1;
1083    env->fptags[2] = 1;
1084    env->fptags[3] = 1;
1085    env->fptags[4] = 1;
1086    env->fptags[5] = 1;
1087    env->fptags[6] = 1;
1088    env->fptags[7] = 1;
1089}
1090
1091void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1092{
1093    floatx80 tmp;
1094    int i;
1095
1096    do_fldenv(env, ptr, data32, GETPC());
1097    ptr += (14 << data32);
1098
1099    for (i = 0; i < 8; i++) {
1100        tmp = helper_fldt(env, ptr, GETPC());
1101        ST(i) = tmp;
1102        ptr += 10;
1103    }
1104}
1105
1106#if defined(CONFIG_USER_ONLY)
1107void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1108{
1109    helper_fsave(env, ptr, data32);
1110}
1111
1112void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1113{
1114    helper_frstor(env, ptr, data32);
1115}
1116#endif
1117
1118static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1119{
1120    int fpus, fptag, i;
1121    target_ulong addr;
1122
1123    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1124    fptag = 0;
1125    for (i = 0; i < 8; i++) {
1126        fptag |= (env->fptags[i] << i);
1127    }
1128    cpu_stw_data_ra(env, ptr, env->fpuc, ra);
1129    cpu_stw_data_ra(env, ptr + 2, fpus, ra);
1130    cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, ra);
1131
1132    /* In 32-bit mode this is eip, sel, dp, sel.
1133       In 64-bit mode this is rip, rdp.
1134       But in either case we don't write actual data, just zeros.  */
1135    cpu_stq_data_ra(env, ptr + 0x08, 0, ra); /* eip+sel; rip */
1136    cpu_stq_data_ra(env, ptr + 0x10, 0, ra); /* edp+sel; rdp */
1137
1138    addr = ptr + 0x20;
1139    for (i = 0; i < 8; i++) {
1140        floatx80 tmp = ST(i);
1141        helper_fstt(env, tmp, addr, ra);
1142        addr += 16;
1143    }
1144}
1145
1146static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1147{
1148    cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, ra); /* mxcsr */
1149    cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, ra); /* mxcsr_mask */
1150}
1151
1152static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1153{
1154    int i, nb_xmm_regs;
1155    target_ulong addr;
1156
1157    if (env->hflags & HF_CS64_MASK) {
1158        nb_xmm_regs = 16;
1159    } else {
1160        nb_xmm_regs = 8;
1161    }
1162
1163    addr = ptr + 0xa0;
1164    for (i = 0; i < nb_xmm_regs; i++) {
1165        cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1166        cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1167        addr += 16;
1168    }
1169}
1170
1171static void do_xsave_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1172{
1173    int i;
1174
1175    for (i = 0; i < 4; i++, addr += 16) {
1176        cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1177        cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1178    }
1179}
1180
1181static void do_xsave_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1182{
1183    cpu_stq_data_ra(env, addr, env->bndcs_regs.cfgu, ra);
1184    cpu_stq_data_ra(env, addr + 8, env->bndcs_regs.sts, ra);
1185}
1186
1187static void do_xsave_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1188{
1189    cpu_stq_data_ra(env, addr, env->pkru, ra);
1190}
1191
1192void helper_fxsave(CPUX86State *env, target_ulong ptr)
1193{
1194    uintptr_t ra = GETPC();
1195
1196    /* The operand must be 16 byte aligned */
1197    if (ptr & 0xf) {
1198        raise_exception_ra(env, EXCP0D_GPF, ra);
1199    }
1200
1201    do_xsave_fpu(env, ptr, ra);
1202
1203    if (env->cr[4] & CR4_OSFXSR_MASK) {
1204        do_xsave_mxcsr(env, ptr, ra);
1205        /* Fast FXSAVE leaves out the XMM registers */
1206        if (!(env->efer & MSR_EFER_FFXSR)
1207            || (env->hflags & HF_CPL_MASK)
1208            || !(env->hflags & HF_LMA_MASK)) {
1209            do_xsave_sse(env, ptr, ra);
1210        }
1211    }
1212}
1213
1214static uint64_t get_xinuse(CPUX86State *env)
1215{
1216    uint64_t inuse = -1;
1217
1218    /* For the most part, we don't track XINUSE.  We could calculate it
1219       here for all components, but it's probably less work to simply
1220       indicate in use.  That said, the state of BNDREGS is important
1221       enough to track in HFLAGS, so we might as well use that here.  */
1222    if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1223       inuse &= ~XSTATE_BNDREGS_MASK;
1224    }
1225    return inuse;
1226}
1227
1228static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1229                     uint64_t inuse, uint64_t opt, uintptr_t ra)
1230{
1231    uint64_t old_bv, new_bv;
1232
1233    /* The OS must have enabled XSAVE.  */
1234    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1235        raise_exception_ra(env, EXCP06_ILLOP, ra);
1236    }
1237
1238    /* The operand must be 64 byte aligned.  */
1239    if (ptr & 63) {
1240        raise_exception_ra(env, EXCP0D_GPF, ra);
1241    }
1242
1243    /* Never save anything not enabled by XCR0.  */
1244    rfbm &= env->xcr0;
1245    opt &= rfbm;
1246
1247    if (opt & XSTATE_FP_MASK) {
1248        do_xsave_fpu(env, ptr, ra);
1249    }
1250    if (rfbm & XSTATE_SSE_MASK) {
1251        /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
1252        do_xsave_mxcsr(env, ptr, ra);
1253    }
1254    if (opt & XSTATE_SSE_MASK) {
1255        do_xsave_sse(env, ptr, ra);
1256    }
1257    if (opt & XSTATE_BNDREGS_MASK) {
1258        target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1259        do_xsave_bndregs(env, ptr + off, ra);
1260    }
1261    if (opt & XSTATE_BNDCSR_MASK) {
1262        target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1263        do_xsave_bndcsr(env, ptr + off, ra);
1264    }
1265    if (opt & XSTATE_PKRU_MASK) {
1266        target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1267        do_xsave_pkru(env, ptr + off, ra);
1268    }
1269
1270    /* Update the XSTATE_BV field.  */
1271    old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1272    new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1273    cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
1274}
1275
1276void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1277{
1278    do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1279}
1280
1281void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1282{
1283    uint64_t inuse = get_xinuse(env);
1284    do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1285}
1286
1287static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1288{
1289    int i, fpus, fptag;
1290    target_ulong addr;
1291
1292    cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, ra));
1293    fpus = cpu_lduw_data_ra(env, ptr + 2, ra);
1294    fptag = cpu_lduw_data_ra(env, ptr + 4, ra);
1295    env->fpstt = (fpus >> 11) & 7;
1296    env->fpus = fpus & ~0x3800;
1297    fptag ^= 0xff;
1298    for (i = 0; i < 8; i++) {
1299        env->fptags[i] = ((fptag >> i) & 1);
1300    }
1301
1302    addr = ptr + 0x20;
1303    for (i = 0; i < 8; i++) {
1304        floatx80 tmp = helper_fldt(env, addr, ra);
1305        ST(i) = tmp;
1306        addr += 16;
1307    }
1308}
1309
1310static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1311{
1312    cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, ra));
1313}
1314
1315static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1316{
1317    int i, nb_xmm_regs;
1318    target_ulong addr;
1319
1320    if (env->hflags & HF_CS64_MASK) {
1321        nb_xmm_regs = 16;
1322    } else {
1323        nb_xmm_regs = 8;
1324    }
1325
1326    addr = ptr + 0xa0;
1327    for (i = 0; i < nb_xmm_regs; i++) {
1328        env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1329        env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1330        addr += 16;
1331    }
1332}
1333
1334static void do_xrstor_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
1335{
1336    int i;
1337
1338    for (i = 0; i < 4; i++, addr += 16) {
1339        env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1340        env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1341    }
1342}
1343
1344static void do_xrstor_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
1345{
1346    /* FIXME: Extend highest implemented bit of linear address.  */
1347    env->bndcs_regs.cfgu = cpu_ldq_data_ra(env, addr, ra);
1348    env->bndcs_regs.sts = cpu_ldq_data_ra(env, addr + 8, ra);
1349}
1350
1351static void do_xrstor_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
1352{
1353    env->pkru = cpu_ldq_data_ra(env, addr, ra);
1354}
1355
1356void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1357{
1358    uintptr_t ra = GETPC();
1359
1360    /* The operand must be 16 byte aligned */
1361    if (ptr & 0xf) {
1362        raise_exception_ra(env, EXCP0D_GPF, ra);
1363    }
1364
1365    do_xrstor_fpu(env, ptr, ra);
1366
1367    if (env->cr[4] & CR4_OSFXSR_MASK) {
1368        do_xrstor_mxcsr(env, ptr, ra);
1369        /* Fast FXRSTOR leaves out the XMM registers */
1370        if (!(env->efer & MSR_EFER_FFXSR)
1371            || (env->hflags & HF_CPL_MASK)
1372            || !(env->hflags & HF_LMA_MASK)) {
1373            do_xrstor_sse(env, ptr, ra);
1374        }
1375    }
1376}
1377
1378void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1379{
1380    uintptr_t ra = GETPC();
1381    uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
1382
1383    rfbm &= env->xcr0;
1384
1385    /* The OS must have enabled XSAVE.  */
1386    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1387        raise_exception_ra(env, EXCP06_ILLOP, ra);
1388    }
1389
1390    /* The operand must be 64 byte aligned.  */
1391    if (ptr & 63) {
1392        raise_exception_ra(env, EXCP0D_GPF, ra);
1393    }
1394
1395    xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
1396
1397    if ((int64_t)xstate_bv < 0) {
1398        /* FIXME: Compact form.  */
1399        raise_exception_ra(env, EXCP0D_GPF, ra);
1400    }
1401
1402    /* Standard form.  */
1403
1404    /* The XSTATE field must not set bits not present in XCR0.  */
1405    if (xstate_bv & ~env->xcr0) {
1406        raise_exception_ra(env, EXCP0D_GPF, ra);
1407    }
1408
1409    /* The XCOMP field must be zero.  */
1410    xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
1411    xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
1412    if (xcomp_bv0 || xcomp_bv1) {
1413        raise_exception_ra(env, EXCP0D_GPF, ra);
1414    }
1415
1416    if (rfbm & XSTATE_FP_MASK) {
1417        if (xstate_bv & XSTATE_FP_MASK) {
1418            do_xrstor_fpu(env, ptr, ra);
1419        } else {
1420            helper_fninit(env);
1421            memset(env->fpregs, 0, sizeof(env->fpregs));
1422        }
1423    }
1424    if (rfbm & XSTATE_SSE_MASK) {
1425        /* Note that the standard form of XRSTOR loads MXCSR from memory
1426           whether or not the XSTATE_BV bit is set.  */
1427        do_xrstor_mxcsr(env, ptr, ra);
1428        if (xstate_bv & XSTATE_SSE_MASK) {
1429            do_xrstor_sse(env, ptr, ra);
1430        } else {
1431            /* ??? When AVX is implemented, we may have to be more
1432               selective in the clearing.  */
1433            memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1434        }
1435    }
1436    if (rfbm & XSTATE_BNDREGS_MASK) {
1437        if (xstate_bv & XSTATE_BNDREGS_MASK) {
1438            target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
1439            do_xrstor_bndregs(env, ptr + off, ra);
1440            env->hflags |= HF_MPX_IU_MASK;
1441        } else {
1442            memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1443            env->hflags &= ~HF_MPX_IU_MASK;
1444        }
1445    }
1446    if (rfbm & XSTATE_BNDCSR_MASK) {
1447        if (xstate_bv & XSTATE_BNDCSR_MASK) {
1448            target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
1449            do_xrstor_bndcsr(env, ptr + off, ra);
1450        } else {
1451            memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1452        }
1453        cpu_sync_bndcs_hflags(env);
1454    }
1455    if (rfbm & XSTATE_PKRU_MASK) {
1456        uint64_t old_pkru = env->pkru;
1457        if (xstate_bv & XSTATE_PKRU_MASK) {
1458            target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
1459            do_xrstor_pkru(env, ptr + off, ra);
1460        } else {
1461            env->pkru = 0;
1462        }
1463        if (env->pkru != old_pkru) {
1464            CPUState *cs = CPU(x86_env_get_cpu(env));
1465            tlb_flush(cs, 1);
1466        }
1467    }
1468}
1469
1470uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1471{
1472    /* The OS must have enabled XSAVE.  */
1473    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1474        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1475    }
1476
1477    switch (ecx) {
1478    case 0:
1479        return env->xcr0;
1480    case 1:
1481        if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1482            return env->xcr0 & get_xinuse(env);
1483        }
1484        break;
1485    }
1486    raise_exception_ra(env, EXCP0D_GPF, GETPC());
1487}
1488
1489void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1490{
1491    uint32_t dummy, ena_lo, ena_hi;
1492    uint64_t ena;
1493
1494    /* The OS must have enabled XSAVE.  */
1495    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1496        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1497    }
1498
1499    /* Only XCR0 is defined at present; the FPU may not be disabled.  */
1500    if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1501        goto do_gpf;
1502    }
1503
1504    /* Disallow enabling unimplemented features.  */
1505    cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1506    ena = ((uint64_t)ena_hi << 32) | ena_lo;
1507    if (mask & ~ena) {
1508        goto do_gpf;
1509    }
1510
1511    /* Disallow enabling only half of MPX.  */
1512    if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1513        & XSTATE_BNDCSR_MASK) {
1514        goto do_gpf;
1515    }
1516
1517    env->xcr0 = mask;
1518    cpu_sync_bndcs_hflags(env);
1519    return;
1520
1521 do_gpf:
1522    raise_exception_ra(env, EXCP0D_GPF, GETPC());
1523}
1524
1525void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
1526{
1527    CPU_LDoubleU temp;
1528
1529    temp.d = f;
1530    *pmant = temp.l.lower;
1531    *pexp = temp.l.upper;
1532}
1533
1534floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
1535{
1536    CPU_LDoubleU temp;
1537
1538    temp.l.upper = upper;
1539    temp.l.lower = mant;
1540    return temp.d;
1541}
1542
1543/* MMX/SSE */
1544/* XXX: optimize by storing fptt and fptags in the static cpu state */
1545
1546#define SSE_DAZ             0x0040
1547#define SSE_RC_MASK         0x6000
1548#define SSE_RC_NEAR         0x0000
1549#define SSE_RC_DOWN         0x2000
1550#define SSE_RC_UP           0x4000
1551#define SSE_RC_CHOP         0x6000
1552#define SSE_FZ              0x8000
1553
1554void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr)
1555{
1556    int rnd_type;
1557
1558    env->mxcsr = mxcsr;
1559
1560    /* set rounding mode */
1561    switch (mxcsr & SSE_RC_MASK) {
1562    default:
1563    case SSE_RC_NEAR:
1564        rnd_type = float_round_nearest_even;
1565        break;
1566    case SSE_RC_DOWN:
1567        rnd_type = float_round_down;
1568        break;
1569    case SSE_RC_UP:
1570        rnd_type = float_round_up;
1571        break;
1572    case SSE_RC_CHOP:
1573        rnd_type = float_round_to_zero;
1574        break;
1575    }
1576    set_float_rounding_mode(rnd_type, &env->sse_status);
1577
1578    /* set denormals are zero */
1579    set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1580
1581    /* set flush to zero */
1582    set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1583}
1584
1585void cpu_set_fpuc(CPUX86State *env, uint16_t val)
1586{
1587    env->fpuc = val;
1588    update_fp_status(env);
1589}
1590
1591void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1592{
1593    cpu_set_mxcsr(env, val);
1594}
1595
1596void helper_enter_mmx(CPUX86State *env)
1597{
1598    env->fpstt = 0;
1599    *(uint32_t *)(env->fptags) = 0;
1600    *(uint32_t *)(env->fptags + 4) = 0;
1601}
1602
1603void helper_emms(CPUX86State *env)
1604{
1605    /* set to empty state */
1606    *(uint32_t *)(env->fptags) = 0x01010101;
1607    *(uint32_t *)(env->fptags + 4) = 0x01010101;
1608}
1609
1610/* XXX: suppress */
1611void helper_movq(CPUX86State *env, void *d, void *s)
1612{
1613    *(uint64_t *)d = *(uint64_t *)s;
1614}
1615
1616#define SHIFT 0
1617#include "ops_sse.h"
1618
1619#define SHIFT 1
1620#include "ops_sse.h"
1621