qemu/target/i386/fpu_helper.c
<<
>>
Prefs
   1/*
   2 *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include <math.h>
  22#include "cpu.h"
  23#include "exec/helper-proto.h"
  24#include "qemu/host-utils.h"
  25#include "exec/exec-all.h"
  26#include "exec/cpu_ldst.h"
  27#include "fpu/softfloat.h"
  28
  29#define FPU_RC_MASK         0xc00
  30#define FPU_RC_NEAR         0x000
  31#define FPU_RC_DOWN         0x400
  32#define FPU_RC_UP           0x800
  33#define FPU_RC_CHOP         0xc00
  34
  35#define MAXTAN 9223372036854775808.0
  36
  37/* the following deal with x86 long double-precision numbers */
  38#define MAXEXPD 0x7fff
  39#define EXPBIAS 16383
  40#define EXPD(fp)        (fp.l.upper & 0x7fff)
  41#define SIGND(fp)       ((fp.l.upper) & 0x8000)
  42#define MANTD(fp)       (fp.l.lower)
  43#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
  44
  45#define FPUS_IE (1 << 0)
  46#define FPUS_DE (1 << 1)
  47#define FPUS_ZE (1 << 2)
  48#define FPUS_OE (1 << 3)
  49#define FPUS_UE (1 << 4)
  50#define FPUS_PE (1 << 5)
  51#define FPUS_SF (1 << 6)
  52#define FPUS_SE (1 << 7)
  53#define FPUS_B  (1 << 15)
  54
  55#define FPUC_EM 0x3f
  56
  57#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
  58#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
  59#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
  60
  61static inline void fpush(CPUX86State *env)
  62{
  63    env->fpstt = (env->fpstt - 1) & 7;
  64    env->fptags[env->fpstt] = 0; /* validate stack entry */
  65}
  66
  67static inline void fpop(CPUX86State *env)
  68{
  69    env->fptags[env->fpstt] = 1; /* invalidate stack entry */
  70    env->fpstt = (env->fpstt + 1) & 7;
  71}
  72
  73static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
  74                                   uintptr_t retaddr)
  75{
  76    CPU_LDoubleU temp;
  77
  78    temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
  79    temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
  80    return temp.d;
  81}
  82
  83static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
  84                               uintptr_t retaddr)
  85{
  86    CPU_LDoubleU temp;
  87
  88    temp.d = f;
  89    cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
  90    cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
  91}
  92
  93/* x87 FPU helpers */
  94
  95static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
  96{
  97    union {
  98        float64 f64;
  99        double d;
 100    } u;
 101
 102    u.f64 = floatx80_to_float64(a, &env->fp_status);
 103    return u.d;
 104}
 105
 106static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
 107{
 108    union {
 109        float64 f64;
 110        double d;
 111    } u;
 112
 113    u.d = a;
 114    return float64_to_floatx80(u.f64, &env->fp_status);
 115}
 116
 117static void fpu_set_exception(CPUX86State *env, int mask)
 118{
 119    env->fpus |= mask;
 120    if (env->fpus & (~env->fpuc & FPUC_EM)) {
 121        env->fpus |= FPUS_SE | FPUS_B;
 122    }
 123}
 124
 125static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
 126{
 127    if (floatx80_is_zero(b)) {
 128        fpu_set_exception(env, FPUS_ZE);
 129    }
 130    return floatx80_div(a, b, &env->fp_status);
 131}
 132
 133static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
 134{
 135    if (env->cr[0] & CR0_NE_MASK) {
 136        raise_exception_ra(env, EXCP10_COPR, retaddr);
 137    }
 138#if !defined(CONFIG_USER_ONLY)
 139    else {
 140        cpu_set_ferr(env);
 141    }
 142#endif
 143}
 144
 145void helper_flds_FT0(CPUX86State *env, uint32_t val)
 146{
 147    union {
 148        float32 f;
 149        uint32_t i;
 150    } u;
 151
 152    u.i = val;
 153    FT0 = float32_to_floatx80(u.f, &env->fp_status);
 154}
 155
 156void helper_fldl_FT0(CPUX86State *env, uint64_t val)
 157{
 158    union {
 159        float64 f;
 160        uint64_t i;
 161    } u;
 162
 163    u.i = val;
 164    FT0 = float64_to_floatx80(u.f, &env->fp_status);
 165}
 166
 167void helper_fildl_FT0(CPUX86State *env, int32_t val)
 168{
 169    FT0 = int32_to_floatx80(val, &env->fp_status);
 170}
 171
 172void helper_flds_ST0(CPUX86State *env, uint32_t val)
 173{
 174    int new_fpstt;
 175    union {
 176        float32 f;
 177        uint32_t i;
 178    } u;
 179
 180    new_fpstt = (env->fpstt - 1) & 7;
 181    u.i = val;
 182    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
 183    env->fpstt = new_fpstt;
 184    env->fptags[new_fpstt] = 0; /* validate stack entry */
 185}
 186
 187void helper_fldl_ST0(CPUX86State *env, uint64_t val)
 188{
 189    int new_fpstt;
 190    union {
 191        float64 f;
 192        uint64_t i;
 193    } u;
 194
 195    new_fpstt = (env->fpstt - 1) & 7;
 196    u.i = val;
 197    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
 198    env->fpstt = new_fpstt;
 199    env->fptags[new_fpstt] = 0; /* validate stack entry */
 200}
 201
 202void helper_fildl_ST0(CPUX86State *env, int32_t val)
 203{
 204    int new_fpstt;
 205
 206    new_fpstt = (env->fpstt - 1) & 7;
 207    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
 208    env->fpstt = new_fpstt;
 209    env->fptags[new_fpstt] = 0; /* validate stack entry */
 210}
 211
 212void helper_fildll_ST0(CPUX86State *env, int64_t val)
 213{
 214    int new_fpstt;
 215
 216    new_fpstt = (env->fpstt - 1) & 7;
 217    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
 218    env->fpstt = new_fpstt;
 219    env->fptags[new_fpstt] = 0; /* validate stack entry */
 220}
 221
 222uint32_t helper_fsts_ST0(CPUX86State *env)
 223{
 224    union {
 225        float32 f;
 226        uint32_t i;
 227    } u;
 228
 229    u.f = floatx80_to_float32(ST0, &env->fp_status);
 230    return u.i;
 231}
 232
 233uint64_t helper_fstl_ST0(CPUX86State *env)
 234{
 235    union {
 236        float64 f;
 237        uint64_t i;
 238    } u;
 239
 240    u.f = floatx80_to_float64(ST0, &env->fp_status);
 241    return u.i;
 242}
 243
 244int32_t helper_fist_ST0(CPUX86State *env)
 245{
 246    int32_t val;
 247
 248    val = floatx80_to_int32(ST0, &env->fp_status);
 249    if (val != (int16_t)val) {
 250        val = -32768;
 251    }
 252    return val;
 253}
 254
 255int32_t helper_fistl_ST0(CPUX86State *env)
 256{
 257    int32_t val;
 258    signed char old_exp_flags;
 259
 260    old_exp_flags = get_float_exception_flags(&env->fp_status);
 261    set_float_exception_flags(0, &env->fp_status);
 262
 263    val = floatx80_to_int32(ST0, &env->fp_status);
 264    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 265        val = 0x80000000;
 266    }
 267    set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 268                                | old_exp_flags, &env->fp_status);
 269    return val;
 270}
 271
 272int64_t helper_fistll_ST0(CPUX86State *env)
 273{
 274    int64_t val;
 275    signed char old_exp_flags;
 276
 277    old_exp_flags = get_float_exception_flags(&env->fp_status);
 278    set_float_exception_flags(0, &env->fp_status);
 279
 280    val = floatx80_to_int64(ST0, &env->fp_status);
 281    if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
 282        val = 0x8000000000000000ULL;
 283    }
 284    set_float_exception_flags(get_float_exception_flags(&env->fp_status)
 285                                | old_exp_flags, &env->fp_status);
 286    return val;
 287}
 288
 289int32_t helper_fistt_ST0(CPUX86State *env)
 290{
 291    int32_t val;
 292
 293    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 294    if (val != (int16_t)val) {
 295        val = -32768;
 296    }
 297    return val;
 298}
 299
 300int32_t helper_fisttl_ST0(CPUX86State *env)
 301{
 302    return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
 303}
 304
 305int64_t helper_fisttll_ST0(CPUX86State *env)
 306{
 307    return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
 308}
 309
 310void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
 311{
 312    int new_fpstt;
 313
 314    new_fpstt = (env->fpstt - 1) & 7;
 315    env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
 316    env->fpstt = new_fpstt;
 317    env->fptags[new_fpstt] = 0; /* validate stack entry */
 318}
 319
 320void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
 321{
 322    helper_fstt(env, ST0, ptr, GETPC());
 323}
 324
 325void helper_fpush(CPUX86State *env)
 326{
 327    fpush(env);
 328}
 329
 330void helper_fpop(CPUX86State *env)
 331{
 332    fpop(env);
 333}
 334
 335void helper_fdecstp(CPUX86State *env)
 336{
 337    env->fpstt = (env->fpstt - 1) & 7;
 338    env->fpus &= ~0x4700;
 339}
 340
 341void helper_fincstp(CPUX86State *env)
 342{
 343    env->fpstt = (env->fpstt + 1) & 7;
 344    env->fpus &= ~0x4700;
 345}
 346
 347/* FPU move */
 348
 349void helper_ffree_STN(CPUX86State *env, int st_index)
 350{
 351    env->fptags[(env->fpstt + st_index) & 7] = 1;
 352}
 353
 354void helper_fmov_ST0_FT0(CPUX86State *env)
 355{
 356    ST0 = FT0;
 357}
 358
 359void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
 360{
 361    FT0 = ST(st_index);
 362}
 363
 364void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
 365{
 366    ST0 = ST(st_index);
 367}
 368
 369void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
 370{
 371    ST(st_index) = ST0;
 372}
 373
 374void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
 375{
 376    floatx80 tmp;
 377
 378    tmp = ST(st_index);
 379    ST(st_index) = ST0;
 380    ST0 = tmp;
 381}
 382
 383/* FPU operations */
 384
 385static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
 386
 387void helper_fcom_ST0_FT0(CPUX86State *env)
 388{
 389    int ret;
 390
 391    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 392    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 393}
 394
 395void helper_fucom_ST0_FT0(CPUX86State *env)
 396{
 397    int ret;
 398
 399    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 400    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
 401}
 402
 403static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
 404
 405void helper_fcomi_ST0_FT0(CPUX86State *env)
 406{
 407    int eflags;
 408    int ret;
 409
 410    ret = floatx80_compare(ST0, FT0, &env->fp_status);
 411    eflags = cpu_cc_compute_all(env, CC_OP);
 412    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 413    CC_SRC = eflags;
 414}
 415
 416void helper_fucomi_ST0_FT0(CPUX86State *env)
 417{
 418    int eflags;
 419    int ret;
 420
 421    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
 422    eflags = cpu_cc_compute_all(env, CC_OP);
 423    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
 424    CC_SRC = eflags;
 425}
 426
 427void helper_fadd_ST0_FT0(CPUX86State *env)
 428{
 429    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
 430}
 431
 432void helper_fmul_ST0_FT0(CPUX86State *env)
 433{
 434    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
 435}
 436
 437void helper_fsub_ST0_FT0(CPUX86State *env)
 438{
 439    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
 440}
 441
 442void helper_fsubr_ST0_FT0(CPUX86State *env)
 443{
 444    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
 445}
 446
 447void helper_fdiv_ST0_FT0(CPUX86State *env)
 448{
 449    ST0 = helper_fdiv(env, ST0, FT0);
 450}
 451
 452void helper_fdivr_ST0_FT0(CPUX86State *env)
 453{
 454    ST0 = helper_fdiv(env, FT0, ST0);
 455}
 456
 457/* fp operations between STN and ST0 */
 458
 459void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
 460{
 461    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
 462}
 463
 464void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
 465{
 466    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
 467}
 468
 469void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
 470{
 471    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
 472}
 473
 474void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
 475{
 476    ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
 477}
 478
 479void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
 480{
 481    floatx80 *p;
 482
 483    p = &ST(st_index);
 484    *p = helper_fdiv(env, *p, ST0);
 485}
 486
 487void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
 488{
 489    floatx80 *p;
 490
 491    p = &ST(st_index);
 492    *p = helper_fdiv(env, ST0, *p);
 493}
 494
 495/* misc FPU operations */
 496void helper_fchs_ST0(CPUX86State *env)
 497{
 498    ST0 = floatx80_chs(ST0);
 499}
 500
 501void helper_fabs_ST0(CPUX86State *env)
 502{
 503    ST0 = floatx80_abs(ST0);
 504}
 505
 506void helper_fld1_ST0(CPUX86State *env)
 507{
 508    ST0 = floatx80_one;
 509}
 510
 511void helper_fldl2t_ST0(CPUX86State *env)
 512{
 513    ST0 = floatx80_l2t;
 514}
 515
 516void helper_fldl2e_ST0(CPUX86State *env)
 517{
 518    ST0 = floatx80_l2e;
 519}
 520
 521void helper_fldpi_ST0(CPUX86State *env)
 522{
 523    ST0 = floatx80_pi;
 524}
 525
 526void helper_fldlg2_ST0(CPUX86State *env)
 527{
 528    ST0 = floatx80_lg2;
 529}
 530
 531void helper_fldln2_ST0(CPUX86State *env)
 532{
 533    ST0 = floatx80_ln2;
 534}
 535
 536void helper_fldz_ST0(CPUX86State *env)
 537{
 538    ST0 = floatx80_zero;
 539}
 540
 541void helper_fldz_FT0(CPUX86State *env)
 542{
 543    FT0 = floatx80_zero;
 544}
 545
 546uint32_t helper_fnstsw(CPUX86State *env)
 547{
 548    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 549}
 550
 551uint32_t helper_fnstcw(CPUX86State *env)
 552{
 553    return env->fpuc;
 554}
 555
 556void update_fp_status(CPUX86State *env)
 557{
 558    int rnd_type;
 559
 560    /* set rounding mode */
 561    switch (env->fpuc & FPU_RC_MASK) {
 562    default:
 563    case FPU_RC_NEAR:
 564        rnd_type = float_round_nearest_even;
 565        break;
 566    case FPU_RC_DOWN:
 567        rnd_type = float_round_down;
 568        break;
 569    case FPU_RC_UP:
 570        rnd_type = float_round_up;
 571        break;
 572    case FPU_RC_CHOP:
 573        rnd_type = float_round_to_zero;
 574        break;
 575    }
 576    set_float_rounding_mode(rnd_type, &env->fp_status);
 577    switch ((env->fpuc >> 8) & 3) {
 578    case 0:
 579        rnd_type = 32;
 580        break;
 581    case 2:
 582        rnd_type = 64;
 583        break;
 584    case 3:
 585    default:
 586        rnd_type = 80;
 587        break;
 588    }
 589    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
 590}
 591
 592void helper_fldcw(CPUX86State *env, uint32_t val)
 593{
 594    cpu_set_fpuc(env, val);
 595}
 596
 597void helper_fclex(CPUX86State *env)
 598{
 599    env->fpus &= 0x7f00;
 600}
 601
 602void helper_fwait(CPUX86State *env)
 603{
 604    if (env->fpus & FPUS_SE) {
 605        fpu_raise_exception(env, GETPC());
 606    }
 607}
 608
 609void helper_fninit(CPUX86State *env)
 610{
 611    env->fpus = 0;
 612    env->fpstt = 0;
 613    cpu_set_fpuc(env, 0x37f);
 614    env->fptags[0] = 1;
 615    env->fptags[1] = 1;
 616    env->fptags[2] = 1;
 617    env->fptags[3] = 1;
 618    env->fptags[4] = 1;
 619    env->fptags[5] = 1;
 620    env->fptags[6] = 1;
 621    env->fptags[7] = 1;
 622}
 623
 624/* BCD ops */
 625
 626void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
 627{
 628    floatx80 tmp;
 629    uint64_t val;
 630    unsigned int v;
 631    int i;
 632
 633    val = 0;
 634    for (i = 8; i >= 0; i--) {
 635        v = cpu_ldub_data_ra(env, ptr + i, GETPC());
 636        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
 637    }
 638    tmp = int64_to_floatx80(val, &env->fp_status);
 639    if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
 640        tmp = floatx80_chs(tmp);
 641    }
 642    fpush(env);
 643    ST0 = tmp;
 644}
 645
 646void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
 647{
 648    int v;
 649    target_ulong mem_ref, mem_end;
 650    int64_t val;
 651
 652    val = floatx80_to_int64(ST0, &env->fp_status);
 653    mem_ref = ptr;
 654    mem_end = mem_ref + 9;
 655    if (val < 0) {
 656        cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
 657        val = -val;
 658    } else {
 659        cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
 660    }
 661    while (mem_ref < mem_end) {
 662        if (val == 0) {
 663            break;
 664        }
 665        v = val % 100;
 666        val = val / 100;
 667        v = ((v / 10) << 4) | (v % 10);
 668        cpu_stb_data_ra(env, mem_ref++, v, GETPC());
 669    }
 670    while (mem_ref < mem_end) {
 671        cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
 672    }
 673}
 674
 675void helper_f2xm1(CPUX86State *env)
 676{
 677    double val = floatx80_to_double(env, ST0);
 678
 679    val = pow(2.0, val) - 1.0;
 680    ST0 = double_to_floatx80(env, val);
 681}
 682
 683void helper_fyl2x(CPUX86State *env)
 684{
 685    double fptemp = floatx80_to_double(env, ST0);
 686
 687    if (fptemp > 0.0) {
 688        fptemp = log(fptemp) / log(2.0); /* log2(ST) */
 689        fptemp *= floatx80_to_double(env, ST1);
 690        ST1 = double_to_floatx80(env, fptemp);
 691        fpop(env);
 692    } else {
 693        env->fpus &= ~0x4700;
 694        env->fpus |= 0x400;
 695    }
 696}
 697
 698void helper_fptan(CPUX86State *env)
 699{
 700    double fptemp = floatx80_to_double(env, ST0);
 701
 702    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 703        env->fpus |= 0x400;
 704    } else {
 705        fptemp = tan(fptemp);
 706        ST0 = double_to_floatx80(env, fptemp);
 707        fpush(env);
 708        ST0 = floatx80_one;
 709        env->fpus &= ~0x400; /* C2 <-- 0 */
 710        /* the above code is for |arg| < 2**52 only */
 711    }
 712}
 713
 714void helper_fpatan(CPUX86State *env)
 715{
 716    double fptemp, fpsrcop;
 717
 718    fpsrcop = floatx80_to_double(env, ST1);
 719    fptemp = floatx80_to_double(env, ST0);
 720    ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
 721    fpop(env);
 722}
 723
 724void helper_fxtract(CPUX86State *env)
 725{
 726    CPU_LDoubleU temp;
 727
 728    temp.d = ST0;
 729
 730    if (floatx80_is_zero(ST0)) {
 731        /* Easy way to generate -inf and raising division by 0 exception */
 732        ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
 733                           &env->fp_status);
 734        fpush(env);
 735        ST0 = temp.d;
 736    } else {
 737        int expdif;
 738
 739        expdif = EXPD(temp) - EXPBIAS;
 740        /* DP exponent bias */
 741        ST0 = int32_to_floatx80(expdif, &env->fp_status);
 742        fpush(env);
 743        BIASEXPONENT(temp);
 744        ST0 = temp.d;
 745    }
 746}
 747
 748void helper_fprem1(CPUX86State *env)
 749{
 750    double st0, st1, dblq, fpsrcop, fptemp;
 751    CPU_LDoubleU fpsrcop1, fptemp1;
 752    int expdif;
 753    signed long long int q;
 754
 755    st0 = floatx80_to_double(env, ST0);
 756    st1 = floatx80_to_double(env, ST1);
 757
 758    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 759        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 760        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 761        return;
 762    }
 763
 764    fpsrcop = st0;
 765    fptemp = st1;
 766    fpsrcop1.d = ST0;
 767    fptemp1.d = ST1;
 768    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 769
 770    if (expdif < 0) {
 771        /* optimisation? taken from the AMD docs */
 772        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 773        /* ST0 is unchanged */
 774        return;
 775    }
 776
 777    if (expdif < 53) {
 778        dblq = fpsrcop / fptemp;
 779        /* round dblq towards nearest integer */
 780        dblq = rint(dblq);
 781        st0 = fpsrcop - fptemp * dblq;
 782
 783        /* convert dblq to q by truncating towards zero */
 784        if (dblq < 0.0) {
 785            q = (signed long long int)(-dblq);
 786        } else {
 787            q = (signed long long int)dblq;
 788        }
 789
 790        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 791        /* (C0,C3,C1) <-- (q2,q1,q0) */
 792        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 793        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 794        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 795    } else {
 796        env->fpus |= 0x400;  /* C2 <-- 1 */
 797        fptemp = pow(2.0, expdif - 50);
 798        fpsrcop = (st0 / st1) / fptemp;
 799        /* fpsrcop = integer obtained by chopping */
 800        fpsrcop = (fpsrcop < 0.0) ?
 801                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 802        st0 -= (st1 * fpsrcop * fptemp);
 803    }
 804    ST0 = double_to_floatx80(env, st0);
 805}
 806
 807void helper_fprem(CPUX86State *env)
 808{
 809    double st0, st1, dblq, fpsrcop, fptemp;
 810    CPU_LDoubleU fpsrcop1, fptemp1;
 811    int expdif;
 812    signed long long int q;
 813
 814    st0 = floatx80_to_double(env, ST0);
 815    st1 = floatx80_to_double(env, ST1);
 816
 817    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
 818        ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
 819        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 820        return;
 821    }
 822
 823    fpsrcop = st0;
 824    fptemp = st1;
 825    fpsrcop1.d = ST0;
 826    fptemp1.d = ST1;
 827    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
 828
 829    if (expdif < 0) {
 830        /* optimisation? taken from the AMD docs */
 831        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 832        /* ST0 is unchanged */
 833        return;
 834    }
 835
 836    if (expdif < 53) {
 837        dblq = fpsrcop / fptemp; /* ST0 / ST1 */
 838        /* round dblq towards zero */
 839        dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
 840        st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
 841
 842        /* convert dblq to q by truncating towards zero */
 843        if (dblq < 0.0) {
 844            q = (signed long long int)(-dblq);
 845        } else {
 846            q = (signed long long int)dblq;
 847        }
 848
 849        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 850        /* (C0,C3,C1) <-- (q2,q1,q0) */
 851        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
 852        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
 853        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
 854    } else {
 855        int N = 32 + (expdif % 32); /* as per AMD docs */
 856
 857        env->fpus |= 0x400;  /* C2 <-- 1 */
 858        fptemp = pow(2.0, (double)(expdif - N));
 859        fpsrcop = (st0 / st1) / fptemp;
 860        /* fpsrcop = integer obtained by chopping */
 861        fpsrcop = (fpsrcop < 0.0) ?
 862                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
 863        st0 -= (st1 * fpsrcop * fptemp);
 864    }
 865    ST0 = double_to_floatx80(env, st0);
 866}
 867
 868void helper_fyl2xp1(CPUX86State *env)
 869{
 870    double fptemp = floatx80_to_double(env, ST0);
 871
 872    if ((fptemp + 1.0) > 0.0) {
 873        fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
 874        fptemp *= floatx80_to_double(env, ST1);
 875        ST1 = double_to_floatx80(env, fptemp);
 876        fpop(env);
 877    } else {
 878        env->fpus &= ~0x4700;
 879        env->fpus |= 0x400;
 880    }
 881}
 882
 883void helper_fsqrt(CPUX86State *env)
 884{
 885    if (floatx80_is_neg(ST0)) {
 886        env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
 887        env->fpus |= 0x400;
 888    }
 889    ST0 = floatx80_sqrt(ST0, &env->fp_status);
 890}
 891
 892void helper_fsincos(CPUX86State *env)
 893{
 894    double fptemp = floatx80_to_double(env, ST0);
 895
 896    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 897        env->fpus |= 0x400;
 898    } else {
 899        ST0 = double_to_floatx80(env, sin(fptemp));
 900        fpush(env);
 901        ST0 = double_to_floatx80(env, cos(fptemp));
 902        env->fpus &= ~0x400;  /* C2 <-- 0 */
 903        /* the above code is for |arg| < 2**63 only */
 904    }
 905}
 906
 907void helper_frndint(CPUX86State *env)
 908{
 909    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
 910}
 911
 912void helper_fscale(CPUX86State *env)
 913{
 914    if (floatx80_is_any_nan(ST1)) {
 915        ST0 = ST1;
 916    } else {
 917        int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
 918        ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
 919    }
 920}
 921
 922void helper_fsin(CPUX86State *env)
 923{
 924    double fptemp = floatx80_to_double(env, ST0);
 925
 926    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 927        env->fpus |= 0x400;
 928    } else {
 929        ST0 = double_to_floatx80(env, sin(fptemp));
 930        env->fpus &= ~0x400;  /* C2 <-- 0 */
 931        /* the above code is for |arg| < 2**53 only */
 932    }
 933}
 934
 935void helper_fcos(CPUX86State *env)
 936{
 937    double fptemp = floatx80_to_double(env, ST0);
 938
 939    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
 940        env->fpus |= 0x400;
 941    } else {
 942        ST0 = double_to_floatx80(env, cos(fptemp));
 943        env->fpus &= ~0x400;  /* C2 <-- 0 */
 944        /* the above code is for |arg| < 2**63 only */
 945    }
 946}
 947
 948void helper_fxam_ST0(CPUX86State *env)
 949{
 950    CPU_LDoubleU temp;
 951    int expdif;
 952
 953    temp.d = ST0;
 954
 955    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
 956    if (SIGND(temp)) {
 957        env->fpus |= 0x200; /* C1 <-- 1 */
 958    }
 959
 960    /* XXX: test fptags too */
 961    expdif = EXPD(temp);
 962    if (expdif == MAXEXPD) {
 963        if (MANTD(temp) == 0x8000000000000000ULL) {
 964            env->fpus |= 0x500; /* Infinity */
 965        } else {
 966            env->fpus |= 0x100; /* NaN */
 967        }
 968    } else if (expdif == 0) {
 969        if (MANTD(temp) == 0) {
 970            env->fpus |=  0x4000; /* Zero */
 971        } else {
 972            env->fpus |= 0x4400; /* Denormal */
 973        }
 974    } else {
 975        env->fpus |= 0x400;
 976    }
 977}
 978
 979static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
 980                      uintptr_t retaddr)
 981{
 982    int fpus, fptag, exp, i;
 983    uint64_t mant;
 984    CPU_LDoubleU tmp;
 985
 986    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
 987    fptag = 0;
 988    for (i = 7; i >= 0; i--) {
 989        fptag <<= 2;
 990        if (env->fptags[i]) {
 991            fptag |= 3;
 992        } else {
 993            tmp.d = env->fpregs[i].d;
 994            exp = EXPD(tmp);
 995            mant = MANTD(tmp);
 996            if (exp == 0 && mant == 0) {
 997                /* zero */
 998                fptag |= 1;
 999            } else if (exp == 0 || exp == MAXEXPD
1000                       || (mant & (1LL << 63)) == 0) {
1001                /* NaNs, infinity, denormal */
1002                fptag |= 2;
1003            }
1004        }
1005    }
1006    if (data32) {
1007        /* 32 bit */
1008        cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1009        cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1010        cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1011        cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1012        cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1013        cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1014        cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1015    } else {
1016        /* 16 bit */
1017        cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1018        cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1019        cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1020        cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1021        cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1022        cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1023        cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1024    }
1025}
1026
1027void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1028{
1029    do_fstenv(env, ptr, data32, GETPC());
1030}
1031
1032static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1033                      uintptr_t retaddr)
1034{
1035    int i, fpus, fptag;
1036
1037    if (data32) {
1038        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1039        fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1040        fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1041    } else {
1042        cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1043        fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1044        fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1045    }
1046    env->fpstt = (fpus >> 11) & 7;
1047    env->fpus = fpus & ~0x3800;
1048    for (i = 0; i < 8; i++) {
1049        env->fptags[i] = ((fptag & 3) == 3);
1050        fptag >>= 2;
1051    }
1052}
1053
1054void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1055{
1056    do_fldenv(env, ptr, data32, GETPC());
1057}
1058
1059void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1060{
1061    floatx80 tmp;
1062    int i;
1063
1064    do_fstenv(env, ptr, data32, GETPC());
1065
1066    ptr += (14 << data32);
1067    for (i = 0; i < 8; i++) {
1068        tmp = ST(i);
1069        helper_fstt(env, tmp, ptr, GETPC());
1070        ptr += 10;
1071    }
1072
1073    /* fninit */
1074    env->fpus = 0;
1075    env->fpstt = 0;
1076    cpu_set_fpuc(env, 0x37f);
1077    env->fptags[0] = 1;
1078    env->fptags[1] = 1;
1079    env->fptags[2] = 1;
1080    env->fptags[3] = 1;
1081    env->fptags[4] = 1;
1082    env->fptags[5] = 1;
1083    env->fptags[6] = 1;
1084    env->fptags[7] = 1;
1085}
1086
1087void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1088{
1089    floatx80 tmp;
1090    int i;
1091
1092    do_fldenv(env, ptr, data32, GETPC());
1093    ptr += (14 << data32);
1094
1095    for (i = 0; i < 8; i++) {
1096        tmp = helper_fldt(env, ptr, GETPC());
1097        ST(i) = tmp;
1098        ptr += 10;
1099    }
1100}
1101
1102#if defined(CONFIG_USER_ONLY)
1103void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1104{
1105    helper_fsave(env, ptr, data32);
1106}
1107
1108void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1109{
1110    helper_frstor(env, ptr, data32);
1111}
1112#endif
1113
1114#define XO(X)  offsetof(X86XSaveArea, X)
1115
1116static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1117{
1118    int fpus, fptag, i;
1119    target_ulong addr;
1120
1121    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1122    fptag = 0;
1123    for (i = 0; i < 8; i++) {
1124        fptag |= (env->fptags[i] << i);
1125    }
1126
1127    cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1128    cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1129    cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1130
1131    /* In 32-bit mode this is eip, sel, dp, sel.
1132       In 64-bit mode this is rip, rdp.
1133       But in either case we don't write actual data, just zeros.  */
1134    cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1135    cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1136
1137    addr = ptr + XO(legacy.fpregs);
1138    for (i = 0; i < 8; i++) {
1139        floatx80 tmp = ST(i);
1140        helper_fstt(env, tmp, addr, ra);
1141        addr += 16;
1142    }
1143}
1144
1145static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1146{
1147    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1148    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1149}
1150
1151static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1152{
1153    int i, nb_xmm_regs;
1154    target_ulong addr;
1155
1156    if (env->hflags & HF_CS64_MASK) {
1157        nb_xmm_regs = 16;
1158    } else {
1159        nb_xmm_regs = 8;
1160    }
1161
1162    addr = ptr + XO(legacy.xmm_regs);
1163    for (i = 0; i < nb_xmm_regs; i++) {
1164        cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1165        cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1166        addr += 16;
1167    }
1168}
1169
1170static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1171{
1172    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1173    int i;
1174
1175    for (i = 0; i < 4; i++, addr += 16) {
1176        cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1177        cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1178    }
1179}
1180
1181static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1182{
1183    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1184                    env->bndcs_regs.cfgu, ra);
1185    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1186                    env->bndcs_regs.sts, ra);
1187}
1188
1189static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1190{
1191    cpu_stq_data_ra(env, ptr, env->pkru, ra);
1192}
1193
1194void helper_fxsave(CPUX86State *env, target_ulong ptr)
1195{
1196    uintptr_t ra = GETPC();
1197
1198    /* The operand must be 16 byte aligned */
1199    if (ptr & 0xf) {
1200        raise_exception_ra(env, EXCP0D_GPF, ra);
1201    }
1202
1203    do_xsave_fpu(env, ptr, ra);
1204
1205    if (env->cr[4] & CR4_OSFXSR_MASK) {
1206        do_xsave_mxcsr(env, ptr, ra);
1207        /* Fast FXSAVE leaves out the XMM registers */
1208        if (!(env->efer & MSR_EFER_FFXSR)
1209            || (env->hflags & HF_CPL_MASK)
1210            || !(env->hflags & HF_LMA_MASK)) {
1211            do_xsave_sse(env, ptr, ra);
1212        }
1213    }
1214}
1215
1216static uint64_t get_xinuse(CPUX86State *env)
1217{
1218    uint64_t inuse = -1;
1219
1220    /* For the most part, we don't track XINUSE.  We could calculate it
1221       here for all components, but it's probably less work to simply
1222       indicate in use.  That said, the state of BNDREGS is important
1223       enough to track in HFLAGS, so we might as well use that here.  */
1224    if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1225       inuse &= ~XSTATE_BNDREGS_MASK;
1226    }
1227    return inuse;
1228}
1229
1230static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1231                     uint64_t inuse, uint64_t opt, uintptr_t ra)
1232{
1233    uint64_t old_bv, new_bv;
1234
1235    /* The OS must have enabled XSAVE.  */
1236    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1237        raise_exception_ra(env, EXCP06_ILLOP, ra);
1238    }
1239
1240    /* The operand must be 64 byte aligned.  */
1241    if (ptr & 63) {
1242        raise_exception_ra(env, EXCP0D_GPF, ra);
1243    }
1244
1245    /* Never save anything not enabled by XCR0.  */
1246    rfbm &= env->xcr0;
1247    opt &= rfbm;
1248
1249    if (opt & XSTATE_FP_MASK) {
1250        do_xsave_fpu(env, ptr, ra);
1251    }
1252    if (rfbm & XSTATE_SSE_MASK) {
1253        /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
1254        do_xsave_mxcsr(env, ptr, ra);
1255    }
1256    if (opt & XSTATE_SSE_MASK) {
1257        do_xsave_sse(env, ptr, ra);
1258    }
1259    if (opt & XSTATE_BNDREGS_MASK) {
1260        do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1261    }
1262    if (opt & XSTATE_BNDCSR_MASK) {
1263        do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1264    }
1265    if (opt & XSTATE_PKRU_MASK) {
1266        do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1267    }
1268
1269    /* Update the XSTATE_BV field.  */
1270    old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1271    new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1272    cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1273}
1274
1275void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1276{
1277    do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1278}
1279
1280void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1281{
1282    uint64_t inuse = get_xinuse(env);
1283    do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1284}
1285
1286static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1287{
1288    int i, fpuc, fpus, fptag;
1289    target_ulong addr;
1290
1291    fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1292    fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1293    fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1294    cpu_set_fpuc(env, fpuc);
1295    env->fpstt = (fpus >> 11) & 7;
1296    env->fpus = fpus & ~0x3800;
1297    fptag ^= 0xff;
1298    for (i = 0; i < 8; i++) {
1299        env->fptags[i] = ((fptag >> i) & 1);
1300    }
1301
1302    addr = ptr + XO(legacy.fpregs);
1303    for (i = 0; i < 8; i++) {
1304        floatx80 tmp = helper_fldt(env, addr, ra);
1305        ST(i) = tmp;
1306        addr += 16;
1307    }
1308}
1309
1310static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1311{
1312    cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1313}
1314
1315static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1316{
1317    int i, nb_xmm_regs;
1318    target_ulong addr;
1319
1320    if (env->hflags & HF_CS64_MASK) {
1321        nb_xmm_regs = 16;
1322    } else {
1323        nb_xmm_regs = 8;
1324    }
1325
1326    addr = ptr + XO(legacy.xmm_regs);
1327    for (i = 0; i < nb_xmm_regs; i++) {
1328        env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1329        env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1330        addr += 16;
1331    }
1332}
1333
1334static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1335{
1336    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1337    int i;
1338
1339    for (i = 0; i < 4; i++, addr += 16) {
1340        env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1341        env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1342    }
1343}
1344
1345static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1346{
1347    /* FIXME: Extend highest implemented bit of linear address.  */
1348    env->bndcs_regs.cfgu
1349        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1350    env->bndcs_regs.sts
1351        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1352}
1353
1354static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1355{
1356    env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1357}
1358
1359void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1360{
1361    uintptr_t ra = GETPC();
1362
1363    /* The operand must be 16 byte aligned */
1364    if (ptr & 0xf) {
1365        raise_exception_ra(env, EXCP0D_GPF, ra);
1366    }
1367
1368    do_xrstor_fpu(env, ptr, ra);
1369
1370    if (env->cr[4] & CR4_OSFXSR_MASK) {
1371        do_xrstor_mxcsr(env, ptr, ra);
1372        /* Fast FXRSTOR leaves out the XMM registers */
1373        if (!(env->efer & MSR_EFER_FFXSR)
1374            || (env->hflags & HF_CPL_MASK)
1375            || !(env->hflags & HF_LMA_MASK)) {
1376            do_xrstor_sse(env, ptr, ra);
1377        }
1378    }
1379}
1380
1381#if defined(CONFIG_USER_ONLY)
1382void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1383{
1384    helper_fxsave(env, ptr);
1385}
1386
1387void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1388{
1389    helper_fxrstor(env, ptr);
1390}
1391#endif
1392
1393void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1394{
1395    uintptr_t ra = GETPC();
1396    uint64_t xstate_bv, xcomp_bv, reserve0;
1397
1398    rfbm &= env->xcr0;
1399
1400    /* The OS must have enabled XSAVE.  */
1401    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1402        raise_exception_ra(env, EXCP06_ILLOP, ra);
1403    }
1404
1405    /* The operand must be 64 byte aligned.  */
1406    if (ptr & 63) {
1407        raise_exception_ra(env, EXCP0D_GPF, ra);
1408    }
1409
1410    xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1411
1412    if ((int64_t)xstate_bv < 0) {
1413        /* FIXME: Compact form.  */
1414        raise_exception_ra(env, EXCP0D_GPF, ra);
1415    }
1416
1417    /* Standard form.  */
1418
1419    /* The XSTATE_BV field must not set bits not present in XCR0.  */
1420    if (xstate_bv & ~env->xcr0) {
1421        raise_exception_ra(env, EXCP0D_GPF, ra);
1422    }
1423
1424    /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
1425       revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1426       describes only XCOMP_BV, but the description of the standard form
1427       of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1428       includes the next 64-bit field.  */
1429    xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1430    reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1431    if (xcomp_bv || reserve0) {
1432        raise_exception_ra(env, EXCP0D_GPF, ra);
1433    }
1434
1435    if (rfbm & XSTATE_FP_MASK) {
1436        if (xstate_bv & XSTATE_FP_MASK) {
1437            do_xrstor_fpu(env, ptr, ra);
1438        } else {
1439            helper_fninit(env);
1440            memset(env->fpregs, 0, sizeof(env->fpregs));
1441        }
1442    }
1443    if (rfbm & XSTATE_SSE_MASK) {
1444        /* Note that the standard form of XRSTOR loads MXCSR from memory
1445           whether or not the XSTATE_BV bit is set.  */
1446        do_xrstor_mxcsr(env, ptr, ra);
1447        if (xstate_bv & XSTATE_SSE_MASK) {
1448            do_xrstor_sse(env, ptr, ra);
1449        } else {
1450            /* ??? When AVX is implemented, we may have to be more
1451               selective in the clearing.  */
1452            memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1453        }
1454    }
1455    if (rfbm & XSTATE_BNDREGS_MASK) {
1456        if (xstate_bv & XSTATE_BNDREGS_MASK) {
1457            do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1458            env->hflags |= HF_MPX_IU_MASK;
1459        } else {
1460            memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1461            env->hflags &= ~HF_MPX_IU_MASK;
1462        }
1463    }
1464    if (rfbm & XSTATE_BNDCSR_MASK) {
1465        if (xstate_bv & XSTATE_BNDCSR_MASK) {
1466            do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1467        } else {
1468            memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1469        }
1470        cpu_sync_bndcs_hflags(env);
1471    }
1472    if (rfbm & XSTATE_PKRU_MASK) {
1473        uint64_t old_pkru = env->pkru;
1474        if (xstate_bv & XSTATE_PKRU_MASK) {
1475            do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1476        } else {
1477            env->pkru = 0;
1478        }
1479        if (env->pkru != old_pkru) {
1480            CPUState *cs = CPU(x86_env_get_cpu(env));
1481            tlb_flush(cs);
1482        }
1483    }
1484}
1485
1486#undef XO
1487
1488uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1489{
1490    /* The OS must have enabled XSAVE.  */
1491    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1492        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1493    }
1494
1495    switch (ecx) {
1496    case 0:
1497        return env->xcr0;
1498    case 1:
1499        if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1500            return env->xcr0 & get_xinuse(env);
1501        }
1502        break;
1503    }
1504    raise_exception_ra(env, EXCP0D_GPF, GETPC());
1505}
1506
1507void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1508{
1509    uint32_t dummy, ena_lo, ena_hi;
1510    uint64_t ena;
1511
1512    /* The OS must have enabled XSAVE.  */
1513    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1514        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1515    }
1516
1517    /* Only XCR0 is defined at present; the FPU may not be disabled.  */
1518    if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1519        goto do_gpf;
1520    }
1521
1522    /* Disallow enabling unimplemented features.  */
1523    cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1524    ena = ((uint64_t)ena_hi << 32) | ena_lo;
1525    if (mask & ~ena) {
1526        goto do_gpf;
1527    }
1528
1529    /* Disallow enabling only half of MPX.  */
1530    if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1531        & XSTATE_BNDCSR_MASK) {
1532        goto do_gpf;
1533    }
1534
1535    env->xcr0 = mask;
1536    cpu_sync_bndcs_hflags(env);
1537    return;
1538
1539 do_gpf:
1540    raise_exception_ra(env, EXCP0D_GPF, GETPC());
1541}
1542
1543/* MMX/SSE */
1544/* XXX: optimize by storing fptt and fptags in the static cpu state */
1545
1546#define SSE_DAZ             0x0040
1547#define SSE_RC_MASK         0x6000
1548#define SSE_RC_NEAR         0x0000
1549#define SSE_RC_DOWN         0x2000
1550#define SSE_RC_UP           0x4000
1551#define SSE_RC_CHOP         0x6000
1552#define SSE_FZ              0x8000
1553
1554void update_mxcsr_status(CPUX86State *env)
1555{
1556    uint32_t mxcsr = env->mxcsr;
1557    int rnd_type;
1558
1559    /* set rounding mode */
1560    switch (mxcsr & SSE_RC_MASK) {
1561    default:
1562    case SSE_RC_NEAR:
1563        rnd_type = float_round_nearest_even;
1564        break;
1565    case SSE_RC_DOWN:
1566        rnd_type = float_round_down;
1567        break;
1568    case SSE_RC_UP:
1569        rnd_type = float_round_up;
1570        break;
1571    case SSE_RC_CHOP:
1572        rnd_type = float_round_to_zero;
1573        break;
1574    }
1575    set_float_rounding_mode(rnd_type, &env->sse_status);
1576
1577    /* set denormals are zero */
1578    set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1579
1580    /* set flush to zero */
1581    set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1582}
1583
1584void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1585{
1586    cpu_set_mxcsr(env, val);
1587}
1588
1589void helper_enter_mmx(CPUX86State *env)
1590{
1591    env->fpstt = 0;
1592    *(uint32_t *)(env->fptags) = 0;
1593    *(uint32_t *)(env->fptags + 4) = 0;
1594}
1595
1596void helper_emms(CPUX86State *env)
1597{
1598    /* set to empty state */
1599    *(uint32_t *)(env->fptags) = 0x01010101;
1600    *(uint32_t *)(env->fptags + 4) = 0x01010101;
1601}
1602
1603/* XXX: suppress */
1604void helper_movq(CPUX86State *env, void *d, void *s)
1605{
1606    *(uint64_t *)d = *(uint64_t *)s;
1607}
1608
1609#define SHIFT 0
1610#include "ops_sse.h"
1611
1612#define SHIFT 1
1613#include "ops_sse.h"
1614