qemu/tests/tcg/test-i386.c
<<
>>
Prefs
   1/*
   2 *  x86 CPU test
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 *  This program is free software; you can redistribute it and/or modify
   7 *  it under the terms of the GNU General Public License as published by
   8 *  the Free Software Foundation; either version 2 of the License, or
   9 *  (at your option) any later version.
  10 *
  11 *  This program is distributed in the hope that it will be useful,
  12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 *  GNU General Public License for more details.
  15 *
  16 *  You should have received a copy of the GNU General Public License
  17 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#define _GNU_SOURCE
  20#include "compiler.h"
  21#include <stdlib.h>
  22#include <stdio.h>
  23#include <string.h>
  24#include <inttypes.h>
  25#include <math.h>
  26#include <signal.h>
  27#include <setjmp.h>
  28#include <errno.h>
  29#include <sys/ucontext.h>
  30#include <sys/mman.h>
  31
  32#if !defined(__x86_64__)
  33//#define TEST_VM86
  34#define TEST_SEGS
  35#endif
  36//#define LINUX_VM86_IOPL_FIX
  37//#define TEST_P4_FLAGS
  38#ifdef __SSE__
  39#define TEST_SSE
  40#define TEST_CMOV  1
  41#define TEST_FCOMI 1
  42#else
  43#undef TEST_SSE
  44#define TEST_CMOV  1
  45#define TEST_FCOMI 1
  46#endif
  47
  48#if defined(__x86_64__)
  49#define FMT64X "%016lx"
  50#define FMTLX "%016lx"
  51#define X86_64_ONLY(x) x
  52#else
  53#define FMT64X "%016" PRIx64
  54#define FMTLX "%08lx"
  55#define X86_64_ONLY(x)
  56#endif
  57
  58#ifdef TEST_VM86
  59#include <asm/vm86.h>
  60#endif
  61
  62#define xglue(x, y) x ## y
  63#define glue(x, y) xglue(x, y)
  64#define stringify(s)    tostring(s)
  65#define tostring(s)     #s
  66
  67#define CC_C    0x0001
  68#define CC_P    0x0004
  69#define CC_A    0x0010
  70#define CC_Z    0x0040
  71#define CC_S    0x0080
  72#define CC_O    0x0800
  73
  74#define __init_call     __attribute__ ((unused,__section__ ("initcall")))
  75
  76#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
  77
  78#if defined(__x86_64__)
  79static inline long i2l(long v)
  80{
  81    return v | ((v ^ 0xabcd) << 32);
  82}
  83#else
  84static inline long i2l(long v)
  85{
  86    return v;
  87}
  88#endif
  89
  90#define OP add
  91#include "test-i386.h"
  92
  93#define OP sub
  94#include "test-i386.h"
  95
  96#define OP xor
  97#include "test-i386.h"
  98
  99#define OP and
 100#include "test-i386.h"
 101
 102#define OP or
 103#include "test-i386.h"
 104
 105#define OP cmp
 106#include "test-i386.h"
 107
 108#define OP adc
 109#define OP_CC
 110#include "test-i386.h"
 111
 112#define OP sbb
 113#define OP_CC
 114#include "test-i386.h"
 115
 116#define OP inc
 117#define OP_CC
 118#define OP1
 119#include "test-i386.h"
 120
 121#define OP dec
 122#define OP_CC
 123#define OP1
 124#include "test-i386.h"
 125
 126#define OP neg
 127#define OP_CC
 128#define OP1
 129#include "test-i386.h"
 130
 131#define OP not
 132#define OP_CC
 133#define OP1
 134#include "test-i386.h"
 135
 136#undef CC_MASK
 137#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O)
 138
 139#define OP shl
 140#include "test-i386-shift.h"
 141
 142#define OP shr
 143#include "test-i386-shift.h"
 144
 145#define OP sar
 146#include "test-i386-shift.h"
 147
 148#define OP rol
 149#include "test-i386-shift.h"
 150
 151#define OP ror
 152#include "test-i386-shift.h"
 153
 154#define OP rcr
 155#define OP_CC
 156#include "test-i386-shift.h"
 157
 158#define OP rcl
 159#define OP_CC
 160#include "test-i386-shift.h"
 161
 162#define OP shld
 163#define OP_SHIFTD
 164#define OP_NOBYTE
 165#include "test-i386-shift.h"
 166
 167#define OP shrd
 168#define OP_SHIFTD
 169#define OP_NOBYTE
 170#include "test-i386-shift.h"
 171
 172/* XXX: should be more precise ? */
 173#undef CC_MASK
 174#define CC_MASK (CC_C)
 175
 176#define OP bt
 177#define OP_NOBYTE
 178#include "test-i386-shift.h"
 179
 180#define OP bts
 181#define OP_NOBYTE
 182#include "test-i386-shift.h"
 183
 184#define OP btr
 185#define OP_NOBYTE
 186#include "test-i386-shift.h"
 187
 188#define OP btc
 189#define OP_NOBYTE
 190#include "test-i386-shift.h"
 191
 192/* lea test (modrm support) */
 193#define TEST_LEAQ(STR)\
 194{\
 195    asm("lea " STR ", %0"\
 196        : "=r" (res)\
 197        : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
 198    printf("lea %s = " FMTLX "\n", STR, res);\
 199}
 200
 201#define TEST_LEA(STR)\
 202{\
 203    asm("lea " STR ", %0"\
 204        : "=r" (res)\
 205        : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
 206    printf("lea %s = " FMTLX "\n", STR, res);\
 207}
 208
 209#define TEST_LEA16(STR)\
 210{\
 211    asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\
 212        : "=wq" (res)\
 213        : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\
 214    printf("lea %s = %08lx\n", STR, res);\
 215}
 216
 217
 218void test_lea(void)
 219{
 220    long eax, ebx, ecx, edx, esi, edi, res;
 221    eax = i2l(0x0001);
 222    ebx = i2l(0x0002);
 223    ecx = i2l(0x0004);
 224    edx = i2l(0x0008);
 225    esi = i2l(0x0010);
 226    edi = i2l(0x0020);
 227
 228    TEST_LEA("0x4000");
 229
 230    TEST_LEA("(%%eax)");
 231    TEST_LEA("(%%ebx)");
 232    TEST_LEA("(%%ecx)");
 233    TEST_LEA("(%%edx)");
 234    TEST_LEA("(%%esi)");
 235    TEST_LEA("(%%edi)");
 236
 237    TEST_LEA("0x40(%%eax)");
 238    TEST_LEA("0x40(%%ebx)");
 239    TEST_LEA("0x40(%%ecx)");
 240    TEST_LEA("0x40(%%edx)");
 241    TEST_LEA("0x40(%%esi)");
 242    TEST_LEA("0x40(%%edi)");
 243
 244    TEST_LEA("0x4000(%%eax)");
 245    TEST_LEA("0x4000(%%ebx)");
 246    TEST_LEA("0x4000(%%ecx)");
 247    TEST_LEA("0x4000(%%edx)");
 248    TEST_LEA("0x4000(%%esi)");
 249    TEST_LEA("0x4000(%%edi)");
 250
 251    TEST_LEA("(%%eax, %%ecx)");
 252    TEST_LEA("(%%ebx, %%edx)");
 253    TEST_LEA("(%%ecx, %%ecx)");
 254    TEST_LEA("(%%edx, %%ecx)");
 255    TEST_LEA("(%%esi, %%ecx)");
 256    TEST_LEA("(%%edi, %%ecx)");
 257
 258    TEST_LEA("0x40(%%eax, %%ecx)");
 259    TEST_LEA("0x4000(%%ebx, %%edx)");
 260
 261    TEST_LEA("(%%ecx, %%ecx, 2)");
 262    TEST_LEA("(%%edx, %%ecx, 4)");
 263    TEST_LEA("(%%esi, %%ecx, 8)");
 264
 265    TEST_LEA("(,%%eax, 2)");
 266    TEST_LEA("(,%%ebx, 4)");
 267    TEST_LEA("(,%%ecx, 8)");
 268
 269    TEST_LEA("0x40(,%%eax, 2)");
 270    TEST_LEA("0x40(,%%ebx, 4)");
 271    TEST_LEA("0x40(,%%ecx, 8)");
 272
 273
 274    TEST_LEA("-10(%%ecx, %%ecx, 2)");
 275    TEST_LEA("-10(%%edx, %%ecx, 4)");
 276    TEST_LEA("-10(%%esi, %%ecx, 8)");
 277
 278    TEST_LEA("0x4000(%%ecx, %%ecx, 2)");
 279    TEST_LEA("0x4000(%%edx, %%ecx, 4)");
 280    TEST_LEA("0x4000(%%esi, %%ecx, 8)");
 281
 282#if defined(__x86_64__)
 283    TEST_LEAQ("0x4000");
 284    TEST_LEAQ("0x4000(%%rip)");
 285
 286    TEST_LEAQ("(%%rax)");
 287    TEST_LEAQ("(%%rbx)");
 288    TEST_LEAQ("(%%rcx)");
 289    TEST_LEAQ("(%%rdx)");
 290    TEST_LEAQ("(%%rsi)");
 291    TEST_LEAQ("(%%rdi)");
 292
 293    TEST_LEAQ("0x40(%%rax)");
 294    TEST_LEAQ("0x40(%%rbx)");
 295    TEST_LEAQ("0x40(%%rcx)");
 296    TEST_LEAQ("0x40(%%rdx)");
 297    TEST_LEAQ("0x40(%%rsi)");
 298    TEST_LEAQ("0x40(%%rdi)");
 299
 300    TEST_LEAQ("0x4000(%%rax)");
 301    TEST_LEAQ("0x4000(%%rbx)");
 302    TEST_LEAQ("0x4000(%%rcx)");
 303    TEST_LEAQ("0x4000(%%rdx)");
 304    TEST_LEAQ("0x4000(%%rsi)");
 305    TEST_LEAQ("0x4000(%%rdi)");
 306
 307    TEST_LEAQ("(%%rax, %%rcx)");
 308    TEST_LEAQ("(%%rbx, %%rdx)");
 309    TEST_LEAQ("(%%rcx, %%rcx)");
 310    TEST_LEAQ("(%%rdx, %%rcx)");
 311    TEST_LEAQ("(%%rsi, %%rcx)");
 312    TEST_LEAQ("(%%rdi, %%rcx)");
 313
 314    TEST_LEAQ("0x40(%%rax, %%rcx)");
 315    TEST_LEAQ("0x4000(%%rbx, %%rdx)");
 316
 317    TEST_LEAQ("(%%rcx, %%rcx, 2)");
 318    TEST_LEAQ("(%%rdx, %%rcx, 4)");
 319    TEST_LEAQ("(%%rsi, %%rcx, 8)");
 320
 321    TEST_LEAQ("(,%%rax, 2)");
 322    TEST_LEAQ("(,%%rbx, 4)");
 323    TEST_LEAQ("(,%%rcx, 8)");
 324
 325    TEST_LEAQ("0x40(,%%rax, 2)");
 326    TEST_LEAQ("0x40(,%%rbx, 4)");
 327    TEST_LEAQ("0x40(,%%rcx, 8)");
 328
 329
 330    TEST_LEAQ("-10(%%rcx, %%rcx, 2)");
 331    TEST_LEAQ("-10(%%rdx, %%rcx, 4)");
 332    TEST_LEAQ("-10(%%rsi, %%rcx, 8)");
 333
 334    TEST_LEAQ("0x4000(%%rcx, %%rcx, 2)");
 335    TEST_LEAQ("0x4000(%%rdx, %%rcx, 4)");
 336    TEST_LEAQ("0x4000(%%rsi, %%rcx, 8)");
 337#else
 338    /* limited 16 bit addressing test */
 339    TEST_LEA16("0x4000");
 340    TEST_LEA16("(%%bx)");
 341    TEST_LEA16("(%%si)");
 342    TEST_LEA16("(%%di)");
 343    TEST_LEA16("0x40(%%bx)");
 344    TEST_LEA16("0x40(%%si)");
 345    TEST_LEA16("0x40(%%di)");
 346    TEST_LEA16("0x4000(%%bx)");
 347    TEST_LEA16("0x4000(%%si)");
 348    TEST_LEA16("(%%bx,%%si)");
 349    TEST_LEA16("(%%bx,%%di)");
 350    TEST_LEA16("0x40(%%bx,%%si)");
 351    TEST_LEA16("0x40(%%bx,%%di)");
 352    TEST_LEA16("0x4000(%%bx,%%si)");
 353    TEST_LEA16("0x4000(%%bx,%%di)");
 354#endif
 355}
 356
 357#define TEST_JCC(JCC, v1, v2)\
 358{\
 359    int res;\
 360    asm("movl $1, %0\n\t"\
 361        "cmpl %2, %1\n\t"\
 362        "j" JCC " 1f\n\t"\
 363        "movl $0, %0\n\t"\
 364        "1:\n\t"\
 365        : "=r" (res)\
 366        : "r" (v1), "r" (v2));\
 367    printf("%-10s %d\n", "j" JCC, res);\
 368\
 369    asm("movl $0, %0\n\t"\
 370        "cmpl %2, %1\n\t"\
 371        "set" JCC " %b0\n\t"\
 372        : "=r" (res)\
 373        : "r" (v1), "r" (v2));\
 374    printf("%-10s %d\n", "set" JCC, res);\
 375 if (TEST_CMOV) {\
 376    long val = i2l(1);\
 377    long res = i2l(0x12345678);\
 378X86_64_ONLY(\
 379    asm("cmpl %2, %1\n\t"\
 380        "cmov" JCC "q %3, %0\n\t"\
 381        : "=r" (res)\
 382        : "r" (v1), "r" (v2), "m" (val), "0" (res));\
 383        printf("%-10s R=" FMTLX "\n", "cmov" JCC "q", res);)\
 384    asm("cmpl %2, %1\n\t"\
 385        "cmov" JCC "l %k3, %k0\n\t"\
 386        : "=r" (res)\
 387        : "r" (v1), "r" (v2), "m" (val), "0" (res));\
 388        printf("%-10s R=" FMTLX "\n", "cmov" JCC "l", res);\
 389    asm("cmpl %2, %1\n\t"\
 390        "cmov" JCC "w %w3, %w0\n\t"\
 391        : "=r" (res)\
 392        : "r" (v1), "r" (v2), "r" (1), "0" (res));\
 393        printf("%-10s R=" FMTLX "\n", "cmov" JCC "w", res);\
 394 } \
 395}
 396
 397/* various jump tests */
 398void test_jcc(void)
 399{
 400    TEST_JCC("ne", 1, 1);
 401    TEST_JCC("ne", 1, 0);
 402
 403    TEST_JCC("e", 1, 1);
 404    TEST_JCC("e", 1, 0);
 405
 406    TEST_JCC("l", 1, 1);
 407    TEST_JCC("l", 1, 0);
 408    TEST_JCC("l", 1, -1);
 409
 410    TEST_JCC("le", 1, 1);
 411    TEST_JCC("le", 1, 0);
 412    TEST_JCC("le", 1, -1);
 413
 414    TEST_JCC("ge", 1, 1);
 415    TEST_JCC("ge", 1, 0);
 416    TEST_JCC("ge", -1, 1);
 417
 418    TEST_JCC("g", 1, 1);
 419    TEST_JCC("g", 1, 0);
 420    TEST_JCC("g", 1, -1);
 421
 422    TEST_JCC("b", 1, 1);
 423    TEST_JCC("b", 1, 0);
 424    TEST_JCC("b", 1, -1);
 425
 426    TEST_JCC("be", 1, 1);
 427    TEST_JCC("be", 1, 0);
 428    TEST_JCC("be", 1, -1);
 429
 430    TEST_JCC("ae", 1, 1);
 431    TEST_JCC("ae", 1, 0);
 432    TEST_JCC("ae", 1, -1);
 433
 434    TEST_JCC("a", 1, 1);
 435    TEST_JCC("a", 1, 0);
 436    TEST_JCC("a", 1, -1);
 437
 438
 439    TEST_JCC("p", 1, 1);
 440    TEST_JCC("p", 1, 0);
 441
 442    TEST_JCC("np", 1, 1);
 443    TEST_JCC("np", 1, 0);
 444
 445    TEST_JCC("o", 0x7fffffff, 0);
 446    TEST_JCC("o", 0x7fffffff, -1);
 447
 448    TEST_JCC("no", 0x7fffffff, 0);
 449    TEST_JCC("no", 0x7fffffff, -1);
 450
 451    TEST_JCC("s", 0, 1);
 452    TEST_JCC("s", 0, -1);
 453    TEST_JCC("s", 0, 0);
 454
 455    TEST_JCC("ns", 0, 1);
 456    TEST_JCC("ns", 0, -1);
 457    TEST_JCC("ns", 0, 0);
 458}
 459
 460#define TEST_LOOP(insn) \
 461{\
 462    for(i = 0; i < sizeof(ecx_vals) / sizeof(long); i++) {\
 463        ecx = ecx_vals[i];\
 464        for(zf = 0; zf < 2; zf++) {\
 465    asm("test %2, %2\n\t"\
 466        "movl $1, %0\n\t"\
 467          insn " 1f\n\t" \
 468        "movl $0, %0\n\t"\
 469        "1:\n\t"\
 470        : "=a" (res)\
 471        : "c" (ecx), "b" (!zf)); \
 472    printf("%-10s ECX=" FMTLX " ZF=%ld r=%d\n", insn, ecx, zf, res);      \
 473        }\
 474   }\
 475}
 476
 477void test_loop(void)
 478{
 479    long ecx, zf;
 480    const long ecx_vals[] = {
 481        0,
 482        1,
 483        0x10000,
 484        0x10001,
 485#if defined(__x86_64__)
 486        0x100000000L,
 487        0x100000001L,
 488#endif
 489    };
 490    int i, res;
 491
 492#if !defined(__x86_64__)
 493    TEST_LOOP("jcxz");
 494    TEST_LOOP("loopw");
 495    TEST_LOOP("loopzw");
 496    TEST_LOOP("loopnzw");
 497#endif
 498
 499    TEST_LOOP("jecxz");
 500    TEST_LOOP("loopl");
 501    TEST_LOOP("loopzl");
 502    TEST_LOOP("loopnzl");
 503}
 504
 505#undef CC_MASK
 506#ifdef TEST_P4_FLAGS
 507#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)
 508#else
 509#define CC_MASK (CC_O | CC_C)
 510#endif
 511
 512#define OP mul
 513#include "test-i386-muldiv.h"
 514
 515#define OP imul
 516#include "test-i386-muldiv.h"
 517
 518void test_imulw2(long op0, long op1)
 519{
 520    long res, s1, s0, flags;
 521    s0 = op0;
 522    s1 = op1;
 523    res = s0;
 524    flags = 0;
 525    asm volatile ("push %4\n\t"
 526         "popf\n\t"
 527         "imulw %w2, %w0\n\t"
 528         "pushf\n\t"
 529         "pop %1\n\t"
 530         : "=q" (res), "=g" (flags)
 531         : "q" (s1), "0" (res), "1" (flags));
 532    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
 533           "imulw", s0, s1, res, flags & CC_MASK);
 534}
 535
 536void test_imull2(long op0, long op1)
 537{
 538    long res, s1, s0, flags;
 539    s0 = op0;
 540    s1 = op1;
 541    res = s0;
 542    flags = 0;
 543    asm volatile ("push %4\n\t"
 544         "popf\n\t"
 545         "imull %k2, %k0\n\t"
 546         "pushf\n\t"
 547         "pop %1\n\t"
 548         : "=q" (res), "=g" (flags)
 549         : "q" (s1), "0" (res), "1" (flags));
 550    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
 551           "imull", s0, s1, res, flags & CC_MASK);
 552}
 553
 554#if defined(__x86_64__)
 555void test_imulq2(long op0, long op1)
 556{
 557    long res, s1, s0, flags;
 558    s0 = op0;
 559    s1 = op1;
 560    res = s0;
 561    flags = 0;
 562    asm volatile ("push %4\n\t"
 563         "popf\n\t"
 564         "imulq %2, %0\n\t"
 565         "pushf\n\t"
 566         "pop %1\n\t"
 567         : "=q" (res), "=g" (flags)
 568         : "q" (s1), "0" (res), "1" (flags));
 569    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",
 570           "imulq", s0, s1, res, flags & CC_MASK);
 571}
 572#endif
 573
 574#define TEST_IMUL_IM(size, rsize, op0, op1)\
 575{\
 576    long res, flags, s1;\
 577    flags = 0;\
 578    res = 0;\
 579    s1 = op1;\
 580    asm volatile ("push %3\n\t"\
 581         "popf\n\t"\
 582         "imul" size " $" #op0 ", %" rsize "2, %" rsize "0\n\t" \
 583         "pushf\n\t"\
 584         "pop %1\n\t"\
 585         : "=r" (res), "=g" (flags)\
 586         : "r" (s1), "1" (flags), "0" (res));\
 587    printf("%-10s A=" FMTLX " B=" FMTLX " R=" FMTLX " CC=%04lx\n",\
 588           "imul" size " im", (long)op0, (long)op1, res, flags & CC_MASK);\
 589}
 590
 591
 592#undef CC_MASK
 593#define CC_MASK (0)
 594
 595#define OP div
 596#include "test-i386-muldiv.h"
 597
 598#define OP idiv
 599#include "test-i386-muldiv.h"
 600
 601void test_mul(void)
 602{
 603    test_imulb(0x1234561d, 4);
 604    test_imulb(3, -4);
 605    test_imulb(0x80, 0x80);
 606    test_imulb(0x10, 0x10);
 607
 608    test_imulw(0, 0x1234001d, 45);
 609    test_imulw(0, 23, -45);
 610    test_imulw(0, 0x8000, 0x8000);
 611    test_imulw(0, 0x100, 0x100);
 612
 613    test_imull(0, 0x1234001d, 45);
 614    test_imull(0, 23, -45);
 615    test_imull(0, 0x80000000, 0x80000000);
 616    test_imull(0, 0x10000, 0x10000);
 617
 618    test_mulb(0x1234561d, 4);
 619    test_mulb(3, -4);
 620    test_mulb(0x80, 0x80);
 621    test_mulb(0x10, 0x10);
 622
 623    test_mulw(0, 0x1234001d, 45);
 624    test_mulw(0, 23, -45);
 625    test_mulw(0, 0x8000, 0x8000);
 626    test_mulw(0, 0x100, 0x100);
 627
 628    test_mull(0, 0x1234001d, 45);
 629    test_mull(0, 23, -45);
 630    test_mull(0, 0x80000000, 0x80000000);
 631    test_mull(0, 0x10000, 0x10000);
 632
 633    test_imulw2(0x1234001d, 45);
 634    test_imulw2(23, -45);
 635    test_imulw2(0x8000, 0x8000);
 636    test_imulw2(0x100, 0x100);
 637
 638    test_imull2(0x1234001d, 45);
 639    test_imull2(23, -45);
 640    test_imull2(0x80000000, 0x80000000);
 641    test_imull2(0x10000, 0x10000);
 642
 643    TEST_IMUL_IM("w", "w", 45, 0x1234);
 644    TEST_IMUL_IM("w", "w", -45, 23);
 645    TEST_IMUL_IM("w", "w", 0x8000, 0x80000000);
 646    TEST_IMUL_IM("w", "w", 0x7fff, 0x1000);
 647
 648    TEST_IMUL_IM("l", "k", 45, 0x1234);
 649    TEST_IMUL_IM("l", "k", -45, 23);
 650    TEST_IMUL_IM("l", "k", 0x8000, 0x80000000);
 651    TEST_IMUL_IM("l", "k", 0x7fff, 0x1000);
 652
 653    test_idivb(0x12341678, 0x127e);
 654    test_idivb(0x43210123, -5);
 655    test_idivb(0x12340004, -1);
 656
 657    test_idivw(0, 0x12345678, 12347);
 658    test_idivw(0, -23223, -45);
 659    test_idivw(0, 0x12348000, -1);
 660    test_idivw(0x12343, 0x12345678, 0x81238567);
 661
 662    test_idivl(0, 0x12345678, 12347);
 663    test_idivl(0, -233223, -45);
 664    test_idivl(0, 0x80000000, -1);
 665    test_idivl(0x12343, 0x12345678, 0x81234567);
 666
 667    test_divb(0x12341678, 0x127e);
 668    test_divb(0x43210123, -5);
 669    test_divb(0x12340004, -1);
 670
 671    test_divw(0, 0x12345678, 12347);
 672    test_divw(0, -23223, -45);
 673    test_divw(0, 0x12348000, -1);
 674    test_divw(0x12343, 0x12345678, 0x81238567);
 675
 676    test_divl(0, 0x12345678, 12347);
 677    test_divl(0, -233223, -45);
 678    test_divl(0, 0x80000000, -1);
 679    test_divl(0x12343, 0x12345678, 0x81234567);
 680
 681#if defined(__x86_64__)
 682    test_imulq(0, 0x1234001d1234001d, 45);
 683    test_imulq(0, 23, -45);
 684    test_imulq(0, 0x8000000000000000, 0x8000000000000000);
 685    test_imulq(0, 0x100000000, 0x100000000);
 686
 687    test_mulq(0, 0x1234001d1234001d, 45);
 688    test_mulq(0, 23, -45);
 689    test_mulq(0, 0x8000000000000000, 0x8000000000000000);
 690    test_mulq(0, 0x100000000, 0x100000000);
 691
 692    test_imulq2(0x1234001d1234001d, 45);
 693    test_imulq2(23, -45);
 694    test_imulq2(0x8000000000000000, 0x8000000000000000);
 695    test_imulq2(0x100000000, 0x100000000);
 696
 697    TEST_IMUL_IM("q", "", 45, 0x12341234);
 698    TEST_IMUL_IM("q", "", -45, 23);
 699    TEST_IMUL_IM("q", "", 0x8000, 0x8000000000000000);
 700    TEST_IMUL_IM("q", "", 0x7fff, 0x10000000);
 701
 702    test_idivq(0, 0x12345678abcdef, 12347);
 703    test_idivq(0, -233223, -45);
 704    test_idivq(0, 0x8000000000000000, -1);
 705    test_idivq(0x12343, 0x12345678, 0x81234567);
 706
 707    test_divq(0, 0x12345678abcdef, 12347);
 708    test_divq(0, -233223, -45);
 709    test_divq(0, 0x8000000000000000, -1);
 710    test_divq(0x12343, 0x12345678, 0x81234567);
 711#endif
 712}
 713
 714#define TEST_BSX(op, size, op0)\
 715{\
 716    long res, val, resz;\
 717    val = op0;\
 718    asm("xor %1, %1\n"\
 719        "mov $0x12345678, %0\n"\
 720        #op " %" size "2, %" size "0 ; setz %b1" \
 721        : "=&r" (res), "=&q" (resz)\
 722        : "r" (val));\
 723    printf("%-10s A=" FMTLX " R=" FMTLX " %ld\n", #op, val, res, resz);\
 724}
 725
 726void test_bsx(void)
 727{
 728    TEST_BSX(bsrw, "w", 0);
 729    TEST_BSX(bsrw, "w", 0x12340128);
 730    TEST_BSX(bsfw, "w", 0);
 731    TEST_BSX(bsfw, "w", 0x12340128);
 732    TEST_BSX(bsrl, "k", 0);
 733    TEST_BSX(bsrl, "k", 0x00340128);
 734    TEST_BSX(bsfl, "k", 0);
 735    TEST_BSX(bsfl, "k", 0x00340128);
 736#if defined(__x86_64__)
 737    TEST_BSX(bsrq, "", 0);
 738    TEST_BSX(bsrq, "", 0x003401281234);
 739    TEST_BSX(bsfq, "", 0);
 740    TEST_BSX(bsfq, "", 0x003401281234);
 741#endif
 742}
 743
 744/**********************************************/
 745
 746union float64u {
 747    double d;
 748    uint64_t l;
 749};
 750
 751union float64u q_nan = { .l = 0xFFF8000000000000LL };
 752union float64u s_nan = { .l = 0xFFF0000000000000LL };
 753
 754void test_fops(double a, double b)
 755{
 756    printf("a=%f b=%f a+b=%f\n", a, b, a + b);
 757    printf("a=%f b=%f a-b=%f\n", a, b, a - b);
 758    printf("a=%f b=%f a*b=%f\n", a, b, a * b);
 759    printf("a=%f b=%f a/b=%f\n", a, b, a / b);
 760    printf("a=%f b=%f fmod(a, b)=%f\n", a, b, fmod(a, b));
 761    printf("a=%f sqrt(a)=%f\n", a, sqrt(a));
 762    printf("a=%f sin(a)=%f\n", a, sin(a));
 763    printf("a=%f cos(a)=%f\n", a, cos(a));
 764    printf("a=%f tan(a)=%f\n", a, tan(a));
 765    printf("a=%f log(a)=%f\n", a, log(a));
 766    printf("a=%f exp(a)=%f\n", a, exp(a));
 767    printf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b));
 768    /* just to test some op combining */
 769    printf("a=%f asin(sin(a))=%f\n", a, asin(sin(a)));
 770    printf("a=%f acos(cos(a))=%f\n", a, acos(cos(a)));
 771    printf("a=%f atan(tan(a))=%f\n", a, atan(tan(a)));
 772
 773}
 774
 775void fpu_clear_exceptions(void)
 776{
 777    struct QEMU_PACKED {
 778        uint16_t fpuc;
 779        uint16_t dummy1;
 780        uint16_t fpus;
 781        uint16_t dummy2;
 782        uint16_t fptag;
 783        uint16_t dummy3;
 784        uint32_t ignored[4];
 785        long double fpregs[8];
 786    } float_env32;
 787
 788    asm volatile ("fnstenv %0\n" : "=m" (float_env32));
 789    float_env32.fpus &= ~0x7f;
 790    asm volatile ("fldenv %0\n" : : "m" (float_env32));
 791}
 792
 793/* XXX: display exception bits when supported */
 794#define FPUS_EMASK 0x0000
 795//#define FPUS_EMASK 0x007f
 796
 797void test_fcmp(double a, double b)
 798{
 799    long eflags, fpus;
 800
 801    fpu_clear_exceptions();
 802    asm("fcom %2\n"
 803        "fstsw %%ax\n"
 804        : "=a" (fpus)
 805        : "t" (a), "u" (b));
 806    printf("fcom(%f %f)=%04lx\n",
 807           a, b, fpus & (0x4500 | FPUS_EMASK));
 808    fpu_clear_exceptions();
 809    asm("fucom %2\n"
 810        "fstsw %%ax\n"
 811        : "=a" (fpus)
 812        : "t" (a), "u" (b));
 813    printf("fucom(%f %f)=%04lx\n",
 814           a, b, fpus & (0x4500 | FPUS_EMASK));
 815    if (TEST_FCOMI) {
 816        /* test f(u)comi instruction */
 817        fpu_clear_exceptions();
 818        asm("fcomi %3, %2\n"
 819            "fstsw %%ax\n"
 820            "pushf\n"
 821            "pop %0\n"
 822            : "=r" (eflags), "=a" (fpus)
 823            : "t" (a), "u" (b));
 824        printf("fcomi(%f %f)=%04lx %02lx\n",
 825               a, b, fpus & FPUS_EMASK, eflags & (CC_Z | CC_P | CC_C));
 826        fpu_clear_exceptions();
 827        asm("fucomi %3, %2\n"
 828            "fstsw %%ax\n"
 829            "pushf\n"
 830            "pop %0\n"
 831            : "=r" (eflags), "=a" (fpus)
 832            : "t" (a), "u" (b));
 833        printf("fucomi(%f %f)=%04lx %02lx\n",
 834               a, b, fpus & FPUS_EMASK, eflags & (CC_Z | CC_P | CC_C));
 835    }
 836    fpu_clear_exceptions();
 837    asm volatile("fxam\n"
 838                 "fstsw %%ax\n"
 839                 : "=a" (fpus)
 840                 : "t" (a));
 841    printf("fxam(%f)=%04lx\n", a, fpus & 0x4700);
 842    fpu_clear_exceptions();
 843}
 844
 845void test_fcvt(double a)
 846{
 847    float fa;
 848    long double la;
 849    int16_t fpuc;
 850    int i;
 851    int64_t lla;
 852    int ia;
 853    int16_t wa;
 854    double ra;
 855
 856    fa = a;
 857    la = a;
 858    printf("(float)%f = %f\n", a, fa);
 859    printf("(long double)%f = %Lf\n", a, la);
 860    printf("a=" FMT64X "\n", *(uint64_t *)&a);
 861    printf("la=" FMT64X " %04x\n", *(uint64_t *)&la,
 862           *(unsigned short *)((char *)(&la) + 8));
 863
 864    /* test all roundings */
 865    asm volatile ("fstcw %0" : "=m" (fpuc));
 866    for(i=0;i<4;i++) {
 867        uint16_t val16;
 868        val16 = (fpuc & ~0x0c00) | (i << 10);
 869        asm volatile ("fldcw %0" : : "m" (val16));
 870        asm volatile ("fist %0" : "=m" (wa) : "t" (a));
 871        asm volatile ("fistl %0" : "=m" (ia) : "t" (a));
 872        asm volatile ("fistpll %0" : "=m" (lla) : "t" (a) : "st");
 873        asm volatile ("frndint ; fstl %0" : "=m" (ra) : "t" (a));
 874        asm volatile ("fldcw %0" : : "m" (fpuc));
 875        printf("(short)a = %d\n", wa);
 876        printf("(int)a = %d\n", ia);
 877        printf("(int64_t)a = " FMT64X "\n", lla);
 878        printf("rint(a) = %f\n", ra);
 879    }
 880}
 881
 882#define TEST(N) \
 883    asm("fld" #N : "=t" (a)); \
 884    printf("fld" #N "= %f\n", a);
 885
 886void test_fconst(void)
 887{
 888    double a;
 889    TEST(1);
 890    TEST(l2t);
 891    TEST(l2e);
 892    TEST(pi);
 893    TEST(lg2);
 894    TEST(ln2);
 895    TEST(z);
 896}
 897
 898void test_fbcd(double a)
 899{
 900    unsigned short bcd[5];
 901    double b;
 902
 903    asm("fbstp %0" : "=m" (bcd[0]) : "t" (a) : "st");
 904    asm("fbld %1" : "=t" (b) : "m" (bcd[0]));
 905    printf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n",
 906           a, bcd[4], bcd[3], bcd[2], bcd[1], bcd[0], b);
 907}
 908
 909#define TEST_ENV(env, save, restore)\
 910{\
 911    memset((env), 0xaa, sizeof(*(env)));\
 912    for(i=0;i<5;i++)\
 913        asm volatile ("fldl %0" : : "m" (dtab[i]));\
 914    asm volatile (save " %0\n" : : "m" (*(env)));\
 915    asm volatile (restore " %0\n": : "m" (*(env)));\
 916    for(i=0;i<5;i++)\
 917        asm volatile ("fstpl %0" : "=m" (rtab[i]));\
 918    for(i=0;i<5;i++)\
 919        printf("res[%d]=%f\n", i, rtab[i]);\
 920    printf("fpuc=%04x fpus=%04x fptag=%04x\n",\
 921           (env)->fpuc,\
 922           (env)->fpus & 0xff00,\
 923           (env)->fptag);\
 924}
 925
 926void test_fenv(void)
 927{
 928    struct QEMU_PACKED {
 929        uint16_t fpuc;
 930        uint16_t dummy1;
 931        uint16_t fpus;
 932        uint16_t dummy2;
 933        uint16_t fptag;
 934        uint16_t dummy3;
 935        uint32_t ignored[4];
 936        long double fpregs[8];
 937    } float_env32;
 938    struct QEMU_PACKED {
 939        uint16_t fpuc;
 940        uint16_t fpus;
 941        uint16_t fptag;
 942        uint16_t ignored[4];
 943        long double fpregs[8];
 944    } float_env16;
 945    double dtab[8];
 946    double rtab[8];
 947    int i;
 948
 949    for(i=0;i<8;i++)
 950        dtab[i] = i + 1;
 951
 952    TEST_ENV(&float_env16, "data16 fnstenv", "data16 fldenv");
 953    TEST_ENV(&float_env16, "data16 fnsave", "data16 frstor");
 954    TEST_ENV(&float_env32, "fnstenv", "fldenv");
 955    TEST_ENV(&float_env32, "fnsave", "frstor");
 956
 957    /* test for ffree */
 958    for(i=0;i<5;i++)
 959        asm volatile ("fldl %0" : : "m" (dtab[i]));
 960    asm volatile("ffree %st(2)");
 961    asm volatile ("fnstenv %0\n" : : "m" (float_env32));
 962    asm volatile ("fninit");
 963    printf("fptag=%04x\n", float_env32.fptag);
 964}
 965
 966
 967#define TEST_FCMOV(a, b, eflags, CC)\
 968{\
 969    double res;\
 970    asm("push %3\n"\
 971        "popf\n"\
 972        "fcmov" CC " %2, %0\n"\
 973        : "=t" (res)\
 974        : "0" (a), "u" (b), "g" (eflags));\
 975    printf("fcmov%s eflags=0x%04lx-> %f\n", \
 976           CC, (long)eflags, res);\
 977}
 978
 979void test_fcmov(void)
 980{
 981    double a, b;
 982    long eflags, i;
 983
 984    a = 1.0;
 985    b = 2.0;
 986    for(i = 0; i < 4; i++) {
 987        eflags = 0;
 988        if (i & 1)
 989            eflags |= CC_C;
 990        if (i & 2)
 991            eflags |= CC_Z;
 992        TEST_FCMOV(a, b, eflags, "b");
 993        TEST_FCMOV(a, b, eflags, "e");
 994        TEST_FCMOV(a, b, eflags, "be");
 995        TEST_FCMOV(a, b, eflags, "nb");
 996        TEST_FCMOV(a, b, eflags, "ne");
 997        TEST_FCMOV(a, b, eflags, "nbe");
 998    }
 999    TEST_FCMOV(a, b, 0, "u");
1000    TEST_FCMOV(a, b, CC_P, "u");
1001    TEST_FCMOV(a, b, 0, "nu");
1002    TEST_FCMOV(a, b, CC_P, "nu");
1003}
1004
1005void test_floats(void)
1006{
1007    test_fops(2, 3);
1008    test_fops(1.4, -5);
1009    test_fcmp(2, -1);
1010    test_fcmp(2, 2);
1011    test_fcmp(2, 3);
1012    test_fcmp(2, q_nan.d);
1013    test_fcmp(q_nan.d, -1);
1014    test_fcmp(-1.0/0.0, -1);
1015    test_fcmp(1.0/0.0, -1);
1016    test_fcvt(0.5);
1017    test_fcvt(-0.5);
1018    test_fcvt(1.0/7.0);
1019    test_fcvt(-1.0/9.0);
1020    test_fcvt(32768);
1021    test_fcvt(-1e20);
1022    test_fcvt(-1.0/0.0);
1023    test_fcvt(1.0/0.0);
1024    test_fcvt(q_nan.d);
1025    test_fconst();
1026    test_fbcd(1234567890123456.0);
1027    test_fbcd(-123451234567890.0);
1028    test_fenv();
1029    if (TEST_CMOV) {
1030        test_fcmov();
1031    }
1032}
1033
1034/**********************************************/
1035#if !defined(__x86_64__)
1036
1037#define TEST_BCD(op, op0, cc_in, cc_mask)\
1038{\
1039    int res, flags;\
1040    res = op0;\
1041    flags = cc_in;\
1042    asm ("push %3\n\t"\
1043         "popf\n\t"\
1044         #op "\n\t"\
1045         "pushf\n\t"\
1046         "pop %1\n\t"\
1047        : "=a" (res), "=g" (flags)\
1048        : "0" (res), "1" (flags));\
1049    printf("%-10s A=%08x R=%08x CCIN=%04x CC=%04x\n",\
1050           #op, op0, res, cc_in, flags & cc_mask);\
1051}
1052
1053void test_bcd(void)
1054{
1055    TEST_BCD(daa, 0x12340503, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1056    TEST_BCD(daa, 0x12340506, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1057    TEST_BCD(daa, 0x12340507, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1058    TEST_BCD(daa, 0x12340559, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1059    TEST_BCD(daa, 0x12340560, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1060    TEST_BCD(daa, 0x1234059f, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1061    TEST_BCD(daa, 0x123405a0, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1062    TEST_BCD(daa, 0x12340503, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1063    TEST_BCD(daa, 0x12340506, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1064    TEST_BCD(daa, 0x12340503, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1065    TEST_BCD(daa, 0x12340506, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1066    TEST_BCD(daa, 0x12340503, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1067    TEST_BCD(daa, 0x12340506, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1068
1069    TEST_BCD(das, 0x12340503, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1070    TEST_BCD(das, 0x12340506, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1071    TEST_BCD(das, 0x12340507, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1072    TEST_BCD(das, 0x12340559, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1073    TEST_BCD(das, 0x12340560, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1074    TEST_BCD(das, 0x1234059f, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1075    TEST_BCD(das, 0x123405a0, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1076    TEST_BCD(das, 0x12340503, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1077    TEST_BCD(das, 0x12340506, 0, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1078    TEST_BCD(das, 0x12340503, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1079    TEST_BCD(das, 0x12340506, CC_C, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1080    TEST_BCD(das, 0x12340503, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1081    TEST_BCD(das, 0x12340506, CC_C | CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_A));
1082
1083    TEST_BCD(aaa, 0x12340205, CC_A, (CC_C | CC_A));
1084    TEST_BCD(aaa, 0x12340306, CC_A, (CC_C | CC_A));
1085    TEST_BCD(aaa, 0x1234040a, CC_A, (CC_C | CC_A));
1086    TEST_BCD(aaa, 0x123405fa, CC_A, (CC_C | CC_A));
1087    TEST_BCD(aaa, 0x12340205, 0, (CC_C | CC_A));
1088    TEST_BCD(aaa, 0x12340306, 0, (CC_C | CC_A));
1089    TEST_BCD(aaa, 0x1234040a, 0, (CC_C | CC_A));
1090    TEST_BCD(aaa, 0x123405fa, 0, (CC_C | CC_A));
1091
1092    TEST_BCD(aas, 0x12340205, CC_A, (CC_C | CC_A));
1093    TEST_BCD(aas, 0x12340306, CC_A, (CC_C | CC_A));
1094    TEST_BCD(aas, 0x1234040a, CC_A, (CC_C | CC_A));
1095    TEST_BCD(aas, 0x123405fa, CC_A, (CC_C | CC_A));
1096    TEST_BCD(aas, 0x12340205, 0, (CC_C | CC_A));
1097    TEST_BCD(aas, 0x12340306, 0, (CC_C | CC_A));
1098    TEST_BCD(aas, 0x1234040a, 0, (CC_C | CC_A));
1099    TEST_BCD(aas, 0x123405fa, 0, (CC_C | CC_A));
1100
1101    TEST_BCD(aam, 0x12340547, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));
1102    TEST_BCD(aad, 0x12340407, CC_A, (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));
1103}
1104#endif
1105
1106#define TEST_XCHG(op, size, opconst)\
1107{\
1108    long op0, op1;\
1109    op0 = i2l(0x12345678);\
1110    op1 = i2l(0xfbca7654);\
1111    asm(#op " %" size "0, %" size "1" \
1112        : "=q" (op0), opconst (op1) \
1113        : "0" (op0));\
1114    printf("%-10s A=" FMTLX " B=" FMTLX "\n",\
1115           #op, op0, op1);\
1116}
1117
1118#define TEST_CMPXCHG(op, size, opconst, eax)\
1119{\
1120    long op0, op1, op2;\
1121    op0 = i2l(0x12345678);\
1122    op1 = i2l(0xfbca7654);\
1123    op2 = i2l(eax);\
1124    asm(#op " %" size "0, %" size "1" \
1125        : "=q" (op0), opconst (op1) \
1126        : "0" (op0), "a" (op2));\
1127    printf("%-10s EAX=" FMTLX " A=" FMTLX " C=" FMTLX "\n",\
1128           #op, op2, op0, op1);\
1129}
1130
1131void test_xchg(void)
1132{
1133#if defined(__x86_64__)
1134    TEST_XCHG(xchgq, "", "+q");
1135#endif
1136    TEST_XCHG(xchgl, "k", "+q");
1137    TEST_XCHG(xchgw, "w", "+q");
1138    TEST_XCHG(xchgb, "b", "+q");
1139
1140#if defined(__x86_64__)
1141    TEST_XCHG(xchgq, "", "=m");
1142#endif
1143    TEST_XCHG(xchgl, "k", "+m");
1144    TEST_XCHG(xchgw, "w", "+m");
1145    TEST_XCHG(xchgb, "b", "+m");
1146
1147#if defined(__x86_64__)
1148    TEST_XCHG(xaddq, "", "+q");
1149#endif
1150    TEST_XCHG(xaddl, "k", "+q");
1151    TEST_XCHG(xaddw, "w", "+q");
1152    TEST_XCHG(xaddb, "b", "+q");
1153
1154    {
1155        int res;
1156        res = 0x12345678;
1157        asm("xaddl %1, %0" : "=r" (res) : "0" (res));
1158        printf("xaddl same res=%08x\n", res);
1159    }
1160
1161#if defined(__x86_64__)
1162    TEST_XCHG(xaddq, "", "+m");
1163#endif
1164    TEST_XCHG(xaddl, "k", "+m");
1165    TEST_XCHG(xaddw, "w", "+m");
1166    TEST_XCHG(xaddb, "b", "+m");
1167
1168#if defined(__x86_64__)
1169    TEST_CMPXCHG(cmpxchgq, "", "+q", 0xfbca7654);
1170#endif
1171    TEST_CMPXCHG(cmpxchgl, "k", "+q", 0xfbca7654);
1172    TEST_CMPXCHG(cmpxchgw, "w", "+q", 0xfbca7654);
1173    TEST_CMPXCHG(cmpxchgb, "b", "+q", 0xfbca7654);
1174
1175#if defined(__x86_64__)
1176    TEST_CMPXCHG(cmpxchgq, "", "+q", 0xfffefdfc);
1177#endif
1178    TEST_CMPXCHG(cmpxchgl, "k", "+q", 0xfffefdfc);
1179    TEST_CMPXCHG(cmpxchgw, "w", "+q", 0xfffefdfc);
1180    TEST_CMPXCHG(cmpxchgb, "b", "+q", 0xfffefdfc);
1181
1182#if defined(__x86_64__)
1183    TEST_CMPXCHG(cmpxchgq, "", "+m", 0xfbca7654);
1184#endif
1185    TEST_CMPXCHG(cmpxchgl, "k", "+m", 0xfbca7654);
1186    TEST_CMPXCHG(cmpxchgw, "w", "+m", 0xfbca7654);
1187    TEST_CMPXCHG(cmpxchgb, "b", "+m", 0xfbca7654);
1188
1189#if defined(__x86_64__)
1190    TEST_CMPXCHG(cmpxchgq, "", "+m", 0xfffefdfc);
1191#endif
1192    TEST_CMPXCHG(cmpxchgl, "k", "+m", 0xfffefdfc);
1193    TEST_CMPXCHG(cmpxchgw, "w", "+m", 0xfffefdfc);
1194    TEST_CMPXCHG(cmpxchgb, "b", "+m", 0xfffefdfc);
1195
1196    {
1197        uint64_t op0, op1, op2;
1198        long eax, edx;
1199        long i, eflags;
1200
1201        for(i = 0; i < 2; i++) {
1202            op0 = 0x123456789abcdLL;
1203            eax = i2l(op0 & 0xffffffff);
1204            edx = i2l(op0 >> 32);
1205            if (i == 0)
1206                op1 = 0xfbca765423456LL;
1207            else
1208                op1 = op0;
1209            op2 = 0x6532432432434LL;
1210            asm("cmpxchg8b %2\n"
1211                "pushf\n"
1212                "pop %3\n"
1213                : "=a" (eax), "=d" (edx), "=m" (op1), "=g" (eflags)
1214                : "0" (eax), "1" (edx), "m" (op1), "b" ((int)op2), "c" ((int)(op2 >> 32)));
1215            printf("cmpxchg8b: eax=" FMTLX " edx=" FMTLX " op1=" FMT64X " CC=%02lx\n",
1216                   eax, edx, op1, eflags & CC_Z);
1217        }
1218    }
1219}
1220
1221#ifdef TEST_SEGS
1222/**********************************************/
1223/* segmentation tests */
1224
1225#include <sys/syscall.h>
1226#include <unistd.h>
1227#include <asm/ldt.h>
1228#include <linux/version.h>
1229
1230static inline int modify_ldt(int func, void * ptr, unsigned long bytecount)
1231{
1232    return syscall(__NR_modify_ldt, func, ptr, bytecount);
1233}
1234
1235#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 66)
1236#define modify_ldt_ldt_s user_desc
1237#endif
1238
1239#define MK_SEL(n) (((n) << 3) | 7)
1240
1241uint8_t seg_data1[4096];
1242uint8_t seg_data2[4096];
1243
1244#define TEST_LR(op, size, seg, mask)\
1245{\
1246    int res, res2;\
1247    uint16_t mseg = seg;\
1248    res = 0x12345678;\
1249    asm (op " %" size "2, %" size "0\n" \
1250         "movl $0, %1\n"\
1251         "jnz 1f\n"\
1252         "movl $1, %1\n"\
1253         "1:\n"\
1254         : "=r" (res), "=r" (res2) : "m" (mseg), "0" (res));\
1255    printf(op ": Z=%d %08x\n", res2, res & ~(mask));\
1256}
1257
1258#define TEST_ARPL(op, size, op1, op2)\
1259{\
1260    long a, b, c;                               \
1261    a = (op1);                                  \
1262    b = (op2);                                  \
1263    asm volatile(op " %" size "3, %" size "0\n"\
1264                 "movl $0,%1\n"\
1265                 "jnz 1f\n"\
1266                 "movl $1,%1\n"\
1267                 "1:\n"\
1268                 : "=r" (a), "=r" (c) : "0" (a), "r" (b));    \
1269    printf(op size " A=" FMTLX " B=" FMTLX " R=" FMTLX " z=%ld\n",\
1270           (long)(op1), (long)(op2), a, c);\
1271}
1272
1273/* NOTE: we use Linux modify_ldt syscall */
1274void test_segs(void)
1275{
1276    struct modify_ldt_ldt_s ldt;
1277    long long ldt_table[3];
1278    int res, res2;
1279    char tmp;
1280    struct {
1281        uint32_t offset;
1282        uint16_t seg;
1283    } QEMU_PACKED segoff;
1284
1285    ldt.entry_number = 1;
1286    ldt.base_addr = (unsigned long)&seg_data1;
1287    ldt.limit = (sizeof(seg_data1) + 0xfff) >> 12;
1288    ldt.seg_32bit = 1;
1289    ldt.contents = MODIFY_LDT_CONTENTS_DATA;
1290    ldt.read_exec_only = 0;
1291    ldt.limit_in_pages = 1;
1292    ldt.seg_not_present = 0;
1293    ldt.useable = 1;
1294    modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
1295
1296    ldt.entry_number = 2;
1297    ldt.base_addr = (unsigned long)&seg_data2;
1298    ldt.limit = (sizeof(seg_data2) + 0xfff) >> 12;
1299    ldt.seg_32bit = 1;
1300    ldt.contents = MODIFY_LDT_CONTENTS_DATA;
1301    ldt.read_exec_only = 0;
1302    ldt.limit_in_pages = 1;
1303    ldt.seg_not_present = 0;
1304    ldt.useable = 1;
1305    modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
1306
1307    modify_ldt(0, &ldt_table, sizeof(ldt_table)); /* read ldt entries */
1308#if 0
1309    {
1310        int i;
1311        for(i=0;i<3;i++)
1312            printf("%d: %016Lx\n", i, ldt_table[i]);
1313    }
1314#endif
1315    /* do some tests with fs or gs */
1316    asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1)));
1317
1318    seg_data1[1] = 0xaa;
1319    seg_data2[1] = 0x55;
1320
1321    asm volatile ("fs movzbl 0x1, %0" : "=r" (res));
1322    printf("FS[1] = %02x\n", res);
1323
1324    asm volatile ("pushl %%gs\n"
1325                  "movl %1, %%gs\n"
1326                  "gs movzbl 0x1, %0\n"
1327                  "popl %%gs\n"
1328                  : "=r" (res)
1329                  : "r" (MK_SEL(2)));
1330    printf("GS[1] = %02x\n", res);
1331
1332    /* tests with ds/ss (implicit segment case) */
1333    tmp = 0xa5;
1334    asm volatile ("pushl %%ebp\n\t"
1335                  "pushl %%ds\n\t"
1336                  "movl %2, %%ds\n\t"
1337                  "movl %3, %%ebp\n\t"
1338                  "movzbl 0x1, %0\n\t"
1339                  "movzbl (%%ebp), %1\n\t"
1340                  "popl %%ds\n\t"
1341                  "popl %%ebp\n\t"
1342                  : "=r" (res), "=r" (res2)
1343                  : "r" (MK_SEL(1)), "r" (&tmp));
1344    printf("DS[1] = %02x\n", res);
1345    printf("SS[tmp] = %02x\n", res2);
1346
1347    segoff.seg = MK_SEL(2);
1348    segoff.offset = 0xabcdef12;
1349    asm volatile("lfs %2, %0\n\t"
1350                 "movl %%fs, %1\n\t"
1351                 : "=r" (res), "=g" (res2)
1352                 : "m" (segoff));
1353    printf("FS:reg = %04x:%08x\n", res2, res);
1354
1355    TEST_LR("larw", "w", MK_SEL(2), 0x0100);
1356    TEST_LR("larl", "", MK_SEL(2), 0x0100);
1357    TEST_LR("lslw", "w", MK_SEL(2), 0);
1358    TEST_LR("lsll", "", MK_SEL(2), 0);
1359
1360    TEST_LR("larw", "w", 0xfff8, 0);
1361    TEST_LR("larl", "", 0xfff8, 0);
1362    TEST_LR("lslw", "w", 0xfff8, 0);
1363    TEST_LR("lsll", "", 0xfff8, 0);
1364
1365    TEST_ARPL("arpl", "w", 0x12345678 | 3, 0x762123c | 1);
1366    TEST_ARPL("arpl", "w", 0x12345678 | 1, 0x762123c | 3);
1367    TEST_ARPL("arpl", "w", 0x12345678 | 1, 0x762123c | 1);
1368}
1369
1370/* 16 bit code test */
1371extern char code16_start, code16_end;
1372extern char code16_func1;
1373extern char code16_func2;
1374extern char code16_func3;
1375
1376void test_code16(void)
1377{
1378    struct modify_ldt_ldt_s ldt;
1379    int res, res2;
1380
1381    /* build a code segment */
1382    ldt.entry_number = 1;
1383    ldt.base_addr = (unsigned long)&code16_start;
1384    ldt.limit = &code16_end - &code16_start;
1385    ldt.seg_32bit = 0;
1386    ldt.contents = MODIFY_LDT_CONTENTS_CODE;
1387    ldt.read_exec_only = 0;
1388    ldt.limit_in_pages = 0;
1389    ldt.seg_not_present = 0;
1390    ldt.useable = 1;
1391    modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
1392
1393    /* call the first function */
1394    asm volatile ("lcall %1, %2"
1395                  : "=a" (res)
1396                  : "i" (MK_SEL(1)), "i" (&code16_func1): "memory", "cc");
1397    printf("func1() = 0x%08x\n", res);
1398    asm volatile ("lcall %2, %3"
1399                  : "=a" (res), "=c" (res2)
1400                  : "i" (MK_SEL(1)), "i" (&code16_func2): "memory", "cc");
1401    printf("func2() = 0x%08x spdec=%d\n", res, res2);
1402    asm volatile ("lcall %1, %2"
1403                  : "=a" (res)
1404                  : "i" (MK_SEL(1)), "i" (&code16_func3): "memory", "cc");
1405    printf("func3() = 0x%08x\n", res);
1406}
1407#endif
1408
1409#if defined(__x86_64__)
1410asm(".globl func_lret\n"
1411    "func_lret:\n"
1412    "movl $0x87654641, %eax\n"
1413    "lretq\n");
1414#else
1415asm(".globl func_lret\n"
1416    "func_lret:\n"
1417    "movl $0x87654321, %eax\n"
1418    "lret\n"
1419
1420    ".globl func_iret\n"
1421    "func_iret:\n"
1422    "movl $0xabcd4321, %eax\n"
1423    "iret\n");
1424#endif
1425
1426extern char func_lret;
1427extern char func_iret;
1428
1429void test_misc(void)
1430{
1431    char table[256];
1432    long res, i;
1433
1434    for(i=0;i<256;i++) table[i] = 256 - i;
1435    res = 0x12345678;
1436    asm ("xlat" : "=a" (res) : "b" (table), "0" (res));
1437    printf("xlat: EAX=" FMTLX "\n", res);
1438
1439#if defined(__x86_64__)
1440#if 0
1441    {
1442        /* XXX: see if Intel Core2 and AMD64 behavior really
1443           differ. Here we implemented the Intel way which is not
1444           compatible yet with QEMU. */
1445        static struct QEMU_PACKED {
1446            uint64_t offset;
1447            uint16_t seg;
1448        } desc;
1449        long cs_sel;
1450
1451        asm volatile ("mov %%cs, %0" : "=r" (cs_sel));
1452
1453        asm volatile ("push %1\n"
1454                      "call func_lret\n"
1455                      : "=a" (res)
1456                      : "r" (cs_sel) : "memory", "cc");
1457        printf("func_lret=" FMTLX "\n", res);
1458
1459        desc.offset = (long)&func_lret;
1460        desc.seg = cs_sel;
1461
1462        asm volatile ("xor %%rax, %%rax\n"
1463                      "rex64 lcall *(%%rcx)\n"
1464                      : "=a" (res)
1465                      : "c" (&desc)
1466                      : "memory", "cc");
1467        printf("func_lret2=" FMTLX "\n", res);
1468
1469        asm volatile ("push %2\n"
1470                      "mov $ 1f, %%rax\n"
1471                      "push %%rax\n"
1472                      "rex64 ljmp *(%%rcx)\n"
1473                      "1:\n"
1474                      : "=a" (res)
1475                      : "c" (&desc), "b" (cs_sel)
1476                      : "memory", "cc");
1477        printf("func_lret3=" FMTLX "\n", res);
1478    }
1479#endif
1480#else
1481    asm volatile ("push %%cs ; call %1"
1482                  : "=a" (res)
1483                  : "m" (func_lret): "memory", "cc");
1484    printf("func_lret=" FMTLX "\n", res);
1485
1486    asm volatile ("pushf ; push %%cs ; call %1"
1487                  : "=a" (res)
1488                  : "m" (func_iret): "memory", "cc");
1489    printf("func_iret=" FMTLX "\n", res);
1490#endif
1491
1492#if defined(__x86_64__)
1493    /* specific popl test */
1494    asm volatile ("push $12345432 ; push $0x9abcdef ; pop (%%rsp) ; pop %0"
1495                  : "=g" (res));
1496    printf("popl esp=" FMTLX "\n", res);
1497#else
1498    /* specific popl test */
1499    asm volatile ("pushl $12345432 ; pushl $0x9abcdef ; popl (%%esp) ; popl %0"
1500                  : "=g" (res));
1501    printf("popl esp=" FMTLX "\n", res);
1502
1503    /* specific popw test */
1504    asm volatile ("pushl $12345432 ; pushl $0x9abcdef ; popw (%%esp) ; addl $2, %%esp ; popl %0"
1505                  : "=g" (res));
1506    printf("popw esp=" FMTLX "\n", res);
1507#endif
1508}
1509
1510uint8_t str_buffer[4096];
1511
1512#define TEST_STRING1(OP, size, DF, REP)\
1513{\
1514    long esi, edi, eax, ecx, eflags;\
1515\
1516    esi = (long)(str_buffer + sizeof(str_buffer) / 2);\
1517    edi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\
1518    eax = i2l(0x12345678);\
1519    ecx = 17;\
1520\
1521    asm volatile ("push $0\n\t"\
1522                  "popf\n\t"\
1523                  DF "\n\t"\
1524                  REP #OP size "\n\t"\
1525                  "cld\n\t"\
1526                  "pushf\n\t"\
1527                  "pop %4\n\t"\
1528                  : "=S" (esi), "=D" (edi), "=a" (eax), "=c" (ecx), "=g" (eflags)\
1529                  : "0" (esi), "1" (edi), "2" (eax), "3" (ecx));\
1530    printf("%-10s ESI=" FMTLX " EDI=" FMTLX " EAX=" FMTLX " ECX=" FMTLX " EFL=%04x\n",\
1531           REP #OP size, esi, edi, eax, ecx,\
1532           (int)(eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)));\
1533}
1534
1535#define TEST_STRING(OP, REP)\
1536    TEST_STRING1(OP, "b", "", REP);\
1537    TEST_STRING1(OP, "w", "", REP);\
1538    TEST_STRING1(OP, "l", "", REP);\
1539    X86_64_ONLY(TEST_STRING1(OP, "q", "", REP));\
1540    TEST_STRING1(OP, "b", "std", REP);\
1541    TEST_STRING1(OP, "w", "std", REP);\
1542    TEST_STRING1(OP, "l", "std", REP);\
1543    X86_64_ONLY(TEST_STRING1(OP, "q", "std", REP))
1544
1545void test_string(void)
1546{
1547    int i;
1548    for(i = 0;i < sizeof(str_buffer); i++)
1549        str_buffer[i] = i + 0x56;
1550   TEST_STRING(stos, "");
1551   TEST_STRING(stos, "rep ");
1552   TEST_STRING(lods, ""); /* to verify stos */
1553   TEST_STRING(lods, "rep ");
1554   TEST_STRING(movs, "");
1555   TEST_STRING(movs, "rep ");
1556   TEST_STRING(lods, ""); /* to verify stos */
1557
1558   /* XXX: better tests */
1559   TEST_STRING(scas, "");
1560   TEST_STRING(scas, "repz ");
1561   TEST_STRING(scas, "repnz ");
1562   TEST_STRING(cmps, "");
1563   TEST_STRING(cmps, "repz ");
1564   TEST_STRING(cmps, "repnz ");
1565}
1566
1567#ifdef TEST_VM86
1568/* VM86 test */
1569
1570static inline void set_bit(uint8_t *a, unsigned int bit)
1571{
1572    a[bit / 8] |= (1 << (bit % 8));
1573}
1574
1575static inline uint8_t *seg_to_linear(unsigned int seg, unsigned int reg)
1576{
1577    return (uint8_t *)((seg << 4) + (reg & 0xffff));
1578}
1579
1580static inline void pushw(struct vm86_regs *r, int val)
1581{
1582    r->esp = (r->esp & ~0xffff) | ((r->esp - 2) & 0xffff);
1583    *(uint16_t *)seg_to_linear(r->ss, r->esp) = val;
1584}
1585
1586static inline int vm86(int func, struct vm86plus_struct *v86)
1587{
1588    return syscall(__NR_vm86, func, v86);
1589}
1590
1591extern char vm86_code_start;
1592extern char vm86_code_end;
1593
1594#define VM86_CODE_CS 0x100
1595#define VM86_CODE_IP 0x100
1596
1597void test_vm86(void)
1598{
1599    struct vm86plus_struct ctx;
1600    struct vm86_regs *r;
1601    uint8_t *vm86_mem;
1602    int seg, ret;
1603
1604    vm86_mem = mmap((void *)0x00000000, 0x110000,
1605                    PROT_WRITE | PROT_READ | PROT_EXEC,
1606                    MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
1607    if (vm86_mem == MAP_FAILED) {
1608        printf("ERROR: could not map vm86 memory");
1609        return;
1610    }
1611    memset(&ctx, 0, sizeof(ctx));
1612
1613    /* init basic registers */
1614    r = &ctx.regs;
1615    r->eip = VM86_CODE_IP;
1616    r->esp = 0xfffe;
1617    seg = VM86_CODE_CS;
1618    r->cs = seg;
1619    r->ss = seg;
1620    r->ds = seg;
1621    r->es = seg;
1622    r->fs = seg;
1623    r->gs = seg;
1624    r->eflags = VIF_MASK;
1625
1626    /* move code to proper address. We use the same layout as a .com
1627       dos program. */
1628    memcpy(vm86_mem + (VM86_CODE_CS << 4) + VM86_CODE_IP,
1629           &vm86_code_start, &vm86_code_end - &vm86_code_start);
1630
1631    /* mark int 0x21 as being emulated */
1632    set_bit((uint8_t *)&ctx.int_revectored, 0x21);
1633
1634    for(;;) {
1635        ret = vm86(VM86_ENTER, &ctx);
1636        switch(VM86_TYPE(ret)) {
1637        case VM86_INTx:
1638            {
1639                int int_num, ah, v;
1640
1641                int_num = VM86_ARG(ret);
1642                if (int_num != 0x21)
1643                    goto unknown_int;
1644                ah = (r->eax >> 8) & 0xff;
1645                switch(ah) {
1646                case 0x00: /* exit */
1647                    goto the_end;
1648                case 0x02: /* write char */
1649                    {
1650                        uint8_t c = r->edx;
1651                        putchar(c);
1652                    }
1653                    break;
1654                case 0x09: /* write string */
1655                    {
1656                        uint8_t c, *ptr;
1657                        ptr = seg_to_linear(r->ds, r->edx);
1658                        for(;;) {
1659                            c = *ptr++;
1660                            if (c == '$')
1661                                break;
1662                            putchar(c);
1663                        }
1664                        r->eax = (r->eax & ~0xff) | '$';
1665                    }
1666                    break;
1667                case 0xff: /* extension: write eflags number in edx */
1668                    v = (int)r->edx;
1669#ifndef LINUX_VM86_IOPL_FIX
1670                    v &= ~0x3000;
1671#endif
1672                    printf("%08x\n", v);
1673                    break;
1674                default:
1675                unknown_int:
1676                    printf("unsupported int 0x%02x\n", int_num);
1677                    goto the_end;
1678                }
1679            }
1680            break;
1681        case VM86_SIGNAL:
1682            /* a signal came, we just ignore that */
1683            break;
1684        case VM86_STI:
1685            break;
1686        default:
1687            printf("ERROR: unhandled vm86 return code (0x%x)\n", ret);
1688            goto the_end;
1689        }
1690    }
1691 the_end:
1692    printf("VM86 end\n");
1693    munmap(vm86_mem, 0x110000);
1694}
1695#endif
1696
1697/* exception tests */
1698#if defined(__i386__) && !defined(REG_EAX)
1699#define REG_EAX EAX
1700#define REG_EBX EBX
1701#define REG_ECX ECX
1702#define REG_EDX EDX
1703#define REG_ESI ESI
1704#define REG_EDI EDI
1705#define REG_EBP EBP
1706#define REG_ESP ESP
1707#define REG_EIP EIP
1708#define REG_EFL EFL
1709#define REG_TRAPNO TRAPNO
1710#define REG_ERR ERR
1711#endif
1712
1713#if defined(__x86_64__)
1714#define REG_EIP REG_RIP
1715#endif
1716
1717jmp_buf jmp_env;
1718int v1;
1719int tab[2];
1720
1721void sig_handler(int sig, siginfo_t *info, void *puc)
1722{
1723    struct ucontext *uc = puc;
1724
1725    printf("si_signo=%d si_errno=%d si_code=%d",
1726           info->si_signo, info->si_errno, info->si_code);
1727    printf(" si_addr=0x%08lx",
1728           (unsigned long)info->si_addr);
1729    printf("\n");
1730
1731    printf("trapno=" FMTLX " err=" FMTLX,
1732           (long)uc->uc_mcontext.gregs[REG_TRAPNO],
1733           (long)uc->uc_mcontext.gregs[REG_ERR]);
1734    printf(" EIP=" FMTLX, (long)uc->uc_mcontext.gregs[REG_EIP]);
1735    printf("\n");
1736    longjmp(jmp_env, 1);
1737}
1738
1739void test_exceptions(void)
1740{
1741    struct sigaction act;
1742    volatile int val;
1743
1744    act.sa_sigaction = sig_handler;
1745    sigemptyset(&act.sa_mask);
1746    act.sa_flags = SA_SIGINFO | SA_NODEFER;
1747    sigaction(SIGFPE, &act, NULL);
1748    sigaction(SIGILL, &act, NULL);
1749    sigaction(SIGSEGV, &act, NULL);
1750    sigaction(SIGBUS, &act, NULL);
1751    sigaction(SIGTRAP, &act, NULL);
1752
1753    /* test division by zero reporting */
1754    printf("DIVZ exception:\n");
1755    if (setjmp(jmp_env) == 0) {
1756        /* now divide by zero */
1757        v1 = 0;
1758        v1 = 2 / v1;
1759    }
1760
1761#if !defined(__x86_64__)
1762    printf("BOUND exception:\n");
1763    if (setjmp(jmp_env) == 0) {
1764        /* bound exception */
1765        tab[0] = 1;
1766        tab[1] = 10;
1767        asm volatile ("bound %0, %1" : : "r" (11), "m" (tab[0]));
1768    }
1769#endif
1770
1771#ifdef TEST_SEGS
1772    printf("segment exceptions:\n");
1773    if (setjmp(jmp_env) == 0) {
1774        /* load an invalid segment */
1775        asm volatile ("movl %0, %%fs" : : "r" ((0x1234 << 3) | 1));
1776    }
1777    if (setjmp(jmp_env) == 0) {
1778        /* null data segment is valid */
1779        asm volatile ("movl %0, %%fs" : : "r" (3));
1780        /* null stack segment */
1781        asm volatile ("movl %0, %%ss" : : "r" (3));
1782    }
1783
1784    {
1785        struct modify_ldt_ldt_s ldt;
1786        ldt.entry_number = 1;
1787        ldt.base_addr = (unsigned long)&seg_data1;
1788        ldt.limit = (sizeof(seg_data1) + 0xfff) >> 12;
1789        ldt.seg_32bit = 1;
1790        ldt.contents = MODIFY_LDT_CONTENTS_DATA;
1791        ldt.read_exec_only = 0;
1792        ldt.limit_in_pages = 1;
1793        ldt.seg_not_present = 1;
1794        ldt.useable = 1;
1795        modify_ldt(1, &ldt, sizeof(ldt)); /* write ldt entry */
1796
1797        if (setjmp(jmp_env) == 0) {
1798            /* segment not present */
1799            asm volatile ("movl %0, %%fs" : : "r" (MK_SEL(1)));
1800        }
1801    }
1802#endif
1803
1804    /* test SEGV reporting */
1805    printf("PF exception:\n");
1806    if (setjmp(jmp_env) == 0) {
1807        val = 1;
1808        /* we add a nop to test a weird PC retrieval case */
1809        asm volatile ("nop");
1810        /* now store in an invalid address */
1811        *(char *)0x1234 = 1;
1812    }
1813
1814    /* test SEGV reporting */
1815    printf("PF exception:\n");
1816    if (setjmp(jmp_env) == 0) {
1817        val = 1;
1818        /* read from an invalid address */
1819        v1 = *(char *)0x1234;
1820    }
1821
1822    /* test illegal instruction reporting */
1823    printf("UD2 exception:\n");
1824    if (setjmp(jmp_env) == 0) {
1825        /* now execute an invalid instruction */
1826        asm volatile("ud2");
1827    }
1828    printf("lock nop exception:\n");
1829    if (setjmp(jmp_env) == 0) {
1830        /* now execute an invalid instruction */
1831        asm volatile(".byte 0xf0, 0x90"); /* lock nop */
1832    }
1833
1834    printf("INT exception:\n");
1835    if (setjmp(jmp_env) == 0) {
1836        asm volatile ("int $0xfd");
1837    }
1838    if (setjmp(jmp_env) == 0) {
1839        asm volatile ("int $0x01");
1840    }
1841    if (setjmp(jmp_env) == 0) {
1842        asm volatile (".byte 0xcd, 0x03");
1843    }
1844    if (setjmp(jmp_env) == 0) {
1845        asm volatile ("int $0x04");
1846    }
1847    if (setjmp(jmp_env) == 0) {
1848        asm volatile ("int $0x05");
1849    }
1850
1851    printf("INT3 exception:\n");
1852    if (setjmp(jmp_env) == 0) {
1853        asm volatile ("int3");
1854    }
1855
1856    printf("CLI exception:\n");
1857    if (setjmp(jmp_env) == 0) {
1858        asm volatile ("cli");
1859    }
1860
1861    printf("STI exception:\n");
1862    if (setjmp(jmp_env) == 0) {
1863        asm volatile ("cli");
1864    }
1865
1866#if !defined(__x86_64__)
1867    printf("INTO exception:\n");
1868    if (setjmp(jmp_env) == 0) {
1869        /* overflow exception */
1870        asm volatile ("addl $1, %0 ; into" : : "r" (0x7fffffff));
1871    }
1872#endif
1873
1874    printf("OUTB exception:\n");
1875    if (setjmp(jmp_env) == 0) {
1876        asm volatile ("outb %%al, %%dx" : : "d" (0x4321), "a" (0));
1877    }
1878
1879    printf("INB exception:\n");
1880    if (setjmp(jmp_env) == 0) {
1881        asm volatile ("inb %%dx, %%al" : "=a" (val) : "d" (0x4321));
1882    }
1883
1884    printf("REP OUTSB exception:\n");
1885    if (setjmp(jmp_env) == 0) {
1886        asm volatile ("rep outsb" : : "d" (0x4321), "S" (tab), "c" (1));
1887    }
1888
1889    printf("REP INSB exception:\n");
1890    if (setjmp(jmp_env) == 0) {
1891        asm volatile ("rep insb" : : "d" (0x4321), "D" (tab), "c" (1));
1892    }
1893
1894    printf("HLT exception:\n");
1895    if (setjmp(jmp_env) == 0) {
1896        asm volatile ("hlt");
1897    }
1898
1899    printf("single step exception:\n");
1900    val = 0;
1901    if (setjmp(jmp_env) == 0) {
1902        asm volatile ("pushf\n"
1903                      "orl $0x00100, (%%esp)\n"
1904                      "popf\n"
1905                      "movl $0xabcd, %0\n"
1906                      "movl $0x0, %0\n" : "=m" (val) : : "cc", "memory");
1907    }
1908    printf("val=0x%x\n", val);
1909}
1910
1911#if !defined(__x86_64__)
1912/* specific precise single step test */
1913void sig_trap_handler(int sig, siginfo_t *info, void *puc)
1914{
1915    struct ucontext *uc = puc;
1916    printf("EIP=" FMTLX "\n", (long)uc->uc_mcontext.gregs[REG_EIP]);
1917}
1918
1919const uint8_t sstep_buf1[4] = { 1, 2, 3, 4};
1920uint8_t sstep_buf2[4];
1921
1922void test_single_step(void)
1923{
1924    struct sigaction act;
1925    volatile int val;
1926    int i;
1927
1928    val = 0;
1929    act.sa_sigaction = sig_trap_handler;
1930    sigemptyset(&act.sa_mask);
1931    act.sa_flags = SA_SIGINFO;
1932    sigaction(SIGTRAP, &act, NULL);
1933    asm volatile ("pushf\n"
1934                  "orl $0x00100, (%%esp)\n"
1935                  "popf\n"
1936                  "movl $0xabcd, %0\n"
1937
1938                  /* jmp test */
1939                  "movl $3, %%ecx\n"
1940                  "1:\n"
1941                  "addl $1, %0\n"
1942                  "decl %%ecx\n"
1943                  "jnz 1b\n"
1944
1945                  /* movsb: the single step should stop at each movsb iteration */
1946                  "movl $sstep_buf1, %%esi\n"
1947                  "movl $sstep_buf2, %%edi\n"
1948                  "movl $0, %%ecx\n"
1949                  "rep movsb\n"
1950                  "movl $3, %%ecx\n"
1951                  "rep movsb\n"
1952                  "movl $1, %%ecx\n"
1953                  "rep movsb\n"
1954
1955                  /* cmpsb: the single step should stop at each cmpsb iteration */
1956                  "movl $sstep_buf1, %%esi\n"
1957                  "movl $sstep_buf2, %%edi\n"
1958                  "movl $0, %%ecx\n"
1959                  "rep cmpsb\n"
1960                  "movl $4, %%ecx\n"
1961                  "rep cmpsb\n"
1962
1963                  /* getpid() syscall: single step should skip one
1964                     instruction */
1965                  "movl $20, %%eax\n"
1966                  "int $0x80\n"
1967                  "movl $0, %%eax\n"
1968
1969                  /* when modifying SS, trace is not done on the next
1970                     instruction */
1971                  "movl %%ss, %%ecx\n"
1972                  "movl %%ecx, %%ss\n"
1973                  "addl $1, %0\n"
1974                  "movl $1, %%eax\n"
1975                  "movl %%ecx, %%ss\n"
1976                  "jmp 1f\n"
1977                  "addl $1, %0\n"
1978                  "1:\n"
1979                  "movl $1, %%eax\n"
1980                  "pushl %%ecx\n"
1981                  "popl %%ss\n"
1982                  "addl $1, %0\n"
1983                  "movl $1, %%eax\n"
1984
1985                  "pushf\n"
1986                  "andl $~0x00100, (%%esp)\n"
1987                  "popf\n"
1988                  : "=m" (val)
1989                  :
1990                  : "cc", "memory", "eax", "ecx", "esi", "edi");
1991    printf("val=%d\n", val);
1992    for(i = 0; i < 4; i++)
1993        printf("sstep_buf2[%d] = %d\n", i, sstep_buf2[i]);
1994}
1995
1996/* self modifying code test */
1997uint8_t code[] = {
1998    0xb8, 0x1, 0x00, 0x00, 0x00, /* movl $1, %eax */
1999    0xc3, /* ret */
2000};
2001
2002asm(".section \".data\"\n"
2003    "smc_code2:\n"
2004    "movl 4(%esp), %eax\n"
2005    "movl %eax, smc_patch_addr2 + 1\n"
2006    "nop\n"
2007    "nop\n"
2008    "nop\n"
2009    "nop\n"
2010    "nop\n"
2011    "nop\n"
2012    "nop\n"
2013    "nop\n"
2014    "smc_patch_addr2:\n"
2015    "movl $1, %eax\n"
2016    "ret\n"
2017    ".previous\n"
2018    );
2019
2020typedef int FuncType(void);
2021extern int smc_code2(int);
2022void test_self_modifying_code(void)
2023{
2024    int i;
2025    printf("self modifying code:\n");
2026    printf("func1 = 0x%x\n", ((FuncType *)code)());
2027    for(i = 2; i <= 4; i++) {
2028        code[1] = i;
2029        printf("func%d = 0x%x\n", i, ((FuncType *)code)());
2030    }
2031
2032    /* more difficult test : the modified code is just after the
2033       modifying instruction. It is forbidden in Intel specs, but it
2034       is used by old DOS programs */
2035    for(i = 2; i <= 4; i++) {
2036        printf("smc_code2(%d) = %d\n", i, smc_code2(i));
2037    }
2038}
2039#endif
2040
2041long enter_stack[4096];
2042
2043#if defined(__x86_64__)
2044#define RSP "%%rsp"
2045#define RBP "%%rbp"
2046#else
2047#define RSP "%%esp"
2048#define RBP "%%ebp"
2049#endif
2050
2051#if !defined(__x86_64__)
2052/* causes an infinite loop, disable it for now.  */
2053#define TEST_ENTER(size, stack_type, level)
2054#else
2055#define TEST_ENTER(size, stack_type, level)\
2056{\
2057    long esp_save, esp_val, ebp_val, ebp_save, i;\
2058    stack_type *ptr, *stack_end, *stack_ptr;\
2059    memset(enter_stack, 0, sizeof(enter_stack));\
2060    stack_end = stack_ptr = (stack_type *)(enter_stack + 4096);\
2061    ebp_val = (long)stack_ptr;\
2062    for(i=1;i<=32;i++)\
2063       *--stack_ptr = i;\
2064    esp_val = (long)stack_ptr;\
2065    asm("mov " RSP ", %[esp_save]\n"\
2066        "mov " RBP ", %[ebp_save]\n"\
2067        "mov %[esp_val], " RSP "\n"\
2068        "mov %[ebp_val], " RBP "\n"\
2069        "enter" size " $8, $" #level "\n"\
2070        "mov " RSP ", %[esp_val]\n"\
2071        "mov " RBP ", %[ebp_val]\n"\
2072        "mov %[esp_save], " RSP "\n"\
2073        "mov %[ebp_save], " RBP "\n"\
2074        : [esp_save] "=r" (esp_save),\
2075        [ebp_save] "=r" (ebp_save),\
2076        [esp_val] "=r" (esp_val),\
2077        [ebp_val] "=r" (ebp_val)\
2078        :  "[esp_val]" (esp_val),\
2079        "[ebp_val]" (ebp_val));\
2080    printf("level=%d:\n", level);\
2081    printf("esp_val=" FMTLX "\n", esp_val - (long)stack_end);\
2082    printf("ebp_val=" FMTLX "\n", ebp_val - (long)stack_end);\
2083    for(ptr = (stack_type *)esp_val; ptr < stack_end; ptr++)\
2084        printf(FMTLX "\n", (long)ptr[0]);\
2085}
2086#endif
2087
2088static void test_enter(void)
2089{
2090#if defined(__x86_64__)
2091    TEST_ENTER("q", uint64_t, 0);
2092    TEST_ENTER("q", uint64_t, 1);
2093    TEST_ENTER("q", uint64_t, 2);
2094    TEST_ENTER("q", uint64_t, 31);
2095#else
2096    TEST_ENTER("l", uint32_t, 0);
2097    TEST_ENTER("l", uint32_t, 1);
2098    TEST_ENTER("l", uint32_t, 2);
2099    TEST_ENTER("l", uint32_t, 31);
2100#endif
2101
2102    TEST_ENTER("w", uint16_t, 0);
2103    TEST_ENTER("w", uint16_t, 1);
2104    TEST_ENTER("w", uint16_t, 2);
2105    TEST_ENTER("w", uint16_t, 31);
2106}
2107
2108#ifdef TEST_SSE
2109
2110typedef int __m64 __attribute__ ((__mode__ (__V2SI__)));
2111typedef float __m128 __attribute__ ((__mode__(__V4SF__)));
2112
2113typedef union {
2114    double d[2];
2115    float s[4];
2116    uint32_t l[4];
2117    uint64_t q[2];
2118    __m128 dq;
2119} XMMReg;
2120
2121static uint64_t __attribute__((aligned(16))) test_values[4][2] = {
2122    { 0x456723c698694873, 0xdc515cff944a58ec },
2123    { 0x1f297ccd58bad7ab, 0x41f21efba9e3e146 },
2124    { 0x007c62c2085427f8, 0x231be9e8cde7438d },
2125    { 0x0f76255a085427f8, 0xc233e9e8c4c9439a },
2126};
2127
2128#define SSE_OP(op)\
2129{\
2130    asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
2131    printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\
2132           #op,\
2133           a.q[1], a.q[0],\
2134           b.q[1], b.q[0],\
2135           r.q[1], r.q[0]);\
2136}
2137
2138#define SSE_OP2(op)\
2139{\
2140    int i;\
2141    for(i=0;i<2;i++) {\
2142    a.q[0] = test_values[2*i][0];\
2143    a.q[1] = test_values[2*i][1];\
2144    b.q[0] = test_values[2*i+1][0];\
2145    b.q[1] = test_values[2*i+1][1];\
2146    SSE_OP(op);\
2147    }\
2148}
2149
2150#define MMX_OP2(op)\
2151{\
2152    int i;\
2153    for(i=0;i<2;i++) {\
2154    a.q[0] = test_values[2*i][0];\
2155    b.q[0] = test_values[2*i+1][0];\
2156    asm volatile (#op " %2, %0" : "=y" (r.q[0]) : "0" (a.q[0]), "y" (b.q[0]));\
2157    printf("%-9s: a=" FMT64X " b=" FMT64X " r=" FMT64X "\n",\
2158           #op,\
2159           a.q[0],\
2160           b.q[0],\
2161           r.q[0]);\
2162    }\
2163    SSE_OP2(op);\
2164}
2165
2166#define SHUF_OP(op, ib)\
2167{\
2168    a.q[0] = test_values[0][0];\
2169    a.q[1] = test_values[0][1];\
2170    b.q[0] = test_values[1][0];\
2171    b.q[1] = test_values[1][1];\
2172    asm volatile (#op " $" #ib ", %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
2173    printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\
2174           #op,\
2175           a.q[1], a.q[0],\
2176           b.q[1], b.q[0],\
2177           ib,\
2178           r.q[1], r.q[0]);\
2179}
2180
2181#define PSHUF_OP(op, ib)\
2182{\
2183    int i;\
2184    for(i=0;i<2;i++) {\
2185    a.q[0] = test_values[2*i][0];\
2186    a.q[1] = test_values[2*i][1];\
2187    asm volatile (#op " $" #ib ", %1, %0" : "=x" (r.dq) : "x" (a.dq));\
2188    printf("%-9s: a=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\
2189           #op,\
2190           a.q[1], a.q[0],\
2191           ib,\
2192           r.q[1], r.q[0]);\
2193    }\
2194}
2195
2196#define SHIFT_IM(op, ib)\
2197{\
2198    int i;\
2199    for(i=0;i<2;i++) {\
2200    a.q[0] = test_values[2*i][0];\
2201    a.q[1] = test_values[2*i][1];\
2202    asm volatile (#op " $" #ib ", %0" : "=x" (r.dq) : "0" (a.dq));\
2203    printf("%-9s: a=" FMT64X "" FMT64X " ib=%02x r=" FMT64X "" FMT64X "\n",\
2204           #op,\
2205           a.q[1], a.q[0],\
2206           ib,\
2207           r.q[1], r.q[0]);\
2208    }\
2209}
2210
2211#define SHIFT_OP(op, ib)\
2212{\
2213    int i;\
2214    SHIFT_IM(op, ib);\
2215    for(i=0;i<2;i++) {\
2216    a.q[0] = test_values[2*i][0];\
2217    a.q[1] = test_values[2*i][1];\
2218    b.q[0] = ib;\
2219    b.q[1] = 0;\
2220    asm volatile (#op " %2, %0" : "=x" (r.dq) : "0" (a.dq), "x" (b.dq));\
2221    printf("%-9s: a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\
2222           #op,\
2223           a.q[1], a.q[0],\
2224           b.q[1], b.q[0],\
2225           r.q[1], r.q[0]);\
2226    }\
2227}
2228
2229#define MOVMSK(op)\
2230{\
2231    int i, reg;\
2232    for(i=0;i<2;i++) {\
2233    a.q[0] = test_values[2*i][0];\
2234    a.q[1] = test_values[2*i][1];\
2235    asm volatile (#op " %1, %0" : "=r" (reg) : "x" (a.dq));\
2236    printf("%-9s: a=" FMT64X "" FMT64X " r=%08x\n",\
2237           #op,\
2238           a.q[1], a.q[0],\
2239           reg);\
2240    }\
2241}
2242
2243#define SSE_OPS(a) \
2244SSE_OP(a ## ps);\
2245SSE_OP(a ## ss);
2246
2247#define SSE_OPD(a) \
2248SSE_OP(a ## pd);\
2249SSE_OP(a ## sd);
2250
2251#define SSE_COMI(op, field)\
2252{\
2253    unsigned int eflags;\
2254    XMMReg a, b;\
2255    a.field[0] = a1;\
2256    b.field[0] = b1;\
2257    asm volatile (#op " %2, %1\n"\
2258        "pushf\n"\
2259        "pop %0\n"\
2260        : "=m" (eflags)\
2261        : "x" (a.dq), "x" (b.dq));\
2262    printf("%-9s: a=%f b=%f cc=%04x\n",\
2263           #op, a1, b1,\
2264           eflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\
2265}
2266
2267void test_sse_comi(double a1, double b1)
2268{
2269    SSE_COMI(ucomiss, s);
2270    SSE_COMI(ucomisd, d);
2271    SSE_COMI(comiss, s);
2272    SSE_COMI(comisd, d);
2273}
2274
2275#define CVT_OP_XMM(op)\
2276{\
2277    asm volatile (#op " %1, %0" : "=x" (r.dq) : "x" (a.dq));\
2278    printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "" FMT64X "\n",\
2279           #op,\
2280           a.q[1], a.q[0],\
2281           r.q[1], r.q[0]);\
2282}
2283
2284/* Force %xmm0 usage to avoid the case where both register index are 0
2285   to test instruction decoding more extensively */
2286#define CVT_OP_XMM2MMX(op)\
2287{\
2288    asm volatile (#op " %1, %0" : "=y" (r.q[0]) : "x" (a.dq) \
2289                  : "%xmm0"); \
2290    asm volatile("emms\n"); \
2291    printf("%-9s: a=" FMT64X "" FMT64X " r=" FMT64X "\n",\
2292           #op,\
2293           a.q[1], a.q[0],\
2294           r.q[0]);\
2295}
2296
2297#define CVT_OP_MMX2XMM(op)\
2298{\
2299    asm volatile (#op " %1, %0" : "=x" (r.dq) : "y" (a.q[0]));\
2300    asm volatile("emms\n"); \
2301    printf("%-9s: a=" FMT64X " r=" FMT64X "" FMT64X "\n",\
2302           #op,\
2303           a.q[0],\
2304           r.q[1], r.q[0]);\
2305}
2306
2307#define CVT_OP_REG2XMM(op)\
2308{\
2309    asm volatile (#op " %1, %0" : "=x" (r.dq) : "r" (a.l[0]));\
2310    printf("%-9s: a=%08x r=" FMT64X "" FMT64X "\n",\
2311           #op,\
2312           a.l[0],\
2313           r.q[1], r.q[0]);\
2314}
2315
2316#define CVT_OP_XMM2REG(op)\
2317{\
2318    asm volatile (#op " %1, %0" : "=r" (r.l[0]) : "x" (a.dq));\
2319    printf("%-9s: a=" FMT64X "" FMT64X " r=%08x\n",\
2320           #op,\
2321           a.q[1], a.q[0],\
2322           r.l[0]);\
2323}
2324
2325struct fpxstate {
2326    uint16_t fpuc;
2327    uint16_t fpus;
2328    uint16_t fptag;
2329    uint16_t fop;
2330    uint32_t fpuip;
2331    uint16_t cs_sel;
2332    uint16_t dummy0;
2333    uint32_t fpudp;
2334    uint16_t ds_sel;
2335    uint16_t dummy1;
2336    uint32_t mxcsr;
2337    uint32_t mxcsr_mask;
2338    uint8_t fpregs1[8 * 16];
2339    uint8_t xmm_regs[8 * 16];
2340    uint8_t dummy2[224];
2341};
2342
2343static struct fpxstate fpx_state __attribute__((aligned(16)));
2344static struct fpxstate fpx_state2 __attribute__((aligned(16)));
2345
2346void test_fxsave(void)
2347{
2348    struct fpxstate *fp = &fpx_state;
2349    struct fpxstate *fp2 = &fpx_state2;
2350    int i, nb_xmm;
2351    XMMReg a, b;
2352    a.q[0] = test_values[0][0];
2353    a.q[1] = test_values[0][1];
2354    b.q[0] = test_values[1][0];
2355    b.q[1] = test_values[1][1];
2356
2357    asm("movdqa %2, %%xmm0\n"
2358        "movdqa %3, %%xmm7\n"
2359#if defined(__x86_64__)
2360        "movdqa %2, %%xmm15\n"
2361#endif
2362        " fld1\n"
2363        " fldpi\n"
2364        " fldln2\n"
2365        " fxsave %0\n"
2366        " fxrstor %0\n"
2367        " fxsave %1\n"
2368        " fninit\n"
2369        : "=m" (*(uint32_t *)fp2), "=m" (*(uint32_t *)fp)
2370        : "m" (a), "m" (b));
2371    printf("fpuc=%04x\n", fp->fpuc);
2372    printf("fpus=%04x\n", fp->fpus);
2373    printf("fptag=%04x\n", fp->fptag);
2374    for(i = 0; i < 3; i++) {
2375        printf("ST%d: " FMT64X " %04x\n",
2376               i,
2377               *(uint64_t *)&fp->fpregs1[i * 16],
2378               *(uint16_t *)&fp->fpregs1[i * 16 + 8]);
2379    }
2380    printf("mxcsr=%08x\n", fp->mxcsr & 0x1f80);
2381#if defined(__x86_64__)
2382    nb_xmm = 16;
2383#else
2384    nb_xmm = 8;
2385#endif
2386    for(i = 0; i < nb_xmm; i++) {
2387        printf("xmm%d: " FMT64X "" FMT64X "\n",
2388               i,
2389               *(uint64_t *)&fp->xmm_regs[i * 16],
2390               *(uint64_t *)&fp->xmm_regs[i * 16 + 8]);
2391    }
2392}
2393
2394void test_sse(void)
2395{
2396    XMMReg r, a, b;
2397    int i;
2398
2399    MMX_OP2(punpcklbw);
2400    MMX_OP2(punpcklwd);
2401    MMX_OP2(punpckldq);
2402    MMX_OP2(packsswb);
2403    MMX_OP2(pcmpgtb);
2404    MMX_OP2(pcmpgtw);
2405    MMX_OP2(pcmpgtd);
2406    MMX_OP2(packuswb);
2407    MMX_OP2(punpckhbw);
2408    MMX_OP2(punpckhwd);
2409    MMX_OP2(punpckhdq);
2410    MMX_OP2(packssdw);
2411    MMX_OP2(pcmpeqb);
2412    MMX_OP2(pcmpeqw);
2413    MMX_OP2(pcmpeqd);
2414
2415    MMX_OP2(paddq);
2416    MMX_OP2(pmullw);
2417    MMX_OP2(psubusb);
2418    MMX_OP2(psubusw);
2419    MMX_OP2(pminub);
2420    MMX_OP2(pand);
2421    MMX_OP2(paddusb);
2422    MMX_OP2(paddusw);
2423    MMX_OP2(pmaxub);
2424    MMX_OP2(pandn);
2425
2426    MMX_OP2(pmulhuw);
2427    MMX_OP2(pmulhw);
2428
2429    MMX_OP2(psubsb);
2430    MMX_OP2(psubsw);
2431    MMX_OP2(pminsw);
2432    MMX_OP2(por);
2433    MMX_OP2(paddsb);
2434    MMX_OP2(paddsw);
2435    MMX_OP2(pmaxsw);
2436    MMX_OP2(pxor);
2437    MMX_OP2(pmuludq);
2438    MMX_OP2(pmaddwd);
2439    MMX_OP2(psadbw);
2440    MMX_OP2(psubb);
2441    MMX_OP2(psubw);
2442    MMX_OP2(psubd);
2443    MMX_OP2(psubq);
2444    MMX_OP2(paddb);
2445    MMX_OP2(paddw);
2446    MMX_OP2(paddd);
2447
2448    MMX_OP2(pavgb);
2449    MMX_OP2(pavgw);
2450
2451    asm volatile ("pinsrw $1, %1, %0" : "=y" (r.q[0]) : "r" (0x12345678));
2452    printf("%-9s: r=" FMT64X "\n", "pinsrw", r.q[0]);
2453
2454    asm volatile ("pinsrw $5, %1, %0" : "=x" (r.dq) : "r" (0x12345678));
2455    printf("%-9s: r=" FMT64X "" FMT64X "\n", "pinsrw", r.q[1], r.q[0]);
2456
2457    a.q[0] = test_values[0][0];
2458    a.q[1] = test_values[0][1];
2459    asm volatile ("pextrw $1, %1, %0" : "=r" (r.l[0]) : "y" (a.q[0]));
2460    printf("%-9s: r=%08x\n", "pextrw", r.l[0]);
2461
2462    asm volatile ("pextrw $5, %1, %0" : "=r" (r.l[0]) : "x" (a.dq));
2463    printf("%-9s: r=%08x\n", "pextrw", r.l[0]);
2464
2465    asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "y" (a.q[0]));
2466    printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]);
2467
2468    asm volatile ("pmovmskb %1, %0" : "=r" (r.l[0]) : "x" (a.dq));
2469    printf("%-9s: r=%08x\n", "pmovmskb", r.l[0]);
2470
2471    {
2472        r.q[0] = -1;
2473        r.q[1] = -1;
2474
2475        a.q[0] = test_values[0][0];
2476        a.q[1] = test_values[0][1];
2477        b.q[0] = test_values[1][0];
2478        b.q[1] = test_values[1][1];
2479        asm volatile("maskmovq %1, %0" :
2480                     : "y" (a.q[0]), "y" (b.q[0]), "D" (&r)
2481                     : "memory");
2482        printf("%-9s: r=" FMT64X " a=" FMT64X " b=" FMT64X "\n",
2483               "maskmov",
2484               r.q[0],
2485               a.q[0],
2486               b.q[0]);
2487        asm volatile("maskmovdqu %1, %0" :
2488                     : "x" (a.dq), "x" (b.dq), "D" (&r)
2489                     : "memory");
2490        printf("%-9s: r=" FMT64X "" FMT64X " a=" FMT64X "" FMT64X " b=" FMT64X "" FMT64X "\n",
2491               "maskmov",
2492               r.q[1], r.q[0],
2493               a.q[1], a.q[0],
2494               b.q[1], b.q[0]);
2495    }
2496
2497    asm volatile ("emms");
2498
2499    SSE_OP2(punpcklqdq);
2500    SSE_OP2(punpckhqdq);
2501    SSE_OP2(andps);
2502    SSE_OP2(andpd);
2503    SSE_OP2(andnps);
2504    SSE_OP2(andnpd);
2505    SSE_OP2(orps);
2506    SSE_OP2(orpd);
2507    SSE_OP2(xorps);
2508    SSE_OP2(xorpd);
2509
2510    SSE_OP2(unpcklps);
2511    SSE_OP2(unpcklpd);
2512    SSE_OP2(unpckhps);
2513    SSE_OP2(unpckhpd);
2514
2515    SHUF_OP(shufps, 0x78);
2516    SHUF_OP(shufpd, 0x02);
2517
2518    PSHUF_OP(pshufd, 0x78);
2519    PSHUF_OP(pshuflw, 0x78);
2520    PSHUF_OP(pshufhw, 0x78);
2521
2522    SHIFT_OP(psrlw, 7);
2523    SHIFT_OP(psrlw, 16);
2524    SHIFT_OP(psraw, 7);
2525    SHIFT_OP(psraw, 16);
2526    SHIFT_OP(psllw, 7);
2527    SHIFT_OP(psllw, 16);
2528
2529    SHIFT_OP(psrld, 7);
2530    SHIFT_OP(psrld, 32);
2531    SHIFT_OP(psrad, 7);
2532    SHIFT_OP(psrad, 32);
2533    SHIFT_OP(pslld, 7);
2534    SHIFT_OP(pslld, 32);
2535
2536    SHIFT_OP(psrlq, 7);
2537    SHIFT_OP(psrlq, 32);
2538    SHIFT_OP(psllq, 7);
2539    SHIFT_OP(psllq, 32);
2540
2541    SHIFT_IM(psrldq, 16);
2542    SHIFT_IM(psrldq, 7);
2543    SHIFT_IM(pslldq, 16);
2544    SHIFT_IM(pslldq, 7);
2545
2546    MOVMSK(movmskps);
2547    MOVMSK(movmskpd);
2548
2549    /* FPU specific ops */
2550
2551    {
2552        uint32_t mxcsr;
2553        asm volatile("stmxcsr %0" : "=m" (mxcsr));
2554        printf("mxcsr=%08x\n", mxcsr & 0x1f80);
2555        asm volatile("ldmxcsr %0" : : "m" (mxcsr));
2556    }
2557
2558    test_sse_comi(2, -1);
2559    test_sse_comi(2, 2);
2560    test_sse_comi(2, 3);
2561    test_sse_comi(2, q_nan.d);
2562    test_sse_comi(q_nan.d, -1);
2563
2564    for(i = 0; i < 2; i++) {
2565        a.s[0] = 2.7;
2566        a.s[1] = 3.4;
2567        a.s[2] = 4;
2568        a.s[3] = -6.3;
2569        b.s[0] = 45.7;
2570        b.s[1] = 353.4;
2571        b.s[2] = 4;
2572        b.s[3] = 56.3;
2573        if (i == 1) {
2574            a.s[0] = q_nan.d;
2575            b.s[3] = q_nan.d;
2576        }
2577
2578        SSE_OPS(add);
2579        SSE_OPS(mul);
2580        SSE_OPS(sub);
2581        SSE_OPS(min);
2582        SSE_OPS(div);
2583        SSE_OPS(max);
2584        SSE_OPS(sqrt);
2585        SSE_OPS(cmpeq);
2586        SSE_OPS(cmplt);
2587        SSE_OPS(cmple);
2588        SSE_OPS(cmpunord);
2589        SSE_OPS(cmpneq);
2590        SSE_OPS(cmpnlt);
2591        SSE_OPS(cmpnle);
2592        SSE_OPS(cmpord);
2593
2594
2595        a.d[0] = 2.7;
2596        a.d[1] = -3.4;
2597        b.d[0] = 45.7;
2598        b.d[1] = -53.4;
2599        if (i == 1) {
2600            a.d[0] = q_nan.d;
2601            b.d[1] = q_nan.d;
2602        }
2603        SSE_OPD(add);
2604        SSE_OPD(mul);
2605        SSE_OPD(sub);
2606        SSE_OPD(min);
2607        SSE_OPD(div);
2608        SSE_OPD(max);
2609        SSE_OPD(sqrt);
2610        SSE_OPD(cmpeq);
2611        SSE_OPD(cmplt);
2612        SSE_OPD(cmple);
2613        SSE_OPD(cmpunord);
2614        SSE_OPD(cmpneq);
2615        SSE_OPD(cmpnlt);
2616        SSE_OPD(cmpnle);
2617        SSE_OPD(cmpord);
2618    }
2619
2620    /* float to float/int */
2621    a.s[0] = 2.7;
2622    a.s[1] = 3.4;
2623    a.s[2] = 4;
2624    a.s[3] = -6.3;
2625    CVT_OP_XMM(cvtps2pd);
2626    CVT_OP_XMM(cvtss2sd);
2627    CVT_OP_XMM2MMX(cvtps2pi);
2628    CVT_OP_XMM2MMX(cvttps2pi);
2629    CVT_OP_XMM2REG(cvtss2si);
2630    CVT_OP_XMM2REG(cvttss2si);
2631    CVT_OP_XMM(cvtps2dq);
2632    CVT_OP_XMM(cvttps2dq);
2633
2634    a.d[0] = 2.6;
2635    a.d[1] = -3.4;
2636    CVT_OP_XMM(cvtpd2ps);
2637    CVT_OP_XMM(cvtsd2ss);
2638    CVT_OP_XMM2MMX(cvtpd2pi);
2639    CVT_OP_XMM2MMX(cvttpd2pi);
2640    CVT_OP_XMM2REG(cvtsd2si);
2641    CVT_OP_XMM2REG(cvttsd2si);
2642    CVT_OP_XMM(cvtpd2dq);
2643    CVT_OP_XMM(cvttpd2dq);
2644
2645    /* sse/mmx moves */
2646    CVT_OP_XMM2MMX(movdq2q);
2647    CVT_OP_MMX2XMM(movq2dq);
2648
2649    /* int to float */
2650    a.l[0] = -6;
2651    a.l[1] = 2;
2652    a.l[2] = 100;
2653    a.l[3] = -60000;
2654    CVT_OP_MMX2XMM(cvtpi2ps);
2655    CVT_OP_MMX2XMM(cvtpi2pd);
2656    CVT_OP_REG2XMM(cvtsi2ss);
2657    CVT_OP_REG2XMM(cvtsi2sd);
2658    CVT_OP_XMM(cvtdq2ps);
2659    CVT_OP_XMM(cvtdq2pd);
2660
2661    /* XXX: test PNI insns */
2662#if 0
2663    SSE_OP2(movshdup);
2664#endif
2665    asm volatile ("emms");
2666}
2667
2668#endif
2669
2670#define TEST_CONV_RAX(op)\
2671{\
2672    unsigned long a, r;\
2673    a = i2l(0x8234a6f8);\
2674    r = a;\
2675    asm volatile(#op : "=a" (r) : "0" (r));\
2676    printf("%-10s A=" FMTLX " R=" FMTLX "\n", #op, a, r);\
2677}
2678
2679#define TEST_CONV_RAX_RDX(op)\
2680{\
2681    unsigned long a, d, r, rh;                   \
2682    a = i2l(0x8234a6f8);\
2683    d = i2l(0x8345a1f2);\
2684    r = a;\
2685    rh = d;\
2686    asm volatile(#op : "=a" (r), "=d" (rh) : "0" (r), "1" (rh));   \
2687    printf("%-10s A=" FMTLX " R=" FMTLX ":" FMTLX "\n", #op, a, r, rh);  \
2688}
2689
2690void test_conv(void)
2691{
2692    TEST_CONV_RAX(cbw);
2693    TEST_CONV_RAX(cwde);
2694#if defined(__x86_64__)
2695    TEST_CONV_RAX(cdqe);
2696#endif
2697
2698    TEST_CONV_RAX_RDX(cwd);
2699    TEST_CONV_RAX_RDX(cdq);
2700#if defined(__x86_64__)
2701    TEST_CONV_RAX_RDX(cqo);
2702#endif
2703
2704    {
2705        unsigned long a, r;
2706        a = i2l(0x12345678);
2707        asm volatile("bswapl %k0" : "=r" (r) : "0" (a));
2708        printf("%-10s: A=" FMTLX " R=" FMTLX "\n", "bswapl", a, r);
2709    }
2710#if defined(__x86_64__)
2711    {
2712        unsigned long a, r;
2713        a = i2l(0x12345678);
2714        asm volatile("bswapq %0" : "=r" (r) : "0" (a));
2715        printf("%-10s: A=" FMTLX " R=" FMTLX "\n", "bswapq", a, r);
2716    }
2717#endif
2718}
2719
2720extern void *__start_initcall;
2721extern void *__stop_initcall;
2722
2723
2724int main(int argc, char **argv)
2725{
2726    void **ptr;
2727    void (*func)(void);
2728
2729    ptr = &__start_initcall;
2730    while (ptr != &__stop_initcall) {
2731        func = *ptr++;
2732        func();
2733    }
2734    test_bsx();
2735    test_mul();
2736    test_jcc();
2737    test_loop();
2738    test_floats();
2739#if !defined(__x86_64__)
2740    test_bcd();
2741#endif
2742    test_xchg();
2743    test_string();
2744    test_misc();
2745    test_lea();
2746#ifdef TEST_SEGS
2747    test_segs();
2748    test_code16();
2749#endif
2750#ifdef TEST_VM86
2751    test_vm86();
2752#endif
2753#if !defined(__x86_64__)
2754    test_exceptions();
2755    test_self_modifying_code();
2756    test_single_step();
2757#endif
2758    test_enter();
2759    test_conv();
2760#ifdef TEST_SSE
2761    test_sse();
2762    test_fxsave();
2763#endif
2764    return 0;
2765}
2766