qemu/tests/tcg/i386/test-avx.c
<<
>>
Prefs
   1#include <stdio.h>
   2#include <stdint.h>
   3#include <stdlib.h>
   4#include <string.h>
   5
   6typedef void (*testfn)(void);
   7
   8typedef struct {
   9    uint64_t q0, q1, q2, q3;
  10} __attribute__((aligned(32))) v4di;
  11
  12typedef struct {
  13    uint64_t mm[8];
  14    v4di ymm[16];
  15    uint64_t r[16];
  16    uint64_t flags;
  17    uint32_t ff;
  18    uint64_t pad;
  19    v4di mem[4];
  20    v4di mem0[4];
  21} reg_state;
  22
  23typedef struct {
  24    int n;
  25    testfn fn;
  26    const char *s;
  27    reg_state *init;
  28} TestDef;
  29
  30reg_state initI;
  31reg_state initF16;
  32reg_state initF32;
  33reg_state initF64;
  34
  35static void dump_ymm(const char *name, int n, const v4di *r, int ff)
  36{
  37    printf("%s%d = %016lx %016lx %016lx %016lx\n",
  38           name, n, r->q3, r->q2, r->q1, r->q0);
  39    if (ff == 64) {
  40        double v[4];
  41        memcpy(v, r, sizeof(v));
  42        printf("        %16g %16g %16g %16g\n",
  43                v[3], v[2], v[1], v[0]);
  44    } else if (ff == 32) {
  45        float v[8];
  46        memcpy(v, r, sizeof(v));
  47        printf(" %8g %8g %8g %8g %8g %8g %8g %8g\n",
  48                v[7], v[6], v[5], v[4], v[3], v[2], v[1], v[0]);
  49    }
  50}
  51
  52static void dump_regs(reg_state *s)
  53{
  54    int i;
  55
  56    for (i = 0; i < 16; i++) {
  57        dump_ymm("ymm", i, &s->ymm[i], 0);
  58    }
  59    for (i = 0; i < 4; i++) {
  60        dump_ymm("mem", i, &s->mem0[i], 0);
  61    }
  62}
  63
  64static void compare_state(const reg_state *a, const reg_state *b)
  65{
  66    int i;
  67    for (i = 0; i < 8; i++) {
  68        if (a->mm[i] != b->mm[i]) {
  69            printf("MM%d = %016lx\n", i, b->mm[i]);
  70        }
  71    }
  72    for (i = 0; i < 16; i++) {
  73        if (a->r[i] != b->r[i]) {
  74            printf("r%d = %016lx\n", i, b->r[i]);
  75        }
  76    }
  77    for (i = 0; i < 16; i++) {
  78        if (memcmp(&a->ymm[i], &b->ymm[i], 32)) {
  79            dump_ymm("ymm", i, &b->ymm[i], a->ff);
  80        }
  81    }
  82    for (i = 0; i < 4; i++) {
  83        if (memcmp(&a->mem0[i], &a->mem[i], 32)) {
  84            dump_ymm("mem", i, &a->mem[i], a->ff);
  85        }
  86    }
  87    if (a->flags != b->flags) {
  88        printf("FLAGS = %016lx\n", b->flags);
  89    }
  90}
  91
  92#define LOADMM(r, o) "movq " #r ", " #o "[%0]\n\t"
  93#define LOADYMM(r, o) "vmovdqa " #r ", " #o "[%0]\n\t"
  94#define STOREMM(r, o) "movq " #o "[%1], " #r "\n\t"
  95#define STOREYMM(r, o) "vmovdqa " #o "[%1], " #r "\n\t"
  96#define MMREG(F) \
  97    F(mm0, 0x00) \
  98    F(mm1, 0x08) \
  99    F(mm2, 0x10) \
 100    F(mm3, 0x18) \
 101    F(mm4, 0x20) \
 102    F(mm5, 0x28) \
 103    F(mm6, 0x30) \
 104    F(mm7, 0x38)
 105#define YMMREG(F) \
 106    F(ymm0, 0x040) \
 107    F(ymm1, 0x060) \
 108    F(ymm2, 0x080) \
 109    F(ymm3, 0x0a0) \
 110    F(ymm4, 0x0c0) \
 111    F(ymm5, 0x0e0) \
 112    F(ymm6, 0x100) \
 113    F(ymm7, 0x120) \
 114    F(ymm8, 0x140) \
 115    F(ymm9, 0x160) \
 116    F(ymm10, 0x180) \
 117    F(ymm11, 0x1a0) \
 118    F(ymm12, 0x1c0) \
 119    F(ymm13, 0x1e0) \
 120    F(ymm14, 0x200) \
 121    F(ymm15, 0x220)
 122#define LOADREG(r, o) "mov " #r ", " #o "[rax]\n\t"
 123#define STOREREG(r, o) "mov " #o "[rax], " #r "\n\t"
 124#define REG(F) \
 125    F(rbx, 0x248) \
 126    F(rcx, 0x250) \
 127    F(rdx, 0x258) \
 128    F(rsi, 0x260) \
 129    F(rdi, 0x268) \
 130    F(r8, 0x280) \
 131    F(r9, 0x288) \
 132    F(r10, 0x290) \
 133    F(r11, 0x298) \
 134    F(r12, 0x2a0) \
 135    F(r13, 0x2a8) \
 136    F(r14, 0x2b0) \
 137    F(r15, 0x2b8) \
 138
 139static void run_test(const TestDef *t)
 140{
 141    reg_state result;
 142    reg_state *init = t->init;
 143    memcpy(init->mem, init->mem0, sizeof(init->mem));
 144    printf("%5d %s\n", t->n, t->s);
 145    asm volatile(
 146            MMREG(LOADMM)
 147            YMMREG(LOADYMM)
 148            "sub rsp, 128\n\t"
 149            "push rax\n\t"
 150            "push rbx\n\t"
 151            "push rcx\n\t"
 152            "push rdx\n\t"
 153            "push %1\n\t"
 154            "push %2\n\t"
 155            "mov rax, %0\n\t"
 156            "pushf\n\t"
 157            "pop rbx\n\t"
 158            "shr rbx, 8\n\t"
 159            "shl rbx, 8\n\t"
 160            "mov rcx, 0x2c0[rax]\n\t"
 161            "and rcx, 0xff\n\t"
 162            "or rbx, rcx\n\t"
 163            "push rbx\n\t"
 164            "popf\n\t"
 165            REG(LOADREG)
 166            "mov rax, 0x240[rax]\n\t"
 167            "call [rsp]\n\t"
 168            "mov [rsp], rax\n\t"
 169            "mov rax, 8[rsp]\n\t"
 170            REG(STOREREG)
 171            "mov rbx, [rsp]\n\t"
 172            "mov 0x240[rax], rbx\n\t"
 173            "mov rbx, 0\n\t"
 174            "mov 0x270[rax], rbx\n\t"
 175            "mov 0x278[rax], rbx\n\t"
 176            "pushf\n\t"
 177            "pop rbx\n\t"
 178            "and rbx, 0xff\n\t"
 179            "mov 0x2c0[rax], rbx\n\t"
 180            "add rsp, 16\n\t"
 181            "pop rdx\n\t"
 182            "pop rcx\n\t"
 183            "pop rbx\n\t"
 184            "pop rax\n\t"
 185            "add rsp, 128\n\t"
 186            MMREG(STOREMM)
 187            YMMREG(STOREYMM)
 188            : : "r"(init), "r"(&result), "r"(t->fn)
 189            : "memory", "cc",
 190            "rsi", "rdi",
 191            "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
 192            "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
 193            "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5",
 194            "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11",
 195            "ymm12", "ymm13", "ymm14", "ymm15"
 196            );
 197    compare_state(init, &result);
 198}
 199
 200#define TEST(n, cmd, type) \
 201static void __attribute__((naked)) test_##n(void) \
 202{ \
 203    asm volatile(cmd); \
 204    asm volatile("ret"); \
 205}
 206#include "test-avx.h"
 207
 208
 209static const TestDef test_table[] = {
 210#define TEST(n, cmd, type) {n, test_##n, cmd, &init##type},
 211#include "test-avx.h"
 212    {-1, NULL, "", NULL}
 213};
 214
 215static void run_all(void)
 216{
 217    const TestDef *t;
 218    for (t = test_table; t->fn; t++) {
 219        run_test(t);
 220    }
 221}
 222
 223#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
 224
 225uint16_t val_f16[] = { 0x4000, 0xbc00, 0x44cd, 0x3a66, 0x4200, 0x7a1a, 0x4780, 0x4826 };
 226float val_f32[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5, 8.3};
 227double val_f64[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5};
 228v4di val_i64[] = {
 229    {0x3d6b3b6a9e4118f2lu, 0x355ae76d2774d78clu,
 230     0xac3ff76c4daa4b28lu, 0xe7fabd204cb54083lu},
 231    {0xd851c54a56bf1f29lu, 0x4a84d1d50bf4c4fflu,
 232     0x56621e553d52b56clu, 0xd0069553da8f584alu},
 233    {0x5826475e2c5fd799lu, 0xfd32edc01243f5e9lu,
 234     0x738ba2c66d3fe126lu, 0x5707219c6e6c26b4lu},
 235};
 236
 237v4di deadbeef = {0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull,
 238                 0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull};
 239v4di indexq = {0x000000000000001full, 0x000000000000008full,
 240               0xffffffffffffffffull, 0xffffffffffffff5full};
 241v4di indexd = {0x00000002000000efull, 0xfffffff500000010ull,
 242               0x0000000afffffff0ull, 0x000000000000000eull};
 243
 244v4di gather_mem[0x20];
 245
 246void init_f16reg(v4di *r)
 247{
 248    memset(r, 0, sizeof(*r));
 249    memcpy(r, val_f16, sizeof(val_f16));
 250}
 251
 252void init_f32reg(v4di *r)
 253{
 254    static int n;
 255    float v[8];
 256    int i;
 257    for (i = 0; i < 8; i++) {
 258        v[i] = val_f32[n++];
 259        if (n == ARRAY_LEN(val_f32)) {
 260            n = 0;
 261        }
 262    }
 263    memcpy(r, v, sizeof(*r));
 264}
 265
 266void init_f64reg(v4di *r)
 267{
 268    static int n;
 269    double v[4];
 270    int i;
 271    for (i = 0; i < 4; i++) {
 272        v[i] = val_f64[n++];
 273        if (n == ARRAY_LEN(val_f64)) {
 274            n = 0;
 275        }
 276    }
 277    memcpy(r, v, sizeof(*r));
 278}
 279
 280void init_intreg(v4di *r)
 281{
 282    static uint64_t mask;
 283    static int n;
 284
 285    r->q0 = val_i64[n].q0 ^ mask;
 286    r->q1 = val_i64[n].q1 ^ mask;
 287    r->q2 = val_i64[n].q2 ^ mask;
 288    r->q3 = val_i64[n].q3 ^ mask;
 289    n++;
 290    if (n == ARRAY_LEN(val_i64)) {
 291        n = 0;
 292        mask *= 0x104C11DB7;
 293    }
 294}
 295
 296static void init_all(reg_state *s)
 297{
 298    int i;
 299
 300    s->r[3] = (uint64_t)&s->mem[0]; /* rdx */
 301    s->r[4] = (uint64_t)&gather_mem[ARRAY_LEN(gather_mem) / 2]; /* rsi */
 302    s->r[5] = (uint64_t)&s->mem[2]; /* rdi */
 303    s->flags = 2;
 304    for (i = 0; i < 16; i++) {
 305        s->ymm[i] = deadbeef;
 306    }
 307    s->ymm[13] = indexd;
 308    s->ymm[14] = indexq;
 309    for (i = 0; i < 4; i++) {
 310        s->mem0[i] = deadbeef;
 311    }
 312}
 313
 314int main(int argc, char *argv[])
 315{
 316    int i;
 317
 318    init_all(&initI);
 319    init_intreg(&initI.ymm[10]);
 320    init_intreg(&initI.ymm[11]);
 321    init_intreg(&initI.ymm[12]);
 322    init_intreg(&initI.mem0[1]);
 323    printf("Int:\n");
 324    dump_regs(&initI);
 325
 326    init_all(&initF16);
 327    init_f16reg(&initF16.ymm[10]);
 328    init_f16reg(&initF16.ymm[11]);
 329    init_f16reg(&initF16.ymm[12]);
 330    init_f16reg(&initF16.mem0[1]);
 331    initF16.ff = 16;
 332    printf("F16:\n");
 333    dump_regs(&initF16);
 334
 335    init_all(&initF32);
 336    init_f32reg(&initF32.ymm[10]);
 337    init_f32reg(&initF32.ymm[11]);
 338    init_f32reg(&initF32.ymm[12]);
 339    init_f32reg(&initF32.mem0[1]);
 340    initF32.ff = 32;
 341    printf("F32:\n");
 342    dump_regs(&initF32);
 343
 344    init_all(&initF64);
 345    init_f64reg(&initF64.ymm[10]);
 346    init_f64reg(&initF64.ymm[11]);
 347    init_f64reg(&initF64.ymm[12]);
 348    init_f64reg(&initF64.mem0[1]);
 349    initF64.ff = 64;
 350    printf("F64:\n");
 351    dump_regs(&initF64);
 352
 353    for (i = 0; i < ARRAY_LEN(gather_mem); i++) {
 354        init_intreg(&gather_mem[i]);
 355    }
 356
 357    if (argc > 1) {
 358        int n = atoi(argv[1]);
 359        run_test(&test_table[n]);
 360    } else {
 361        run_all();
 362    }
 363    return 0;
 364}
 365