1#include <stdio.h>
2#include <stdint.h>
3#include <stdlib.h>
4#include <string.h>
5
6#ifndef TEST_FILE
7#define TEST_FILE "test-mmx.h"
8#endif
9#ifndef EMMS
10#define EMMS "emms"
11#endif
12
13typedef void (*testfn)(void);
14
15typedef struct {
16 uint64_t q0, q1;
17} __attribute__((aligned(16))) v2di;
18
19typedef struct {
20 uint64_t mm[8];
21 v2di xmm[8];
22 uint64_t r[16];
23 uint64_t flags;
24 uint32_t ff;
25 uint64_t pad;
26 v2di mem[4];
27 v2di mem0[4];
28} reg_state;
29
30typedef struct {
31 int n;
32 testfn fn;
33 const char *s;
34 reg_state *init;
35} TestDef;
36
37reg_state initI;
38reg_state initF32;
39reg_state initF64;
40
41static void dump_mmx(int n, const uint64_t *r, int ff)
42{
43 if (ff == 32) {
44 float v[2];
45 memcpy(v, r, sizeof(v));
46 printf("MM%d = %016lx %8g %8g\n", n, *r, v[1], v[0]);
47 } else {
48 printf("MM%d = %016lx\n", n, *r);
49 }
50}
51
52static void dump_xmm(const char *name, int n, const v2di *r, int ff)
53{
54 printf("%s%d = %016lx %016lx\n",
55 name, n, r->q1, r->q0);
56 if (ff == 32) {
57 float v[4];
58 memcpy(v, r, sizeof(v));
59 printf(" %8g %8g %8g %8g\n",
60 v[3], v[2], v[1], v[0]);
61 }
62}
63
64static void dump_regs(reg_state *s, int ff)
65{
66 int i;
67
68 for (i = 0; i < 8; i++) {
69 dump_mmx(i, &s->mm[i], ff);
70 }
71 for (i = 0; i < 4; i++) {
72 dump_xmm("mem", i, &s->mem0[i], 0);
73 }
74}
75
76static void compare_state(const reg_state *a, const reg_state *b)
77{
78 int i;
79 for (i = 0; i < 8; i++) {
80 if (a->mm[i] != b->mm[i]) {
81 printf("MM%d = %016lx\n", i, b->mm[i]);
82 }
83 }
84 for (i = 0; i < 16; i++) {
85 if (a->r[i] != b->r[i]) {
86 printf("r%d = %016lx\n", i, b->r[i]);
87 }
88 }
89 for (i = 0; i < 8; i++) {
90 if (memcmp(&a->xmm[i], &b->xmm[i], 8)) {
91 dump_xmm("xmm", i, &b->xmm[i], a->ff);
92 }
93 }
94 for (i = 0; i < 4; i++) {
95 if (memcmp(&a->mem0[i], &a->mem[i], 16)) {
96 dump_xmm("mem", i, &a->mem[i], a->ff);
97 }
98 }
99 if (a->flags != b->flags) {
100 printf("FLAGS = %016lx\n", b->flags);
101 }
102}
103
104#define LOADMM(r, o) "movq " #r ", " #o "[%0]\n\t"
105#define LOADXMM(r, o) "movdqa " #r ", " #o "[%0]\n\t"
106#define STOREMM(r, o) "movq " #o "[%1], " #r "\n\t"
107#define STOREXMM(r, o) "movdqa " #o "[%1], " #r "\n\t"
108#define MMREG(F) \
109 F(mm0, 0x00) \
110 F(mm1, 0x08) \
111 F(mm2, 0x10) \
112 F(mm3, 0x18) \
113 F(mm4, 0x20) \
114 F(mm5, 0x28) \
115 F(mm6, 0x30) \
116 F(mm7, 0x38)
117#define XMMREG(F) \
118 F(xmm0, 0x040) \
119 F(xmm1, 0x050) \
120 F(xmm2, 0x060) \
121 F(xmm3, 0x070) \
122 F(xmm4, 0x080) \
123 F(xmm5, 0x090) \
124 F(xmm6, 0x0a0) \
125 F(xmm7, 0x0b0)
126#define LOADREG(r, o) "mov " #r ", " #o "[rax]\n\t"
127#define STOREREG(r, o) "mov " #o "[rax], " #r "\n\t"
128#define REG(F) \
129 F(rbx, 0xc8) \
130 F(rcx, 0xd0) \
131 F(rdx, 0xd8) \
132 F(rsi, 0xe0) \
133 F(rdi, 0xe8) \
134 F(r8, 0x100) \
135 F(r9, 0x108) \
136 F(r10, 0x110) \
137 F(r11, 0x118) \
138 F(r12, 0x120) \
139 F(r13, 0x128) \
140 F(r14, 0x130) \
141 F(r15, 0x138) \
142
143static void run_test(const TestDef *t)
144{
145 reg_state result;
146 reg_state *init = t->init;
147 memcpy(init->mem, init->mem0, sizeof(init->mem));
148 printf("%5d %s\n", t->n, t->s);
149 asm volatile(
150 MMREG(LOADMM)
151 XMMREG(LOADXMM)
152 "sub rsp, 128\n\t"
153 "push rax\n\t"
154 "push rbx\n\t"
155 "push rcx\n\t"
156 "push rdx\n\t"
157 "push %1\n\t"
158 "push %2\n\t"
159 "mov rax, %0\n\t"
160 "pushf\n\t"
161 "pop rbx\n\t"
162 "shr rbx, 8\n\t"
163 "shl rbx, 8\n\t"
164 "mov rcx, 0x140[rax]\n\t"
165 "and rcx, 0xff\n\t"
166 "or rbx, rcx\n\t"
167 "push rbx\n\t"
168 "popf\n\t"
169 REG(LOADREG)
170 "mov rax, 0xc0[rax]\n\t"
171 "call [rsp]\n\t"
172 "mov [rsp], rax\n\t"
173 "mov rax, 8[rsp]\n\t"
174 REG(STOREREG)
175 "mov rbx, [rsp]\n\t"
176 "mov 0xc0[rax], rbx\n\t"
177 "mov rbx, 0\n\t"
178 "mov 0xf0[rax], rbx\n\t"
179 "mov 0xf8[rax], rbx\n\t"
180 "pushf\n\t"
181 "pop rbx\n\t"
182 "and rbx, 0xff\n\t"
183 "mov 0x140[rax], rbx\n\t"
184 "add rsp, 16\n\t"
185 "pop rdx\n\t"
186 "pop rcx\n\t"
187 "pop rbx\n\t"
188 "pop rax\n\t"
189 "add rsp, 128\n\t"
190 MMREG(STOREMM)
191 EMMS "\n\t"
192 XMMREG(STOREXMM)
193 : : "r"(init), "r"(&result), "r"(t->fn)
194 : "memory", "cc",
195 "rsi", "rdi",
196 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
197 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
198 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5",
199 "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11",
200 "xmm12", "xmm13", "xmm14", "xmm15"
201 );
202 compare_state(init, &result);
203}
204
205#define TEST(n, cmd, type) \
206static void __attribute__((naked)) test_##n(void) \
207{ \
208 asm volatile(cmd); \
209 asm volatile("ret"); \
210}
211#include TEST_FILE
212
213
214static const TestDef test_table[] = {
215#define TEST(n, cmd, type) {n, test_##n, cmd, &init##type},
216#include TEST_FILE
217 {-1, NULL, "", NULL}
218};
219
220static void run_all(void)
221{
222 const TestDef *t;
223 for (t = test_table; t->fn; t++) {
224 run_test(t);
225 }
226}
227
228#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
229
230float val_f32[] = {2.0, -1.0, 4.8, 0.8, 3, -42.0, 5e6, 7.5, 8.3};
231uint64_t val_i64[] = {
232 0x3d6b3b6a9e4118f2lu, 0x355ae76d2774d78clu,
233 0xd851c54a56bf1f29lu, 0x4a84d1d50bf4c4fflu,
234 0x5826475e2c5fd799lu, 0xfd32edc01243f5e9lu,
235};
236
237v2di deadbeef = {0xa5a5a5a5deadbeefull, 0xa5a5a5a5deadbeefull};
238
239void init_f32reg(uint64_t *r)
240{
241 static int n;
242 float v[2];
243 int i;
244 for (i = 0; i < 2; i++) {
245 v[i] = val_f32[n++];
246 if (n == ARRAY_LEN(val_f32)) {
247 n = 0;
248 }
249 }
250 memcpy(r, v, sizeof(*r));
251}
252
253void init_intreg(uint64_t *r)
254{
255 static uint64_t mask;
256 static int n;
257
258 *r = val_i64[n] ^ mask;
259 n++;
260 if (n == ARRAY_LEN(val_i64)) {
261 n = 0;
262 mask *= 0x104C11DB7;
263 }
264}
265
266static void init_all(reg_state *s)
267{
268 int i;
269
270 for (i = 0; i < 16; i++) {
271 init_intreg(&s->r[i]);
272 }
273 s->r[3] = (uint64_t)&s->mem[0];
274 s->r[5] = (uint64_t)&s->mem[2];
275 s->r[6] = 0;
276 s->r[7] = 0;
277 s->flags = 2;
278 for (i = 0; i < 8; i++) {
279 s->xmm[i] = deadbeef;
280 memcpy(&s->mm[i], &s->xmm[i], sizeof(s->mm[i]));
281 }
282 for (i = 0; i < 2; i++) {
283 s->mem0[i] = deadbeef;
284 }
285}
286
287int main(int argc, char *argv[])
288{
289 init_all(&initI);
290 init_intreg(&initI.mm[5]);
291 init_intreg(&initI.mm[6]);
292 init_intreg(&initI.mm[7]);
293 init_intreg(&initI.mem0[1].q0);
294 init_intreg(&initI.mem0[1].q1);
295 printf("Int:\n");
296 dump_regs(&initI, 0);
297
298 init_all(&initF32);
299 init_f32reg(&initF32.mm[5]);
300 init_f32reg(&initF32.mm[6]);
301 init_f32reg(&initF32.mm[7]);
302 init_f32reg(&initF32.mem0[1].q0);
303 init_f32reg(&initF32.mem0[1].q1);
304 initF32.ff = 32;
305 printf("F32:\n");
306 dump_regs(&initF32, 32);
307
308 if (argc > 1) {
309 int n = atoi(argv[1]);
310 run_test(&test_table[n]);
311 } else {
312 run_all();
313 }
314 return 0;
315}
316