1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <stdio.h>
19
20
21
22
23
24
25
26
27#define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
28static inline unsigned int NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
29{ \
30 unsigned int ret; \
31 asm volatile("{\n\t" \
32 " " #ST_OP "(%1) = %3\n\t" \
33 " %0 = " #LD_OP "(%2)\n\t" \
34 "}:mem_noshuf\n" \
35 : "=r"(ret) \
36 : "r"(p), "r"(q), "r"(x) \
37 : "memory"); \
38 return ret; \
39}
40
41#define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
42static inline unsigned long long NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
43{ \
44 unsigned long long ret; \
45 asm volatile("{\n\t" \
46 " " #ST_OP "(%1) = %3\n\t" \
47 " %0 = " #LD_OP "(%2)\n\t" \
48 "}:mem_noshuf\n" \
49 : "=r"(ret) \
50 : "r"(p), "r"(q), "r"(x) \
51 : "memory"); \
52 return ret; \
53}
54
55
56MEM_NOSHUF32(mem_noshuf_sb_lb, signed char, signed char, memb, memb)
57MEM_NOSHUF32(mem_noshuf_sb_lub, signed char, unsigned char, memb, memub)
58MEM_NOSHUF32(mem_noshuf_sb_lh, signed char, signed short, memb, memh)
59MEM_NOSHUF32(mem_noshuf_sb_luh, signed char, unsigned short, memb, memuh)
60MEM_NOSHUF32(mem_noshuf_sb_lw, signed char, signed int, memb, memw)
61MEM_NOSHUF64(mem_noshuf_sb_ld, signed char, signed long long, memb, memd)
62
63
64MEM_NOSHUF32(mem_noshuf_sh_lb, signed short, signed char, memh, memb)
65MEM_NOSHUF32(mem_noshuf_sh_lub, signed short, unsigned char, memh, memub)
66MEM_NOSHUF32(mem_noshuf_sh_lh, signed short, signed short, memh, memh)
67MEM_NOSHUF32(mem_noshuf_sh_luh, signed short, unsigned short, memh, memuh)
68MEM_NOSHUF32(mem_noshuf_sh_lw, signed short, signed int, memh, memw)
69MEM_NOSHUF64(mem_noshuf_sh_ld, signed short, signed long long, memh, memd)
70
71
72MEM_NOSHUF32(mem_noshuf_sw_lb, signed int, signed char, memw, memb)
73MEM_NOSHUF32(mem_noshuf_sw_lub, signed int, unsigned char, memw, memub)
74MEM_NOSHUF32(mem_noshuf_sw_lh, signed int, signed short, memw, memh)
75MEM_NOSHUF32(mem_noshuf_sw_luh, signed int, unsigned short, memw, memuh)
76MEM_NOSHUF32(mem_noshuf_sw_lw, signed int, signed int, memw, memw)
77MEM_NOSHUF64(mem_noshuf_sw_ld, signed int, signed long long, memw, memd)
78
79
80MEM_NOSHUF32(mem_noshuf_sd_lb, long long, signed char, memd, memb)
81MEM_NOSHUF32(mem_noshuf_sd_lub, long long, unsigned char, memd, memub)
82MEM_NOSHUF32(mem_noshuf_sd_lh, long long, signed short, memd, memh)
83MEM_NOSHUF32(mem_noshuf_sd_luh, long long, unsigned short, memd, memuh)
84MEM_NOSHUF32(mem_noshuf_sd_lw, long long, signed int, memd, memw)
85MEM_NOSHUF64(mem_noshuf_sd_ld, long long, signed long long, memd, memd)
86
87static inline int pred_lw_sw(int pred, int *p, int *q, int x, int y)
88{
89 int ret;
90 asm volatile("p0 = cmp.eq(%5, #0)\n\t"
91 "%0 = %3\n\t"
92 "{\n\t"
93 " memw(%1) = %4\n\t"
94 " if (!p0) %0 = memw(%2)\n\t"
95 "}:mem_noshuf\n"
96 : "=&r"(ret)
97 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
98 : "p0", "memory");
99 return ret;
100}
101
102static inline int pred_lw_sw_pi(int pred, int *p, int *q, int x, int y)
103{
104 int ret;
105 asm volatile("p0 = cmp.eq(%5, #0)\n\t"
106 "%0 = %3\n\t"
107 "r7 = %2\n\t"
108 "{\n\t"
109 " memw(%1) = %4\n\t"
110 " if (!p0) %0 = memw(r7++#4)\n\t"
111 "}:mem_noshuf\n"
112 : "=&r"(ret)
113 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
114 : "r7", "p0", "memory");
115 return ret;
116}
117
118static inline long long pred_ld_sd(int pred, long long *p, long long *q,
119 long long x, long long y)
120{
121 unsigned long long ret;
122 asm volatile("p0 = cmp.eq(%5, #0)\n\t"
123 "%0 = %3\n\t"
124 "{\n\t"
125 " memd(%1) = %4\n\t"
126 " if (!p0) %0 = memd(%2)\n\t"
127 "}:mem_noshuf\n"
128 : "=&r"(ret)
129 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
130 : "p0", "memory");
131 return ret;
132}
133
134static inline long long pred_ld_sd_pi(int pred, long long *p, long long *q,
135 long long x, long long y)
136{
137 long long ret;
138 asm volatile("p0 = cmp.eq(%5, #0)\n\t"
139 "%0 = %3\n\t"
140 "r7 = %2\n\t"
141 "{\n\t"
142 " memd(%1) = %4\n\t"
143 " if (!p0) %0 = memd(r7++#8)\n\t"
144 "}:mem_noshuf\n"
145 : "=&r"(ret)
146 : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
147 : "p0", "memory");
148 return ret;
149}
150
151static inline unsigned int cancel_sw_lb(int pred, int *p, signed char *q, int x)
152{
153 unsigned int ret;
154 asm volatile("p0 = cmp.eq(%4, #0)\n\t"
155 "{\n\t"
156 " if (!p0) memw(%1) = %3\n\t"
157 " %0 = memb(%2)\n\t"
158 "}:mem_noshuf\n"
159 : "=r"(ret)
160 : "r"(p), "r"(q), "r"(x), "r"(pred)
161 : "p0", "memory");
162 return ret;
163}
164
165static inline
166unsigned long long cancel_sw_ld(int pred, int *p, long long *q, int x)
167{
168 long long ret;
169 asm volatile("p0 = cmp.eq(%4, #0)\n\t"
170 "{\n\t"
171 " if (!p0) memw(%1) = %3\n\t"
172 " %0 = memd(%2)\n\t"
173 "}:mem_noshuf\n"
174 : "=r"(ret)
175 : "r"(p), "r"(q), "r"(x), "r"(pred)
176 : "p0", "memory");
177 return ret;
178}
179
180typedef union {
181 signed long long d[2];
182 unsigned long long ud[2];
183 signed int w[4];
184 unsigned int uw[4];
185 signed short h[8];
186 unsigned short uh[8];
187 signed char b[16];
188 unsigned char ub[16];
189} Memory;
190
191int err;
192
193#define check32(n, expect) check32_(n, expect, __LINE__)
194
195static void check32_(int n, int expect, int line)
196{
197 if (n != expect) {
198 printf("ERROR: 0x%08x != 0x%08x, line %d\n", n, expect, line);
199 err++;
200 }
201}
202
203#define check64(n, expect) check64_(n, expect, __LINE__)
204
205static void check64_(long long n, long long expect, int line)
206{
207 if (n != expect) {
208 printf("ERROR: 0x%08llx != 0x%08llx, line %d\n", n, expect, line);
209 err++;
210 }
211}
212
213int main()
214{
215 Memory n;
216 unsigned int res32;
217 unsigned long long res64;
218
219
220
221
222 n.w[0] = ~0;
223 res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87);
224 check32(res32, 0xffffff87);
225
226 n.w[0] = ~0;
227 res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87);
228 check32(res32, 0x00000087);
229
230 n.w[0] = ~0;
231 res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87);
232 check32(res32, 0xffffff87);
233
234 n.w[0] = ~0;
235 res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87);
236 check32(res32, 0x0000ff87);
237
238 n.w[0] = ~0;
239 res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87);
240 check32(res32, 0xffffff87);
241
242 n.d[0] = ~0LL;
243 res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87);
244 check64(res64, 0xffffffffffffff87LL);
245
246
247
248
249 n.w[0] = ~0;
250 res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787);
251 check32(res32, 0xffffff87);
252
253 n.w[0] = ~0;
254 res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87);
255 check32(res32, 0x0000008f);
256
257 n.w[0] = ~0;
258 res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87);
259 check32(res32, 0xffff8a87);
260
261 n.w[0] = ~0;
262 res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87);
263 check32(res32, 0x8a87);
264
265 n.w[0] = ~0;
266 res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87);
267 check32(res32, 0x8a87ffff);
268
269 n.w[0] = ~0;
270 res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87);
271 check64(res64, 0xffffffff8a87ffffLL);
272
273
274
275
276 n.w[0] = ~0;
277 res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687);
278 check32(res32, 0xffffff87);
279
280 n.w[0] = ~0;
281 res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687);
282 check32(res32, 0x00000087);
283
284 n.w[0] = ~0;
285 res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678);
286 check32(res32, 0xfffff678);
287
288 n.w[0] = ~0;
289 res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678);
290 check32(res32, 0x00005678);
291
292 n.w[0] = ~0;
293 res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678);
294 check32(res32, 0x12345678);
295
296 n.d[0] = ~0LL;
297 res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678);
298 check64(res64, 0xffffffff12345678LL);
299
300
301
302
303 n.d[0] = ~0LL;
304 res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0);
305 check32(res32, 0xffffffde);
306
307 n.d[0] = ~0LL;
308 res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0);
309 check32(res32, 0x000000de);
310
311 n.d[0] = ~0LL;
312 res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0);
313 check32(res32, 0xffff9abc);
314
315 n.d[0] = ~0LL;
316 res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0);
317 check32(res32, 0x00009abc);
318
319 n.d[0] = ~0LL;
320 res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0);
321 check32(res32, 0x12345678);
322
323 n.d[0] = ~0LL;
324 res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0);
325 check64(res64, 0x123456789abcdef0LL);
326
327
328
329
330 n.w[0] = ~0;
331 res32 = cancel_sw_lb(0, &n.w[0], &n.b[0], 0x12345678);
332 check32(res32, 0xffffffff);
333
334 n.w[0] = ~0;
335 res32 = cancel_sw_lb(1, &n.w[0], &n.b[0], 0x12345687);
336 check32(res32, 0xffffff87);
337
338
339
340
341 n.d[0] = ~0LL;
342 res64 = cancel_sw_ld(0, &n.w[0], &n.d[0], 0x12345678);
343 check64(res64, 0xffffffffffffffffLL);
344
345 n.d[0] = ~0LL;
346 res64 = cancel_sw_ld(1, &n.w[0], &n.d[0], 0x12345678);
347 check64(res64, 0xffffffff12345678LL);
348
349 n.d[0] = ~0LL;
350 res64 = cancel_sw_ld(0, &n.w[1], &n.d[0], 0x12345678);
351 check64(res64, 0xffffffffffffffffLL);
352
353 n.d[0] = ~0LL;
354 res64 = cancel_sw_ld(1, &n.w[1], &n.d[0], 0x12345678);
355 check64(res64, 0x12345678ffffffffLL);
356
357
358
359
360 n.w[0] = ~0;
361 res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87);
362 check32(res32, 0xffffffff);
363
364 n.w[0] = ~0;
365 res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87);
366 check32(res32, 0xffffffff);
367
368 n.w[0] = ~0;
369 res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787);
370 check32(res32, 0xffffffff);
371
372 n.w[0] = ~0;
373 res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787);
374 check32(res32, 0xffffffff);
375
376 n.d[0] = ~0LL;
377 res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678);
378 check32(res32, 0xffffffff);
379
380 n.d[0] = ~0LL;
381 res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678);
382 check32(res32, 0xffffffff);
383
384 n.d[0] = ~0LL;
385 n.d[1] = ~0LL;
386 res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL);
387 check64(res64, 0xffffffffffffffffLL);
388
389 n.d[0] = ~0LL;
390 n.d[1] = ~0LL;
391 res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL);
392 check64(res64, 0xffffffffffffffffLL);
393
394 n.w[0] = ~0;
395 res32 = pred_lw_sw(0, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
396 check32(res32, 0x12345678);
397 check32(n.w[0], 0xc0ffeeda);
398
399 n.w[0] = ~0;
400 res32 = pred_lw_sw(1, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
401 check32(res32, 0xc0ffeeda);
402 check32(n.w[0], 0xc0ffeeda);
403
404 n.w[0] = ~0;
405 res32 = pred_lw_sw_pi(0, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
406 check32(res32, 0x12345678);
407 check32(n.w[0], 0xc0ffeeda);
408
409 n.w[0] = ~0;
410 res32 = pred_lw_sw_pi(1, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
411 check32(res32, 0xc0ffeeda);
412 check32(n.w[0], 0xc0ffeeda);
413
414 n.d[0] = ~0LL;
415 res64 = pred_ld_sd(0, &n.d[0], &n.d[0],
416 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
417 check64(res64, 0x1234567812345678LL);
418 check64(n.d[0], 0xc0ffeedac0ffeedaLL);
419
420 n.d[0] = ~0LL;
421 res64 = pred_ld_sd(1, &n.d[0], &n.d[0],
422 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
423 check64(res64, 0xc0ffeedac0ffeedaLL);
424 check64(n.d[0], 0xc0ffeedac0ffeedaLL);
425
426 n.d[0] = ~0LL;
427 res64 = pred_ld_sd_pi(0, &n.d[0], &n.d[0],
428 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
429 check64(res64, 0x1234567812345678LL);
430 check64(n.d[0], 0xc0ffeedac0ffeedaLL);
431
432 n.d[0] = ~0LL;
433 res64 = pred_ld_sd_pi(1, &n.d[0], &n.d[0],
434 0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
435 check64(res64, 0xc0ffeedac0ffeedaLL);
436 check64(n.d[0], 0xc0ffeedac0ffeedaLL);
437
438 puts(err ? "FAIL" : "PASS");
439 return err;
440}
441