1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <stdio.h>
19
20
21
22
23
24
25
26
27#define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
28static inline unsigned int NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
29{ \
30 unsigned int ret; \
31 asm volatile("{\n\t" \
32 " " #ST_OP "(%1) = %3\n\t" \
33 " %0 = " #LD_OP "(%2)\n\t" \
34 "}:mem_noshuf\n" \
35 : "=r"(ret) \
36 : "r"(p), "r"(q), "r"(x) \
37 : "memory"); \
38 return ret; \
39}
40
41#define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
42static inline unsigned long long NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
43{ \
44 unsigned long long ret; \
45 asm volatile("{\n\t" \
46 " " #ST_OP "(%1) = %3\n\t" \
47 " %0 = " #LD_OP "(%2)\n\t" \
48 "}:mem_noshuf\n" \
49 : "=r"(ret) \
50 : "r"(p), "r"(q), "r"(x) \
51 : "memory"); \
52 return ret; \
53}
54
55
56MEM_NOSHUF32(mem_noshuf_sb_lb, signed char, signed char, memb, memb)
57MEM_NOSHUF32(mem_noshuf_sb_lub, signed char, unsigned char, memb, memub)
58MEM_NOSHUF32(mem_noshuf_sb_lh, signed char, signed short, memb, memh)
59MEM_NOSHUF32(mem_noshuf_sb_luh, signed char, unsigned short, memb, memuh)
60MEM_NOSHUF32(mem_noshuf_sb_lw, signed char, signed int, memb, memw)
61MEM_NOSHUF64(mem_noshuf_sb_ld, signed char, signed long long, memb, memd)
62
63
64MEM_NOSHUF32(mem_noshuf_sh_lb, signed short, signed char, memh, memb)
65MEM_NOSHUF32(mem_noshuf_sh_lub, signed short, unsigned char, memh, memub)
66MEM_NOSHUF32(mem_noshuf_sh_lh, signed short, signed short, memh, memh)
67MEM_NOSHUF32(mem_noshuf_sh_luh, signed short, unsigned short, memh, memuh)
68MEM_NOSHUF32(mem_noshuf_sh_lw, signed short, signed int, memh, memw)
69MEM_NOSHUF64(mem_noshuf_sh_ld, signed short, signed long long, memh, memd)
70
71
72MEM_NOSHUF32(mem_noshuf_sw_lb, signed int, signed char, memw, memb)
73MEM_NOSHUF32(mem_noshuf_sw_lub, signed int, unsigned char, memw, memub)
74MEM_NOSHUF32(mem_noshuf_sw_lh, signed int, signed short, memw, memh)
75MEM_NOSHUF32(mem_noshuf_sw_luh, signed int, unsigned short, memw, memuh)
76MEM_NOSHUF32(mem_noshuf_sw_lw, signed int, signed int, memw, memw)
77MEM_NOSHUF64(mem_noshuf_sw_ld, signed int, signed long long, memw, memd)
78
79
80MEM_NOSHUF32(mem_noshuf_sd_lb, long long, signed char, memd, memb)
81MEM_NOSHUF32(mem_noshuf_sd_lub, long long, unsigned char, memd, memub)
82MEM_NOSHUF32(mem_noshuf_sd_lh, long long, signed short, memd, memh)
83MEM_NOSHUF32(mem_noshuf_sd_luh, long long, unsigned short, memd, memuh)
84MEM_NOSHUF32(mem_noshuf_sd_lw, long long, signed int, memd, memw)
85MEM_NOSHUF64(mem_noshuf_sd_ld, long long, signed long long, memd, memd)
86
87static inline unsigned int cancel_sw_lb(int pred, int *p, signed char *q, int x)
88{
89 unsigned int ret;
90 asm volatile("p0 = cmp.eq(%4, #0)\n\t"
91 "{\n\t"
92 " if (!p0) memw(%1) = %3\n\t"
93 " %0 = memb(%2)\n\t"
94 "}:mem_noshuf\n"
95 : "=r"(ret)
96 : "r"(p), "r"(q), "r"(x), "r"(pred)
97 : "p0", "memory");
98 return ret;
99}
100
101static inline
102unsigned long long cancel_sw_ld(int pred, int *p, long long *q, int x)
103{
104 long long ret;
105 asm volatile("p0 = cmp.eq(%4, #0)\n\t"
106 "{\n\t"
107 " if (!p0) memw(%1) = %3\n\t"
108 " %0 = memd(%2)\n\t"
109 "}:mem_noshuf\n"
110 : "=r"(ret)
111 : "r"(p), "r"(q), "r"(x), "r"(pred)
112 : "p0", "memory");
113 return ret;
114}
115
116typedef union {
117 signed long long d[2];
118 unsigned long long ud[2];
119 signed int w[4];
120 unsigned int uw[4];
121 signed short h[8];
122 unsigned short uh[8];
123 signed char b[16];
124 unsigned char ub[16];
125} Memory;
126
127int err;
128
129static void check32(int n, int expect)
130{
131 if (n != expect) {
132 printf("ERROR: 0x%08x != 0x%08x\n", n, expect);
133 err++;
134 }
135}
136
137static void check64(long long n, long long expect)
138{
139 if (n != expect) {
140 printf("ERROR: 0x%08llx != 0x%08llx\n", n, expect);
141 err++;
142 }
143}
144
145int main()
146{
147 Memory n;
148 unsigned int res32;
149 unsigned long long res64;
150
151
152
153
154 n.w[0] = ~0;
155 res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87);
156 check32(res32, 0xffffff87);
157
158 n.w[0] = ~0;
159 res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87);
160 check32(res32, 0x00000087);
161
162 n.w[0] = ~0;
163 res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87);
164 check32(res32, 0xffffff87);
165
166 n.w[0] = ~0;
167 res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87);
168 check32(res32, 0x0000ff87);
169
170 n.w[0] = ~0;
171 res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87);
172 check32(res32, 0xffffff87);
173
174 n.d[0] = ~0LL;
175 res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87);
176 check64(res64, 0xffffffffffffff87LL);
177
178
179
180
181 n.w[0] = ~0;
182 res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787);
183 check32(res32, 0xffffff87);
184
185 n.w[0] = ~0;
186 res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87);
187 check32(res32, 0x0000008f);
188
189 n.w[0] = ~0;
190 res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87);
191 check32(res32, 0xffff8a87);
192
193 n.w[0] = ~0;
194 res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87);
195 check32(res32, 0x8a87);
196
197 n.w[0] = ~0;
198 res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87);
199 check32(res32, 0x8a87ffff);
200
201 n.w[0] = ~0;
202 res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87);
203 check64(res64, 0xffffffff8a87ffffLL);
204
205
206
207
208 n.w[0] = ~0;
209 res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687);
210 check32(res32, 0xffffff87);
211
212 n.w[0] = ~0;
213 res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687);
214 check32(res32, 0x00000087);
215
216 n.w[0] = ~0;
217 res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678);
218 check32(res32, 0xfffff678);
219
220 n.w[0] = ~0;
221 res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678);
222 check32(res32, 0x00005678);
223
224 n.w[0] = ~0;
225 res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678);
226 check32(res32, 0x12345678);
227
228 n.d[0] = ~0LL;
229 res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678);
230 check64(res64, 0xffffffff12345678LL);
231
232
233
234
235 n.d[0] = ~0LL;
236 res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0);
237 check32(res32, 0xffffffde);
238
239 n.d[0] = ~0LL;
240 res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0);
241 check32(res32, 0x000000de);
242
243 n.d[0] = ~0LL;
244 res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0);
245 check32(res32, 0xffff9abc);
246
247 n.d[0] = ~0LL;
248 res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0);
249 check32(res32, 0x00009abc);
250
251 n.d[0] = ~0LL;
252 res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0);
253 check32(res32, 0x12345678);
254
255 n.d[0] = ~0LL;
256 res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0);
257 check64(res64, 0x123456789abcdef0LL);
258
259
260
261
262 n.w[0] = ~0;
263 res32 = cancel_sw_lb(0, &n.w[0], &n.b[0], 0x12345678);
264 check32(res32, 0xffffffff);
265
266 n.w[0] = ~0;
267 res32 = cancel_sw_lb(1, &n.w[0], &n.b[0], 0x12345687);
268 check32(res32, 0xffffff87);
269
270
271
272
273 n.d[0] = ~0LL;
274 res64 = cancel_sw_ld(0, &n.w[0], &n.d[0], 0x12345678);
275 check64(res64, 0xffffffffffffffffLL);
276
277 n.d[0] = ~0LL;
278 res64 = cancel_sw_ld(1, &n.w[0], &n.d[0], 0x12345678);
279 check64(res64, 0xffffffff12345678LL);
280
281 n.d[0] = ~0LL;
282 res64 = cancel_sw_ld(0, &n.w[1], &n.d[0], 0x12345678);
283 check64(res64, 0xffffffffffffffffLL);
284
285 n.d[0] = ~0LL;
286 res64 = cancel_sw_ld(1, &n.w[1], &n.d[0], 0x12345678);
287 check64(res64, 0x12345678ffffffffLL);
288
289
290
291
292 n.w[0] = ~0;
293 res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87);
294 check32(res32, 0xffffffff);
295
296 n.w[0] = ~0;
297 res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87);
298 check32(res32, 0xffffffff);
299
300 n.w[0] = ~0;
301 res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787);
302 check32(res32, 0xffffffff);
303
304 n.w[0] = ~0;
305 res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787);
306 check32(res32, 0xffffffff);
307
308 n.d[0] = ~0LL;
309 res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678);
310 check32(res32, 0xffffffff);
311
312 n.d[0] = ~0LL;
313 res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678);
314 check32(res32, 0xffffffff);
315
316 n.d[0] = ~0LL;
317 n.d[1] = ~0LL;
318 res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL);
319 check64(res64, 0xffffffffffffffffLL);
320
321 n.d[0] = ~0LL;
322 n.d[1] = ~0LL;
323 res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL);
324 check64(res64, 0xffffffffffffffffLL);
325
326 puts(err ? "FAIL" : "PASS");
327 return err;
328}
329