1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include "qemu/osdep.h"
21#include "qemu/host-utils.h"
22#include "cpu.h"
23#include "exec/helper-proto-common.h"
24#include "tcg/tcg-gvec-desc.h"
25
26
27static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
28{
29 intptr_t maxsz = simd_maxsz(desc);
30 intptr_t i;
31
32 if (unlikely(maxsz > oprsz)) {
33 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
34 *(uint64_t *)(d + i) = 0;
35 }
36 }
37}
38
39void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
40{
41 intptr_t oprsz = simd_oprsz(desc);
42 intptr_t i;
43
44 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
45 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
46 }
47 clear_high(d, oprsz, desc);
48}
49
50void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
51{
52 intptr_t oprsz = simd_oprsz(desc);
53 intptr_t i;
54
55 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
56 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
57 }
58 clear_high(d, oprsz, desc);
59}
60
61void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
62{
63 intptr_t oprsz = simd_oprsz(desc);
64 intptr_t i;
65
66 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
67 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
68 }
69 clear_high(d, oprsz, desc);
70}
71
72void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
73{
74 intptr_t oprsz = simd_oprsz(desc);
75 intptr_t i;
76
77 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
78 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
79 }
80 clear_high(d, oprsz, desc);
81}
82
83void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
84{
85 intptr_t oprsz = simd_oprsz(desc);
86 intptr_t i;
87
88 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
89 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
90 }
91 clear_high(d, oprsz, desc);
92}
93
94void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
95{
96 intptr_t oprsz = simd_oprsz(desc);
97 intptr_t i;
98
99 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
100 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
101 }
102 clear_high(d, oprsz, desc);
103}
104
105void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
106{
107 intptr_t oprsz = simd_oprsz(desc);
108 intptr_t i;
109
110 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
111 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
112 }
113 clear_high(d, oprsz, desc);
114}
115
116void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
117{
118 intptr_t oprsz = simd_oprsz(desc);
119 intptr_t i;
120
121 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
122 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
123 }
124 clear_high(d, oprsz, desc);
125}
126
127void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
128{
129 intptr_t oprsz = simd_oprsz(desc);
130 intptr_t i;
131
132 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
133 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
134 }
135 clear_high(d, oprsz, desc);
136}
137
138void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
139{
140 intptr_t oprsz = simd_oprsz(desc);
141 intptr_t i;
142
143 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
144 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
145 }
146 clear_high(d, oprsz, desc);
147}
148
149void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
150{
151 intptr_t oprsz = simd_oprsz(desc);
152 intptr_t i;
153
154 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
155 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
156 }
157 clear_high(d, oprsz, desc);
158}
159
160void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
161{
162 intptr_t oprsz = simd_oprsz(desc);
163 intptr_t i;
164
165 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
166 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
167 }
168 clear_high(d, oprsz, desc);
169}
170
171void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
172{
173 intptr_t oprsz = simd_oprsz(desc);
174 intptr_t i;
175
176 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
177 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
178 }
179 clear_high(d, oprsz, desc);
180}
181
182void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
183{
184 intptr_t oprsz = simd_oprsz(desc);
185 intptr_t i;
186
187 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
188 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
189 }
190 clear_high(d, oprsz, desc);
191}
192
193void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
194{
195 intptr_t oprsz = simd_oprsz(desc);
196 intptr_t i;
197
198 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
199 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
200 }
201 clear_high(d, oprsz, desc);
202}
203
204void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
205{
206 intptr_t oprsz = simd_oprsz(desc);
207 intptr_t i;
208
209 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
210 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
211 }
212 clear_high(d, oprsz, desc);
213}
214
215void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
216{
217 intptr_t oprsz = simd_oprsz(desc);
218 intptr_t i;
219
220 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
221 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
222 }
223 clear_high(d, oprsz, desc);
224}
225
226void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
227{
228 intptr_t oprsz = simd_oprsz(desc);
229 intptr_t i;
230
231 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
232 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
233 }
234 clear_high(d, oprsz, desc);
235}
236
237void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
238{
239 intptr_t oprsz = simd_oprsz(desc);
240 intptr_t i;
241
242 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
243 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
244 }
245 clear_high(d, oprsz, desc);
246}
247
248void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
249{
250 intptr_t oprsz = simd_oprsz(desc);
251 intptr_t i;
252
253 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
254 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
255 }
256 clear_high(d, oprsz, desc);
257}
258
259void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
260{
261 intptr_t oprsz = simd_oprsz(desc);
262 intptr_t i;
263
264 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
265 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
266 }
267 clear_high(d, oprsz, desc);
268}
269
270void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
271{
272 intptr_t oprsz = simd_oprsz(desc);
273 intptr_t i;
274
275 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
276 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
277 }
278 clear_high(d, oprsz, desc);
279}
280
281void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
282{
283 intptr_t oprsz = simd_oprsz(desc);
284 intptr_t i;
285
286 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
287 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
288 }
289 clear_high(d, oprsz, desc);
290}
291
292void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
293{
294 intptr_t oprsz = simd_oprsz(desc);
295 intptr_t i;
296
297 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
298 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
299 }
300 clear_high(d, oprsz, desc);
301}
302
303void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
304{
305 intptr_t oprsz = simd_oprsz(desc);
306 intptr_t i;
307
308 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
309 *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
310 }
311 clear_high(d, oprsz, desc);
312}
313
314void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
315{
316 intptr_t oprsz = simd_oprsz(desc);
317 intptr_t i;
318
319 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
320 *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
321 }
322 clear_high(d, oprsz, desc);
323}
324
325void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
326{
327 intptr_t oprsz = simd_oprsz(desc);
328 intptr_t i;
329
330 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
331 *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
332 }
333 clear_high(d, oprsz, desc);
334}
335
336void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
337{
338 intptr_t oprsz = simd_oprsz(desc);
339 intptr_t i;
340
341 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
342 *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
343 }
344 clear_high(d, oprsz, desc);
345}
346
347void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
348{
349 intptr_t oprsz = simd_oprsz(desc);
350 intptr_t i;
351
352 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
353 int8_t aa = *(int8_t *)(a + i);
354 *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
355 }
356 clear_high(d, oprsz, desc);
357}
358
359void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
360{
361 intptr_t oprsz = simd_oprsz(desc);
362 intptr_t i;
363
364 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
365 int16_t aa = *(int16_t *)(a + i);
366 *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
367 }
368 clear_high(d, oprsz, desc);
369}
370
371void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
372{
373 intptr_t oprsz = simd_oprsz(desc);
374 intptr_t i;
375
376 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
377 int32_t aa = *(int32_t *)(a + i);
378 *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
379 }
380 clear_high(d, oprsz, desc);
381}
382
383void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
384{
385 intptr_t oprsz = simd_oprsz(desc);
386 intptr_t i;
387
388 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
389 int64_t aa = *(int64_t *)(a + i);
390 *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
391 }
392 clear_high(d, oprsz, desc);
393}
394
395void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
396{
397 intptr_t oprsz = simd_oprsz(desc);
398
399 memcpy(d, a, oprsz);
400 clear_high(d, oprsz, desc);
401}
402
403void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
404{
405 intptr_t oprsz = simd_oprsz(desc);
406 intptr_t i;
407
408 if (c == 0) {
409 oprsz = 0;
410 } else {
411 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
412 *(uint64_t *)(d + i) = c;
413 }
414 }
415 clear_high(d, oprsz, desc);
416}
417
418void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
419{
420 intptr_t oprsz = simd_oprsz(desc);
421 intptr_t i;
422
423 if (c == 0) {
424 oprsz = 0;
425 } else {
426 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
427 *(uint32_t *)(d + i) = c;
428 }
429 }
430 clear_high(d, oprsz, desc);
431}
432
433void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
434{
435 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
436}
437
438void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
439{
440 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
441}
442
443void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
444{
445 intptr_t oprsz = simd_oprsz(desc);
446 intptr_t i;
447
448 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
449 *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
450 }
451 clear_high(d, oprsz, desc);
452}
453
454void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
455{
456 intptr_t oprsz = simd_oprsz(desc);
457 intptr_t i;
458
459 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
460 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
461 }
462 clear_high(d, oprsz, desc);
463}
464
465void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
466{
467 intptr_t oprsz = simd_oprsz(desc);
468 intptr_t i;
469
470 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
471 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
472 }
473 clear_high(d, oprsz, desc);
474}
475
476void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
477{
478 intptr_t oprsz = simd_oprsz(desc);
479 intptr_t i;
480
481 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
482 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
483 }
484 clear_high(d, oprsz, desc);
485}
486
487void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
488{
489 intptr_t oprsz = simd_oprsz(desc);
490 intptr_t i;
491
492 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
493 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
494 }
495 clear_high(d, oprsz, desc);
496}
497
498void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
499{
500 intptr_t oprsz = simd_oprsz(desc);
501 intptr_t i;
502
503 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
504 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
505 }
506 clear_high(d, oprsz, desc);
507}
508
509void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
510{
511 intptr_t oprsz = simd_oprsz(desc);
512 intptr_t i;
513
514 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
515 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
516 }
517 clear_high(d, oprsz, desc);
518}
519
520void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
521{
522 intptr_t oprsz = simd_oprsz(desc);
523 intptr_t i;
524
525 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
526 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
527 }
528 clear_high(d, oprsz, desc);
529}
530
531void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
532{
533 intptr_t oprsz = simd_oprsz(desc);
534 intptr_t i;
535
536 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
537 *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
538 }
539 clear_high(d, oprsz, desc);
540}
541
542void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
543{
544 intptr_t oprsz = simd_oprsz(desc);
545 intptr_t i;
546
547 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
548 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
549 }
550 clear_high(d, oprsz, desc);
551}
552
553void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
554{
555 intptr_t oprsz = simd_oprsz(desc);
556 intptr_t i;
557
558 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
559 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
560 }
561 clear_high(d, oprsz, desc);
562}
563
564void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
565{
566 intptr_t oprsz = simd_oprsz(desc);
567 intptr_t i;
568
569 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
570 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
571 }
572 clear_high(d, oprsz, desc);
573}
574
575void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
576{
577 intptr_t oprsz = simd_oprsz(desc);
578 intptr_t i;
579
580 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
581 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
582 }
583 clear_high(d, oprsz, desc);
584}
585
586void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
587{
588 intptr_t oprsz = simd_oprsz(desc);
589 int shift = simd_data(desc);
590 intptr_t i;
591
592 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
593 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
594 }
595 clear_high(d, oprsz, desc);
596}
597
598void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
599{
600 intptr_t oprsz = simd_oprsz(desc);
601 int shift = simd_data(desc);
602 intptr_t i;
603
604 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
605 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
606 }
607 clear_high(d, oprsz, desc);
608}
609
610void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
611{
612 intptr_t oprsz = simd_oprsz(desc);
613 int shift = simd_data(desc);
614 intptr_t i;
615
616 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
617 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
618 }
619 clear_high(d, oprsz, desc);
620}
621
622void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
623{
624 intptr_t oprsz = simd_oprsz(desc);
625 int shift = simd_data(desc);
626 intptr_t i;
627
628 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
629 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
630 }
631 clear_high(d, oprsz, desc);
632}
633
634void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
635{
636 intptr_t oprsz = simd_oprsz(desc);
637 int shift = simd_data(desc);
638 intptr_t i;
639
640 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
641 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
642 }
643 clear_high(d, oprsz, desc);
644}
645
646void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
647{
648 intptr_t oprsz = simd_oprsz(desc);
649 int shift = simd_data(desc);
650 intptr_t i;
651
652 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
653 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
654 }
655 clear_high(d, oprsz, desc);
656}
657
658void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
659{
660 intptr_t oprsz = simd_oprsz(desc);
661 int shift = simd_data(desc);
662 intptr_t i;
663
664 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
665 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
666 }
667 clear_high(d, oprsz, desc);
668}
669
670void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
671{
672 intptr_t oprsz = simd_oprsz(desc);
673 int shift = simd_data(desc);
674 intptr_t i;
675
676 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
677 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
678 }
679 clear_high(d, oprsz, desc);
680}
681
682void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
683{
684 intptr_t oprsz = simd_oprsz(desc);
685 int shift = simd_data(desc);
686 intptr_t i;
687
688 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
689 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
690 }
691 clear_high(d, oprsz, desc);
692}
693
694void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
695{
696 intptr_t oprsz = simd_oprsz(desc);
697 int shift = simd_data(desc);
698 intptr_t i;
699
700 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
701 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
702 }
703 clear_high(d, oprsz, desc);
704}
705
706void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
707{
708 intptr_t oprsz = simd_oprsz(desc);
709 int shift = simd_data(desc);
710 intptr_t i;
711
712 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
713 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
714 }
715 clear_high(d, oprsz, desc);
716}
717
718void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
719{
720 intptr_t oprsz = simd_oprsz(desc);
721 int shift = simd_data(desc);
722 intptr_t i;
723
724 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
725 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
726 }
727 clear_high(d, oprsz, desc);
728}
729
730void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
731{
732 intptr_t oprsz = simd_oprsz(desc);
733 int shift = simd_data(desc);
734 intptr_t i;
735
736 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
737 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
738 }
739 clear_high(d, oprsz, desc);
740}
741
742void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
743{
744 intptr_t oprsz = simd_oprsz(desc);
745 int shift = simd_data(desc);
746 intptr_t i;
747
748 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
749 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
750 }
751 clear_high(d, oprsz, desc);
752}
753
754void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
755{
756 intptr_t oprsz = simd_oprsz(desc);
757 int shift = simd_data(desc);
758 intptr_t i;
759
760 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
761 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
762 }
763 clear_high(d, oprsz, desc);
764}
765
766void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
767{
768 intptr_t oprsz = simd_oprsz(desc);
769 int shift = simd_data(desc);
770 intptr_t i;
771
772 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
773 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
774 }
775 clear_high(d, oprsz, desc);
776}
777
778void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
779{
780 intptr_t oprsz = simd_oprsz(desc);
781 intptr_t i;
782
783 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
784 uint8_t sh = *(uint8_t *)(b + i) & 7;
785 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
786 }
787 clear_high(d, oprsz, desc);
788}
789
790void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
791{
792 intptr_t oprsz = simd_oprsz(desc);
793 intptr_t i;
794
795 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
796 uint8_t sh = *(uint16_t *)(b + i) & 15;
797 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
798 }
799 clear_high(d, oprsz, desc);
800}
801
802void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
803{
804 intptr_t oprsz = simd_oprsz(desc);
805 intptr_t i;
806
807 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
808 uint8_t sh = *(uint32_t *)(b + i) & 31;
809 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
810 }
811 clear_high(d, oprsz, desc);
812}
813
814void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
815{
816 intptr_t oprsz = simd_oprsz(desc);
817 intptr_t i;
818
819 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
820 uint8_t sh = *(uint64_t *)(b + i) & 63;
821 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
822 }
823 clear_high(d, oprsz, desc);
824}
825
826void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
827{
828 intptr_t oprsz = simd_oprsz(desc);
829 intptr_t i;
830
831 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
832 uint8_t sh = *(uint8_t *)(b + i) & 7;
833 *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
834 }
835 clear_high(d, oprsz, desc);
836}
837
838void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
839{
840 intptr_t oprsz = simd_oprsz(desc);
841 intptr_t i;
842
843 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
844 uint8_t sh = *(uint16_t *)(b + i) & 15;
845 *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
846 }
847 clear_high(d, oprsz, desc);
848}
849
850void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
851{
852 intptr_t oprsz = simd_oprsz(desc);
853 intptr_t i;
854
855 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
856 uint8_t sh = *(uint32_t *)(b + i) & 31;
857 *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
858 }
859 clear_high(d, oprsz, desc);
860}
861
862void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
863{
864 intptr_t oprsz = simd_oprsz(desc);
865 intptr_t i;
866
867 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
868 uint8_t sh = *(uint64_t *)(b + i) & 63;
869 *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
870 }
871 clear_high(d, oprsz, desc);
872}
873
874void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
875{
876 intptr_t oprsz = simd_oprsz(desc);
877 intptr_t i;
878
879 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
880 uint8_t sh = *(uint8_t *)(b + i) & 7;
881 *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
882 }
883 clear_high(d, oprsz, desc);
884}
885
886void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
887{
888 intptr_t oprsz = simd_oprsz(desc);
889 intptr_t i;
890
891 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
892 uint8_t sh = *(uint16_t *)(b + i) & 15;
893 *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
894 }
895 clear_high(d, oprsz, desc);
896}
897
898void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
899{
900 intptr_t oprsz = simd_oprsz(desc);
901 intptr_t i;
902
903 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
904 uint8_t sh = *(uint32_t *)(b + i) & 31;
905 *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
906 }
907 clear_high(d, oprsz, desc);
908}
909
910void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
911{
912 intptr_t oprsz = simd_oprsz(desc);
913 intptr_t i;
914
915 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
916 uint8_t sh = *(uint64_t *)(b + i) & 63;
917 *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
918 }
919 clear_high(d, oprsz, desc);
920}
921
922void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
923{
924 intptr_t oprsz = simd_oprsz(desc);
925 intptr_t i;
926
927 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
928 uint8_t sh = *(uint8_t *)(b + i) & 7;
929 *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
930 }
931 clear_high(d, oprsz, desc);
932}
933
934void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
935{
936 intptr_t oprsz = simd_oprsz(desc);
937 intptr_t i;
938
939 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
940 uint8_t sh = *(uint16_t *)(b + i) & 15;
941 *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
942 }
943 clear_high(d, oprsz, desc);
944}
945
946void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
947{
948 intptr_t oprsz = simd_oprsz(desc);
949 intptr_t i;
950
951 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
952 uint8_t sh = *(uint32_t *)(b + i) & 31;
953 *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
954 }
955 clear_high(d, oprsz, desc);
956}
957
958void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
959{
960 intptr_t oprsz = simd_oprsz(desc);
961 intptr_t i;
962
963 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
964 uint8_t sh = *(uint64_t *)(b + i) & 63;
965 *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
966 }
967 clear_high(d, oprsz, desc);
968}
969
970void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
971{
972 intptr_t oprsz = simd_oprsz(desc);
973 intptr_t i;
974
975 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
976 uint8_t sh = *(uint8_t *)(b + i) & 7;
977 *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
978 }
979 clear_high(d, oprsz, desc);
980}
981
982void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
983{
984 intptr_t oprsz = simd_oprsz(desc);
985 intptr_t i;
986
987 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
988 uint8_t sh = *(uint16_t *)(b + i) & 15;
989 *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
990 }
991 clear_high(d, oprsz, desc);
992}
993
994void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
995{
996 intptr_t oprsz = simd_oprsz(desc);
997 intptr_t i;
998
999 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1000 uint8_t sh = *(uint32_t *)(b + i) & 31;
1001 *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
1002 }
1003 clear_high(d, oprsz, desc);
1004}
1005
1006void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
1007{
1008 intptr_t oprsz = simd_oprsz(desc);
1009 intptr_t i;
1010
1011 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1012 uint8_t sh = *(uint64_t *)(b + i) & 63;
1013 *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
1014 }
1015 clear_high(d, oprsz, desc);
1016}
1017
1018#define DO_CMP1(NAME, TYPE, OP) \
1019void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
1020{ \
1021 intptr_t oprsz = simd_oprsz(desc); \
1022 intptr_t i; \
1023 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
1024 *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
1025 } \
1026 clear_high(d, oprsz, desc); \
1027}
1028
1029#define DO_CMP2(SZ) \
1030 DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==) \
1031 DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=) \
1032 DO_CMP1(gvec_lt##SZ, int##SZ##_t, <) \
1033 DO_CMP1(gvec_le##SZ, int##SZ##_t, <=) \
1034 DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <) \
1035 DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
1036
1037DO_CMP2(8)
1038DO_CMP2(16)
1039DO_CMP2(32)
1040DO_CMP2(64)
1041
1042#undef DO_CMP1
1043#undef DO_CMP2
1044
1045void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
1046{
1047 intptr_t oprsz = simd_oprsz(desc);
1048 intptr_t i;
1049
1050 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1051 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
1052 if (r > INT8_MAX) {
1053 r = INT8_MAX;
1054 } else if (r < INT8_MIN) {
1055 r = INT8_MIN;
1056 }
1057 *(int8_t *)(d + i) = r;
1058 }
1059 clear_high(d, oprsz, desc);
1060}
1061
1062void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
1063{
1064 intptr_t oprsz = simd_oprsz(desc);
1065 intptr_t i;
1066
1067 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1068 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
1069 if (r > INT16_MAX) {
1070 r = INT16_MAX;
1071 } else if (r < INT16_MIN) {
1072 r = INT16_MIN;
1073 }
1074 *(int16_t *)(d + i) = r;
1075 }
1076 clear_high(d, oprsz, desc);
1077}
1078
1079void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
1080{
1081 intptr_t oprsz = simd_oprsz(desc);
1082 intptr_t i;
1083
1084 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1085 int32_t ai = *(int32_t *)(a + i);
1086 int32_t bi = *(int32_t *)(b + i);
1087 int32_t di;
1088 if (sadd32_overflow(ai, bi, &di)) {
1089 di = (di < 0 ? INT32_MAX : INT32_MIN);
1090 }
1091 *(int32_t *)(d + i) = di;
1092 }
1093 clear_high(d, oprsz, desc);
1094}
1095
1096void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
1097{
1098 intptr_t oprsz = simd_oprsz(desc);
1099 intptr_t i;
1100
1101 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1102 int64_t ai = *(int64_t *)(a + i);
1103 int64_t bi = *(int64_t *)(b + i);
1104 int64_t di;
1105 if (sadd64_overflow(ai, bi, &di)) {
1106 di = (di < 0 ? INT64_MAX : INT64_MIN);
1107 }
1108 *(int64_t *)(d + i) = di;
1109 }
1110 clear_high(d, oprsz, desc);
1111}
1112
1113void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
1114{
1115 intptr_t oprsz = simd_oprsz(desc);
1116 intptr_t i;
1117
1118 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1119 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
1120 if (r > INT8_MAX) {
1121 r = INT8_MAX;
1122 } else if (r < INT8_MIN) {
1123 r = INT8_MIN;
1124 }
1125 *(uint8_t *)(d + i) = r;
1126 }
1127 clear_high(d, oprsz, desc);
1128}
1129
1130void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1131{
1132 intptr_t oprsz = simd_oprsz(desc);
1133 intptr_t i;
1134
1135 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1136 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1137 if (r > INT16_MAX) {
1138 r = INT16_MAX;
1139 } else if (r < INT16_MIN) {
1140 r = INT16_MIN;
1141 }
1142 *(int16_t *)(d + i) = r;
1143 }
1144 clear_high(d, oprsz, desc);
1145}
1146
1147void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1148{
1149 intptr_t oprsz = simd_oprsz(desc);
1150 intptr_t i;
1151
1152 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1153 int32_t ai = *(int32_t *)(a + i);
1154 int32_t bi = *(int32_t *)(b + i);
1155 int32_t di;
1156 if (ssub32_overflow(ai, bi, &di)) {
1157 di = (di < 0 ? INT32_MAX : INT32_MIN);
1158 }
1159 *(int32_t *)(d + i) = di;
1160 }
1161 clear_high(d, oprsz, desc);
1162}
1163
1164void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1165{
1166 intptr_t oprsz = simd_oprsz(desc);
1167 intptr_t i;
1168
1169 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1170 int64_t ai = *(int64_t *)(a + i);
1171 int64_t bi = *(int64_t *)(b + i);
1172 int64_t di;
1173 if (ssub64_overflow(ai, bi, &di)) {
1174 di = (di < 0 ? INT64_MAX : INT64_MIN);
1175 }
1176 *(int64_t *)(d + i) = di;
1177 }
1178 clear_high(d, oprsz, desc);
1179}
1180
1181void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1182{
1183 intptr_t oprsz = simd_oprsz(desc);
1184 intptr_t i;
1185
1186 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1187 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1188 if (r > UINT8_MAX) {
1189 r = UINT8_MAX;
1190 }
1191 *(uint8_t *)(d + i) = r;
1192 }
1193 clear_high(d, oprsz, desc);
1194}
1195
1196void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1197{
1198 intptr_t oprsz = simd_oprsz(desc);
1199 intptr_t i;
1200
1201 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1202 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1203 if (r > UINT16_MAX) {
1204 r = UINT16_MAX;
1205 }
1206 *(uint16_t *)(d + i) = r;
1207 }
1208 clear_high(d, oprsz, desc);
1209}
1210
1211void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1212{
1213 intptr_t oprsz = simd_oprsz(desc);
1214 intptr_t i;
1215
1216 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1217 uint32_t ai = *(uint32_t *)(a + i);
1218 uint32_t bi = *(uint32_t *)(b + i);
1219 uint32_t di;
1220 if (uadd32_overflow(ai, bi, &di)) {
1221 di = UINT32_MAX;
1222 }
1223 *(uint32_t *)(d + i) = di;
1224 }
1225 clear_high(d, oprsz, desc);
1226}
1227
1228void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1229{
1230 intptr_t oprsz = simd_oprsz(desc);
1231 intptr_t i;
1232
1233 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1234 uint64_t ai = *(uint64_t *)(a + i);
1235 uint64_t bi = *(uint64_t *)(b + i);
1236 uint64_t di;
1237 if (uadd64_overflow(ai, bi, &di)) {
1238 di = UINT64_MAX;
1239 }
1240 *(uint64_t *)(d + i) = di;
1241 }
1242 clear_high(d, oprsz, desc);
1243}
1244
1245void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1246{
1247 intptr_t oprsz = simd_oprsz(desc);
1248 intptr_t i;
1249
1250 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1251 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1252 if (r < 0) {
1253 r = 0;
1254 }
1255 *(uint8_t *)(d + i) = r;
1256 }
1257 clear_high(d, oprsz, desc);
1258}
1259
1260void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1261{
1262 intptr_t oprsz = simd_oprsz(desc);
1263 intptr_t i;
1264
1265 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1266 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1267 if (r < 0) {
1268 r = 0;
1269 }
1270 *(uint16_t *)(d + i) = r;
1271 }
1272 clear_high(d, oprsz, desc);
1273}
1274
1275void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1276{
1277 intptr_t oprsz = simd_oprsz(desc);
1278 intptr_t i;
1279
1280 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1281 uint32_t ai = *(uint32_t *)(a + i);
1282 uint32_t bi = *(uint32_t *)(b + i);
1283 uint32_t di;
1284 if (usub32_overflow(ai, bi, &di)) {
1285 di = 0;
1286 }
1287 *(uint32_t *)(d + i) = di;
1288 }
1289 clear_high(d, oprsz, desc);
1290}
1291
1292void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1293{
1294 intptr_t oprsz = simd_oprsz(desc);
1295 intptr_t i;
1296
1297 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1298 uint64_t ai = *(uint64_t *)(a + i);
1299 uint64_t bi = *(uint64_t *)(b + i);
1300 uint64_t di;
1301 if (usub64_overflow(ai, bi, &di)) {
1302 di = 0;
1303 }
1304 *(uint64_t *)(d + i) = di;
1305 }
1306 clear_high(d, oprsz, desc);
1307}
1308
1309void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1310{
1311 intptr_t oprsz = simd_oprsz(desc);
1312 intptr_t i;
1313
1314 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1315 int8_t aa = *(int8_t *)(a + i);
1316 int8_t bb = *(int8_t *)(b + i);
1317 int8_t dd = aa < bb ? aa : bb;
1318 *(int8_t *)(d + i) = dd;
1319 }
1320 clear_high(d, oprsz, desc);
1321}
1322
1323void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1324{
1325 intptr_t oprsz = simd_oprsz(desc);
1326 intptr_t i;
1327
1328 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1329 int16_t aa = *(int16_t *)(a + i);
1330 int16_t bb = *(int16_t *)(b + i);
1331 int16_t dd = aa < bb ? aa : bb;
1332 *(int16_t *)(d + i) = dd;
1333 }
1334 clear_high(d, oprsz, desc);
1335}
1336
1337void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1338{
1339 intptr_t oprsz = simd_oprsz(desc);
1340 intptr_t i;
1341
1342 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1343 int32_t aa = *(int32_t *)(a + i);
1344 int32_t bb = *(int32_t *)(b + i);
1345 int32_t dd = aa < bb ? aa : bb;
1346 *(int32_t *)(d + i) = dd;
1347 }
1348 clear_high(d, oprsz, desc);
1349}
1350
1351void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1352{
1353 intptr_t oprsz = simd_oprsz(desc);
1354 intptr_t i;
1355
1356 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1357 int64_t aa = *(int64_t *)(a + i);
1358 int64_t bb = *(int64_t *)(b + i);
1359 int64_t dd = aa < bb ? aa : bb;
1360 *(int64_t *)(d + i) = dd;
1361 }
1362 clear_high(d, oprsz, desc);
1363}
1364
1365void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1366{
1367 intptr_t oprsz = simd_oprsz(desc);
1368 intptr_t i;
1369
1370 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1371 int8_t aa = *(int8_t *)(a + i);
1372 int8_t bb = *(int8_t *)(b + i);
1373 int8_t dd = aa > bb ? aa : bb;
1374 *(int8_t *)(d + i) = dd;
1375 }
1376 clear_high(d, oprsz, desc);
1377}
1378
1379void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1380{
1381 intptr_t oprsz = simd_oprsz(desc);
1382 intptr_t i;
1383
1384 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1385 int16_t aa = *(int16_t *)(a + i);
1386 int16_t bb = *(int16_t *)(b + i);
1387 int16_t dd = aa > bb ? aa : bb;
1388 *(int16_t *)(d + i) = dd;
1389 }
1390 clear_high(d, oprsz, desc);
1391}
1392
1393void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1394{
1395 intptr_t oprsz = simd_oprsz(desc);
1396 intptr_t i;
1397
1398 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1399 int32_t aa = *(int32_t *)(a + i);
1400 int32_t bb = *(int32_t *)(b + i);
1401 int32_t dd = aa > bb ? aa : bb;
1402 *(int32_t *)(d + i) = dd;
1403 }
1404 clear_high(d, oprsz, desc);
1405}
1406
1407void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1408{
1409 intptr_t oprsz = simd_oprsz(desc);
1410 intptr_t i;
1411
1412 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1413 int64_t aa = *(int64_t *)(a + i);
1414 int64_t bb = *(int64_t *)(b + i);
1415 int64_t dd = aa > bb ? aa : bb;
1416 *(int64_t *)(d + i) = dd;
1417 }
1418 clear_high(d, oprsz, desc);
1419}
1420
1421void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1422{
1423 intptr_t oprsz = simd_oprsz(desc);
1424 intptr_t i;
1425
1426 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1427 uint8_t aa = *(uint8_t *)(a + i);
1428 uint8_t bb = *(uint8_t *)(b + i);
1429 uint8_t dd = aa < bb ? aa : bb;
1430 *(uint8_t *)(d + i) = dd;
1431 }
1432 clear_high(d, oprsz, desc);
1433}
1434
1435void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1436{
1437 intptr_t oprsz = simd_oprsz(desc);
1438 intptr_t i;
1439
1440 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1441 uint16_t aa = *(uint16_t *)(a + i);
1442 uint16_t bb = *(uint16_t *)(b + i);
1443 uint16_t dd = aa < bb ? aa : bb;
1444 *(uint16_t *)(d + i) = dd;
1445 }
1446 clear_high(d, oprsz, desc);
1447}
1448
1449void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1450{
1451 intptr_t oprsz = simd_oprsz(desc);
1452 intptr_t i;
1453
1454 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1455 uint32_t aa = *(uint32_t *)(a + i);
1456 uint32_t bb = *(uint32_t *)(b + i);
1457 uint32_t dd = aa < bb ? aa : bb;
1458 *(uint32_t *)(d + i) = dd;
1459 }
1460 clear_high(d, oprsz, desc);
1461}
1462
1463void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1464{
1465 intptr_t oprsz = simd_oprsz(desc);
1466 intptr_t i;
1467
1468 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1469 uint64_t aa = *(uint64_t *)(a + i);
1470 uint64_t bb = *(uint64_t *)(b + i);
1471 uint64_t dd = aa < bb ? aa : bb;
1472 *(uint64_t *)(d + i) = dd;
1473 }
1474 clear_high(d, oprsz, desc);
1475}
1476
1477void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1478{
1479 intptr_t oprsz = simd_oprsz(desc);
1480 intptr_t i;
1481
1482 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1483 uint8_t aa = *(uint8_t *)(a + i);
1484 uint8_t bb = *(uint8_t *)(b + i);
1485 uint8_t dd = aa > bb ? aa : bb;
1486 *(uint8_t *)(d + i) = dd;
1487 }
1488 clear_high(d, oprsz, desc);
1489}
1490
1491void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1492{
1493 intptr_t oprsz = simd_oprsz(desc);
1494 intptr_t i;
1495
1496 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1497 uint16_t aa = *(uint16_t *)(a + i);
1498 uint16_t bb = *(uint16_t *)(b + i);
1499 uint16_t dd = aa > bb ? aa : bb;
1500 *(uint16_t *)(d + i) = dd;
1501 }
1502 clear_high(d, oprsz, desc);
1503}
1504
1505void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1506{
1507 intptr_t oprsz = simd_oprsz(desc);
1508 intptr_t i;
1509
1510 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1511 uint32_t aa = *(uint32_t *)(a + i);
1512 uint32_t bb = *(uint32_t *)(b + i);
1513 uint32_t dd = aa > bb ? aa : bb;
1514 *(uint32_t *)(d + i) = dd;
1515 }
1516 clear_high(d, oprsz, desc);
1517}
1518
1519void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1520{
1521 intptr_t oprsz = simd_oprsz(desc);
1522 intptr_t i;
1523
1524 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1525 uint64_t aa = *(uint64_t *)(a + i);
1526 uint64_t bb = *(uint64_t *)(b + i);
1527 uint64_t dd = aa > bb ? aa : bb;
1528 *(uint64_t *)(d + i) = dd;
1529 }
1530 clear_high(d, oprsz, desc);
1531}
1532
1533void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
1534{
1535 intptr_t oprsz = simd_oprsz(desc);
1536 intptr_t i;
1537
1538 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1539 uint64_t aa = *(uint64_t *)(a + i);
1540 uint64_t bb = *(uint64_t *)(b + i);
1541 uint64_t cc = *(uint64_t *)(c + i);
1542 *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
1543 }
1544 clear_high(d, oprsz, desc);
1545}
1546