1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include "qemu/osdep.h"
21#include "qemu/host-utils.h"
22#include "cpu.h"
23#include "exec/helper-proto.h"
24#include "tcg-gvec-desc.h"
25
26
27
28
29
30
31
32
33
34
35
36
37#ifdef CONFIG_VECTOR16
38typedef uint8_t vec8 __attribute__((vector_size(16)));
39typedef uint16_t vec16 __attribute__((vector_size(16)));
40typedef uint32_t vec32 __attribute__((vector_size(16)));
41typedef uint64_t vec64 __attribute__((vector_size(16)));
42
43typedef int8_t svec8 __attribute__((vector_size(16)));
44typedef int16_t svec16 __attribute__((vector_size(16)));
45typedef int32_t svec32 __attribute__((vector_size(16)));
46typedef int64_t svec64 __attribute__((vector_size(16)));
47
48#define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
49#define DUP8(X) { X, X, X, X, X, X, X, X }
50#define DUP4(X) { X, X, X, X }
51#define DUP2(X) { X, X }
52#else
53typedef uint8_t vec8;
54typedef uint16_t vec16;
55typedef uint32_t vec32;
56typedef uint64_t vec64;
57
58typedef int8_t svec8;
59typedef int16_t svec16;
60typedef int32_t svec32;
61typedef int64_t svec64;
62
63#define DUP16(X) X
64#define DUP8(X) X
65#define DUP4(X) X
66#define DUP2(X) X
67#endif
68
69static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
70{
71 intptr_t maxsz = simd_maxsz(desc);
72 intptr_t i;
73
74 if (unlikely(maxsz > oprsz)) {
75 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
76 *(uint64_t *)(d + i) = 0;
77 }
78 }
79}
80
81void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
82{
83 intptr_t oprsz = simd_oprsz(desc);
84 intptr_t i;
85
86 for (i = 0; i < oprsz; i += sizeof(vec8)) {
87 *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);
88 }
89 clear_high(d, oprsz, desc);
90}
91
92void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
93{
94 intptr_t oprsz = simd_oprsz(desc);
95 intptr_t i;
96
97 for (i = 0; i < oprsz; i += sizeof(vec16)) {
98 *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);
99 }
100 clear_high(d, oprsz, desc);
101}
102
103void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
104{
105 intptr_t oprsz = simd_oprsz(desc);
106 intptr_t i;
107
108 for (i = 0; i < oprsz; i += sizeof(vec32)) {
109 *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);
110 }
111 clear_high(d, oprsz, desc);
112}
113
114void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
115{
116 intptr_t oprsz = simd_oprsz(desc);
117 intptr_t i;
118
119 for (i = 0; i < oprsz; i += sizeof(vec64)) {
120 *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);
121 }
122 clear_high(d, oprsz, desc);
123}
124
125void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
126{
127 intptr_t oprsz = simd_oprsz(desc);
128 vec8 vecb = (vec8)DUP16(b);
129 intptr_t i;
130
131 for (i = 0; i < oprsz; i += sizeof(vec8)) {
132 *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb;
133 }
134 clear_high(d, oprsz, desc);
135}
136
137void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
138{
139 intptr_t oprsz = simd_oprsz(desc);
140 vec16 vecb = (vec16)DUP8(b);
141 intptr_t i;
142
143 for (i = 0; i < oprsz; i += sizeof(vec16)) {
144 *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb;
145 }
146 clear_high(d, oprsz, desc);
147}
148
149void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
150{
151 intptr_t oprsz = simd_oprsz(desc);
152 vec32 vecb = (vec32)DUP4(b);
153 intptr_t i;
154
155 for (i = 0; i < oprsz; i += sizeof(vec32)) {
156 *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb;
157 }
158 clear_high(d, oprsz, desc);
159}
160
161void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
162{
163 intptr_t oprsz = simd_oprsz(desc);
164 vec64 vecb = (vec64)DUP2(b);
165 intptr_t i;
166
167 for (i = 0; i < oprsz; i += sizeof(vec64)) {
168 *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb;
169 }
170 clear_high(d, oprsz, desc);
171}
172
173void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
174{
175 intptr_t oprsz = simd_oprsz(desc);
176 intptr_t i;
177
178 for (i = 0; i < oprsz; i += sizeof(vec8)) {
179 *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);
180 }
181 clear_high(d, oprsz, desc);
182}
183
184void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
185{
186 intptr_t oprsz = simd_oprsz(desc);
187 intptr_t i;
188
189 for (i = 0; i < oprsz; i += sizeof(vec16)) {
190 *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);
191 }
192 clear_high(d, oprsz, desc);
193}
194
195void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
196{
197 intptr_t oprsz = simd_oprsz(desc);
198 intptr_t i;
199
200 for (i = 0; i < oprsz; i += sizeof(vec32)) {
201 *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);
202 }
203 clear_high(d, oprsz, desc);
204}
205
206void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
207{
208 intptr_t oprsz = simd_oprsz(desc);
209 intptr_t i;
210
211 for (i = 0; i < oprsz; i += sizeof(vec64)) {
212 *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);
213 }
214 clear_high(d, oprsz, desc);
215}
216
217void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
218{
219 intptr_t oprsz = simd_oprsz(desc);
220 vec8 vecb = (vec8)DUP16(b);
221 intptr_t i;
222
223 for (i = 0; i < oprsz; i += sizeof(vec8)) {
224 *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb;
225 }
226 clear_high(d, oprsz, desc);
227}
228
229void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
230{
231 intptr_t oprsz = simd_oprsz(desc);
232 vec16 vecb = (vec16)DUP8(b);
233 intptr_t i;
234
235 for (i = 0; i < oprsz; i += sizeof(vec16)) {
236 *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb;
237 }
238 clear_high(d, oprsz, desc);
239}
240
241void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
242{
243 intptr_t oprsz = simd_oprsz(desc);
244 vec32 vecb = (vec32)DUP4(b);
245 intptr_t i;
246
247 for (i = 0; i < oprsz; i += sizeof(vec32)) {
248 *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb;
249 }
250 clear_high(d, oprsz, desc);
251}
252
253void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
254{
255 intptr_t oprsz = simd_oprsz(desc);
256 vec64 vecb = (vec64)DUP2(b);
257 intptr_t i;
258
259 for (i = 0; i < oprsz; i += sizeof(vec64)) {
260 *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb;
261 }
262 clear_high(d, oprsz, desc);
263}
264
265void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
266{
267 intptr_t oprsz = simd_oprsz(desc);
268 intptr_t i;
269
270 for (i = 0; i < oprsz; i += sizeof(vec8)) {
271 *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i);
272 }
273 clear_high(d, oprsz, desc);
274}
275
276void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
277{
278 intptr_t oprsz = simd_oprsz(desc);
279 intptr_t i;
280
281 for (i = 0; i < oprsz; i += sizeof(vec16)) {
282 *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i);
283 }
284 clear_high(d, oprsz, desc);
285}
286
287void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
288{
289 intptr_t oprsz = simd_oprsz(desc);
290 intptr_t i;
291
292 for (i = 0; i < oprsz; i += sizeof(vec32)) {
293 *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i);
294 }
295 clear_high(d, oprsz, desc);
296}
297
298void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
299{
300 intptr_t oprsz = simd_oprsz(desc);
301 intptr_t i;
302
303 for (i = 0; i < oprsz; i += sizeof(vec64)) {
304 *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i);
305 }
306 clear_high(d, oprsz, desc);
307}
308
309void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
310{
311 intptr_t oprsz = simd_oprsz(desc);
312 vec8 vecb = (vec8)DUP16(b);
313 intptr_t i;
314
315 for (i = 0; i < oprsz; i += sizeof(vec8)) {
316 *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb;
317 }
318 clear_high(d, oprsz, desc);
319}
320
321void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
322{
323 intptr_t oprsz = simd_oprsz(desc);
324 vec16 vecb = (vec16)DUP8(b);
325 intptr_t i;
326
327 for (i = 0; i < oprsz; i += sizeof(vec16)) {
328 *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb;
329 }
330 clear_high(d, oprsz, desc);
331}
332
333void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
334{
335 intptr_t oprsz = simd_oprsz(desc);
336 vec32 vecb = (vec32)DUP4(b);
337 intptr_t i;
338
339 for (i = 0; i < oprsz; i += sizeof(vec32)) {
340 *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb;
341 }
342 clear_high(d, oprsz, desc);
343}
344
345void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
346{
347 intptr_t oprsz = simd_oprsz(desc);
348 vec64 vecb = (vec64)DUP2(b);
349 intptr_t i;
350
351 for (i = 0; i < oprsz; i += sizeof(vec64)) {
352 *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb;
353 }
354 clear_high(d, oprsz, desc);
355}
356
357void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
358{
359 intptr_t oprsz = simd_oprsz(desc);
360 intptr_t i;
361
362 for (i = 0; i < oprsz; i += sizeof(vec8)) {
363 *(vec8 *)(d + i) = -*(vec8 *)(a + i);
364 }
365 clear_high(d, oprsz, desc);
366}
367
368void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
369{
370 intptr_t oprsz = simd_oprsz(desc);
371 intptr_t i;
372
373 for (i = 0; i < oprsz; i += sizeof(vec16)) {
374 *(vec16 *)(d + i) = -*(vec16 *)(a + i);
375 }
376 clear_high(d, oprsz, desc);
377}
378
379void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
380{
381 intptr_t oprsz = simd_oprsz(desc);
382 intptr_t i;
383
384 for (i = 0; i < oprsz; i += sizeof(vec32)) {
385 *(vec32 *)(d + i) = -*(vec32 *)(a + i);
386 }
387 clear_high(d, oprsz, desc);
388}
389
390void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
391{
392 intptr_t oprsz = simd_oprsz(desc);
393 intptr_t i;
394
395 for (i = 0; i < oprsz; i += sizeof(vec64)) {
396 *(vec64 *)(d + i) = -*(vec64 *)(a + i);
397 }
398 clear_high(d, oprsz, desc);
399}
400
401void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
402{
403 intptr_t oprsz = simd_oprsz(desc);
404
405 memcpy(d, a, oprsz);
406 clear_high(d, oprsz, desc);
407}
408
409void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
410{
411 intptr_t oprsz = simd_oprsz(desc);
412 intptr_t i;
413
414 if (c == 0) {
415 oprsz = 0;
416 } else {
417 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
418 *(uint64_t *)(d + i) = c;
419 }
420 }
421 clear_high(d, oprsz, desc);
422}
423
424void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
425{
426 intptr_t oprsz = simd_oprsz(desc);
427 intptr_t i;
428
429 if (c == 0) {
430 oprsz = 0;
431 } else {
432 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
433 *(uint32_t *)(d + i) = c;
434 }
435 }
436 clear_high(d, oprsz, desc);
437}
438
439void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
440{
441 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
442}
443
444void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
445{
446 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
447}
448
449void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
450{
451 intptr_t oprsz = simd_oprsz(desc);
452 intptr_t i;
453
454 for (i = 0; i < oprsz; i += sizeof(vec64)) {
455 *(vec64 *)(d + i) = ~*(vec64 *)(a + i);
456 }
457 clear_high(d, oprsz, desc);
458}
459
460void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
461{
462 intptr_t oprsz = simd_oprsz(desc);
463 intptr_t i;
464
465 for (i = 0; i < oprsz; i += sizeof(vec64)) {
466 *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);
467 }
468 clear_high(d, oprsz, desc);
469}
470
471void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
472{
473 intptr_t oprsz = simd_oprsz(desc);
474 intptr_t i;
475
476 for (i = 0; i < oprsz; i += sizeof(vec64)) {
477 *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);
478 }
479 clear_high(d, oprsz, desc);
480}
481
482void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
483{
484 intptr_t oprsz = simd_oprsz(desc);
485 intptr_t i;
486
487 for (i = 0; i < oprsz; i += sizeof(vec64)) {
488 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);
489 }
490 clear_high(d, oprsz, desc);
491}
492
493void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
494{
495 intptr_t oprsz = simd_oprsz(desc);
496 intptr_t i;
497
498 for (i = 0; i < oprsz; i += sizeof(vec64)) {
499 *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
500 }
501 clear_high(d, oprsz, desc);
502}
503
504void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
505{
506 intptr_t oprsz = simd_oprsz(desc);
507 intptr_t i;
508
509 for (i = 0; i < oprsz; i += sizeof(vec64)) {
510 *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);
511 }
512 clear_high(d, oprsz, desc);
513}
514
515void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
516{
517 intptr_t oprsz = simd_oprsz(desc);
518 vec64 vecb = (vec64)DUP2(b);
519 intptr_t i;
520
521 for (i = 0; i < oprsz; i += sizeof(vec64)) {
522 *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb;
523 }
524 clear_high(d, oprsz, desc);
525}
526
527void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
528{
529 intptr_t oprsz = simd_oprsz(desc);
530 vec64 vecb = (vec64)DUP2(b);
531 intptr_t i;
532
533 for (i = 0; i < oprsz; i += sizeof(vec64)) {
534 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb;
535 }
536 clear_high(d, oprsz, desc);
537}
538
539void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
540{
541 intptr_t oprsz = simd_oprsz(desc);
542 vec64 vecb = (vec64)DUP2(b);
543 intptr_t i;
544
545 for (i = 0; i < oprsz; i += sizeof(vec64)) {
546 *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb;
547 }
548 clear_high(d, oprsz, desc);
549}
550
551void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
552{
553 intptr_t oprsz = simd_oprsz(desc);
554 int shift = simd_data(desc);
555 intptr_t i;
556
557 for (i = 0; i < oprsz; i += sizeof(vec8)) {
558 *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift;
559 }
560 clear_high(d, oprsz, desc);
561}
562
563void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
564{
565 intptr_t oprsz = simd_oprsz(desc);
566 int shift = simd_data(desc);
567 intptr_t i;
568
569 for (i = 0; i < oprsz; i += sizeof(vec16)) {
570 *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift;
571 }
572 clear_high(d, oprsz, desc);
573}
574
575void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
576{
577 intptr_t oprsz = simd_oprsz(desc);
578 int shift = simd_data(desc);
579 intptr_t i;
580
581 for (i = 0; i < oprsz; i += sizeof(vec32)) {
582 *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift;
583 }
584 clear_high(d, oprsz, desc);
585}
586
587void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
588{
589 intptr_t oprsz = simd_oprsz(desc);
590 int shift = simd_data(desc);
591 intptr_t i;
592
593 for (i = 0; i < oprsz; i += sizeof(vec64)) {
594 *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift;
595 }
596 clear_high(d, oprsz, desc);
597}
598
599void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
600{
601 intptr_t oprsz = simd_oprsz(desc);
602 int shift = simd_data(desc);
603 intptr_t i;
604
605 for (i = 0; i < oprsz; i += sizeof(vec8)) {
606 *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift;
607 }
608 clear_high(d, oprsz, desc);
609}
610
611void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
612{
613 intptr_t oprsz = simd_oprsz(desc);
614 int shift = simd_data(desc);
615 intptr_t i;
616
617 for (i = 0; i < oprsz; i += sizeof(vec16)) {
618 *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift;
619 }
620 clear_high(d, oprsz, desc);
621}
622
623void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
624{
625 intptr_t oprsz = simd_oprsz(desc);
626 int shift = simd_data(desc);
627 intptr_t i;
628
629 for (i = 0; i < oprsz; i += sizeof(vec32)) {
630 *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift;
631 }
632 clear_high(d, oprsz, desc);
633}
634
635void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
636{
637 intptr_t oprsz = simd_oprsz(desc);
638 int shift = simd_data(desc);
639 intptr_t i;
640
641 for (i = 0; i < oprsz; i += sizeof(vec64)) {
642 *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift;
643 }
644 clear_high(d, oprsz, desc);
645}
646
647void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
648{
649 intptr_t oprsz = simd_oprsz(desc);
650 int shift = simd_data(desc);
651 intptr_t i;
652
653 for (i = 0; i < oprsz; i += sizeof(vec8)) {
654 *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift;
655 }
656 clear_high(d, oprsz, desc);
657}
658
659void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
660{
661 intptr_t oprsz = simd_oprsz(desc);
662 int shift = simd_data(desc);
663 intptr_t i;
664
665 for (i = 0; i < oprsz; i += sizeof(vec16)) {
666 *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift;
667 }
668 clear_high(d, oprsz, desc);
669}
670
671void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
672{
673 intptr_t oprsz = simd_oprsz(desc);
674 int shift = simd_data(desc);
675 intptr_t i;
676
677 for (i = 0; i < oprsz; i += sizeof(vec32)) {
678 *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift;
679 }
680 clear_high(d, oprsz, desc);
681}
682
683void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
684{
685 intptr_t oprsz = simd_oprsz(desc);
686 int shift = simd_data(desc);
687 intptr_t i;
688
689 for (i = 0; i < oprsz; i += sizeof(vec64)) {
690 *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift;
691 }
692 clear_high(d, oprsz, desc);
693}
694
695
696
697#ifdef CONFIG_VECTOR16
698# define DO_CMP0(X) X
699#else
700# define DO_CMP0(X) -(X)
701#endif
702
703#define DO_CMP1(NAME, TYPE, OP) \
704void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
705{ \
706 intptr_t oprsz = simd_oprsz(desc); \
707 intptr_t i; \
708 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
709 *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
710 } \
711 clear_high(d, oprsz, desc); \
712}
713
714#define DO_CMP2(SZ) \
715 DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \
716 DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \
717 DO_CMP1(gvec_lt##SZ, svec##SZ, <) \
718 DO_CMP1(gvec_le##SZ, svec##SZ, <=) \
719 DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \
720 DO_CMP1(gvec_leu##SZ, vec##SZ, <=)
721
722DO_CMP2(8)
723DO_CMP2(16)
724DO_CMP2(32)
725DO_CMP2(64)
726
727#undef DO_CMP0
728#undef DO_CMP1
729#undef DO_CMP2
730
731void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
732{
733 intptr_t oprsz = simd_oprsz(desc);
734 intptr_t i;
735
736 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
737 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
738 if (r > INT8_MAX) {
739 r = INT8_MAX;
740 } else if (r < INT8_MIN) {
741 r = INT8_MIN;
742 }
743 *(int8_t *)(d + i) = r;
744 }
745 clear_high(d, oprsz, desc);
746}
747
748void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
749{
750 intptr_t oprsz = simd_oprsz(desc);
751 intptr_t i;
752
753 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
754 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
755 if (r > INT16_MAX) {
756 r = INT16_MAX;
757 } else if (r < INT16_MIN) {
758 r = INT16_MIN;
759 }
760 *(int16_t *)(d + i) = r;
761 }
762 clear_high(d, oprsz, desc);
763}
764
765void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
766{
767 intptr_t oprsz = simd_oprsz(desc);
768 intptr_t i;
769
770 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
771 int32_t ai = *(int32_t *)(a + i);
772 int32_t bi = *(int32_t *)(b + i);
773 int32_t di = ai + bi;
774 if (((di ^ ai) &~ (ai ^ bi)) < 0) {
775
776 di = (di < 0 ? INT32_MAX : INT32_MIN);
777 }
778 *(int32_t *)(d + i) = di;
779 }
780 clear_high(d, oprsz, desc);
781}
782
783void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
784{
785 intptr_t oprsz = simd_oprsz(desc);
786 intptr_t i;
787
788 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
789 int64_t ai = *(int64_t *)(a + i);
790 int64_t bi = *(int64_t *)(b + i);
791 int64_t di = ai + bi;
792 if (((di ^ ai) &~ (ai ^ bi)) < 0) {
793
794 di = (di < 0 ? INT64_MAX : INT64_MIN);
795 }
796 *(int64_t *)(d + i) = di;
797 }
798 clear_high(d, oprsz, desc);
799}
800
801void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
802{
803 intptr_t oprsz = simd_oprsz(desc);
804 intptr_t i;
805
806 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
807 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
808 if (r > INT8_MAX) {
809 r = INT8_MAX;
810 } else if (r < INT8_MIN) {
811 r = INT8_MIN;
812 }
813 *(uint8_t *)(d + i) = r;
814 }
815 clear_high(d, oprsz, desc);
816}
817
818void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
819{
820 intptr_t oprsz = simd_oprsz(desc);
821 intptr_t i;
822
823 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
824 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
825 if (r > INT16_MAX) {
826 r = INT16_MAX;
827 } else if (r < INT16_MIN) {
828 r = INT16_MIN;
829 }
830 *(int16_t *)(d + i) = r;
831 }
832 clear_high(d, oprsz, desc);
833}
834
835void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
836{
837 intptr_t oprsz = simd_oprsz(desc);
838 intptr_t i;
839
840 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
841 int32_t ai = *(int32_t *)(a + i);
842 int32_t bi = *(int32_t *)(b + i);
843 int32_t di = ai - bi;
844 if (((di ^ ai) & (ai ^ bi)) < 0) {
845
846 di = (di < 0 ? INT32_MAX : INT32_MIN);
847 }
848 *(int32_t *)(d + i) = di;
849 }
850 clear_high(d, oprsz, desc);
851}
852
853void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
854{
855 intptr_t oprsz = simd_oprsz(desc);
856 intptr_t i;
857
858 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
859 int64_t ai = *(int64_t *)(a + i);
860 int64_t bi = *(int64_t *)(b + i);
861 int64_t di = ai - bi;
862 if (((di ^ ai) & (ai ^ bi)) < 0) {
863
864 di = (di < 0 ? INT64_MAX : INT64_MIN);
865 }
866 *(int64_t *)(d + i) = di;
867 }
868 clear_high(d, oprsz, desc);
869}
870
871void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
872{
873 intptr_t oprsz = simd_oprsz(desc);
874 intptr_t i;
875
876 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
877 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
878 if (r > UINT8_MAX) {
879 r = UINT8_MAX;
880 }
881 *(uint8_t *)(d + i) = r;
882 }
883 clear_high(d, oprsz, desc);
884}
885
886void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
887{
888 intptr_t oprsz = simd_oprsz(desc);
889 intptr_t i;
890
891 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
892 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
893 if (r > UINT16_MAX) {
894 r = UINT16_MAX;
895 }
896 *(uint16_t *)(d + i) = r;
897 }
898 clear_high(d, oprsz, desc);
899}
900
901void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
902{
903 intptr_t oprsz = simd_oprsz(desc);
904 intptr_t i;
905
906 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
907 uint32_t ai = *(uint32_t *)(a + i);
908 uint32_t bi = *(uint32_t *)(b + i);
909 uint32_t di = ai + bi;
910 if (di < ai) {
911 di = UINT32_MAX;
912 }
913 *(uint32_t *)(d + i) = di;
914 }
915 clear_high(d, oprsz, desc);
916}
917
918void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
919{
920 intptr_t oprsz = simd_oprsz(desc);
921 intptr_t i;
922
923 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
924 uint64_t ai = *(uint64_t *)(a + i);
925 uint64_t bi = *(uint64_t *)(b + i);
926 uint64_t di = ai + bi;
927 if (di < ai) {
928 di = UINT64_MAX;
929 }
930 *(uint64_t *)(d + i) = di;
931 }
932 clear_high(d, oprsz, desc);
933}
934
935void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
936{
937 intptr_t oprsz = simd_oprsz(desc);
938 intptr_t i;
939
940 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
941 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
942 if (r < 0) {
943 r = 0;
944 }
945 *(uint8_t *)(d + i) = r;
946 }
947 clear_high(d, oprsz, desc);
948}
949
950void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
951{
952 intptr_t oprsz = simd_oprsz(desc);
953 intptr_t i;
954
955 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
956 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
957 if (r < 0) {
958 r = 0;
959 }
960 *(uint16_t *)(d + i) = r;
961 }
962 clear_high(d, oprsz, desc);
963}
964
965void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
966{
967 intptr_t oprsz = simd_oprsz(desc);
968 intptr_t i;
969
970 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
971 uint32_t ai = *(uint32_t *)(a + i);
972 uint32_t bi = *(uint32_t *)(b + i);
973 uint32_t di = ai - bi;
974 if (ai < bi) {
975 di = 0;
976 }
977 *(uint32_t *)(d + i) = di;
978 }
979 clear_high(d, oprsz, desc);
980}
981
982void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
983{
984 intptr_t oprsz = simd_oprsz(desc);
985 intptr_t i;
986
987 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
988 uint64_t ai = *(uint64_t *)(a + i);
989 uint64_t bi = *(uint64_t *)(b + i);
990 uint64_t di = ai - bi;
991 if (ai < bi) {
992 di = 0;
993 }
994 *(uint64_t *)(d + i) = di;
995 }
996 clear_high(d, oprsz, desc);
997}
998