1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include "qemu/osdep.h"
20#include "qemu/host-utils.h"
21#include "qemu/bitops.h"
22#include "cpu.h"
23#include "exec/memop.h"
24#include "exec/exec-all.h"
25#include "exec/helper-proto.h"
26#include "fpu/softfloat.h"
27#include "tcg/tcg-gvec-desc.h"
28#include "internals.h"
29#include <math.h>
30
31target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32 target_ulong s2)
33{
34 int vlmax, vl;
35 RISCVCPU *cpu = env_archcpu(env);
36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
39 int xlen = riscv_cpu_xlen(env);
40 bool vill = (s2 >> (xlen - 1)) & 0x1;
41 target_ulong reserved = s2 &
42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
44
45 if (lmul & 4) {
46
47 if (lmul == 4 ||
48 cpu->cfg.elen >> (8 - lmul) < sew) {
49 vill = true;
50 }
51 }
52
53 if ((sew > cpu->cfg.elen)
54 || vill
55 || (ediv != 0)
56 || (reserved != 0)) {
57
58 env->vill = 1;
59 env->vtype = 0;
60 env->vl = 0;
61 env->vstart = 0;
62 return 0;
63 }
64
65 vlmax = vext_get_vlmax(cpu, s2);
66 if (s1 <= vlmax) {
67 vl = s1;
68 } else {
69 vl = vlmax;
70 }
71 env->vl = vl;
72 env->vtype = s2;
73 env->vstart = 0;
74 env->vill = 0;
75 return vl;
76}
77
78
79
80
81
82#ifdef HOST_WORDS_BIGENDIAN
83#define H1(x) ((x) ^ 7)
84#define H1_2(x) ((x) ^ 6)
85#define H1_4(x) ((x) ^ 4)
86#define H2(x) ((x) ^ 3)
87#define H4(x) ((x) ^ 1)
88#define H8(x) ((x))
89#else
90#define H1(x) (x)
91#define H1_2(x) (x)
92#define H1_4(x) (x)
93#define H2(x) (x)
94#define H4(x) (x)
95#define H8(x) (x)
96#endif
97
98static inline uint32_t vext_nf(uint32_t desc)
99{
100 return FIELD_EX32(simd_data(desc), VDATA, NF);
101}
102
103static inline uint32_t vext_vm(uint32_t desc)
104{
105 return FIELD_EX32(simd_data(desc), VDATA, VM);
106}
107
108
109
110
111
112
113
114
115
116
117
118
119
120static inline int32_t vext_lmul(uint32_t desc)
121{
122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
123}
124
125
126
127
128
129
130static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
131{
132
133
134
135
136 uint32_t vlenb = simd_maxsz(desc);
137
138
139 int scale = vext_lmul(desc) - esz;
140 return scale < 0 ? vlenb >> -scale : vlenb << scale;
141}
142
143static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
144{
145 return (addr & env->cur_pmmask) | env->cur_pmbase;
146}
147
148
149
150
151
152
153
154
155
156
157
158static void probe_pages(CPURISCVState *env, target_ulong addr,
159 target_ulong len, uintptr_t ra,
160 MMUAccessType access_type)
161{
162 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
163 target_ulong curlen = MIN(pagelen, len);
164
165 probe_access(env, adjust_addr(env, addr), curlen, access_type,
166 cpu_mmu_index(env, false), ra);
167 if (len > curlen) {
168 addr += curlen;
169 curlen = len - curlen;
170 probe_access(env, adjust_addr(env, addr), curlen, access_type,
171 cpu_mmu_index(env, false), ra);
172 }
173}
174
175static inline void vext_set_elem_mask(void *v0, int index,
176 uint8_t value)
177{
178 int idx = index / 64;
179 int pos = index % 64;
180 uint64_t old = ((uint64_t *)v0)[idx];
181 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
182}
183
184
185
186
187
188
189static inline int vext_elem_mask(void *v0, int index)
190{
191 int idx = index / 64;
192 int pos = index % 64;
193 return (((uint64_t *)v0)[idx] >> pos) & 1;
194}
195
196
197typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
198 uint32_t idx, void *vd, uintptr_t retaddr);
199
200#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
201static void NAME(CPURISCVState *env, abi_ptr addr, \
202 uint32_t idx, void *vd, uintptr_t retaddr)\
203{ \
204 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
205 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
206} \
207
208GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
209GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
210GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
211GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
212
213#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
214static void NAME(CPURISCVState *env, abi_ptr addr, \
215 uint32_t idx, void *vd, uintptr_t retaddr)\
216{ \
217 ETYPE data = *((ETYPE *)vd + H(idx)); \
218 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
219}
220
221GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
222GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
223GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
224GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
225
226
227
228
229static void
230vext_ldst_stride(void *vd, void *v0, target_ulong base,
231 target_ulong stride, CPURISCVState *env,
232 uint32_t desc, uint32_t vm,
233 vext_ldst_elem_fn *ldst_elem,
234 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
235{
236 uint32_t i, k;
237 uint32_t nf = vext_nf(desc);
238 uint32_t max_elems = vext_max_elems(desc, esz);
239
240 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
241 if (!vm && !vext_elem_mask(v0, i)) {
242 continue;
243 }
244
245 k = 0;
246 while (k < nf) {
247 target_ulong addr = base + stride * i + (k << esz);
248 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
249 k++;
250 }
251 }
252 env->vstart = 0;
253}
254
255#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
256void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
257 target_ulong stride, CPURISCVState *env, \
258 uint32_t desc) \
259{ \
260 uint32_t vm = vext_vm(desc); \
261 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
262 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
263}
264
265GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
266GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
267GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
268GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
269
270#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
271void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
272 target_ulong stride, CPURISCVState *env, \
273 uint32_t desc) \
274{ \
275 uint32_t vm = vext_vm(desc); \
276 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
277 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
278}
279
280GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
281GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
282GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
283GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
284
285
286
287
288
289
290static void
291vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
292 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
293 uintptr_t ra, MMUAccessType access_type)
294{
295 uint32_t i, k;
296 uint32_t nf = vext_nf(desc);
297 uint32_t max_elems = vext_max_elems(desc, esz);
298
299
300 for (i = env->vstart; i < evl; i++, env->vstart++) {
301 k = 0;
302 while (k < nf) {
303 target_ulong addr = base + ((i * nf + k) << esz);
304 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
305 k++;
306 }
307 }
308 env->vstart = 0;
309}
310
311
312
313
314
315
316#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
317void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
318 CPURISCVState *env, uint32_t desc) \
319{ \
320 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
321 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
322 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
323} \
324 \
325void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
326 CPURISCVState *env, uint32_t desc) \
327{ \
328 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
329 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
330}
331
332GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
333GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
334GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
335GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
336
337#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
338void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
339 CPURISCVState *env, uint32_t desc) \
340{ \
341 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
342 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
343 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \
344} \
345 \
346void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
347 CPURISCVState *env, uint32_t desc) \
348{ \
349 vext_ldst_us(vd, base, env, desc, STORE_FN, \
350 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
351}
352
353GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
354GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
355GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
356GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
357
358
359
360
361void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
362 CPURISCVState *env, uint32_t desc)
363{
364
365 uint8_t evl = (env->vl + 7) >> 3;
366 vext_ldst_us(vd, base, env, desc, lde_b,
367 0, evl, GETPC(), MMU_DATA_LOAD);
368}
369
370void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
371 CPURISCVState *env, uint32_t desc)
372{
373
374 uint8_t evl = (env->vl + 7) >> 3;
375 vext_ldst_us(vd, base, env, desc, ste_b,
376 0, evl, GETPC(), MMU_DATA_STORE);
377}
378
379
380
381
382typedef target_ulong vext_get_index_addr(target_ulong base,
383 uint32_t idx, void *vs2);
384
385#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
386static target_ulong NAME(target_ulong base, \
387 uint32_t idx, void *vs2) \
388{ \
389 return (base + *((ETYPE *)vs2 + H(idx))); \
390}
391
392GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
393GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
394GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
395GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
396
397static inline void
398vext_ldst_index(void *vd, void *v0, target_ulong base,
399 void *vs2, CPURISCVState *env, uint32_t desc,
400 vext_get_index_addr get_index_addr,
401 vext_ldst_elem_fn *ldst_elem,
402 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
403{
404 uint32_t i, k;
405 uint32_t nf = vext_nf(desc);
406 uint32_t vm = vext_vm(desc);
407 uint32_t max_elems = vext_max_elems(desc, esz);
408
409
410 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
411 if (!vm && !vext_elem_mask(v0, i)) {
412 continue;
413 }
414
415 k = 0;
416 while (k < nf) {
417 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
418 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
419 k++;
420 }
421 }
422 env->vstart = 0;
423}
424
425#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
426void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
427 void *vs2, CPURISCVState *env, uint32_t desc) \
428{ \
429 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
430 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
431}
432
433GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
434GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
435GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
436GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
437GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
438GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
439GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
440GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
441GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
442GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
443GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
444GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
445GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
446GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
447GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
448GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
449
450#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
451void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
452 void *vs2, CPURISCVState *env, uint32_t desc) \
453{ \
454 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
455 STORE_FN, ctzl(sizeof(ETYPE)), \
456 GETPC(), MMU_DATA_STORE); \
457}
458
459GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
460GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
461GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
462GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
463GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
464GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
465GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
466GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
467GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
468GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
469GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
470GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
471GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
472GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
473GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
474GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
475
476
477
478
479static inline void
480vext_ldff(void *vd, void *v0, target_ulong base,
481 CPURISCVState *env, uint32_t desc,
482 vext_ldst_elem_fn *ldst_elem,
483 uint32_t esz, uintptr_t ra)
484{
485 void *host;
486 uint32_t i, k, vl = 0;
487 uint32_t nf = vext_nf(desc);
488 uint32_t vm = vext_vm(desc);
489 uint32_t max_elems = vext_max_elems(desc, esz);
490 target_ulong addr, offset, remain;
491
492
493 for (i = env->vstart; i < env->vl; i++) {
494 if (!vm && !vext_elem_mask(v0, i)) {
495 continue;
496 }
497 addr = adjust_addr(env, base + i * (nf << esz));
498 if (i == 0) {
499 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
500 } else {
501
502 remain = nf << esz;
503 while (remain > 0) {
504 offset = -(addr | TARGET_PAGE_MASK);
505 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
506 cpu_mmu_index(env, false));
507 if (host) {
508#ifdef CONFIG_USER_ONLY
509 if (page_check_range(addr, offset, PAGE_READ) < 0) {
510 vl = i;
511 goto ProbeSuccess;
512 }
513#else
514 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
515#endif
516 } else {
517 vl = i;
518 goto ProbeSuccess;
519 }
520 if (remain <= offset) {
521 break;
522 }
523 remain -= offset;
524 addr = adjust_addr(env, addr + offset);
525 }
526 }
527 }
528ProbeSuccess:
529
530 if (vl != 0) {
531 env->vl = vl;
532 }
533 for (i = env->vstart; i < env->vl; i++) {
534 k = 0;
535 if (!vm && !vext_elem_mask(v0, i)) {
536 continue;
537 }
538 while (k < nf) {
539 target_ulong addr = base + ((i * nf + k) << esz);
540 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
541 k++;
542 }
543 }
544 env->vstart = 0;
545}
546
547#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
548void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
549 CPURISCVState *env, uint32_t desc) \
550{ \
551 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
552 ctzl(sizeof(ETYPE)), GETPC()); \
553}
554
555GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
556GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
557GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
558GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
559
560#define DO_SWAP(N, M) (M)
561#define DO_AND(N, M) (N & M)
562#define DO_XOR(N, M) (N ^ M)
563#define DO_OR(N, M) (N | M)
564#define DO_ADD(N, M) (N + M)
565
566
567#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
568#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
569
570
571#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
572#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
573
574
575
576
577static void
578vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
579 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
580 MMUAccessType access_type)
581{
582 uint32_t i, k, off, pos;
583 uint32_t nf = vext_nf(desc);
584 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
585 uint32_t max_elems = vlenb >> esz;
586
587 k = env->vstart / max_elems;
588 off = env->vstart % max_elems;
589
590 if (off) {
591
592 for (pos = off; pos < max_elems; pos++, env->vstart++) {
593 target_ulong addr = base + ((pos + k * max_elems) << esz);
594 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
595 }
596 k++;
597 }
598
599
600 for (; k < nf; k++) {
601 for (i = 0; i < max_elems; i++, env->vstart++) {
602 target_ulong addr = base + ((i + k * max_elems) << esz);
603 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
604 }
605 }
606
607 env->vstart = 0;
608}
609
610#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
611void HELPER(NAME)(void *vd, target_ulong base, \
612 CPURISCVState *env, uint32_t desc) \
613{ \
614 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
615 ctzl(sizeof(ETYPE)), GETPC(), \
616 MMU_DATA_LOAD); \
617}
618
619GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
620GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
621GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
622GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
623GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
624GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
625GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
626GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
627GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
628GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
629GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
630GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
631GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
632GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
633GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
634GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
635
636#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
637void HELPER(NAME)(void *vd, target_ulong base, \
638 CPURISCVState *env, uint32_t desc) \
639{ \
640 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
641 ctzl(sizeof(ETYPE)), GETPC(), \
642 MMU_DATA_STORE); \
643}
644
645GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
646GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
647GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
648GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
649
650
651
652
653
654
655#define RVVCALL(macro, ...) macro(__VA_ARGS__)
656
657
658#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
659#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
660#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
661#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
662#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
663#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
664#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
665#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
666#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
667#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
668#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
669#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
670#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
671#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
672#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
673#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
674#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
675#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
676#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
677#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
678#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
679#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
680#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
681#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
682#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
683#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
684#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
685#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
686#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
687#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
688
689
690typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
691
692#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
693static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
694{ \
695 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
696 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
697 *((TD *)vd + HD(i)) = OP(s2, s1); \
698}
699#define DO_SUB(N, M) (N - M)
700#define DO_RSUB(N, M) (M - N)
701
702RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
703RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
704RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
705RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
706RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
707RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
708RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
709RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
710
711static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
712 CPURISCVState *env, uint32_t desc,
713 uint32_t esz, uint32_t dsz,
714 opivv2_fn *fn)
715{
716 uint32_t vm = vext_vm(desc);
717 uint32_t vl = env->vl;
718 uint32_t i;
719
720 for (i = env->vstart; i < vl; i++) {
721 if (!vm && !vext_elem_mask(v0, i)) {
722 continue;
723 }
724 fn(vd, vs1, vs2, i);
725 }
726 env->vstart = 0;
727}
728
729
730#define GEN_VEXT_VV(NAME, ESZ, DSZ) \
731void HELPER(NAME)(void *vd, void *v0, void *vs1, \
732 void *vs2, CPURISCVState *env, \
733 uint32_t desc) \
734{ \
735 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
736 do_##NAME); \
737}
738
739GEN_VEXT_VV(vadd_vv_b, 1, 1)
740GEN_VEXT_VV(vadd_vv_h, 2, 2)
741GEN_VEXT_VV(vadd_vv_w, 4, 4)
742GEN_VEXT_VV(vadd_vv_d, 8, 8)
743GEN_VEXT_VV(vsub_vv_b, 1, 1)
744GEN_VEXT_VV(vsub_vv_h, 2, 2)
745GEN_VEXT_VV(vsub_vv_w, 4, 4)
746GEN_VEXT_VV(vsub_vv_d, 8, 8)
747
748typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
749
750
751
752
753
754#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
755static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
756{ \
757 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
758 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
759}
760
761RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
762RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
763RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
764RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
765RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
766RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
767RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
768RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
769RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
770RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
771RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
772RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
773
774static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
775 CPURISCVState *env, uint32_t desc,
776 uint32_t esz, uint32_t dsz,
777 opivx2_fn fn)
778{
779 uint32_t vm = vext_vm(desc);
780 uint32_t vl = env->vl;
781 uint32_t i;
782
783 for (i = env->vstart; i < vl; i++) {
784 if (!vm && !vext_elem_mask(v0, i)) {
785 continue;
786 }
787 fn(vd, s1, vs2, i);
788 }
789 env->vstart = 0;
790}
791
792
793#define GEN_VEXT_VX(NAME, ESZ, DSZ) \
794void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
795 void *vs2, CPURISCVState *env, \
796 uint32_t desc) \
797{ \
798 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
799 do_##NAME); \
800}
801
802GEN_VEXT_VX(vadd_vx_b, 1, 1)
803GEN_VEXT_VX(vadd_vx_h, 2, 2)
804GEN_VEXT_VX(vadd_vx_w, 4, 4)
805GEN_VEXT_VX(vadd_vx_d, 8, 8)
806GEN_VEXT_VX(vsub_vx_b, 1, 1)
807GEN_VEXT_VX(vsub_vx_h, 2, 2)
808GEN_VEXT_VX(vsub_vx_w, 4, 4)
809GEN_VEXT_VX(vsub_vx_d, 8, 8)
810GEN_VEXT_VX(vrsub_vx_b, 1, 1)
811GEN_VEXT_VX(vrsub_vx_h, 2, 2)
812GEN_VEXT_VX(vrsub_vx_w, 4, 4)
813GEN_VEXT_VX(vrsub_vx_d, 8, 8)
814
815void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
816{
817 intptr_t oprsz = simd_oprsz(desc);
818 intptr_t i;
819
820 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
821 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
822 }
823}
824
825void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
826{
827 intptr_t oprsz = simd_oprsz(desc);
828 intptr_t i;
829
830 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
831 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
832 }
833}
834
835void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
836{
837 intptr_t oprsz = simd_oprsz(desc);
838 intptr_t i;
839
840 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
841 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
842 }
843}
844
845void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
846{
847 intptr_t oprsz = simd_oprsz(desc);
848 intptr_t i;
849
850 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
851 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
852 }
853}
854
855
856#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
857#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
858#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
859#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
860#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
861#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
862#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
863#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
864#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
865#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
866#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
867#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
868RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
869RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
870RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
871RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
872RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
873RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
874RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
875RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
876RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
877RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
878RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
879RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
880RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
881RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
882RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
883RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
884RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
885RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
886RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
887RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
888RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
889RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
890RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
891RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
892GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
893GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
894GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
895GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
896GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
897GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
898GEN_VEXT_VV(vwadd_vv_b, 1, 2)
899GEN_VEXT_VV(vwadd_vv_h, 2, 4)
900GEN_VEXT_VV(vwadd_vv_w, 4, 8)
901GEN_VEXT_VV(vwsub_vv_b, 1, 2)
902GEN_VEXT_VV(vwsub_vv_h, 2, 4)
903GEN_VEXT_VV(vwsub_vv_w, 4, 8)
904GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
905GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
906GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
907GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
908GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
909GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
910GEN_VEXT_VV(vwadd_wv_b, 1, 2)
911GEN_VEXT_VV(vwadd_wv_h, 2, 4)
912GEN_VEXT_VV(vwadd_wv_w, 4, 8)
913GEN_VEXT_VV(vwsub_wv_b, 1, 2)
914GEN_VEXT_VV(vwsub_wv_h, 2, 4)
915GEN_VEXT_VV(vwsub_wv_w, 4, 8)
916
917RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
918RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
919RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
920RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
921RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
922RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
923RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
924RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
925RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
926RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
927RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
928RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
929RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
930RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
931RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
932RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
933RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
934RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
935RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
936RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
937RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
938RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
939RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
940RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
941GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
942GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
943GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
944GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
945GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
946GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
947GEN_VEXT_VX(vwadd_vx_b, 1, 2)
948GEN_VEXT_VX(vwadd_vx_h, 2, 4)
949GEN_VEXT_VX(vwadd_vx_w, 4, 8)
950GEN_VEXT_VX(vwsub_vx_b, 1, 2)
951GEN_VEXT_VX(vwsub_vx_h, 2, 4)
952GEN_VEXT_VX(vwsub_vx_w, 4, 8)
953GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
954GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
955GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
956GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
957GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
958GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
959GEN_VEXT_VX(vwadd_wx_b, 1, 2)
960GEN_VEXT_VX(vwadd_wx_h, 2, 4)
961GEN_VEXT_VX(vwadd_wx_w, 4, 8)
962GEN_VEXT_VX(vwsub_wx_b, 1, 2)
963GEN_VEXT_VX(vwsub_wx_h, 2, 4)
964GEN_VEXT_VX(vwsub_wx_w, 4, 8)
965
966
967#define DO_VADC(N, M, C) (N + M + C)
968#define DO_VSBC(N, M, C) (N - M - C)
969
970#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
971void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
972 CPURISCVState *env, uint32_t desc) \
973{ \
974 uint32_t vl = env->vl; \
975 uint32_t i; \
976 \
977 for (i = env->vstart; i < vl; i++) { \
978 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
979 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
980 ETYPE carry = vext_elem_mask(v0, i); \
981 \
982 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
983 } \
984 env->vstart = 0; \
985}
986
987GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
988GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
989GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
990GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
991
992GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
993GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
994GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
995GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
996
997#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
998void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
999 CPURISCVState *env, uint32_t desc) \
1000{ \
1001 uint32_t vl = env->vl; \
1002 uint32_t i; \
1003 \
1004 for (i = env->vstart; i < vl; i++) { \
1005 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1006 ETYPE carry = vext_elem_mask(v0, i); \
1007 \
1008 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1009 } \
1010 env->vstart = 0; \
1011}
1012
1013GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1014GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1015GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1016GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1017
1018GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1019GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1020GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1021GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1022
1023#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1024 (__typeof(N))(N + M) < N)
1025#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1026
1027#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1028void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1029 CPURISCVState *env, uint32_t desc) \
1030{ \
1031 uint32_t vl = env->vl; \
1032 uint32_t vm = vext_vm(desc); \
1033 uint32_t i; \
1034 \
1035 for (i = env->vstart; i < vl; i++) { \
1036 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1037 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1038 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1039 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
1040 } \
1041 env->vstart = 0; \
1042}
1043
1044GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1045GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1046GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1047GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1048
1049GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1050GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1051GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1052GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1053
1054#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1055void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1056 void *vs2, CPURISCVState *env, uint32_t desc) \
1057{ \
1058 uint32_t vl = env->vl; \
1059 uint32_t vm = vext_vm(desc); \
1060 uint32_t i; \
1061 \
1062 for (i = env->vstart; i < vl; i++) { \
1063 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1064 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1065 vext_set_elem_mask(vd, i, \
1066 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1067 } \
1068 env->vstart = 0; \
1069}
1070
1071GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1072GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1073GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1074GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1075
1076GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1077GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1078GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1079GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1080
1081
1082RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1083RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1084RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1085RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1086RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1087RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1088RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1089RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1090RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1091RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1092RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1093RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1094GEN_VEXT_VV(vand_vv_b, 1, 1)
1095GEN_VEXT_VV(vand_vv_h, 2, 2)
1096GEN_VEXT_VV(vand_vv_w, 4, 4)
1097GEN_VEXT_VV(vand_vv_d, 8, 8)
1098GEN_VEXT_VV(vor_vv_b, 1, 1)
1099GEN_VEXT_VV(vor_vv_h, 2, 2)
1100GEN_VEXT_VV(vor_vv_w, 4, 4)
1101GEN_VEXT_VV(vor_vv_d, 8, 8)
1102GEN_VEXT_VV(vxor_vv_b, 1, 1)
1103GEN_VEXT_VV(vxor_vv_h, 2, 2)
1104GEN_VEXT_VV(vxor_vv_w, 4, 4)
1105GEN_VEXT_VV(vxor_vv_d, 8, 8)
1106
1107RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1108RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1109RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1110RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1111RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1112RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1113RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1114RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1115RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1116RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1117RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1118RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1119GEN_VEXT_VX(vand_vx_b, 1, 1)
1120GEN_VEXT_VX(vand_vx_h, 2, 2)
1121GEN_VEXT_VX(vand_vx_w, 4, 4)
1122GEN_VEXT_VX(vand_vx_d, 8, 8)
1123GEN_VEXT_VX(vor_vx_b, 1, 1)
1124GEN_VEXT_VX(vor_vx_h, 2, 2)
1125GEN_VEXT_VX(vor_vx_w, 4, 4)
1126GEN_VEXT_VX(vor_vx_d, 8, 8)
1127GEN_VEXT_VX(vxor_vx_b, 1, 1)
1128GEN_VEXT_VX(vxor_vx_h, 2, 2)
1129GEN_VEXT_VX(vxor_vx_w, 4, 4)
1130GEN_VEXT_VX(vxor_vx_d, 8, 8)
1131
1132
1133#define DO_SLL(N, M) (N << (M))
1134#define DO_SRL(N, M) (N >> (M))
1135
1136
1137#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1138void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1139 void *vs2, CPURISCVState *env, uint32_t desc) \
1140{ \
1141 uint32_t vm = vext_vm(desc); \
1142 uint32_t vl = env->vl; \
1143 uint32_t i; \
1144 \
1145 for (i = env->vstart; i < vl; i++) { \
1146 if (!vm && !vext_elem_mask(v0, i)) { \
1147 continue; \
1148 } \
1149 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1150 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1151 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1152 } \
1153 env->vstart = 0; \
1154}
1155
1156GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1157GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1158GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1159GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1160
1161GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1162GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1163GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1164GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1165
1166GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1167GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1168GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1169GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1170
1171
1172#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1173void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1174 void *vs2, CPURISCVState *env, uint32_t desc) \
1175{ \
1176 uint32_t vm = vext_vm(desc); \
1177 uint32_t vl = env->vl; \
1178 uint32_t i; \
1179 \
1180 for (i = env->vstart; i < vl; i++) { \
1181 if (!vm && !vext_elem_mask(v0, i)) { \
1182 continue; \
1183 } \
1184 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1185 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1186 } \
1187 env->vstart = 0; \
1188}
1189
1190GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1191GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1192GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1193GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1194
1195GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1196GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1197GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1198GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1199
1200GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1201GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1202GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1203GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1204
1205
1206GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1207GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1208GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1209GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1210GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1211GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1212GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1213GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1214GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1215GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1216GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1217GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1218
1219
1220#define DO_MSEQ(N, M) (N == M)
1221#define DO_MSNE(N, M) (N != M)
1222#define DO_MSLT(N, M) (N < M)
1223#define DO_MSLE(N, M) (N <= M)
1224#define DO_MSGT(N, M) (N > M)
1225
1226#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1227void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1228 CPURISCVState *env, uint32_t desc) \
1229{ \
1230 uint32_t vm = vext_vm(desc); \
1231 uint32_t vl = env->vl; \
1232 uint32_t i; \
1233 \
1234 for (i = env->vstart; i < vl; i++) { \
1235 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1236 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1237 if (!vm && !vext_elem_mask(v0, i)) { \
1238 continue; \
1239 } \
1240 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1241 } \
1242 env->vstart = 0; \
1243}
1244
1245GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1246GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1247GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1248GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1249
1250GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1251GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1252GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1253GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1254
1255GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1256GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1257GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1258GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1259
1260GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1261GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1262GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1263GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1264
1265GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1266GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1267GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1268GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1269
1270GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1271GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1272GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1273GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1274
1275#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1276void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1277 CPURISCVState *env, uint32_t desc) \
1278{ \
1279 uint32_t vm = vext_vm(desc); \
1280 uint32_t vl = env->vl; \
1281 uint32_t i; \
1282 \
1283 for (i = env->vstart; i < vl; i++) { \
1284 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1285 if (!vm && !vext_elem_mask(v0, i)) { \
1286 continue; \
1287 } \
1288 vext_set_elem_mask(vd, i, \
1289 DO_OP(s2, (ETYPE)(target_long)s1)); \
1290 } \
1291 env->vstart = 0; \
1292}
1293
1294GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1295GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1296GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1297GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1298
1299GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1300GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1301GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1302GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1303
1304GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1305GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1306GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1307GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1308
1309GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1310GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1311GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1312GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1313
1314GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1315GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1316GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1317GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1318
1319GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1320GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1321GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1322GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1323
1324GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1325GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1326GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1327GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1328
1329GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1330GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1331GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1332GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1333
1334
1335RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1336RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1337RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1338RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1339RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1340RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1341RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1342RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1343RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1344RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1345RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1346RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1347RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1348RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1349RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1350RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1351GEN_VEXT_VV(vminu_vv_b, 1, 1)
1352GEN_VEXT_VV(vminu_vv_h, 2, 2)
1353GEN_VEXT_VV(vminu_vv_w, 4, 4)
1354GEN_VEXT_VV(vminu_vv_d, 8, 8)
1355GEN_VEXT_VV(vmin_vv_b, 1, 1)
1356GEN_VEXT_VV(vmin_vv_h, 2, 2)
1357GEN_VEXT_VV(vmin_vv_w, 4, 4)
1358GEN_VEXT_VV(vmin_vv_d, 8, 8)
1359GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1360GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1361GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1362GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1363GEN_VEXT_VV(vmax_vv_b, 1, 1)
1364GEN_VEXT_VV(vmax_vv_h, 2, 2)
1365GEN_VEXT_VV(vmax_vv_w, 4, 4)
1366GEN_VEXT_VV(vmax_vv_d, 8, 8)
1367
1368RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1369RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1370RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1371RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1372RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1373RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1374RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1375RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1376RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1377RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1378RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1379RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1380RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1381RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1382RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1383RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1384GEN_VEXT_VX(vminu_vx_b, 1, 1)
1385GEN_VEXT_VX(vminu_vx_h, 2, 2)
1386GEN_VEXT_VX(vminu_vx_w, 4, 4)
1387GEN_VEXT_VX(vminu_vx_d, 8, 8)
1388GEN_VEXT_VX(vmin_vx_b, 1, 1)
1389GEN_VEXT_VX(vmin_vx_h, 2, 2)
1390GEN_VEXT_VX(vmin_vx_w, 4, 4)
1391GEN_VEXT_VX(vmin_vx_d, 8, 8)
1392GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1393GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1394GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1395GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1396GEN_VEXT_VX(vmax_vx_b, 1, 1)
1397GEN_VEXT_VX(vmax_vx_h, 2, 2)
1398GEN_VEXT_VX(vmax_vx_w, 4, 4)
1399GEN_VEXT_VX(vmax_vx_d, 8, 8)
1400
1401
1402#define DO_MUL(N, M) (N * M)
1403RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1404RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1405RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1406RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1407GEN_VEXT_VV(vmul_vv_b, 1, 1)
1408GEN_VEXT_VV(vmul_vv_h, 2, 2)
1409GEN_VEXT_VV(vmul_vv_w, 4, 4)
1410GEN_VEXT_VV(vmul_vv_d, 8, 8)
1411
1412static int8_t do_mulh_b(int8_t s2, int8_t s1)
1413{
1414 return (int16_t)s2 * (int16_t)s1 >> 8;
1415}
1416
1417static int16_t do_mulh_h(int16_t s2, int16_t s1)
1418{
1419 return (int32_t)s2 * (int32_t)s1 >> 16;
1420}
1421
1422static int32_t do_mulh_w(int32_t s2, int32_t s1)
1423{
1424 return (int64_t)s2 * (int64_t)s1 >> 32;
1425}
1426
1427static int64_t do_mulh_d(int64_t s2, int64_t s1)
1428{
1429 uint64_t hi_64, lo_64;
1430
1431 muls64(&lo_64, &hi_64, s1, s2);
1432 return hi_64;
1433}
1434
1435static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1436{
1437 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1438}
1439
1440static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1441{
1442 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1443}
1444
1445static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1446{
1447 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1448}
1449
1450static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1451{
1452 uint64_t hi_64, lo_64;
1453
1454 mulu64(&lo_64, &hi_64, s2, s1);
1455 return hi_64;
1456}
1457
1458static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1459{
1460 return (int16_t)s2 * (uint16_t)s1 >> 8;
1461}
1462
1463static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1464{
1465 return (int32_t)s2 * (uint32_t)s1 >> 16;
1466}
1467
1468static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1469{
1470 return (int64_t)s2 * (uint64_t)s1 >> 32;
1471}
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1493{
1494 uint64_t hi_64, lo_64;
1495
1496 mulu64(&lo_64, &hi_64, s2, s1);
1497
1498 hi_64 -= s2 < 0 ? s1 : 0;
1499 return hi_64;
1500}
1501
1502RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1503RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1504RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1505RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1506RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1507RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1508RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1509RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1510RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1511RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1512RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1513RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1514GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1515GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1516GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1517GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1518GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1519GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1520GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1521GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1522GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1523GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1524GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1525GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1526
1527RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1528RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1529RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1530RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1531RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1532RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1533RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1534RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1535RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1536RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1537RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1538RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1539RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1540RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1541RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1542RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1543GEN_VEXT_VX(vmul_vx_b, 1, 1)
1544GEN_VEXT_VX(vmul_vx_h, 2, 2)
1545GEN_VEXT_VX(vmul_vx_w, 4, 4)
1546GEN_VEXT_VX(vmul_vx_d, 8, 8)
1547GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1548GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1549GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1550GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1551GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1552GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1553GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1554GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1555GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1556GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1557GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1558GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1559
1560
1561#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1562#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1563#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
1564 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1565#define DO_REM(N, M) (unlikely(M == 0) ? N :\
1566 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1567
1568RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1569RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1570RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1571RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1572RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1573RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1574RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1575RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1576RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1577RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1578RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1579RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1580RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1581RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1582RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1583RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1584GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1585GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1586GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1587GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1588GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1589GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1590GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1591GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1592GEN_VEXT_VV(vremu_vv_b, 1, 1)
1593GEN_VEXT_VV(vremu_vv_h, 2, 2)
1594GEN_VEXT_VV(vremu_vv_w, 4, 4)
1595GEN_VEXT_VV(vremu_vv_d, 8, 8)
1596GEN_VEXT_VV(vrem_vv_b, 1, 1)
1597GEN_VEXT_VV(vrem_vv_h, 2, 2)
1598GEN_VEXT_VV(vrem_vv_w, 4, 4)
1599GEN_VEXT_VV(vrem_vv_d, 8, 8)
1600
1601RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1602RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1603RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1604RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1605RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1606RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1607RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1608RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1609RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1610RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1611RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1612RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1613RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1614RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1615RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1616RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1617GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1618GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1619GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1620GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1621GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1622GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1623GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1624GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1625GEN_VEXT_VX(vremu_vx_b, 1, 1)
1626GEN_VEXT_VX(vremu_vx_h, 2, 2)
1627GEN_VEXT_VX(vremu_vx_w, 4, 4)
1628GEN_VEXT_VX(vremu_vx_d, 8, 8)
1629GEN_VEXT_VX(vrem_vx_b, 1, 1)
1630GEN_VEXT_VX(vrem_vx_h, 2, 2)
1631GEN_VEXT_VX(vrem_vx_w, 4, 4)
1632GEN_VEXT_VX(vrem_vx_d, 8, 8)
1633
1634
1635RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1636RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1637RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1638RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1639RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1640RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1641RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1642RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1643RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1644GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1645GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1646GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1647GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1648GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1649GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1650GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1651GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1652GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1653
1654RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1655RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1656RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1657RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1658RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1659RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1660RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1661RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1662RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1663GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1664GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1665GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1666GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1667GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1668GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1669GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1670GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1671GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1672
1673
1674#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1675static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1676{ \
1677 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1678 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1679 TD d = *((TD *)vd + HD(i)); \
1680 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1681}
1682
1683#define DO_MACC(N, M, D) (M * N + D)
1684#define DO_NMSAC(N, M, D) (-(M * N) + D)
1685#define DO_MADD(N, M, D) (M * D + N)
1686#define DO_NMSUB(N, M, D) (-(M * D) + N)
1687RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1688RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1689RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1690RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1691RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1692RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1693RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1694RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1695RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1696RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1697RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1698RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1699RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1700RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1701RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1702RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1703GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1704GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1705GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1706GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1707GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1708GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1709GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1710GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1711GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1712GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1713GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1714GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1715GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1716GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1717GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1718GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1719
1720#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1721static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1722{ \
1723 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1724 TD d = *((TD *)vd + HD(i)); \
1725 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1726}
1727
1728RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1729RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1730RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1731RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1732RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1733RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1734RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1735RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1736RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1737RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1738RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1739RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1740RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1741RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1742RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1743RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1744GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1745GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1746GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1747GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1748GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1749GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1750GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1751GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1752GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1753GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1754GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1755GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1756GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1757GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1758GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1759GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1760
1761
1762RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1763RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1764RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1765RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1766RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1767RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1768RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1769RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1770RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1771GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1772GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1773GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1774GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1775GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1776GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1777GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1778GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1779GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1780
1781RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1782RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1783RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1784RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1785RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1786RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1787RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1788RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1789RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1790RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1791RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1792RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1793GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1794GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1795GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1796GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1797GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1798GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1799GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1800GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1801GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1802GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1803GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1804GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1805
1806
1807#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
1808void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1809 uint32_t desc) \
1810{ \
1811 uint32_t vl = env->vl; \
1812 uint32_t i; \
1813 \
1814 for (i = env->vstart; i < vl; i++) { \
1815 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1816 *((ETYPE *)vd + H(i)) = s1; \
1817 } \
1818 env->vstart = 0; \
1819}
1820
1821GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1822GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1823GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1824GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1825
1826#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
1827void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1828 uint32_t desc) \
1829{ \
1830 uint32_t vl = env->vl; \
1831 uint32_t i; \
1832 \
1833 for (i = env->vstart; i < vl; i++) { \
1834 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1835 } \
1836 env->vstart = 0; \
1837}
1838
1839GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
1840GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1841GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1842GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1843
1844#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
1845void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1846 CPURISCVState *env, uint32_t desc) \
1847{ \
1848 uint32_t vl = env->vl; \
1849 uint32_t i; \
1850 \
1851 for (i = env->vstart; i < vl; i++) { \
1852 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
1853 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
1854 } \
1855 env->vstart = 0; \
1856}
1857
1858GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
1859GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1860GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1861GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1862
1863#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
1864void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1865 void *vs2, CPURISCVState *env, uint32_t desc) \
1866{ \
1867 uint32_t vl = env->vl; \
1868 uint32_t i; \
1869 \
1870 for (i = env->vstart; i < vl; i++) { \
1871 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1872 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
1873 (ETYPE)(target_long)s1); \
1874 *((ETYPE *)vd + H(i)) = d; \
1875 } \
1876 env->vstart = 0; \
1877}
1878
1879GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
1880GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1881GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1882GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1895 CPURISCVState *env, int vxrm);
1896
1897#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1898static inline void \
1899do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1900 CPURISCVState *env, int vxrm) \
1901{ \
1902 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1903 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1904 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
1905}
1906
1907static inline void
1908vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1909 CPURISCVState *env,
1910 uint32_t vl, uint32_t vm, int vxrm,
1911 opivv2_rm_fn *fn)
1912{
1913 for (uint32_t i = env->vstart; i < vl; i++) {
1914 if (!vm && !vext_elem_mask(v0, i)) {
1915 continue;
1916 }
1917 fn(vd, vs1, vs2, i, env, vxrm);
1918 }
1919 env->vstart = 0;
1920}
1921
1922static inline void
1923vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1924 CPURISCVState *env,
1925 uint32_t desc, uint32_t esz, uint32_t dsz,
1926 opivv2_rm_fn *fn)
1927{
1928 uint32_t vm = vext_vm(desc);
1929 uint32_t vl = env->vl;
1930
1931 switch (env->vxrm) {
1932 case 0:
1933 vext_vv_rm_1(vd, v0, vs1, vs2,
1934 env, vl, vm, 0, fn);
1935 break;
1936 case 1:
1937 vext_vv_rm_1(vd, v0, vs1, vs2,
1938 env, vl, vm, 1, fn);
1939 break;
1940 case 2:
1941 vext_vv_rm_1(vd, v0, vs1, vs2,
1942 env, vl, vm, 2, fn);
1943 break;
1944 default:
1945 vext_vv_rm_1(vd, v0, vs1, vs2,
1946 env, vl, vm, 3, fn);
1947 break;
1948 }
1949}
1950
1951
1952#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \
1953void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1954 CPURISCVState *env, uint32_t desc) \
1955{ \
1956 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
1957 do_##NAME); \
1958}
1959
1960static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1961{
1962 uint8_t res = a + b;
1963 if (res < a) {
1964 res = UINT8_MAX;
1965 env->vxsat = 0x1;
1966 }
1967 return res;
1968}
1969
1970static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1971 uint16_t b)
1972{
1973 uint16_t res = a + b;
1974 if (res < a) {
1975 res = UINT16_MAX;
1976 env->vxsat = 0x1;
1977 }
1978 return res;
1979}
1980
1981static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1982 uint32_t b)
1983{
1984 uint32_t res = a + b;
1985 if (res < a) {
1986 res = UINT32_MAX;
1987 env->vxsat = 0x1;
1988 }
1989 return res;
1990}
1991
1992static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1993 uint64_t b)
1994{
1995 uint64_t res = a + b;
1996 if (res < a) {
1997 res = UINT64_MAX;
1998 env->vxsat = 0x1;
1999 }
2000 return res;
2001}
2002
2003RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2004RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2005RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2006RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2007GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
2008GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
2009GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
2010GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
2011
2012typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2013 CPURISCVState *env, int vxrm);
2014
2015#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2016static inline void \
2017do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2018 CPURISCVState *env, int vxrm) \
2019{ \
2020 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2021 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2022}
2023
2024static inline void
2025vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2026 CPURISCVState *env,
2027 uint32_t vl, uint32_t vm, int vxrm,
2028 opivx2_rm_fn *fn)
2029{
2030 for (uint32_t i = env->vstart; i < vl; i++) {
2031 if (!vm && !vext_elem_mask(v0, i)) {
2032 continue;
2033 }
2034 fn(vd, s1, vs2, i, env, vxrm);
2035 }
2036 env->vstart = 0;
2037}
2038
2039static inline void
2040vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2041 CPURISCVState *env,
2042 uint32_t desc, uint32_t esz, uint32_t dsz,
2043 opivx2_rm_fn *fn)
2044{
2045 uint32_t vm = vext_vm(desc);
2046 uint32_t vl = env->vl;
2047
2048 switch (env->vxrm) {
2049 case 0:
2050 vext_vx_rm_1(vd, v0, s1, vs2,
2051 env, vl, vm, 0, fn);
2052 break;
2053 case 1:
2054 vext_vx_rm_1(vd, v0, s1, vs2,
2055 env, vl, vm, 1, fn);
2056 break;
2057 case 2:
2058 vext_vx_rm_1(vd, v0, s1, vs2,
2059 env, vl, vm, 2, fn);
2060 break;
2061 default:
2062 vext_vx_rm_1(vd, v0, s1, vs2,
2063 env, vl, vm, 3, fn);
2064 break;
2065 }
2066}
2067
2068
2069#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \
2070void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2071 void *vs2, CPURISCVState *env, uint32_t desc) \
2072{ \
2073 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \
2074 do_##NAME); \
2075}
2076
2077RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2078RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2079RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2080RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2081GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2082GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2083GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2084GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
2085
2086static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2087{
2088 int8_t res = a + b;
2089 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2090 res = a > 0 ? INT8_MAX : INT8_MIN;
2091 env->vxsat = 0x1;
2092 }
2093 return res;
2094}
2095
2096static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2097{
2098 int16_t res = a + b;
2099 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2100 res = a > 0 ? INT16_MAX : INT16_MIN;
2101 env->vxsat = 0x1;
2102 }
2103 return res;
2104}
2105
2106static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2107{
2108 int32_t res = a + b;
2109 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2110 res = a > 0 ? INT32_MAX : INT32_MIN;
2111 env->vxsat = 0x1;
2112 }
2113 return res;
2114}
2115
2116static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2117{
2118 int64_t res = a + b;
2119 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2120 res = a > 0 ? INT64_MAX : INT64_MIN;
2121 env->vxsat = 0x1;
2122 }
2123 return res;
2124}
2125
2126RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2127RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2128RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2129RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2130GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2131GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2132GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2133GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2134
2135RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2136RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2137RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2138RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2139GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2140GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2141GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2142GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2143
2144static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2145{
2146 uint8_t res = a - b;
2147 if (res > a) {
2148 res = 0;
2149 env->vxsat = 0x1;
2150 }
2151 return res;
2152}
2153
2154static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2155 uint16_t b)
2156{
2157 uint16_t res = a - b;
2158 if (res > a) {
2159 res = 0;
2160 env->vxsat = 0x1;
2161 }
2162 return res;
2163}
2164
2165static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2166 uint32_t b)
2167{
2168 uint32_t res = a - b;
2169 if (res > a) {
2170 res = 0;
2171 env->vxsat = 0x1;
2172 }
2173 return res;
2174}
2175
2176static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2177 uint64_t b)
2178{
2179 uint64_t res = a - b;
2180 if (res > a) {
2181 res = 0;
2182 env->vxsat = 0x1;
2183 }
2184 return res;
2185}
2186
2187RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2188RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2189RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2190RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2191GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2192GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2193GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2194GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2195
2196RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2197RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2198RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2199RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2200GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2201GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2202GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2203GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2204
2205static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2206{
2207 int8_t res = a - b;
2208 if ((res ^ a) & (a ^ b) & INT8_MIN) {
2209 res = a >= 0 ? INT8_MAX : INT8_MIN;
2210 env->vxsat = 0x1;
2211 }
2212 return res;
2213}
2214
2215static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2216{
2217 int16_t res = a - b;
2218 if ((res ^ a) & (a ^ b) & INT16_MIN) {
2219 res = a >= 0 ? INT16_MAX : INT16_MIN;
2220 env->vxsat = 0x1;
2221 }
2222 return res;
2223}
2224
2225static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2226{
2227 int32_t res = a - b;
2228 if ((res ^ a) & (a ^ b) & INT32_MIN) {
2229 res = a >= 0 ? INT32_MAX : INT32_MIN;
2230 env->vxsat = 0x1;
2231 }
2232 return res;
2233}
2234
2235static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2236{
2237 int64_t res = a - b;
2238 if ((res ^ a) & (a ^ b) & INT64_MIN) {
2239 res = a >= 0 ? INT64_MAX : INT64_MIN;
2240 env->vxsat = 0x1;
2241 }
2242 return res;
2243}
2244
2245RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2246RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2247RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2248RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2249GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2250GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2251GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2252GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2253
2254RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2255RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2256RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2257RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2258GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2259GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2260GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2261GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2262
2263
2264static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2265{
2266 uint8_t d = extract64(v, shift, 1);
2267 uint8_t d1;
2268 uint64_t D1, D2;
2269
2270 if (shift == 0 || shift > 64) {
2271 return 0;
2272 }
2273
2274 d1 = extract64(v, shift - 1, 1);
2275 D1 = extract64(v, 0, shift);
2276 if (vxrm == 0) {
2277 return d1;
2278 } else if (vxrm == 1) {
2279 if (shift > 1) {
2280 D2 = extract64(v, 0, shift - 1);
2281 return d1 & ((D2 != 0) | d);
2282 } else {
2283 return d1 & d;
2284 }
2285 } else if (vxrm == 3) {
2286 return !d & (D1 != 0);
2287 }
2288 return 0;
2289}
2290
2291static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2292{
2293 int64_t res = (int64_t)a + b;
2294 uint8_t round = get_round(vxrm, res, 1);
2295
2296 return (res >> 1) + round;
2297}
2298
2299static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2300{
2301 int64_t res = a + b;
2302 uint8_t round = get_round(vxrm, res, 1);
2303 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2304
2305
2306 return ((res >> 1) ^ over) + round;
2307}
2308
2309RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2310RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2311RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2312RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2313GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2314GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2315GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2316GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2317
2318RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2319RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2320RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2321RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2322GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2323GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2324GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2325GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2326
2327static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2328 uint32_t a, uint32_t b)
2329{
2330 uint64_t res = (uint64_t)a + b;
2331 uint8_t round = get_round(vxrm, res, 1);
2332
2333 return (res >> 1) + round;
2334}
2335
2336static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2337 uint64_t a, uint64_t b)
2338{
2339 uint64_t res = a + b;
2340 uint8_t round = get_round(vxrm, res, 1);
2341 uint64_t over = (uint64_t)(res < a) << 63;
2342
2343 return ((res >> 1) | over) + round;
2344}
2345
2346RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2347RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2348RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2349RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2350GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
2351GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
2352GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
2353GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
2354
2355RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2356RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2357RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2358RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2359GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
2360GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
2361GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
2362GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
2363
2364static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2365{
2366 int64_t res = (int64_t)a - b;
2367 uint8_t round = get_round(vxrm, res, 1);
2368
2369 return (res >> 1) + round;
2370}
2371
2372static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2373{
2374 int64_t res = (int64_t)a - b;
2375 uint8_t round = get_round(vxrm, res, 1);
2376 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2377
2378
2379 return ((res >> 1) ^ over) + round;
2380}
2381
2382RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2383RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2384RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2385RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2386GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2387GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2388GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2389GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2390
2391RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2392RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2393RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2394RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2395GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2396GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2397GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2398GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2399
2400static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2401 uint32_t a, uint32_t b)
2402{
2403 int64_t res = (int64_t)a - b;
2404 uint8_t round = get_round(vxrm, res, 1);
2405
2406 return (res >> 1) + round;
2407}
2408
2409static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2410 uint64_t a, uint64_t b)
2411{
2412 uint64_t res = (uint64_t)a - b;
2413 uint8_t round = get_round(vxrm, res, 1);
2414 uint64_t over = (uint64_t)(res > a) << 63;
2415
2416 return ((res >> 1) | over) + round;
2417}
2418
2419RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2420RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2421RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2422RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2423GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
2424GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
2425GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
2426GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
2427
2428RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2429RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2430RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2431RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2432GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
2433GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
2434GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
2435GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
2436
2437
2438static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2439{
2440 uint8_t round;
2441 int16_t res;
2442
2443 res = (int16_t)a * (int16_t)b;
2444 round = get_round(vxrm, res, 7);
2445 res = (res >> 7) + round;
2446
2447 if (res > INT8_MAX) {
2448 env->vxsat = 0x1;
2449 return INT8_MAX;
2450 } else if (res < INT8_MIN) {
2451 env->vxsat = 0x1;
2452 return INT8_MIN;
2453 } else {
2454 return res;
2455 }
2456}
2457
2458static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2459{
2460 uint8_t round;
2461 int32_t res;
2462
2463 res = (int32_t)a * (int32_t)b;
2464 round = get_round(vxrm, res, 15);
2465 res = (res >> 15) + round;
2466
2467 if (res > INT16_MAX) {
2468 env->vxsat = 0x1;
2469 return INT16_MAX;
2470 } else if (res < INT16_MIN) {
2471 env->vxsat = 0x1;
2472 return INT16_MIN;
2473 } else {
2474 return res;
2475 }
2476}
2477
2478static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2479{
2480 uint8_t round;
2481 int64_t res;
2482
2483 res = (int64_t)a * (int64_t)b;
2484 round = get_round(vxrm, res, 31);
2485 res = (res >> 31) + round;
2486
2487 if (res > INT32_MAX) {
2488 env->vxsat = 0x1;
2489 return INT32_MAX;
2490 } else if (res < INT32_MIN) {
2491 env->vxsat = 0x1;
2492 return INT32_MIN;
2493 } else {
2494 return res;
2495 }
2496}
2497
2498static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2499{
2500 uint8_t round;
2501 uint64_t hi_64, lo_64;
2502 int64_t res;
2503
2504 if (a == INT64_MIN && b == INT64_MIN) {
2505 env->vxsat = 1;
2506 return INT64_MAX;
2507 }
2508
2509 muls64(&lo_64, &hi_64, a, b);
2510 round = get_round(vxrm, lo_64, 63);
2511
2512
2513
2514
2515 res = (hi_64 << 1) | (lo_64 >> 63);
2516 if (round) {
2517 if (res == INT64_MAX) {
2518 env->vxsat = 1;
2519 } else {
2520 res += 1;
2521 }
2522 }
2523 return res;
2524}
2525
2526RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2527RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2528RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2529RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2530GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2531GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2532GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2533GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2534
2535RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2536RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2537RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2538RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2539GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2540GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2541GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2542GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2543
2544
2545static inline uint8_t
2546vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2547{
2548 uint8_t round, shift = b & 0x7;
2549 uint8_t res;
2550
2551 round = get_round(vxrm, a, shift);
2552 res = (a >> shift) + round;
2553 return res;
2554}
2555static inline uint16_t
2556vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2557{
2558 uint8_t round, shift = b & 0xf;
2559 uint16_t res;
2560
2561 round = get_round(vxrm, a, shift);
2562 res = (a >> shift) + round;
2563 return res;
2564}
2565static inline uint32_t
2566vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2567{
2568 uint8_t round, shift = b & 0x1f;
2569 uint32_t res;
2570
2571 round = get_round(vxrm, a, shift);
2572 res = (a >> shift) + round;
2573 return res;
2574}
2575static inline uint64_t
2576vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2577{
2578 uint8_t round, shift = b & 0x3f;
2579 uint64_t res;
2580
2581 round = get_round(vxrm, a, shift);
2582 res = (a >> shift) + round;
2583 return res;
2584}
2585RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2586RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2587RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2588RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2589GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2590GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2591GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2592GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2593
2594RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2595RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2596RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2597RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2598GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2599GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2600GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2601GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2602
2603static inline int8_t
2604vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2605{
2606 uint8_t round, shift = b & 0x7;
2607 int8_t res;
2608
2609 round = get_round(vxrm, a, shift);
2610 res = (a >> shift) + round;
2611 return res;
2612}
2613static inline int16_t
2614vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2615{
2616 uint8_t round, shift = b & 0xf;
2617 int16_t res;
2618
2619 round = get_round(vxrm, a, shift);
2620 res = (a >> shift) + round;
2621 return res;
2622}
2623static inline int32_t
2624vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2625{
2626 uint8_t round, shift = b & 0x1f;
2627 int32_t res;
2628
2629 round = get_round(vxrm, a, shift);
2630 res = (a >> shift) + round;
2631 return res;
2632}
2633static inline int64_t
2634vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2635{
2636 uint8_t round, shift = b & 0x3f;
2637 int64_t res;
2638
2639 round = get_round(vxrm, a, shift);
2640 res = (a >> shift) + round;
2641 return res;
2642}
2643
2644RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2645RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2646RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2647RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2648GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2649GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2650GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2651GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2652
2653RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2654RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2655RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2656RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2657GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2658GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2659GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2660GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2661
2662
2663static inline int8_t
2664vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2665{
2666 uint8_t round, shift = b & 0xf;
2667 int16_t res;
2668
2669 round = get_round(vxrm, a, shift);
2670 res = (a >> shift) + round;
2671 if (res > INT8_MAX) {
2672 env->vxsat = 0x1;
2673 return INT8_MAX;
2674 } else if (res < INT8_MIN) {
2675 env->vxsat = 0x1;
2676 return INT8_MIN;
2677 } else {
2678 return res;
2679 }
2680}
2681
2682static inline int16_t
2683vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2684{
2685 uint8_t round, shift = b & 0x1f;
2686 int32_t res;
2687
2688 round = get_round(vxrm, a, shift);
2689 res = (a >> shift) + round;
2690 if (res > INT16_MAX) {
2691 env->vxsat = 0x1;
2692 return INT16_MAX;
2693 } else if (res < INT16_MIN) {
2694 env->vxsat = 0x1;
2695 return INT16_MIN;
2696 } else {
2697 return res;
2698 }
2699}
2700
2701static inline int32_t
2702vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2703{
2704 uint8_t round, shift = b & 0x3f;
2705 int64_t res;
2706
2707 round = get_round(vxrm, a, shift);
2708 res = (a >> shift) + round;
2709 if (res > INT32_MAX) {
2710 env->vxsat = 0x1;
2711 return INT32_MAX;
2712 } else if (res < INT32_MIN) {
2713 env->vxsat = 0x1;
2714 return INT32_MIN;
2715 } else {
2716 return res;
2717 }
2718}
2719
2720RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2721RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2722RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2723GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
2724GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
2725GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
2726
2727RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2728RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2729RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2730GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
2731GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
2732GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
2733
2734static inline uint8_t
2735vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2736{
2737 uint8_t round, shift = b & 0xf;
2738 uint16_t res;
2739
2740 round = get_round(vxrm, a, shift);
2741 res = (a >> shift) + round;
2742 if (res > UINT8_MAX) {
2743 env->vxsat = 0x1;
2744 return UINT8_MAX;
2745 } else {
2746 return res;
2747 }
2748}
2749
2750static inline uint16_t
2751vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2752{
2753 uint8_t round, shift = b & 0x1f;
2754 uint32_t res;
2755
2756 round = get_round(vxrm, a, shift);
2757 res = (a >> shift) + round;
2758 if (res > UINT16_MAX) {
2759 env->vxsat = 0x1;
2760 return UINT16_MAX;
2761 } else {
2762 return res;
2763 }
2764}
2765
2766static inline uint32_t
2767vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2768{
2769 uint8_t round, shift = b & 0x3f;
2770 uint64_t res;
2771
2772 round = get_round(vxrm, a, shift);
2773 res = (a >> shift) + round;
2774 if (res > UINT32_MAX) {
2775 env->vxsat = 0x1;
2776 return UINT32_MAX;
2777 } else {
2778 return res;
2779 }
2780}
2781
2782RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2783RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2784RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2785GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
2786GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
2787GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
2788
2789RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2790RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2791RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2792GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
2793GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
2794GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
2795
2796
2797
2798
2799
2800#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2801static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2802 CPURISCVState *env) \
2803{ \
2804 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2805 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2806 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2807}
2808
2809#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \
2810void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2811 void *vs2, CPURISCVState *env, \
2812 uint32_t desc) \
2813{ \
2814 uint32_t vm = vext_vm(desc); \
2815 uint32_t vl = env->vl; \
2816 uint32_t i; \
2817 \
2818 for (i = env->vstart; i < vl; i++) { \
2819 if (!vm && !vext_elem_mask(v0, i)) { \
2820 continue; \
2821 } \
2822 do_##NAME(vd, vs1, vs2, i, env); \
2823 } \
2824 env->vstart = 0; \
2825}
2826
2827RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2828RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2829RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2830GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2831GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2832GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2833
2834#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2835static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2836 CPURISCVState *env) \
2837{ \
2838 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2839 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2840}
2841
2842#define GEN_VEXT_VF(NAME, ESZ, DSZ) \
2843void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
2844 void *vs2, CPURISCVState *env, \
2845 uint32_t desc) \
2846{ \
2847 uint32_t vm = vext_vm(desc); \
2848 uint32_t vl = env->vl; \
2849 uint32_t i; \
2850 \
2851 for (i = env->vstart; i < vl; i++) { \
2852 if (!vm && !vext_elem_mask(v0, i)) { \
2853 continue; \
2854 } \
2855 do_##NAME(vd, s1, vs2, i, env); \
2856 } \
2857 env->vstart = 0; \
2858}
2859
2860RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2861RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2862RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2863GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2864GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2865GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2866
2867RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2868RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2869RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2870GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2871GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2872GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2873RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2874RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2875RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2876GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2877GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2878GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2879
2880static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2881{
2882 return float16_sub(b, a, s);
2883}
2884
2885static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2886{
2887 return float32_sub(b, a, s);
2888}
2889
2890static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2891{
2892 return float64_sub(b, a, s);
2893}
2894
2895RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2896RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2897RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2898GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2899GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2900GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2901
2902
2903static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2904{
2905 return float32_add(float16_to_float32(a, true, s),
2906 float16_to_float32(b, true, s), s);
2907}
2908
2909static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2910{
2911 return float64_add(float32_to_float64(a, s),
2912 float32_to_float64(b, s), s);
2913
2914}
2915
2916RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2917RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2918GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2919GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
2920RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2921RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2922GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2923GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
2924
2925static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2926{
2927 return float32_sub(float16_to_float32(a, true, s),
2928 float16_to_float32(b, true, s), s);
2929}
2930
2931static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2932{
2933 return float64_sub(float32_to_float64(a, s),
2934 float32_to_float64(b, s), s);
2935
2936}
2937
2938RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2939RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2940GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2941GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
2942RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2943RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2944GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2945GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
2946
2947static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2948{
2949 return float32_add(a, float16_to_float32(b, true, s), s);
2950}
2951
2952static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2953{
2954 return float64_add(a, float32_to_float64(b, s), s);
2955}
2956
2957RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2958RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2959GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2960GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
2961RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2962RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2963GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2964GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
2965
2966static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2967{
2968 return float32_sub(a, float16_to_float32(b, true, s), s);
2969}
2970
2971static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2972{
2973 return float64_sub(a, float32_to_float64(b, s), s);
2974}
2975
2976RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2977RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
2978GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
2979GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
2980RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2981RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
2982GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
2983GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
2984
2985
2986RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2987RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2988RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
2989GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
2990GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
2991GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
2992RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2993RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2994RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
2995GEN_VEXT_VF(vfmul_vf_h, 2, 2)
2996GEN_VEXT_VF(vfmul_vf_w, 4, 4)
2997GEN_VEXT_VF(vfmul_vf_d, 8, 8)
2998
2999RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3000RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3001RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3002GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
3003GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
3004GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
3005RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3006RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3007RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3008GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
3009GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
3010GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
3011
3012static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3013{
3014 return float16_div(b, a, s);
3015}
3016
3017static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3018{
3019 return float32_div(b, a, s);
3020}
3021
3022static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3023{
3024 return float64_div(b, a, s);
3025}
3026
3027RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3028RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3029RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3030GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
3031GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3032GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3033
3034
3035static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3036{
3037 return float32_mul(float16_to_float32(a, true, s),
3038 float16_to_float32(b, true, s), s);
3039}
3040
3041static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3042{
3043 return float64_mul(float32_to_float64(a, s),
3044 float32_to_float64(b, s), s);
3045
3046}
3047RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3048RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3049GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3050GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3051RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3052RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3053GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3054GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3055
3056
3057#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3058static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3059 CPURISCVState *env) \
3060{ \
3061 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3062 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3063 TD d = *((TD *)vd + HD(i)); \
3064 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3065}
3066
3067static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3068{
3069 return float16_muladd(a, b, d, 0, s);
3070}
3071
3072static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3073{
3074 return float32_muladd(a, b, d, 0, s);
3075}
3076
3077static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3078{
3079 return float64_muladd(a, b, d, 0, s);
3080}
3081
3082RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3083RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3084RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3085GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3086GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3087GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3088
3089#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3090static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3091 CPURISCVState *env) \
3092{ \
3093 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3094 TD d = *((TD *)vd + HD(i)); \
3095 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3096}
3097
3098RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3099RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3100RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3101GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3102GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3103GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3104
3105static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3106{
3107 return float16_muladd(a, b, d,
3108 float_muladd_negate_c | float_muladd_negate_product, s);
3109}
3110
3111static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3112{
3113 return float32_muladd(a, b, d,
3114 float_muladd_negate_c | float_muladd_negate_product, s);
3115}
3116
3117static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3118{
3119 return float64_muladd(a, b, d,
3120 float_muladd_negate_c | float_muladd_negate_product, s);
3121}
3122
3123RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3124RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3125RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3126GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3127GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3128GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3129RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3130RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3131RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3132GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3133GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3134GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3135
3136static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3137{
3138 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3139}
3140
3141static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3142{
3143 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3144}
3145
3146static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3147{
3148 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3149}
3150
3151RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3152RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3153RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3154GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3155GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3156GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3157RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3158RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3159RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3160GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3161GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3162GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3163
3164static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3165{
3166 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3167}
3168
3169static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3170{
3171 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3172}
3173
3174static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3175{
3176 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3177}
3178
3179RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3180RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3181RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3182GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3183GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3184GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3185RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3186RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3187RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3188GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3189GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3190GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3191
3192static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3193{
3194 return float16_muladd(d, b, a, 0, s);
3195}
3196
3197static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3198{
3199 return float32_muladd(d, b, a, 0, s);
3200}
3201
3202static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3203{
3204 return float64_muladd(d, b, a, 0, s);
3205}
3206
3207RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3208RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3209RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3210GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3211GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3212GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3213RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3214RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3215RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3216GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3217GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3218GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3219
3220static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3221{
3222 return float16_muladd(d, b, a,
3223 float_muladd_negate_c | float_muladd_negate_product, s);
3224}
3225
3226static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3227{
3228 return float32_muladd(d, b, a,
3229 float_muladd_negate_c | float_muladd_negate_product, s);
3230}
3231
3232static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3233{
3234 return float64_muladd(d, b, a,
3235 float_muladd_negate_c | float_muladd_negate_product, s);
3236}
3237
3238RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3239RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3240RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3241GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3242GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3243GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3244RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3245RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3246RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3247GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3248GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3249GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3250
3251static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3252{
3253 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3254}
3255
3256static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3257{
3258 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3259}
3260
3261static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3262{
3263 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3264}
3265
3266RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3267RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3268RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3269GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3270GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3271GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3272RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3273RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3274RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3275GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3276GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3277GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3278
3279static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3280{
3281 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3282}
3283
3284static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3285{
3286 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3287}
3288
3289static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3290{
3291 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3292}
3293
3294RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3295RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3296RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3297GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3298GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3299GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3300RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3301RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3302RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3303GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3304GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3305GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3306
3307
3308static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3309{
3310 return float32_muladd(float16_to_float32(a, true, s),
3311 float16_to_float32(b, true, s), d, 0, s);
3312}
3313
3314static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3315{
3316 return float64_muladd(float32_to_float64(a, s),
3317 float32_to_float64(b, s), d, 0, s);
3318}
3319
3320RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3321RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3322GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3323GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3324RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3325RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3326GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3327GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3328
3329static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3330{
3331 return float32_muladd(float16_to_float32(a, true, s),
3332 float16_to_float32(b, true, s), d,
3333 float_muladd_negate_c | float_muladd_negate_product, s);
3334}
3335
3336static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3337{
3338 return float64_muladd(float32_to_float64(a, s),
3339 float32_to_float64(b, s), d,
3340 float_muladd_negate_c | float_muladd_negate_product, s);
3341}
3342
3343RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3344RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3345GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3346GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3347RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3348RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3349GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3350GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3351
3352static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3353{
3354 return float32_muladd(float16_to_float32(a, true, s),
3355 float16_to_float32(b, true, s), d,
3356 float_muladd_negate_c, s);
3357}
3358
3359static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3360{
3361 return float64_muladd(float32_to_float64(a, s),
3362 float32_to_float64(b, s), d,
3363 float_muladd_negate_c, s);
3364}
3365
3366RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3367RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3368GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3369GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3370RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3371RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3372GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3373GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3374
3375static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3376{
3377 return float32_muladd(float16_to_float32(a, true, s),
3378 float16_to_float32(b, true, s), d,
3379 float_muladd_negate_product, s);
3380}
3381
3382static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3383{
3384 return float64_muladd(float32_to_float64(a, s),
3385 float32_to_float64(b, s), d,
3386 float_muladd_negate_product, s);
3387}
3388
3389RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3390RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3391GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3392GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3393RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3394RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3395GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3396GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3397
3398
3399
3400#define OP_UU_H uint16_t, uint16_t, uint16_t
3401#define OP_UU_W uint32_t, uint32_t, uint32_t
3402#define OP_UU_D uint64_t, uint64_t, uint64_t
3403
3404#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3405static void do_##NAME(void *vd, void *vs2, int i, \
3406 CPURISCVState *env) \
3407{ \
3408 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3409 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3410}
3411
3412#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \
3413void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3414 CPURISCVState *env, uint32_t desc) \
3415{ \
3416 uint32_t vm = vext_vm(desc); \
3417 uint32_t vl = env->vl; \
3418 uint32_t i; \
3419 \
3420 if (vl == 0) { \
3421 return; \
3422 } \
3423 for (i = env->vstart; i < vl; i++) { \
3424 if (!vm && !vext_elem_mask(v0, i)) { \
3425 continue; \
3426 } \
3427 do_##NAME(vd, vs2, i, env); \
3428 } \
3429 env->vstart = 0; \
3430}
3431
3432RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3433RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3434RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3435GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3436GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3437GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3438
3439
3440
3441
3442
3443
3444
3445static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3446{
3447 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3448 uint64_t exp = extract64(f, frac_size, exp_size);
3449 uint64_t frac = extract64(f, 0, frac_size);
3450
3451 const uint8_t lookup_table[] = {
3452 52, 51, 50, 48, 47, 46, 44, 43,
3453 42, 41, 40, 39, 38, 36, 35, 34,
3454 33, 32, 31, 30, 30, 29, 28, 27,
3455 26, 25, 24, 23, 23, 22, 21, 20,
3456 19, 19, 18, 17, 16, 16, 15, 14,
3457 14, 13, 12, 12, 11, 10, 10, 9,
3458 9, 8, 7, 7, 6, 6, 5, 4,
3459 4, 3, 3, 2, 2, 1, 1, 0,
3460 127, 125, 123, 121, 119, 118, 116, 114,
3461 113, 111, 109, 108, 106, 105, 103, 102,
3462 100, 99, 97, 96, 95, 93, 92, 91,
3463 90, 88, 87, 86, 85, 84, 83, 82,
3464 80, 79, 78, 77, 76, 75, 74, 73,
3465 72, 71, 70, 70, 69, 68, 67, 66,
3466 65, 64, 63, 63, 62, 61, 60, 59,
3467 59, 58, 57, 56, 56, 55, 54, 53
3468 };
3469 const int precision = 7;
3470
3471 if (exp == 0 && frac != 0) {
3472
3473 while (extract64(frac, frac_size - 1, 1) == 0) {
3474 exp--;
3475 frac <<= 1;
3476 }
3477
3478 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3479 }
3480
3481 int idx = ((exp & 1) << (precision - 1)) |
3482 (frac >> (frac_size - precision + 1));
3483 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3484 (frac_size - precision);
3485 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3486
3487 uint64_t val = 0;
3488 val = deposit64(val, 0, frac_size, out_frac);
3489 val = deposit64(val, frac_size, exp_size, out_exp);
3490 val = deposit64(val, frac_size + exp_size, 1, sign);
3491 return val;
3492}
3493
3494static float16 frsqrt7_h(float16 f, float_status *s)
3495{
3496 int exp_size = 5, frac_size = 10;
3497 bool sign = float16_is_neg(f);
3498
3499
3500
3501
3502
3503
3504
3505 if (float16_is_signaling_nan(f, s) ||
3506 (float16_is_infinity(f) && sign) ||
3507 (float16_is_normal(f) && sign) ||
3508 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3509 s->float_exception_flags |= float_flag_invalid;
3510 return float16_default_nan(s);
3511 }
3512
3513
3514 if (float16_is_quiet_nan(f, s)) {
3515 return float16_default_nan(s);
3516 }
3517
3518
3519 if (float16_is_zero(f)) {
3520 s->float_exception_flags |= float_flag_divbyzero;
3521 return float16_set_sign(float16_infinity, sign);
3522 }
3523
3524
3525 if (float16_is_infinity(f) && !sign) {
3526 return float16_set_sign(float16_zero, sign);
3527 }
3528
3529
3530 uint64_t val = frsqrt7(f, exp_size, frac_size);
3531 return make_float16(val);
3532}
3533
3534static float32 frsqrt7_s(float32 f, float_status *s)
3535{
3536 int exp_size = 8, frac_size = 23;
3537 bool sign = float32_is_neg(f);
3538
3539
3540
3541
3542
3543
3544
3545 if (float32_is_signaling_nan(f, s) ||
3546 (float32_is_infinity(f) && sign) ||
3547 (float32_is_normal(f) && sign) ||
3548 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3549 s->float_exception_flags |= float_flag_invalid;
3550 return float32_default_nan(s);
3551 }
3552
3553
3554 if (float32_is_quiet_nan(f, s)) {
3555 return float32_default_nan(s);
3556 }
3557
3558
3559 if (float32_is_zero(f)) {
3560 s->float_exception_flags |= float_flag_divbyzero;
3561 return float32_set_sign(float32_infinity, sign);
3562 }
3563
3564
3565 if (float32_is_infinity(f) && !sign) {
3566 return float32_set_sign(float32_zero, sign);
3567 }
3568
3569
3570 uint64_t val = frsqrt7(f, exp_size, frac_size);
3571 return make_float32(val);
3572}
3573
3574static float64 frsqrt7_d(float64 f, float_status *s)
3575{
3576 int exp_size = 11, frac_size = 52;
3577 bool sign = float64_is_neg(f);
3578
3579
3580
3581
3582
3583
3584
3585 if (float64_is_signaling_nan(f, s) ||
3586 (float64_is_infinity(f) && sign) ||
3587 (float64_is_normal(f) && sign) ||
3588 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3589 s->float_exception_flags |= float_flag_invalid;
3590 return float64_default_nan(s);
3591 }
3592
3593
3594 if (float64_is_quiet_nan(f, s)) {
3595 return float64_default_nan(s);
3596 }
3597
3598
3599 if (float64_is_zero(f)) {
3600 s->float_exception_flags |= float_flag_divbyzero;
3601 return float64_set_sign(float64_infinity, sign);
3602 }
3603
3604
3605 if (float64_is_infinity(f) && !sign) {
3606 return float64_set_sign(float64_zero, sign);
3607 }
3608
3609
3610 uint64_t val = frsqrt7(f, exp_size, frac_size);
3611 return make_float64(val);
3612}
3613
3614RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3615RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3616RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3617GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2)
3618GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4)
3619GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8)
3620
3621
3622
3623
3624
3625
3626
3627static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3628 float_status *s)
3629{
3630 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3631 uint64_t exp = extract64(f, frac_size, exp_size);
3632 uint64_t frac = extract64(f, 0, frac_size);
3633
3634 const uint8_t lookup_table[] = {
3635 127, 125, 123, 121, 119, 117, 116, 114,
3636 112, 110, 109, 107, 105, 104, 102, 100,
3637 99, 97, 96, 94, 93, 91, 90, 88,
3638 87, 85, 84, 83, 81, 80, 79, 77,
3639 76, 75, 74, 72, 71, 70, 69, 68,
3640 66, 65, 64, 63, 62, 61, 60, 59,
3641 58, 57, 56, 55, 54, 53, 52, 51,
3642 50, 49, 48, 47, 46, 45, 44, 43,
3643 42, 41, 40, 40, 39, 38, 37, 36,
3644 35, 35, 34, 33, 32, 31, 31, 30,
3645 29, 28, 28, 27, 26, 25, 25, 24,
3646 23, 23, 22, 21, 21, 20, 19, 19,
3647 18, 17, 17, 16, 15, 15, 14, 14,
3648 13, 12, 12, 11, 11, 10, 9, 9,
3649 8, 8, 7, 7, 6, 5, 5, 4,
3650 4, 3, 3, 2, 2, 1, 1, 0
3651 };
3652 const int precision = 7;
3653
3654 if (exp == 0 && frac != 0) {
3655
3656 while (extract64(frac, frac_size - 1, 1) == 0) {
3657 exp--;
3658 frac <<= 1;
3659 }
3660
3661 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3662
3663 if (exp != 0 && exp != UINT64_MAX) {
3664
3665
3666
3667
3668 s->float_exception_flags |= (float_flag_inexact |
3669 float_flag_overflow);
3670
3671 if ((s->float_rounding_mode == float_round_to_zero) ||
3672 ((s->float_rounding_mode == float_round_down) && !sign) ||
3673 ((s->float_rounding_mode == float_round_up) && sign)) {
3674
3675 return (sign << (exp_size + frac_size)) |
3676 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3677 } else {
3678
3679 return (sign << (exp_size + frac_size)) |
3680 MAKE_64BIT_MASK(frac_size, exp_size);
3681 }
3682 }
3683 }
3684
3685 int idx = frac >> (frac_size - precision);
3686 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3687 (frac_size - precision);
3688 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3689
3690 if (out_exp == 0 || out_exp == UINT64_MAX) {
3691
3692
3693
3694
3695 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3696 if (out_exp == UINT64_MAX) {
3697 out_frac >>= 1;
3698 out_exp = 0;
3699 }
3700 }
3701
3702 uint64_t val = 0;
3703 val = deposit64(val, 0, frac_size, out_frac);
3704 val = deposit64(val, frac_size, exp_size, out_exp);
3705 val = deposit64(val, frac_size + exp_size, 1, sign);
3706 return val;
3707}
3708
3709static float16 frec7_h(float16 f, float_status *s)
3710{
3711 int exp_size = 5, frac_size = 10;
3712 bool sign = float16_is_neg(f);
3713
3714
3715 if (float16_is_infinity(f)) {
3716 return float16_set_sign(float16_zero, sign);
3717 }
3718
3719
3720 if (float16_is_zero(f)) {
3721 s->float_exception_flags |= float_flag_divbyzero;
3722 return float16_set_sign(float16_infinity, sign);
3723 }
3724
3725
3726 if (float16_is_signaling_nan(f, s)) {
3727 s->float_exception_flags |= float_flag_invalid;
3728 return float16_default_nan(s);
3729 }
3730
3731
3732 if (float16_is_quiet_nan(f, s)) {
3733 return float16_default_nan(s);
3734 }
3735
3736
3737 uint64_t val = frec7(f, exp_size, frac_size, s);
3738 return make_float16(val);
3739}
3740
3741static float32 frec7_s(float32 f, float_status *s)
3742{
3743 int exp_size = 8, frac_size = 23;
3744 bool sign = float32_is_neg(f);
3745
3746
3747 if (float32_is_infinity(f)) {
3748 return float32_set_sign(float32_zero, sign);
3749 }
3750
3751
3752 if (float32_is_zero(f)) {
3753 s->float_exception_flags |= float_flag_divbyzero;
3754 return float32_set_sign(float32_infinity, sign);
3755 }
3756
3757
3758 if (float32_is_signaling_nan(f, s)) {
3759 s->float_exception_flags |= float_flag_invalid;
3760 return float32_default_nan(s);
3761 }
3762
3763
3764 if (float32_is_quiet_nan(f, s)) {
3765 return float32_default_nan(s);
3766 }
3767
3768
3769 uint64_t val = frec7(f, exp_size, frac_size, s);
3770 return make_float32(val);
3771}
3772
3773static float64 frec7_d(float64 f, float_status *s)
3774{
3775 int exp_size = 11, frac_size = 52;
3776 bool sign = float64_is_neg(f);
3777
3778
3779 if (float64_is_infinity(f)) {
3780 return float64_set_sign(float64_zero, sign);
3781 }
3782
3783
3784 if (float64_is_zero(f)) {
3785 s->float_exception_flags |= float_flag_divbyzero;
3786 return float64_set_sign(float64_infinity, sign);
3787 }
3788
3789
3790 if (float64_is_signaling_nan(f, s)) {
3791 s->float_exception_flags |= float_flag_invalid;
3792 return float64_default_nan(s);
3793 }
3794
3795
3796 if (float64_is_quiet_nan(f, s)) {
3797 return float64_default_nan(s);
3798 }
3799
3800
3801 uint64_t val = frec7(f, exp_size, frac_size, s);
3802 return make_float64(val);
3803}
3804
3805RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3806RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3807RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3808GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2)
3809GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4)
3810GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8)
3811
3812
3813RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3814RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3815RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3816GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3817GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3818GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3819RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3820RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3821RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3822GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3823GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3824GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3825
3826RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3827RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3828RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3829GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3830GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3831GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3832RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3833RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3834RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3835GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3836GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3837GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3838
3839
3840static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3841{
3842 return deposit64(b, 0, 15, a);
3843}
3844
3845static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3846{
3847 return deposit64(b, 0, 31, a);
3848}
3849
3850static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3851{
3852 return deposit64(b, 0, 63, a);
3853}
3854
3855RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3856RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3857RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3858GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3859GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3860GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3861RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3862RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3863RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3864GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3865GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3866GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3867
3868static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3869{
3870 return deposit64(~b, 0, 15, a);
3871}
3872
3873static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3874{
3875 return deposit64(~b, 0, 31, a);
3876}
3877
3878static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3879{
3880 return deposit64(~b, 0, 63, a);
3881}
3882
3883RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3884RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3885RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3886GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3887GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3888GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3889RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3890RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3891RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3892GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3893GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3894GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3895
3896static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3897{
3898 return deposit64(b ^ a, 0, 15, a);
3899}
3900
3901static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3902{
3903 return deposit64(b ^ a, 0, 31, a);
3904}
3905
3906static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3907{
3908 return deposit64(b ^ a, 0, 63, a);
3909}
3910
3911RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3912RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3913RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3914GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3915GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3916GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3917RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3918RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3919RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3920GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3921GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3922GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3923
3924
3925#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
3926void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
3927 CPURISCVState *env, uint32_t desc) \
3928{ \
3929 uint32_t vm = vext_vm(desc); \
3930 uint32_t vl = env->vl; \
3931 uint32_t i; \
3932 \
3933 for (i = env->vstart; i < vl; i++) { \
3934 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
3935 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
3936 if (!vm && !vext_elem_mask(v0, i)) { \
3937 continue; \
3938 } \
3939 vext_set_elem_mask(vd, i, \
3940 DO_OP(s2, s1, &env->fp_status)); \
3941 } \
3942 env->vstart = 0; \
3943}
3944
3945GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3946GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3947GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3948
3949#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
3950void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
3951 CPURISCVState *env, uint32_t desc) \
3952{ \
3953 uint32_t vm = vext_vm(desc); \
3954 uint32_t vl = env->vl; \
3955 uint32_t i; \
3956 \
3957 for (i = env->vstart; i < vl; i++) { \
3958 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
3959 if (!vm && !vext_elem_mask(v0, i)) { \
3960 continue; \
3961 } \
3962 vext_set_elem_mask(vd, i, \
3963 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
3964 } \
3965 env->vstart = 0; \
3966}
3967
3968GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3969GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3970GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3971
3972static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3973{
3974 FloatRelation compare = float16_compare_quiet(a, b, s);
3975 return compare != float_relation_equal;
3976}
3977
3978static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3979{
3980 FloatRelation compare = float32_compare_quiet(a, b, s);
3981 return compare != float_relation_equal;
3982}
3983
3984static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3985{
3986 FloatRelation compare = float64_compare_quiet(a, b, s);
3987 return compare != float_relation_equal;
3988}
3989
3990GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3991GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3992GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3993GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3994GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3995GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3996
3997GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3998GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3999GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4000GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4001GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4002GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4003
4004GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4005GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4006GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4007GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4008GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4009GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4010
4011static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4012{
4013 FloatRelation compare = float16_compare(a, b, s);
4014 return compare == float_relation_greater;
4015}
4016
4017static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4018{
4019 FloatRelation compare = float32_compare(a, b, s);
4020 return compare == float_relation_greater;
4021}
4022
4023static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4024{
4025 FloatRelation compare = float64_compare(a, b, s);
4026 return compare == float_relation_greater;
4027}
4028
4029GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4030GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4031GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4032
4033static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4034{
4035 FloatRelation compare = float16_compare(a, b, s);
4036 return compare == float_relation_greater ||
4037 compare == float_relation_equal;
4038}
4039
4040static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4041{
4042 FloatRelation compare = float32_compare(a, b, s);
4043 return compare == float_relation_greater ||
4044 compare == float_relation_equal;
4045}
4046
4047static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4048{
4049 FloatRelation compare = float64_compare(a, b, s);
4050 return compare == float_relation_greater ||
4051 compare == float_relation_equal;
4052}
4053
4054GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4055GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4056GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4057
4058
4059#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
4060static void do_##NAME(void *vd, void *vs2, int i) \
4061{ \
4062 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
4063 *((TD *)vd + HD(i)) = OP(s2); \
4064}
4065
4066#define GEN_VEXT_V(NAME, ESZ, DSZ) \
4067void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4068 CPURISCVState *env, uint32_t desc) \
4069{ \
4070 uint32_t vm = vext_vm(desc); \
4071 uint32_t vl = env->vl; \
4072 uint32_t i; \
4073 \
4074 for (i = env->vstart; i < vl; i++) { \
4075 if (!vm && !vext_elem_mask(v0, i)) { \
4076 continue; \
4077 } \
4078 do_##NAME(vd, vs2, i); \
4079 } \
4080 env->vstart = 0; \
4081}
4082
4083target_ulong fclass_h(uint64_t frs1)
4084{
4085 float16 f = frs1;
4086 bool sign = float16_is_neg(f);
4087
4088 if (float16_is_infinity(f)) {
4089 return sign ? 1 << 0 : 1 << 7;
4090 } else if (float16_is_zero(f)) {
4091 return sign ? 1 << 3 : 1 << 4;
4092 } else if (float16_is_zero_or_denormal(f)) {
4093 return sign ? 1 << 2 : 1 << 5;
4094 } else if (float16_is_any_nan(f)) {
4095 float_status s = { };
4096 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4097 } else {
4098 return sign ? 1 << 1 : 1 << 6;
4099 }
4100}
4101
4102target_ulong fclass_s(uint64_t frs1)
4103{
4104 float32 f = frs1;
4105 bool sign = float32_is_neg(f);
4106
4107 if (float32_is_infinity(f)) {
4108 return sign ? 1 << 0 : 1 << 7;
4109 } else if (float32_is_zero(f)) {
4110 return sign ? 1 << 3 : 1 << 4;
4111 } else if (float32_is_zero_or_denormal(f)) {
4112 return sign ? 1 << 2 : 1 << 5;
4113 } else if (float32_is_any_nan(f)) {
4114 float_status s = { };
4115 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4116 } else {
4117 return sign ? 1 << 1 : 1 << 6;
4118 }
4119}
4120
4121target_ulong fclass_d(uint64_t frs1)
4122{
4123 float64 f = frs1;
4124 bool sign = float64_is_neg(f);
4125
4126 if (float64_is_infinity(f)) {
4127 return sign ? 1 << 0 : 1 << 7;
4128 } else if (float64_is_zero(f)) {
4129 return sign ? 1 << 3 : 1 << 4;
4130 } else if (float64_is_zero_or_denormal(f)) {
4131 return sign ? 1 << 2 : 1 << 5;
4132 } else if (float64_is_any_nan(f)) {
4133 float_status s = { };
4134 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4135 } else {
4136 return sign ? 1 << 1 : 1 << 6;
4137 }
4138}
4139
4140RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4141RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4142RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4143GEN_VEXT_V(vfclass_v_h, 2, 2)
4144GEN_VEXT_V(vfclass_v_w, 4, 4)
4145GEN_VEXT_V(vfclass_v_d, 8, 8)
4146
4147
4148#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4149void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4150 CPURISCVState *env, uint32_t desc) \
4151{ \
4152 uint32_t vm = vext_vm(desc); \
4153 uint32_t vl = env->vl; \
4154 uint32_t i; \
4155 \
4156 for (i = env->vstart; i < vl; i++) { \
4157 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4158 *((ETYPE *)vd + H(i)) \
4159 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4160 } \
4161 env->vstart = 0; \
4162}
4163
4164GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4165GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4166GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4167
4168
4169
4170RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4171RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4172RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4173GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
4174GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
4175GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
4176
4177
4178RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4179RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4180RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4181GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
4182GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
4183GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
4184
4185
4186RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4187RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4188RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4189GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
4190GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
4191GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
4192
4193
4194RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4195RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4196RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4197GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
4198GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
4199GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
4200
4201
4202
4203#define WOP_UU_B uint16_t, uint8_t, uint8_t
4204#define WOP_UU_H uint32_t, uint16_t, uint16_t
4205#define WOP_UU_W uint64_t, uint32_t, uint32_t
4206
4207RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4208RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4209GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
4210GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
4211
4212
4213RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4214RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4215GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
4216GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
4217
4218
4219RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4220RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4221RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4222GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
4223GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
4224GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
4225
4226
4227RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4228RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4229RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4230GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
4231GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
4232GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
4233
4234
4235
4236
4237
4238static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4239{
4240 return float16_to_float32(a, true, s);
4241}
4242
4243RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4244RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4245GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
4246GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
4247
4248
4249
4250#define NOP_UU_B uint8_t, uint16_t, uint32_t
4251#define NOP_UU_H uint16_t, uint32_t, uint32_t
4252#define NOP_UU_W uint32_t, uint64_t, uint64_t
4253
4254RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4255RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4256RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4257GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
4258GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
4259GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
4260
4261
4262RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4263RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4264RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4265GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
4266GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
4267GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
4268
4269
4270RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4271RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4272GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
4273GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
4274
4275
4276RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4277RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4278GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
4279GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
4280
4281
4282static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4283{
4284 return float32_to_float16(a, true, s);
4285}
4286
4287RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4288RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4289GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
4290GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
4291
4292
4293
4294
4295
4296#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4297void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4298 void *vs2, CPURISCVState *env, uint32_t desc) \
4299{ \
4300 uint32_t vm = vext_vm(desc); \
4301 uint32_t vl = env->vl; \
4302 uint32_t i; \
4303 TD s1 = *((TD *)vs1 + HD(0)); \
4304 \
4305 for (i = env->vstart; i < vl; i++) { \
4306 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4307 if (!vm && !vext_elem_mask(v0, i)) { \
4308 continue; \
4309 } \
4310 s1 = OP(s1, (TD)s2); \
4311 } \
4312 *((TD *)vd + HD(0)) = s1; \
4313 env->vstart = 0; \
4314}
4315
4316
4317GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4318GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4319GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4320GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4321
4322
4323GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4324GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4325GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4326GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4327
4328
4329GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4330GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4331GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4332GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4333
4334
4335GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4336GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4337GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4338GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4339
4340
4341GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4342GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4343GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4344GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4345
4346
4347GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4348GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4349GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4350GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4351
4352
4353GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4354GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4355GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4356GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4357
4358
4359GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4360GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4361GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4362GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4363
4364
4365
4366GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4367GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4368GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4369
4370
4371GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4372GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4373GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4374
4375
4376#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4377void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4378 void *vs2, CPURISCVState *env, \
4379 uint32_t desc) \
4380{ \
4381 uint32_t vm = vext_vm(desc); \
4382 uint32_t vl = env->vl; \
4383 uint32_t i; \
4384 TD s1 = *((TD *)vs1 + HD(0)); \
4385 \
4386 for (i = env->vstart; i < vl; i++) { \
4387 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4388 if (!vm && !vext_elem_mask(v0, i)) { \
4389 continue; \
4390 } \
4391 s1 = OP(s1, (TD)s2, &env->fp_status); \
4392 } \
4393 *((TD *)vd + HD(0)) = s1; \
4394 env->vstart = 0; \
4395}
4396
4397
4398GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4399GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4400GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4401
4402
4403GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4404GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4405GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4406
4407
4408GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4409GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4410GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4411
4412
4413
4414void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4415 void *vs2, CPURISCVState *env, uint32_t desc)
4416{
4417 uint32_t vm = vext_vm(desc);
4418 uint32_t vl = env->vl;
4419 uint32_t i;
4420 uint32_t s1 = *((uint32_t *)vs1 + H4(0));
4421
4422 for (i = env->vstart; i < vl; i++) {
4423 uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4424 if (!vm && !vext_elem_mask(v0, i)) {
4425 continue;
4426 }
4427 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4428 &env->fp_status);
4429 }
4430 *((uint32_t *)vd + H4(0)) = s1;
4431 env->vstart = 0;
4432}
4433
4434void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4435 void *vs2, CPURISCVState *env, uint32_t desc)
4436{
4437 uint32_t vm = vext_vm(desc);
4438 uint32_t vl = env->vl;
4439 uint32_t i;
4440 uint64_t s1 = *((uint64_t *)vs1);
4441
4442 for (i = env->vstart; i < vl; i++) {
4443 uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4444 if (!vm && !vext_elem_mask(v0, i)) {
4445 continue;
4446 }
4447 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4448 &env->fp_status);
4449 }
4450 *((uint64_t *)vd) = s1;
4451 env->vstart = 0;
4452}
4453
4454
4455
4456
4457
4458#define GEN_VEXT_MASK_VV(NAME, OP) \
4459void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4460 void *vs2, CPURISCVState *env, \
4461 uint32_t desc) \
4462{ \
4463 uint32_t vl = env->vl; \
4464 uint32_t i; \
4465 int a, b; \
4466 \
4467 for (i = env->vstart; i < vl; i++) { \
4468 a = vext_elem_mask(vs1, i); \
4469 b = vext_elem_mask(vs2, i); \
4470 vext_set_elem_mask(vd, i, OP(b, a)); \
4471 } \
4472 env->vstart = 0; \
4473}
4474
4475#define DO_NAND(N, M) (!(N & M))
4476#define DO_ANDNOT(N, M) (N & !M)
4477#define DO_NOR(N, M) (!(N | M))
4478#define DO_ORNOT(N, M) (N | !M)
4479#define DO_XNOR(N, M) (!(N ^ M))
4480
4481GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4482GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4483GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4484GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4485GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4486GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4487GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4488GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4489
4490
4491target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4492 uint32_t desc)
4493{
4494 target_ulong cnt = 0;
4495 uint32_t vm = vext_vm(desc);
4496 uint32_t vl = env->vl;
4497 int i;
4498
4499 for (i = env->vstart; i < vl; i++) {
4500 if (vm || vext_elem_mask(v0, i)) {
4501 if (vext_elem_mask(vs2, i)) {
4502 cnt++;
4503 }
4504 }
4505 }
4506 env->vstart = 0;
4507 return cnt;
4508}
4509
4510
4511target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4512 uint32_t desc)
4513{
4514 uint32_t vm = vext_vm(desc);
4515 uint32_t vl = env->vl;
4516 int i;
4517
4518 for (i = env->vstart; i < vl; i++) {
4519 if (vm || vext_elem_mask(v0, i)) {
4520 if (vext_elem_mask(vs2, i)) {
4521 return i;
4522 }
4523 }
4524 }
4525 env->vstart = 0;
4526 return -1LL;
4527}
4528
4529enum set_mask_type {
4530 ONLY_FIRST = 1,
4531 INCLUDE_FIRST,
4532 BEFORE_FIRST,
4533};
4534
4535static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4536 uint32_t desc, enum set_mask_type type)
4537{
4538 uint32_t vm = vext_vm(desc);
4539 uint32_t vl = env->vl;
4540 int i;
4541 bool first_mask_bit = false;
4542
4543 for (i = env->vstart; i < vl; i++) {
4544 if (!vm && !vext_elem_mask(v0, i)) {
4545 continue;
4546 }
4547
4548 if (first_mask_bit) {
4549 vext_set_elem_mask(vd, i, 0);
4550 continue;
4551 }
4552 if (vext_elem_mask(vs2, i)) {
4553 first_mask_bit = true;
4554 if (type == BEFORE_FIRST) {
4555 vext_set_elem_mask(vd, i, 0);
4556 } else {
4557 vext_set_elem_mask(vd, i, 1);
4558 }
4559 } else {
4560 if (type == ONLY_FIRST) {
4561 vext_set_elem_mask(vd, i, 0);
4562 } else {
4563 vext_set_elem_mask(vd, i, 1);
4564 }
4565 }
4566 }
4567 env->vstart = 0;
4568}
4569
4570void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4571 uint32_t desc)
4572{
4573 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4574}
4575
4576void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4577 uint32_t desc)
4578{
4579 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4580}
4581
4582void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4583 uint32_t desc)
4584{
4585 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4586}
4587
4588
4589#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
4590void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4591 uint32_t desc) \
4592{ \
4593 uint32_t vm = vext_vm(desc); \
4594 uint32_t vl = env->vl; \
4595 uint32_t sum = 0; \
4596 int i; \
4597 \
4598 for (i = env->vstart; i < vl; i++) { \
4599 if (!vm && !vext_elem_mask(v0, i)) { \
4600 continue; \
4601 } \
4602 *((ETYPE *)vd + H(i)) = sum; \
4603 if (vext_elem_mask(vs2, i)) { \
4604 sum++; \
4605 } \
4606 } \
4607 env->vstart = 0; \
4608}
4609
4610GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4611GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4612GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4613GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4614
4615
4616#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
4617void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4618{ \
4619 uint32_t vm = vext_vm(desc); \
4620 uint32_t vl = env->vl; \
4621 int i; \
4622 \
4623 for (i = env->vstart; i < vl; i++) { \
4624 if (!vm && !vext_elem_mask(v0, i)) { \
4625 continue; \
4626 } \
4627 *((ETYPE *)vd + H(i)) = i; \
4628 } \
4629 env->vstart = 0; \
4630}
4631
4632GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4633GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4634GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4635GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4636
4637
4638
4639
4640
4641
4642#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
4643void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4644 CPURISCVState *env, uint32_t desc) \
4645{ \
4646 uint32_t vm = vext_vm(desc); \
4647 uint32_t vl = env->vl; \
4648 target_ulong offset = s1, i_min, i; \
4649 \
4650 i_min = MAX(env->vstart, offset); \
4651 for (i = i_min; i < vl; i++) { \
4652 if (!vm && !vext_elem_mask(v0, i)) { \
4653 continue; \
4654 } \
4655 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4656 } \
4657}
4658
4659
4660GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4661GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4662GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4663GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4664
4665#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
4666void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4667 CPURISCVState *env, uint32_t desc) \
4668{ \
4669 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
4670 uint32_t vm = vext_vm(desc); \
4671 uint32_t vl = env->vl; \
4672 target_ulong i_max, i; \
4673 \
4674 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
4675 for (i = env->vstart; i < i_max; ++i) { \
4676 if (vm || vext_elem_mask(v0, i)) { \
4677 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
4678 } \
4679 } \
4680 \
4681 for (i = i_max; i < vl; ++i) { \
4682 if (vm || vext_elem_mask(v0, i)) { \
4683 *((ETYPE *)vd + H(i)) = 0; \
4684 } \
4685 } \
4686 \
4687 env->vstart = 0; \
4688}
4689
4690
4691GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4692GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4693GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4694GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4695
4696#define GEN_VEXT_VSLIE1UP(ESZ, H) \
4697static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4698 CPURISCVState *env, uint32_t desc) \
4699{ \
4700 typedef uint##ESZ##_t ETYPE; \
4701 uint32_t vm = vext_vm(desc); \
4702 uint32_t vl = env->vl; \
4703 uint32_t i; \
4704 \
4705 for (i = env->vstart; i < vl; i++) { \
4706 if (!vm && !vext_elem_mask(v0, i)) { \
4707 continue; \
4708 } \
4709 if (i == 0) { \
4710 *((ETYPE *)vd + H(i)) = s1; \
4711 } else { \
4712 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4713 } \
4714 } \
4715 env->vstart = 0; \
4716}
4717
4718GEN_VEXT_VSLIE1UP(8, H1)
4719GEN_VEXT_VSLIE1UP(16, H2)
4720GEN_VEXT_VSLIE1UP(32, H4)
4721GEN_VEXT_VSLIE1UP(64, H8)
4722
4723#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \
4724void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4725 CPURISCVState *env, uint32_t desc) \
4726{ \
4727 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
4728}
4729
4730
4731GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4732GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4733GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4734GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4735
4736#define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \
4737static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4738 CPURISCVState *env, uint32_t desc) \
4739{ \
4740 typedef uint##ESZ##_t ETYPE; \
4741 uint32_t vm = vext_vm(desc); \
4742 uint32_t vl = env->vl; \
4743 uint32_t i; \
4744 \
4745 for (i = env->vstart; i < vl; i++) { \
4746 if (!vm && !vext_elem_mask(v0, i)) { \
4747 continue; \
4748 } \
4749 if (i == vl - 1) { \
4750 *((ETYPE *)vd + H(i)) = s1; \
4751 } else { \
4752 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4753 } \
4754 } \
4755 env->vstart = 0; \
4756}
4757
4758GEN_VEXT_VSLIDE1DOWN(8, H1)
4759GEN_VEXT_VSLIDE1DOWN(16, H2)
4760GEN_VEXT_VSLIDE1DOWN(32, H4)
4761GEN_VEXT_VSLIDE1DOWN(64, H8)
4762
4763#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \
4764void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4765 CPURISCVState *env, uint32_t desc) \
4766{ \
4767 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
4768}
4769
4770
4771GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4772GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4773GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4774GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4775
4776
4777#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \
4778void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4779 CPURISCVState *env, uint32_t desc) \
4780{ \
4781 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \
4782}
4783
4784
4785GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4786GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4787GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4788
4789#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \
4790void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4791 CPURISCVState *env, uint32_t desc) \
4792{ \
4793 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \
4794}
4795
4796
4797GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4798GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4799GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4800
4801
4802#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
4803void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4804 CPURISCVState *env, uint32_t desc) \
4805{ \
4806 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
4807 uint32_t vm = vext_vm(desc); \
4808 uint32_t vl = env->vl; \
4809 uint64_t index; \
4810 uint32_t i; \
4811 \
4812 for (i = env->vstart; i < vl; i++) { \
4813 if (!vm && !vext_elem_mask(v0, i)) { \
4814 continue; \
4815 } \
4816 index = *((TS1 *)vs1 + HS1(i)); \
4817 if (index >= vlmax) { \
4818 *((TS2 *)vd + HS2(i)) = 0; \
4819 } else { \
4820 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
4821 } \
4822 } \
4823 env->vstart = 0; \
4824}
4825
4826
4827GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
4828GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4829GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4830GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4831
4832GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
4833GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4834GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4835GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4836
4837#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
4838void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4839 CPURISCVState *env, uint32_t desc) \
4840{ \
4841 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
4842 uint32_t vm = vext_vm(desc); \
4843 uint32_t vl = env->vl; \
4844 uint64_t index = s1; \
4845 uint32_t i; \
4846 \
4847 for (i = env->vstart; i < vl; i++) { \
4848 if (!vm && !vext_elem_mask(v0, i)) { \
4849 continue; \
4850 } \
4851 if (index >= vlmax) { \
4852 *((ETYPE *)vd + H(i)) = 0; \
4853 } else { \
4854 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
4855 } \
4856 } \
4857 env->vstart = 0; \
4858}
4859
4860
4861GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
4862GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4863GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4864GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4865
4866
4867#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
4868void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4869 CPURISCVState *env, uint32_t desc) \
4870{ \
4871 uint32_t vl = env->vl; \
4872 uint32_t num = 0, i; \
4873 \
4874 for (i = env->vstart; i < vl; i++) { \
4875 if (!vext_elem_mask(vs1, i)) { \
4876 continue; \
4877 } \
4878 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
4879 num++; \
4880 } \
4881 env->vstart = 0; \
4882}
4883
4884
4885GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
4886GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4887GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4888GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4889
4890
4891#define GEN_VEXT_VMV_WHOLE(NAME, LEN) \
4892void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
4893 uint32_t desc) \
4894{ \
4895 \
4896 uint32_t maxsz = simd_maxsz(desc); \
4897 uint32_t i = env->vstart; \
4898 \
4899 memcpy((uint8_t *)vd + H1(i), \
4900 (uint8_t *)vs2 + H1(i), \
4901 maxsz - env->vstart); \
4902 \
4903 env->vstart = 0; \
4904}
4905
4906GEN_VEXT_VMV_WHOLE(vmv1r_v, 1)
4907GEN_VEXT_VMV_WHOLE(vmv2r_v, 2)
4908GEN_VEXT_VMV_WHOLE(vmv4r_v, 4)
4909GEN_VEXT_VMV_WHOLE(vmv8r_v, 8)
4910
4911
4912#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
4913void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4914 CPURISCVState *env, uint32_t desc) \
4915{ \
4916 uint32_t vl = env->vl; \
4917 uint32_t vm = vext_vm(desc); \
4918 uint32_t i; \
4919 \
4920 for (i = env->vstart; i < vl; i++) { \
4921 if (!vm && !vext_elem_mask(v0, i)) { \
4922 continue; \
4923 } \
4924 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
4925 } \
4926 env->vstart = 0; \
4927}
4928
4929GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
4930GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4931GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4932GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
4933GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4934GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
4935
4936GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
4937GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4938GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4939GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
4940GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4941GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)
4942