1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include "qemu/osdep.h"
20#include "qemu/host-utils.h"
21#include "qemu/bitops.h"
22#include "cpu.h"
23#include "exec/memop.h"
24#include "accel/tcg/cpu-ldst.h"
25#include "accel/tcg/probe.h"
26#include "exec/page-protection.h"
27#include "exec/helper-proto.h"
28#include "exec/tlb-flags.h"
29#include "exec/target_page.h"
30#include "fpu/softfloat.h"
31#include "tcg/tcg-gvec-desc.h"
32#include "internals.h"
33#include "vector_internals.h"
34#include <math.h>
35
36target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
37 target_ulong s2, target_ulong x0)
38{
39 int vlmax, vl;
40 RISCVCPU *cpu = env_archcpu(env);
41 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL);
42 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW);
43 uint16_t sew = 8 << vsew;
44 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
45 int xlen = riscv_cpu_xlen(env);
46 bool vill = (s2 >> (xlen - 1)) & 0x1;
47 target_ulong reserved = s2 &
48 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
49 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
50 uint16_t vlen = cpu->cfg.vlenb << 3;
51 int8_t lmul;
52
53 if (vlmul & 4) {
54
55
56
57
58
59
60
61 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
62 vill = true;
63 }
64 }
65
66 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
67
68 env->vill = 1;
69 env->vtype = 0;
70 env->vl = 0;
71 env->vstart = 0;
72 return 0;
73 }
74
75
76 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3);
77 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul);
78 if (s1 <= vlmax) {
79 vl = s1;
80 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) {
81 vl = (s1 + 1) >> 1;
82 } else {
83 vl = vlmax;
84 }
85
86 if (cpu->cfg.rvv_vsetvl_x0_vill && x0 && (env->vl != vl)) {
87
88 env->vill = 1;
89 env->vtype = 0;
90 env->vl = 0;
91 env->vstart = 0;
92 return 0;
93 }
94
95 env->vl = vl;
96 env->vtype = s2;
97 env->vstart = 0;
98 env->vill = 0;
99 return vl;
100}
101
102
103
104
105
106
107static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
108{
109
110
111
112
113 uint32_t vlenb = simd_maxsz(desc);
114
115
116 int scale = vext_lmul(desc) - log2_esz;
117 return scale < 0 ? vlenb >> -scale : vlenb << scale;
118}
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135static void probe_pages(CPURISCVState *env, target_ulong addr, target_ulong len,
136 uintptr_t ra, MMUAccessType access_type, int mmu_index,
137 void **host, int *flags, bool nonfault)
138{
139 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
140 target_ulong curlen = MIN(pagelen, len);
141
142 if (flags != NULL) {
143 *flags = probe_access_flags(env, adjust_addr(env, addr), curlen,
144 access_type, mmu_index, nonfault, host, ra);
145 } else {
146 probe_access(env, adjust_addr(env, addr), curlen, access_type,
147 mmu_index, ra);
148 }
149
150 if (len > curlen) {
151 addr += curlen;
152 curlen = len - curlen;
153 if (flags != NULL) {
154 *flags = probe_access_flags(env, adjust_addr(env, addr), curlen,
155 access_type, mmu_index, nonfault,
156 host, ra);
157 } else {
158 probe_access(env, adjust_addr(env, addr), curlen, access_type,
159 mmu_index, ra);
160 }
161 }
162}
163
164
165static inline void vext_set_elem_mask(void *v0, int index,
166 uint8_t value)
167{
168 int idx = index / 64;
169 int pos = index % 64;
170 uint64_t old = ((uint64_t *)v0)[idx];
171 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
172}
173
174
175typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr,
176 uint32_t idx, void *vd, uintptr_t retaddr);
177typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host);
178
179#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
180static inline QEMU_ALWAYS_INLINE \
181void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
182 uint32_t idx, void *vd, uintptr_t retaddr) \
183{ \
184 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
185 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
186} \
187 \
188static inline QEMU_ALWAYS_INLINE \
189void NAME##_host(void *vd, uint32_t idx, void *host) \
190{ \
191 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
192 *cur = (ETYPE)LDSUF##_p(host); \
193}
194
195GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub)
196GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw)
197GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl)
198GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq)
199
200#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
201static inline QEMU_ALWAYS_INLINE \
202void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \
203 uint32_t idx, void *vd, uintptr_t retaddr) \
204{ \
205 ETYPE data = *((ETYPE *)vd + H(idx)); \
206 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
207} \
208 \
209static inline QEMU_ALWAYS_INLINE \
210void NAME##_host(void *vd, uint32_t idx, void *host) \
211{ \
212 ETYPE data = *((ETYPE *)vd + H(idx)); \
213 STSUF##_p(host, data); \
214}
215
216GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb)
217GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw)
218GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl)
219GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq)
220
221static inline QEMU_ALWAYS_INLINE void
222vext_continuous_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb,
223 void *vd, uint32_t evl, target_ulong addr,
224 uint32_t reg_start, uintptr_t ra, uint32_t esz,
225 bool is_load)
226{
227 uint32_t i;
228 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) {
229 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra);
230 }
231}
232
233static inline QEMU_ALWAYS_INLINE void
234vext_continuous_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host,
235 void *vd, uint32_t evl, uint32_t reg_start, void *host,
236 uint32_t esz, bool is_load)
237{
238#if HOST_BIG_ENDIAN
239 for (; reg_start < evl; reg_start++, host += esz) {
240 ldst_host(vd, reg_start, host);
241 }
242#else
243 if (esz == 1) {
244 uint32_t byte_offset = reg_start * esz;
245 uint32_t size = (evl - reg_start) * esz;
246
247 if (is_load) {
248 memcpy(vd + byte_offset, host, size);
249 } else {
250 memcpy(host, vd + byte_offset, size);
251 }
252 } else {
253 for (; reg_start < evl; reg_start++, host += esz) {
254 ldst_host(vd, reg_start, host);
255 }
256 }
257#endif
258}
259
260static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
261 uint32_t desc, uint32_t nf,
262 uint32_t esz, uint32_t max_elems)
263{
264 uint32_t vta = vext_vta(desc);
265 int k;
266
267 if (vta == 0) {
268 return;
269 }
270
271 for (k = 0; k < nf; ++k) {
272 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
273 (k * max_elems + max_elems) * esz);
274 }
275}
276
277
278
279
280static void
281vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride,
282 CPURISCVState *env, uint32_t desc, uint32_t vm,
283 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz,
284 uintptr_t ra)
285{
286 uint32_t i, k;
287 uint32_t nf = vext_nf(desc);
288 uint32_t max_elems = vext_max_elems(desc, log2_esz);
289 uint32_t esz = 1 << log2_esz;
290 uint32_t vma = vext_vma(desc);
291
292 VSTART_CHECK_EARLY_EXIT(env, env->vl);
293
294 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
295 k = 0;
296 while (k < nf) {
297 if (!vm && !vext_elem_mask(v0, i)) {
298
299 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
300 (i + k * max_elems + 1) * esz);
301 k++;
302 continue;
303 }
304 target_ulong addr = base + stride * i + (k << log2_esz);
305 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
306 k++;
307 }
308 }
309 env->vstart = 0;
310
311 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
312}
313
314#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
315void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
316 target_ulong stride, CPURISCVState *env, \
317 uint32_t desc) \
318{ \
319 uint32_t vm = vext_vm(desc); \
320 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
321 ctzl(sizeof(ETYPE)), GETPC()); \
322}
323
324GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb)
325GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb)
326GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb)
327GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb)
328
329#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
330void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
331 target_ulong stride, CPURISCVState *env, \
332 uint32_t desc) \
333{ \
334 uint32_t vm = vext_vm(desc); \
335 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
336 ctzl(sizeof(ETYPE)), GETPC()); \
337}
338
339GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb)
340GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb)
341GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb)
342GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb)
343
344
345
346
347
348
349static inline QEMU_ALWAYS_INLINE void
350vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr,
351 uint32_t elems, uint32_t nf, uint32_t max_elems,
352 uint32_t log2_esz, bool is_load, int mmu_index,
353 vext_ldst_elem_fn_tlb *ldst_tlb,
354 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra)
355{
356 void *host;
357 int i, k, flags;
358 uint32_t esz = 1 << log2_esz;
359 uint32_t size = (elems * nf) << log2_esz;
360 uint32_t evl = env->vstart + elems;
361 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
362
363
364 probe_pages(env, addr, size, ra, access_type, mmu_index, &host, &flags,
365 true);
366
367 if (flags == 0) {
368 if (nf == 1) {
369 vext_continuous_ldst_host(env, ldst_host, vd, evl, env->vstart,
370 host, esz, is_load);
371 } else {
372 for (i = env->vstart; i < evl; ++i) {
373 k = 0;
374 while (k < nf) {
375 ldst_host(vd, i + k * max_elems, host);
376 host += esz;
377 k++;
378 }
379 }
380 }
381 env->vstart += elems;
382 } else {
383 if (nf == 1) {
384 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart,
385 ra, esz, is_load);
386 } else {
387
388 for (i = env->vstart; i < evl; env->vstart = ++i) {
389 k = 0;
390 while (k < nf) {
391 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
392 vd, ra);
393 addr += esz;
394 k++;
395 }
396 }
397 }
398 }
399}
400
401static inline QEMU_ALWAYS_INLINE void
402vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
403 vext_ldst_elem_fn_tlb *ldst_tlb,
404 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
405 uint32_t evl, uintptr_t ra, bool is_load)
406{
407 uint32_t k;
408 target_ulong page_split, elems, addr;
409 uint32_t nf = vext_nf(desc);
410 uint32_t max_elems = vext_max_elems(desc, log2_esz);
411 uint32_t esz = 1 << log2_esz;
412 uint32_t msize = nf * esz;
413 int mmu_index = riscv_env_mmu_index(env, false);
414
415 VSTART_CHECK_EARLY_EXIT(env, evl);
416
417#if defined(CONFIG_USER_ONLY)
418
419
420
421
422 if (nf == 1 && (evl << log2_esz) <= 6) {
423 addr = base + (env->vstart << log2_esz);
424 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, ra,
425 esz, is_load);
426
427 env->vstart = 0;
428 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
429 return;
430 }
431#endif
432
433
434 addr = base + ((env->vstart * nf) << log2_esz);
435 page_split = -(addr | TARGET_PAGE_MASK);
436
437 elems = page_split / msize;
438 if (unlikely(env->vstart + elems >= evl)) {
439 elems = evl - env->vstart;
440 }
441
442
443 if (likely(elems)) {
444 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
445 is_load, mmu_index, ldst_tlb, ldst_host, ra);
446 }
447
448
449 if (unlikely(env->vstart < evl)) {
450
451 if (unlikely(page_split % msize)) {
452 for (k = 0; k < nf; k++) {
453 addr = base + ((env->vstart * nf + k) << log2_esz);
454 ldst_tlb(env, adjust_addr(env, addr),
455 env->vstart + k * max_elems, vd, ra);
456 }
457 env->vstart++;
458 }
459
460 addr = base + ((env->vstart * nf) << log2_esz);
461
462 elems = evl - env->vstart;
463
464
465 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz,
466 is_load, mmu_index, ldst_tlb, ldst_host, ra);
467 }
468
469 env->vstart = 0;
470 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
471}
472
473
474
475
476
477
478#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
479void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
480 CPURISCVState *env, uint32_t desc) \
481{ \
482 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
483 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
484 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
485} \
486 \
487void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
488 CPURISCVState *env, uint32_t desc) \
489{ \
490 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
491 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \
492}
493
494GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host)
495GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host)
496GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host)
497GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host)
498
499#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
500void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
501 CPURISCVState *env, uint32_t desc) \
502{ \
503 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
504 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \
505 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \
506} \
507 \
508void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
509 CPURISCVState *env, uint32_t desc) \
510{ \
511 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
512 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \
513}
514
515GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host)
516GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host)
517GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host)
518GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host)
519
520
521
522
523void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
524 CPURISCVState *env, uint32_t desc)
525{
526
527 uint8_t evl = (env->vl + 7) >> 3;
528 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host,
529 0, evl, GETPC(), true);
530}
531
532void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
533 CPURISCVState *env, uint32_t desc)
534{
535
536 uint8_t evl = (env->vl + 7) >> 3;
537 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host,
538 0, evl, GETPC(), false);
539}
540
541
542
543
544typedef target_ulong vext_get_index_addr(target_ulong base,
545 uint32_t idx, void *vs2);
546
547#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
548static target_ulong NAME(target_ulong base, \
549 uint32_t idx, void *vs2) \
550{ \
551 return (base + *((ETYPE *)vs2 + H(idx))); \
552}
553
554GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
555GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
556GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
557GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
558
559static inline void
560vext_ldst_index(void *vd, void *v0, target_ulong base,
561 void *vs2, CPURISCVState *env, uint32_t desc,
562 vext_get_index_addr get_index_addr,
563 vext_ldst_elem_fn_tlb *ldst_elem,
564 uint32_t log2_esz, uintptr_t ra)
565{
566 uint32_t i, k;
567 uint32_t nf = vext_nf(desc);
568 uint32_t vm = vext_vm(desc);
569 uint32_t max_elems = vext_max_elems(desc, log2_esz);
570 uint32_t esz = 1 << log2_esz;
571 uint32_t vma = vext_vma(desc);
572
573 VSTART_CHECK_EARLY_EXIT(env, env->vl);
574
575
576 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
577 k = 0;
578 while (k < nf) {
579 if (!vm && !vext_elem_mask(v0, i)) {
580
581 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
582 (i + k * max_elems + 1) * esz);
583 k++;
584 continue;
585 }
586 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
587 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
588 k++;
589 }
590 }
591 env->vstart = 0;
592
593 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
594}
595
596#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
597void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
598 void *vs2, CPURISCVState *env, uint32_t desc) \
599{ \
600 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
601 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
602}
603
604GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb)
605GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb)
606GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb)
607GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb)
608GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb)
609GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb)
610GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb)
611GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb)
612GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb)
613GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb)
614GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb)
615GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb)
616GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb)
617GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb)
618GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb)
619GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb)
620
621#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
622void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
623 void *vs2, CPURISCVState *env, uint32_t desc) \
624{ \
625 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
626 STORE_FN, ctzl(sizeof(ETYPE)), \
627 GETPC()); \
628}
629
630GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb)
631GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb)
632GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb)
633GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb)
634GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb)
635GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb)
636GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb)
637GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb)
638GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb)
639GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb)
640GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb)
641GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb)
642GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb)
643GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb)
644GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb)
645GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb)
646
647
648
649
650static inline void
651vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env,
652 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb,
653 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra)
654{
655 uint32_t i, k, vl = 0;
656 uint32_t nf = vext_nf(desc);
657 uint32_t vm = vext_vm(desc);
658 uint32_t max_elems = vext_max_elems(desc, log2_esz);
659 uint32_t esz = 1 << log2_esz;
660 uint32_t msize = nf * esz;
661 uint32_t vma = vext_vma(desc);
662 target_ulong addr, addr_probe, addr_i, offset, remain, page_split, elems;
663 int mmu_index = riscv_env_mmu_index(env, false);
664 int flags, probe_flags;
665 void *host;
666
667 VSTART_CHECK_EARLY_EXIT(env, env->vl);
668
669 addr = base + ((env->vstart * nf) << log2_esz);
670 page_split = -(addr | TARGET_PAGE_MASK);
671
672 elems = page_split / msize;
673 if (unlikely(env->vstart + elems >= env->vl)) {
674 elems = env->vl - env->vstart;
675 }
676
677
678 probe_pages(env, addr, elems * msize, ra, MMU_DATA_LOAD, mmu_index, &host,
679 &flags, true);
680
681
682 if (env->vl > elems) {
683 addr_probe = addr + (elems << log2_esz);
684 probe_pages(env, addr_probe, elems * msize, ra, MMU_DATA_LOAD,
685 mmu_index, &host, &probe_flags, true);
686 flags |= probe_flags;
687 }
688
689 if (flags & ~TLB_WATCHPOINT) {
690
691 for (i = env->vstart; i < env->vl; i++) {
692 if (!vm && !vext_elem_mask(v0, i)) {
693 continue;
694 }
695 addr_i = adjust_addr(env, base + i * (nf << log2_esz));
696 if (i == 0) {
697
698 probe_pages(env, addr_i, nf << log2_esz, ra, MMU_DATA_LOAD,
699 mmu_index, &host, NULL, false);
700 } else {
701 remain = nf << log2_esz;
702 while (remain > 0) {
703 offset = -(addr_i | TARGET_PAGE_MASK);
704
705
706 probe_pages(env, addr_i, offset, 0, MMU_DATA_LOAD,
707 mmu_index, &host, &flags, true);
708
709
710
711
712
713
714
715 if (flags & ~TLB_WATCHPOINT) {
716 vl = i;
717 goto ProbeSuccess;
718 }
719 if (remain <= offset) {
720 break;
721 }
722 remain -= offset;
723 addr_i = adjust_addr(env, addr_i + offset);
724 }
725 }
726 }
727 }
728ProbeSuccess:
729
730 if (vl != 0) {
731 env->vl = vl;
732 }
733
734 if (env->vstart < env->vl) {
735 if (vm) {
736
737 if (likely(elems)) {
738 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
739 log2_esz, true, mmu_index, ldst_tlb,
740 ldst_host, ra);
741 }
742
743
744 if (unlikely(env->vstart < env->vl)) {
745
746 if (unlikely(page_split % msize)) {
747 for (k = 0; k < nf; k++) {
748 addr = base + ((env->vstart * nf + k) << log2_esz);
749 ldst_tlb(env, adjust_addr(env, addr),
750 env->vstart + k * max_elems, vd, ra);
751 }
752 env->vstart++;
753 }
754
755 addr = base + ((env->vstart * nf) << log2_esz);
756
757 elems = env->vl - env->vstart;
758
759
760 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems,
761 log2_esz, true, mmu_index, ldst_tlb,
762 ldst_host, ra);
763 }
764 } else {
765 for (i = env->vstart; i < env->vl; i++) {
766 k = 0;
767 while (k < nf) {
768 if (!vext_elem_mask(v0, i)) {
769
770 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
771 (i + k * max_elems + 1) * esz);
772 k++;
773 continue;
774 }
775 addr = base + ((i * nf + k) << log2_esz);
776 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems,
777 vd, ra);
778 k++;
779 }
780 }
781 }
782 }
783 env->vstart = 0;
784
785 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
786}
787
788#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
789void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
790 CPURISCVState *env, uint32_t desc) \
791{ \
792 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \
793 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \
794}
795
796GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host)
797GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host)
798GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host)
799GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host)
800
801#define DO_SWAP(N, M) (M)
802#define DO_AND(N, M) (N & M)
803#define DO_XOR(N, M) (N ^ M)
804#define DO_OR(N, M) (N | M)
805#define DO_ADD(N, M) (N + M)
806
807
808#define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
809#define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
810
811
812
813
814static inline QEMU_ALWAYS_INLINE void
815vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
816 vext_ldst_elem_fn_tlb *ldst_tlb,
817 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
818 uintptr_t ra, bool is_load)
819{
820 target_ulong page_split, elems, addr;
821 uint32_t nf = vext_nf(desc);
822 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
823 uint32_t max_elems = vlenb >> log2_esz;
824 uint32_t evl = nf * max_elems;
825 uint32_t esz = 1 << log2_esz;
826 int mmu_index = riscv_env_mmu_index(env, false);
827
828
829 addr = base + (env->vstart << log2_esz);
830 page_split = -(addr | TARGET_PAGE_MASK);
831
832 elems = page_split / esz;
833 if (unlikely(env->vstart + elems >= evl)) {
834 elems = evl - env->vstart;
835 }
836
837
838 if (likely(elems)) {
839 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
840 is_load, mmu_index, ldst_tlb, ldst_host, ra);
841 }
842
843
844 if (unlikely(env->vstart < evl)) {
845
846 if (unlikely(page_split % esz)) {
847 addr = base + (env->vstart << log2_esz);
848 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra);
849 env->vstart++;
850 }
851
852 addr = base + (env->vstart << log2_esz);
853
854 elems = evl - env->vstart;
855
856
857 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz,
858 is_load, mmu_index, ldst_tlb, ldst_host, ra);
859 }
860
861 env->vstart = 0;
862}
863
864#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \
865void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
866 uint32_t desc) \
867{ \
868 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \
869 ctzl(sizeof(ETYPE)), GETPC(), true); \
870}
871
872GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host)
873GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host)
874GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host)
875GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host)
876GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host)
877GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host)
878GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host)
879GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host)
880GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host)
881GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host)
882GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host)
883GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host)
884GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host)
885GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host)
886GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host)
887GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host)
888
889#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \
890void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \
891 uint32_t desc) \
892{ \
893 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \
894 ctzl(sizeof(ETYPE)), GETPC(), false); \
895}
896
897GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host)
898GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host)
899GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host)
900GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host)
901
902
903
904
905
906
907#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
908#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
909#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
910#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
911#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
912#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
913#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
914#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
915#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
916#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
917#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
918#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
919#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
920#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
921#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
922#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
923#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
924#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
925#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
926#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
927#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
928#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
929#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
930
931#define DO_SUB(N, M) (N - M)
932#define DO_RSUB(N, M) (M - N)
933
934RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
935RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
936RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
937RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
938RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
939RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
940RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
941RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
942
943GEN_VEXT_VV(vadd_vv_b, 1)
944GEN_VEXT_VV(vadd_vv_h, 2)
945GEN_VEXT_VV(vadd_vv_w, 4)
946GEN_VEXT_VV(vadd_vv_d, 8)
947GEN_VEXT_VV(vsub_vv_b, 1)
948GEN_VEXT_VV(vsub_vv_h, 2)
949GEN_VEXT_VV(vsub_vv_w, 4)
950GEN_VEXT_VV(vsub_vv_d, 8)
951
952
953RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
954RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
955RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
956RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
957RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
958RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
959RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
960RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
961RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
962RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
963RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
964RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
965
966GEN_VEXT_VX(vadd_vx_b, 1)
967GEN_VEXT_VX(vadd_vx_h, 2)
968GEN_VEXT_VX(vadd_vx_w, 4)
969GEN_VEXT_VX(vadd_vx_d, 8)
970GEN_VEXT_VX(vsub_vx_b, 1)
971GEN_VEXT_VX(vsub_vx_h, 2)
972GEN_VEXT_VX(vsub_vx_w, 4)
973GEN_VEXT_VX(vsub_vx_d, 8)
974GEN_VEXT_VX(vrsub_vx_b, 1)
975GEN_VEXT_VX(vrsub_vx_h, 2)
976GEN_VEXT_VX(vrsub_vx_w, 4)
977GEN_VEXT_VX(vrsub_vx_d, 8)
978
979void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
980{
981 intptr_t oprsz = simd_oprsz(desc);
982 intptr_t i;
983
984 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
985 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
986 }
987}
988
989void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
990{
991 intptr_t oprsz = simd_oprsz(desc);
992 intptr_t i;
993
994 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
995 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
996 }
997}
998
999void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
1000{
1001 intptr_t oprsz = simd_oprsz(desc);
1002 intptr_t i;
1003
1004 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1005 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
1006 }
1007}
1008
1009void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
1010{
1011 intptr_t oprsz = simd_oprsz(desc);
1012 intptr_t i;
1013
1014 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1015 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
1016 }
1017}
1018
1019
1020#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
1021#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
1022#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
1023#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
1024#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
1025#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
1026#define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
1027#define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
1028#define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
1029#define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
1030#define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
1031#define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
1032RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
1033RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
1034RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
1035RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
1036RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
1037RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
1038RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
1039RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
1040RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
1041RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
1042RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
1043RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
1044RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
1045RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
1046RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
1047RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
1048RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
1049RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
1050RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
1051RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
1052RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
1053RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
1054RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
1055RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
1056GEN_VEXT_VV(vwaddu_vv_b, 2)
1057GEN_VEXT_VV(vwaddu_vv_h, 4)
1058GEN_VEXT_VV(vwaddu_vv_w, 8)
1059GEN_VEXT_VV(vwsubu_vv_b, 2)
1060GEN_VEXT_VV(vwsubu_vv_h, 4)
1061GEN_VEXT_VV(vwsubu_vv_w, 8)
1062GEN_VEXT_VV(vwadd_vv_b, 2)
1063GEN_VEXT_VV(vwadd_vv_h, 4)
1064GEN_VEXT_VV(vwadd_vv_w, 8)
1065GEN_VEXT_VV(vwsub_vv_b, 2)
1066GEN_VEXT_VV(vwsub_vv_h, 4)
1067GEN_VEXT_VV(vwsub_vv_w, 8)
1068GEN_VEXT_VV(vwaddu_wv_b, 2)
1069GEN_VEXT_VV(vwaddu_wv_h, 4)
1070GEN_VEXT_VV(vwaddu_wv_w, 8)
1071GEN_VEXT_VV(vwsubu_wv_b, 2)
1072GEN_VEXT_VV(vwsubu_wv_h, 4)
1073GEN_VEXT_VV(vwsubu_wv_w, 8)
1074GEN_VEXT_VV(vwadd_wv_b, 2)
1075GEN_VEXT_VV(vwadd_wv_h, 4)
1076GEN_VEXT_VV(vwadd_wv_w, 8)
1077GEN_VEXT_VV(vwsub_wv_b, 2)
1078GEN_VEXT_VV(vwsub_wv_h, 4)
1079GEN_VEXT_VV(vwsub_wv_w, 8)
1080
1081RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1082RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1083RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1084RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1085RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1086RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1087RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1088RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1089RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1090RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1091RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1092RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1093RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1094RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1095RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1096RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1097RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1098RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1099RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1100RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1101RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1102RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1103RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1104RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
1105GEN_VEXT_VX(vwaddu_vx_b, 2)
1106GEN_VEXT_VX(vwaddu_vx_h, 4)
1107GEN_VEXT_VX(vwaddu_vx_w, 8)
1108GEN_VEXT_VX(vwsubu_vx_b, 2)
1109GEN_VEXT_VX(vwsubu_vx_h, 4)
1110GEN_VEXT_VX(vwsubu_vx_w, 8)
1111GEN_VEXT_VX(vwadd_vx_b, 2)
1112GEN_VEXT_VX(vwadd_vx_h, 4)
1113GEN_VEXT_VX(vwadd_vx_w, 8)
1114GEN_VEXT_VX(vwsub_vx_b, 2)
1115GEN_VEXT_VX(vwsub_vx_h, 4)
1116GEN_VEXT_VX(vwsub_vx_w, 8)
1117GEN_VEXT_VX(vwaddu_wx_b, 2)
1118GEN_VEXT_VX(vwaddu_wx_h, 4)
1119GEN_VEXT_VX(vwaddu_wx_w, 8)
1120GEN_VEXT_VX(vwsubu_wx_b, 2)
1121GEN_VEXT_VX(vwsubu_wx_h, 4)
1122GEN_VEXT_VX(vwsubu_wx_w, 8)
1123GEN_VEXT_VX(vwadd_wx_b, 2)
1124GEN_VEXT_VX(vwadd_wx_h, 4)
1125GEN_VEXT_VX(vwadd_wx_w, 8)
1126GEN_VEXT_VX(vwsub_wx_b, 2)
1127GEN_VEXT_VX(vwsub_wx_h, 4)
1128GEN_VEXT_VX(vwsub_wx_w, 8)
1129
1130
1131#define DO_VADC(N, M, C) (N + M + C)
1132#define DO_VSBC(N, M, C) (N - M - C)
1133
1134#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
1135void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1136 CPURISCVState *env, uint32_t desc) \
1137{ \
1138 uint32_t vl = env->vl; \
1139 uint32_t esz = sizeof(ETYPE); \
1140 uint32_t total_elems = \
1141 vext_get_total_elems(env, desc, esz); \
1142 uint32_t vta = vext_vta(desc); \
1143 uint32_t i; \
1144 \
1145 VSTART_CHECK_EARLY_EXIT(env, vl); \
1146 \
1147 for (i = env->vstart; i < vl; i++) { \
1148 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1149 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1150 ETYPE carry = vext_elem_mask(v0, i); \
1151 \
1152 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1153 } \
1154 env->vstart = 0; \
1155 \
1156 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1157}
1158
1159GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1160GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1161GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1162GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
1163
1164GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1165GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1166GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1167GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
1168
1169#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
1170void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1171 CPURISCVState *env, uint32_t desc) \
1172{ \
1173 uint32_t vl = env->vl; \
1174 uint32_t esz = sizeof(ETYPE); \
1175 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1176 uint32_t vta = vext_vta(desc); \
1177 uint32_t i; \
1178 \
1179 VSTART_CHECK_EARLY_EXIT(env, vl); \
1180 \
1181 for (i = env->vstart; i < vl; i++) { \
1182 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1183 ETYPE carry = vext_elem_mask(v0, i); \
1184 \
1185 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1186 } \
1187 env->vstart = 0; \
1188 \
1189 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1190}
1191
1192GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1193GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1194GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1195GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1196
1197GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1198GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1199GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1200GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1201
1202#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1203 (__typeof(N))(N + M) < N)
1204#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1205
1206#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1207void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1208 CPURISCVState *env, uint32_t desc) \
1209{ \
1210 uint32_t vl = env->vl; \
1211 uint32_t vm = vext_vm(desc); \
1212 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1213 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1214 uint32_t i; \
1215 \
1216 VSTART_CHECK_EARLY_EXIT(env, vl); \
1217 \
1218 for (i = env->vstart; i < vl; i++) { \
1219 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1220 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1221 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1222 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
1223 } \
1224 env->vstart = 0; \
1225
1226
1227
1228 \
1229 if (vta_all_1s) { \
1230 for (; i < total_elems; i++) { \
1231 vext_set_elem_mask(vd, i, 1); \
1232 } \
1233 } \
1234}
1235
1236GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1237GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1238GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1239GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1240
1241GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1242GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1243GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1244GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1245
1246#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1247void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1248 void *vs2, CPURISCVState *env, uint32_t desc) \
1249{ \
1250 uint32_t vl = env->vl; \
1251 uint32_t vm = vext_vm(desc); \
1252 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1253 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1254 uint32_t i; \
1255 \
1256 VSTART_CHECK_EARLY_EXIT(env, vl); \
1257 \
1258 for (i = env->vstart; i < vl; i++) { \
1259 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1260 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1261 vext_set_elem_mask(vd, i, \
1262 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1263 } \
1264 env->vstart = 0; \
1265
1266
1267
1268 \
1269 if (vta_all_1s) { \
1270 for (; i < total_elems; i++) { \
1271 vext_set_elem_mask(vd, i, 1); \
1272 } \
1273 } \
1274}
1275
1276GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1277GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1278GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1279GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1280
1281GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1282GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1283GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1284GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1285
1286
1287RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1288RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1289RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1290RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1291RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1292RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1293RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1294RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1295RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1296RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1297RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1298RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1299GEN_VEXT_VV(vand_vv_b, 1)
1300GEN_VEXT_VV(vand_vv_h, 2)
1301GEN_VEXT_VV(vand_vv_w, 4)
1302GEN_VEXT_VV(vand_vv_d, 8)
1303GEN_VEXT_VV(vor_vv_b, 1)
1304GEN_VEXT_VV(vor_vv_h, 2)
1305GEN_VEXT_VV(vor_vv_w, 4)
1306GEN_VEXT_VV(vor_vv_d, 8)
1307GEN_VEXT_VV(vxor_vv_b, 1)
1308GEN_VEXT_VV(vxor_vv_h, 2)
1309GEN_VEXT_VV(vxor_vv_w, 4)
1310GEN_VEXT_VV(vxor_vv_d, 8)
1311
1312RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1313RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1314RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1315RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1316RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1317RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1318RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1319RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1320RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1321RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1322RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1323RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1324GEN_VEXT_VX(vand_vx_b, 1)
1325GEN_VEXT_VX(vand_vx_h, 2)
1326GEN_VEXT_VX(vand_vx_w, 4)
1327GEN_VEXT_VX(vand_vx_d, 8)
1328GEN_VEXT_VX(vor_vx_b, 1)
1329GEN_VEXT_VX(vor_vx_h, 2)
1330GEN_VEXT_VX(vor_vx_w, 4)
1331GEN_VEXT_VX(vor_vx_d, 8)
1332GEN_VEXT_VX(vxor_vx_b, 1)
1333GEN_VEXT_VX(vxor_vx_h, 2)
1334GEN_VEXT_VX(vxor_vx_w, 4)
1335GEN_VEXT_VX(vxor_vx_d, 8)
1336
1337
1338#define DO_SLL(N, M) (N << (M))
1339#define DO_SRL(N, M) (N >> (M))
1340
1341
1342#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1343void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1344 void *vs2, CPURISCVState *env, uint32_t desc) \
1345{ \
1346 uint32_t vm = vext_vm(desc); \
1347 uint32_t vl = env->vl; \
1348 uint32_t esz = sizeof(TS1); \
1349 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1350 uint32_t vta = vext_vta(desc); \
1351 uint32_t vma = vext_vma(desc); \
1352 uint32_t i; \
1353 \
1354 VSTART_CHECK_EARLY_EXIT(env, vl); \
1355 \
1356 for (i = env->vstart; i < vl; i++) { \
1357 if (!vm && !vext_elem_mask(v0, i)) { \
1358 \
1359 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
1360 continue; \
1361 } \
1362 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1363 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1364 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1365 } \
1366 env->vstart = 0; \
1367 \
1368 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1369}
1370
1371GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1372GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1373GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1374GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1375
1376GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1377GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1378GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1379GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1380
1381GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1382GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1383GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1384GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1385
1386
1387
1388
1389#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1390void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1391 void *vs2, CPURISCVState *env, \
1392 uint32_t desc) \
1393{ \
1394 uint32_t vm = vext_vm(desc); \
1395 uint32_t vl = env->vl; \
1396 uint32_t esz = sizeof(TD); \
1397 uint32_t total_elems = \
1398 vext_get_total_elems(env, desc, esz); \
1399 uint32_t vta = vext_vta(desc); \
1400 uint32_t vma = vext_vma(desc); \
1401 uint32_t i; \
1402 \
1403 VSTART_CHECK_EARLY_EXIT(env, vl); \
1404 \
1405 for (i = env->vstart; i < vl; i++) { \
1406 if (!vm && !vext_elem_mask(v0, i)) { \
1407 \
1408 vext_set_elems_1s(vd, vma, i * esz, \
1409 (i + 1) * esz); \
1410 continue; \
1411 } \
1412 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1413 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1414 } \
1415 env->vstart = 0; \
1416 \
1417 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
1418}
1419
1420GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1421GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1422GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1423GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1424
1425GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1426GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1427GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1428GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1429
1430GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1431GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1432GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1433GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1434
1435
1436GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1437GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1438GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1439GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1440GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1441GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1442GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1443GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1444GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1445GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1446GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1447GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1448
1449
1450#define DO_MSEQ(N, M) (N == M)
1451#define DO_MSNE(N, M) (N != M)
1452#define DO_MSLT(N, M) (N < M)
1453#define DO_MSLE(N, M) (N <= M)
1454#define DO_MSGT(N, M) (N > M)
1455
1456#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1457void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1458 CPURISCVState *env, uint32_t desc) \
1459{ \
1460 uint32_t vm = vext_vm(desc); \
1461 uint32_t vl = env->vl; \
1462 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1463 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1464 uint32_t vma = vext_vma(desc); \
1465 uint32_t i; \
1466 \
1467 VSTART_CHECK_EARLY_EXIT(env, vl); \
1468 \
1469 for (i = env->vstart; i < vl; i++) { \
1470 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1471 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1472 if (!vm && !vext_elem_mask(v0, i)) { \
1473 \
1474 if (vma) { \
1475 vext_set_elem_mask(vd, i, 1); \
1476 } \
1477 continue; \
1478 } \
1479 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1480 } \
1481 env->vstart = 0; \
1482
1483
1484
1485 \
1486 if (vta_all_1s) { \
1487 for (; i < total_elems; i++) { \
1488 vext_set_elem_mask(vd, i, 1); \
1489 } \
1490 } \
1491}
1492
1493GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1494GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1495GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1496GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1497
1498GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1499GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1500GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1501GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1502
1503GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1504GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1505GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1506GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1507
1508GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1509GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1510GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1511GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1512
1513GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1514GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1515GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1516GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1517
1518GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1519GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1520GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1521GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1522
1523#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1524void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1525 CPURISCVState *env, uint32_t desc) \
1526{ \
1527 uint32_t vm = vext_vm(desc); \
1528 uint32_t vl = env->vl; \
1529 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1530 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1531 uint32_t vma = vext_vma(desc); \
1532 uint32_t i; \
1533 \
1534 VSTART_CHECK_EARLY_EXIT(env, vl); \
1535 \
1536 for (i = env->vstart; i < vl; i++) { \
1537 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1538 if (!vm && !vext_elem_mask(v0, i)) { \
1539 \
1540 if (vma) { \
1541 vext_set_elem_mask(vd, i, 1); \
1542 } \
1543 continue; \
1544 } \
1545 vext_set_elem_mask(vd, i, \
1546 DO_OP(s2, (ETYPE)(target_long)s1)); \
1547 } \
1548 env->vstart = 0; \
1549
1550
1551
1552 \
1553 if (vta_all_1s) { \
1554 for (; i < total_elems; i++) { \
1555 vext_set_elem_mask(vd, i, 1); \
1556 } \
1557 } \
1558}
1559
1560GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1561GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1562GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1563GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1564
1565GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1566GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1567GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1568GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1569
1570GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1571GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1572GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1573GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1574
1575GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1576GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1577GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1578GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1579
1580GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1581GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1582GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1583GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1584
1585GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1586GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1587GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1588GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1589
1590GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1591GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1592GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1593GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1594
1595GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1596GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1597GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1598GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1599
1600
1601RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1602RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1603RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1604RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1605RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1606RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1607RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1608RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1609RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1610RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1611RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1612RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1613RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1614RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1615RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1616RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1617GEN_VEXT_VV(vminu_vv_b, 1)
1618GEN_VEXT_VV(vminu_vv_h, 2)
1619GEN_VEXT_VV(vminu_vv_w, 4)
1620GEN_VEXT_VV(vminu_vv_d, 8)
1621GEN_VEXT_VV(vmin_vv_b, 1)
1622GEN_VEXT_VV(vmin_vv_h, 2)
1623GEN_VEXT_VV(vmin_vv_w, 4)
1624GEN_VEXT_VV(vmin_vv_d, 8)
1625GEN_VEXT_VV(vmaxu_vv_b, 1)
1626GEN_VEXT_VV(vmaxu_vv_h, 2)
1627GEN_VEXT_VV(vmaxu_vv_w, 4)
1628GEN_VEXT_VV(vmaxu_vv_d, 8)
1629GEN_VEXT_VV(vmax_vv_b, 1)
1630GEN_VEXT_VV(vmax_vv_h, 2)
1631GEN_VEXT_VV(vmax_vv_w, 4)
1632GEN_VEXT_VV(vmax_vv_d, 8)
1633
1634RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1635RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1636RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1637RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1638RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1639RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1640RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1641RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1642RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1643RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1644RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1645RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1646RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1647RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1648RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1649RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1650GEN_VEXT_VX(vminu_vx_b, 1)
1651GEN_VEXT_VX(vminu_vx_h, 2)
1652GEN_VEXT_VX(vminu_vx_w, 4)
1653GEN_VEXT_VX(vminu_vx_d, 8)
1654GEN_VEXT_VX(vmin_vx_b, 1)
1655GEN_VEXT_VX(vmin_vx_h, 2)
1656GEN_VEXT_VX(vmin_vx_w, 4)
1657GEN_VEXT_VX(vmin_vx_d, 8)
1658GEN_VEXT_VX(vmaxu_vx_b, 1)
1659GEN_VEXT_VX(vmaxu_vx_h, 2)
1660GEN_VEXT_VX(vmaxu_vx_w, 4)
1661GEN_VEXT_VX(vmaxu_vx_d, 8)
1662GEN_VEXT_VX(vmax_vx_b, 1)
1663GEN_VEXT_VX(vmax_vx_h, 2)
1664GEN_VEXT_VX(vmax_vx_w, 4)
1665GEN_VEXT_VX(vmax_vx_d, 8)
1666
1667
1668#define DO_MUL(N, M) (N * M)
1669RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1670RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1671RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1672RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1673GEN_VEXT_VV(vmul_vv_b, 1)
1674GEN_VEXT_VV(vmul_vv_h, 2)
1675GEN_VEXT_VV(vmul_vv_w, 4)
1676GEN_VEXT_VV(vmul_vv_d, 8)
1677
1678static int8_t do_mulh_b(int8_t s2, int8_t s1)
1679{
1680 return (int16_t)s2 * (int16_t)s1 >> 8;
1681}
1682
1683static int16_t do_mulh_h(int16_t s2, int16_t s1)
1684{
1685 return (int32_t)s2 * (int32_t)s1 >> 16;
1686}
1687
1688static int32_t do_mulh_w(int32_t s2, int32_t s1)
1689{
1690 return (int64_t)s2 * (int64_t)s1 >> 32;
1691}
1692
1693static int64_t do_mulh_d(int64_t s2, int64_t s1)
1694{
1695 uint64_t hi_64, lo_64;
1696
1697 muls64(&lo_64, &hi_64, s1, s2);
1698 return hi_64;
1699}
1700
1701static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1702{
1703 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1704}
1705
1706static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1707{
1708 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1709}
1710
1711static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1712{
1713 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1714}
1715
1716static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1717{
1718 uint64_t hi_64, lo_64;
1719
1720 mulu64(&lo_64, &hi_64, s2, s1);
1721 return hi_64;
1722}
1723
1724static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1725{
1726 return (int16_t)s2 * (uint16_t)s1 >> 8;
1727}
1728
1729static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1730{
1731 return (int32_t)s2 * (uint32_t)s1 >> 16;
1732}
1733
1734static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1735{
1736 return (int64_t)s2 * (uint64_t)s1 >> 32;
1737}
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1759{
1760 uint64_t hi_64, lo_64;
1761
1762 mulu64(&lo_64, &hi_64, s2, s1);
1763
1764 hi_64 -= s2 < 0 ? s1 : 0;
1765 return hi_64;
1766}
1767
1768RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1769RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1770RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1771RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1772RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1773RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1774RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1775RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1776RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1777RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1778RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1779RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1780GEN_VEXT_VV(vmulh_vv_b, 1)
1781GEN_VEXT_VV(vmulh_vv_h, 2)
1782GEN_VEXT_VV(vmulh_vv_w, 4)
1783GEN_VEXT_VV(vmulh_vv_d, 8)
1784GEN_VEXT_VV(vmulhu_vv_b, 1)
1785GEN_VEXT_VV(vmulhu_vv_h, 2)
1786GEN_VEXT_VV(vmulhu_vv_w, 4)
1787GEN_VEXT_VV(vmulhu_vv_d, 8)
1788GEN_VEXT_VV(vmulhsu_vv_b, 1)
1789GEN_VEXT_VV(vmulhsu_vv_h, 2)
1790GEN_VEXT_VV(vmulhsu_vv_w, 4)
1791GEN_VEXT_VV(vmulhsu_vv_d, 8)
1792
1793RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1794RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1795RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1796RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1797RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1798RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1799RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1800RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1801RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1802RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1803RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1804RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1805RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1806RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1807RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1808RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1809GEN_VEXT_VX(vmul_vx_b, 1)
1810GEN_VEXT_VX(vmul_vx_h, 2)
1811GEN_VEXT_VX(vmul_vx_w, 4)
1812GEN_VEXT_VX(vmul_vx_d, 8)
1813GEN_VEXT_VX(vmulh_vx_b, 1)
1814GEN_VEXT_VX(vmulh_vx_h, 2)
1815GEN_VEXT_VX(vmulh_vx_w, 4)
1816GEN_VEXT_VX(vmulh_vx_d, 8)
1817GEN_VEXT_VX(vmulhu_vx_b, 1)
1818GEN_VEXT_VX(vmulhu_vx_h, 2)
1819GEN_VEXT_VX(vmulhu_vx_w, 4)
1820GEN_VEXT_VX(vmulhu_vx_d, 8)
1821GEN_VEXT_VX(vmulhsu_vx_b, 1)
1822GEN_VEXT_VX(vmulhsu_vx_h, 2)
1823GEN_VEXT_VX(vmulhsu_vx_w, 4)
1824GEN_VEXT_VX(vmulhsu_vx_d, 8)
1825
1826
1827#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1828#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1829#define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
1830 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1831#define DO_REM(N, M) (unlikely(M == 0) ? N : \
1832 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1833
1834RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1835RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1836RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1837RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1838RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1839RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1840RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1841RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1842RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1843RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1844RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1845RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1846RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1847RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1848RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1849RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1850GEN_VEXT_VV(vdivu_vv_b, 1)
1851GEN_VEXT_VV(vdivu_vv_h, 2)
1852GEN_VEXT_VV(vdivu_vv_w, 4)
1853GEN_VEXT_VV(vdivu_vv_d, 8)
1854GEN_VEXT_VV(vdiv_vv_b, 1)
1855GEN_VEXT_VV(vdiv_vv_h, 2)
1856GEN_VEXT_VV(vdiv_vv_w, 4)
1857GEN_VEXT_VV(vdiv_vv_d, 8)
1858GEN_VEXT_VV(vremu_vv_b, 1)
1859GEN_VEXT_VV(vremu_vv_h, 2)
1860GEN_VEXT_VV(vremu_vv_w, 4)
1861GEN_VEXT_VV(vremu_vv_d, 8)
1862GEN_VEXT_VV(vrem_vv_b, 1)
1863GEN_VEXT_VV(vrem_vv_h, 2)
1864GEN_VEXT_VV(vrem_vv_w, 4)
1865GEN_VEXT_VV(vrem_vv_d, 8)
1866
1867RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1868RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1869RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1870RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1871RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1872RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1873RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1874RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1875RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1876RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1877RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1878RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1879RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1880RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1881RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1882RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1883GEN_VEXT_VX(vdivu_vx_b, 1)
1884GEN_VEXT_VX(vdivu_vx_h, 2)
1885GEN_VEXT_VX(vdivu_vx_w, 4)
1886GEN_VEXT_VX(vdivu_vx_d, 8)
1887GEN_VEXT_VX(vdiv_vx_b, 1)
1888GEN_VEXT_VX(vdiv_vx_h, 2)
1889GEN_VEXT_VX(vdiv_vx_w, 4)
1890GEN_VEXT_VX(vdiv_vx_d, 8)
1891GEN_VEXT_VX(vremu_vx_b, 1)
1892GEN_VEXT_VX(vremu_vx_h, 2)
1893GEN_VEXT_VX(vremu_vx_w, 4)
1894GEN_VEXT_VX(vremu_vx_d, 8)
1895GEN_VEXT_VX(vrem_vx_b, 1)
1896GEN_VEXT_VX(vrem_vx_h, 2)
1897GEN_VEXT_VX(vrem_vx_w, 4)
1898GEN_VEXT_VX(vrem_vx_d, 8)
1899
1900
1901RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1902RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1903RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1904RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1905RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1906RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1907RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1908RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1909RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1910GEN_VEXT_VV(vwmul_vv_b, 2)
1911GEN_VEXT_VV(vwmul_vv_h, 4)
1912GEN_VEXT_VV(vwmul_vv_w, 8)
1913GEN_VEXT_VV(vwmulu_vv_b, 2)
1914GEN_VEXT_VV(vwmulu_vv_h, 4)
1915GEN_VEXT_VV(vwmulu_vv_w, 8)
1916GEN_VEXT_VV(vwmulsu_vv_b, 2)
1917GEN_VEXT_VV(vwmulsu_vv_h, 4)
1918GEN_VEXT_VV(vwmulsu_vv_w, 8)
1919
1920RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1921RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1922RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1923RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1924RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1925RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1926RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1927RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1928RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1929GEN_VEXT_VX(vwmul_vx_b, 2)
1930GEN_VEXT_VX(vwmul_vx_h, 4)
1931GEN_VEXT_VX(vwmul_vx_w, 8)
1932GEN_VEXT_VX(vwmulu_vx_b, 2)
1933GEN_VEXT_VX(vwmulu_vx_h, 4)
1934GEN_VEXT_VX(vwmulu_vx_w, 8)
1935GEN_VEXT_VX(vwmulsu_vx_b, 2)
1936GEN_VEXT_VX(vwmulsu_vx_h, 4)
1937GEN_VEXT_VX(vwmulsu_vx_w, 8)
1938
1939
1940#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1941static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1942{ \
1943 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1944 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1945 TD d = *((TD *)vd + HD(i)); \
1946 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1947}
1948
1949#define DO_MACC(N, M, D) (M * N + D)
1950#define DO_NMSAC(N, M, D) (-(M * N) + D)
1951#define DO_MADD(N, M, D) (M * D + N)
1952#define DO_NMSUB(N, M, D) (-(M * D) + N)
1953RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1954RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1955RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1956RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1957RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1958RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1959RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1960RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1961RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1962RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1963RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1964RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1965RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1966RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1967RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1968RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1969GEN_VEXT_VV(vmacc_vv_b, 1)
1970GEN_VEXT_VV(vmacc_vv_h, 2)
1971GEN_VEXT_VV(vmacc_vv_w, 4)
1972GEN_VEXT_VV(vmacc_vv_d, 8)
1973GEN_VEXT_VV(vnmsac_vv_b, 1)
1974GEN_VEXT_VV(vnmsac_vv_h, 2)
1975GEN_VEXT_VV(vnmsac_vv_w, 4)
1976GEN_VEXT_VV(vnmsac_vv_d, 8)
1977GEN_VEXT_VV(vmadd_vv_b, 1)
1978GEN_VEXT_VV(vmadd_vv_h, 2)
1979GEN_VEXT_VV(vmadd_vv_w, 4)
1980GEN_VEXT_VV(vmadd_vv_d, 8)
1981GEN_VEXT_VV(vnmsub_vv_b, 1)
1982GEN_VEXT_VV(vnmsub_vv_h, 2)
1983GEN_VEXT_VV(vnmsub_vv_w, 4)
1984GEN_VEXT_VV(vnmsub_vv_d, 8)
1985
1986#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1987static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1988{ \
1989 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1990 TD d = *((TD *)vd + HD(i)); \
1991 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1992}
1993
1994RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1995RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1996RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1997RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1998RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1999RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
2000RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
2001RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
2002RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
2003RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
2004RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
2005RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
2006RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
2007RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
2008RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
2009RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
2010GEN_VEXT_VX(vmacc_vx_b, 1)
2011GEN_VEXT_VX(vmacc_vx_h, 2)
2012GEN_VEXT_VX(vmacc_vx_w, 4)
2013GEN_VEXT_VX(vmacc_vx_d, 8)
2014GEN_VEXT_VX(vnmsac_vx_b, 1)
2015GEN_VEXT_VX(vnmsac_vx_h, 2)
2016GEN_VEXT_VX(vnmsac_vx_w, 4)
2017GEN_VEXT_VX(vnmsac_vx_d, 8)
2018GEN_VEXT_VX(vmadd_vx_b, 1)
2019GEN_VEXT_VX(vmadd_vx_h, 2)
2020GEN_VEXT_VX(vmadd_vx_w, 4)
2021GEN_VEXT_VX(vmadd_vx_d, 8)
2022GEN_VEXT_VX(vnmsub_vx_b, 1)
2023GEN_VEXT_VX(vnmsub_vx_h, 2)
2024GEN_VEXT_VX(vnmsub_vx_w, 4)
2025GEN_VEXT_VX(vnmsub_vx_d, 8)
2026
2027
2028RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
2029RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
2030RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
2031RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
2032RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
2033RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
2034RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
2035RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
2036RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
2037GEN_VEXT_VV(vwmaccu_vv_b, 2)
2038GEN_VEXT_VV(vwmaccu_vv_h, 4)
2039GEN_VEXT_VV(vwmaccu_vv_w, 8)
2040GEN_VEXT_VV(vwmacc_vv_b, 2)
2041GEN_VEXT_VV(vwmacc_vv_h, 4)
2042GEN_VEXT_VV(vwmacc_vv_w, 8)
2043GEN_VEXT_VV(vwmaccsu_vv_b, 2)
2044GEN_VEXT_VV(vwmaccsu_vv_h, 4)
2045GEN_VEXT_VV(vwmaccsu_vv_w, 8)
2046
2047RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
2048RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
2049RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
2050RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
2051RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
2052RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
2053RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
2054RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
2055RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
2056RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
2057RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
2058RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
2059GEN_VEXT_VX(vwmaccu_vx_b, 2)
2060GEN_VEXT_VX(vwmaccu_vx_h, 4)
2061GEN_VEXT_VX(vwmaccu_vx_w, 8)
2062GEN_VEXT_VX(vwmacc_vx_b, 2)
2063GEN_VEXT_VX(vwmacc_vx_h, 4)
2064GEN_VEXT_VX(vwmacc_vx_w, 8)
2065GEN_VEXT_VX(vwmaccsu_vx_b, 2)
2066GEN_VEXT_VX(vwmaccsu_vx_h, 4)
2067GEN_VEXT_VX(vwmaccsu_vx_w, 8)
2068GEN_VEXT_VX(vwmaccus_vx_b, 2)
2069GEN_VEXT_VX(vwmaccus_vx_h, 4)
2070GEN_VEXT_VX(vwmaccus_vx_w, 8)
2071
2072
2073#define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
2074void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
2075 uint32_t desc) \
2076{ \
2077 uint32_t vl = env->vl; \
2078 uint32_t esz = sizeof(ETYPE); \
2079 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2080 uint32_t vta = vext_vta(desc); \
2081 uint32_t i; \
2082 \
2083 VSTART_CHECK_EARLY_EXIT(env, vl); \
2084 \
2085 for (i = env->vstart; i < vl; i++) { \
2086 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
2087 *((ETYPE *)vd + H(i)) = s1; \
2088 } \
2089 env->vstart = 0; \
2090 \
2091 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2092}
2093
2094GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
2095GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
2096GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
2097GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
2098
2099#define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
2100void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
2101 uint32_t desc) \
2102{ \
2103 uint32_t vl = env->vl; \
2104 uint32_t esz = sizeof(ETYPE); \
2105 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2106 uint32_t vta = vext_vta(desc); \
2107 uint32_t i; \
2108 \
2109 VSTART_CHECK_EARLY_EXIT(env, vl); \
2110 \
2111 for (i = env->vstart; i < vl; i++) { \
2112 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2113 } \
2114 env->vstart = 0; \
2115 \
2116 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2117}
2118
2119GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2120GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2121GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2122GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
2123
2124#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
2125void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2126 CPURISCVState *env, uint32_t desc) \
2127{ \
2128 uint32_t vl = env->vl; \
2129 uint32_t esz = sizeof(ETYPE); \
2130 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2131 uint32_t vta = vext_vta(desc); \
2132 uint32_t i; \
2133 \
2134 VSTART_CHECK_EARLY_EXIT(env, vl); \
2135 \
2136 for (i = env->vstart; i < vl; i++) { \
2137 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
2138 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2139 } \
2140 env->vstart = 0; \
2141 \
2142 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2143}
2144
2145GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2146GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2147GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2148GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
2149
2150#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
2151void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2152 void *vs2, CPURISCVState *env, uint32_t desc) \
2153{ \
2154 uint32_t vl = env->vl; \
2155 uint32_t esz = sizeof(ETYPE); \
2156 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2157 uint32_t vta = vext_vta(desc); \
2158 uint32_t i; \
2159 \
2160 VSTART_CHECK_EARLY_EXIT(env, vl); \
2161 \
2162 for (i = env->vstart; i < vl; i++) { \
2163 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
2164 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
2165 (ETYPE)(target_long)s1); \
2166 *((ETYPE *)vd + H(i)) = d; \
2167 } \
2168 env->vstart = 0; \
2169 \
2170 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2171}
2172
2173GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2174GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2175GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2176GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2189 CPURISCVState *env, int vxrm);
2190
2191#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2192static inline void \
2193do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2194 CPURISCVState *env, int vxrm) \
2195{ \
2196 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2197 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2198 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2199}
2200
2201static inline void
2202vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2203 CPURISCVState *env,
2204 uint32_t vl, uint32_t vm, int vxrm,
2205 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
2206{
2207 for (uint32_t i = env->vstart; i < vl; i++) {
2208 if (!vm && !vext_elem_mask(v0, i)) {
2209
2210 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2211 continue;
2212 }
2213 fn(vd, vs1, vs2, i, env, vxrm);
2214 }
2215 env->vstart = 0;
2216}
2217
2218static inline void
2219vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2220 CPURISCVState *env,
2221 uint32_t desc,
2222 opivv2_rm_fn *fn, uint32_t esz)
2223{
2224 uint32_t vm = vext_vm(desc);
2225 uint32_t vl = env->vl;
2226 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2227 uint32_t vta = vext_vta(desc);
2228 uint32_t vma = vext_vma(desc);
2229
2230 VSTART_CHECK_EARLY_EXIT(env, vl);
2231
2232 switch (env->vxrm) {
2233 case 0:
2234 vext_vv_rm_1(vd, v0, vs1, vs2,
2235 env, vl, vm, 0, fn, vma, esz);
2236 break;
2237 case 1:
2238 vext_vv_rm_1(vd, v0, vs1, vs2,
2239 env, vl, vm, 1, fn, vma, esz);
2240 break;
2241 case 2:
2242 vext_vv_rm_1(vd, v0, vs1, vs2,
2243 env, vl, vm, 2, fn, vma, esz);
2244 break;
2245 default:
2246 vext_vv_rm_1(vd, v0, vs1, vs2,
2247 env, vl, vm, 3, fn, vma, esz);
2248 break;
2249 }
2250
2251 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2252}
2253
2254
2255#define GEN_VEXT_VV_RM(NAME, ESZ) \
2256void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2257 CPURISCVState *env, uint32_t desc) \
2258{ \
2259 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
2260 do_##NAME, ESZ); \
2261}
2262
2263static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2264 uint8_t b)
2265{
2266 uint8_t res = a + b;
2267 if (res < a) {
2268 res = UINT8_MAX;
2269 env->vxsat = 0x1;
2270 }
2271 return res;
2272}
2273
2274static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2275 uint16_t b)
2276{
2277 uint16_t res = a + b;
2278 if (res < a) {
2279 res = UINT16_MAX;
2280 env->vxsat = 0x1;
2281 }
2282 return res;
2283}
2284
2285static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2286 uint32_t b)
2287{
2288 uint32_t res = a + b;
2289 if (res < a) {
2290 res = UINT32_MAX;
2291 env->vxsat = 0x1;
2292 }
2293 return res;
2294}
2295
2296static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2297 uint64_t b)
2298{
2299 uint64_t res = a + b;
2300 if (res < a) {
2301 res = UINT64_MAX;
2302 env->vxsat = 0x1;
2303 }
2304 return res;
2305}
2306
2307RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2308RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2309RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2310RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2311GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2312GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2313GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2314GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
2315
2316typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2317 CPURISCVState *env, int vxrm);
2318
2319#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2320static inline void \
2321do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2322 CPURISCVState *env, int vxrm) \
2323{ \
2324 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2325 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2326}
2327
2328static inline void
2329vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2330 CPURISCVState *env,
2331 uint32_t vl, uint32_t vm, int vxrm,
2332 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
2333{
2334 for (uint32_t i = env->vstart; i < vl; i++) {
2335 if (!vm && !vext_elem_mask(v0, i)) {
2336
2337 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2338 continue;
2339 }
2340 fn(vd, s1, vs2, i, env, vxrm);
2341 }
2342 env->vstart = 0;
2343}
2344
2345static inline void
2346vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2347 CPURISCVState *env,
2348 uint32_t desc,
2349 opivx2_rm_fn *fn, uint32_t esz)
2350{
2351 uint32_t vm = vext_vm(desc);
2352 uint32_t vl = env->vl;
2353 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2354 uint32_t vta = vext_vta(desc);
2355 uint32_t vma = vext_vma(desc);
2356
2357 VSTART_CHECK_EARLY_EXIT(env, vl);
2358
2359 switch (env->vxrm) {
2360 case 0:
2361 vext_vx_rm_1(vd, v0, s1, vs2,
2362 env, vl, vm, 0, fn, vma, esz);
2363 break;
2364 case 1:
2365 vext_vx_rm_1(vd, v0, s1, vs2,
2366 env, vl, vm, 1, fn, vma, esz);
2367 break;
2368 case 2:
2369 vext_vx_rm_1(vd, v0, s1, vs2,
2370 env, vl, vm, 2, fn, vma, esz);
2371 break;
2372 default:
2373 vext_vx_rm_1(vd, v0, s1, vs2,
2374 env, vl, vm, 3, fn, vma, esz);
2375 break;
2376 }
2377
2378 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2379}
2380
2381
2382#define GEN_VEXT_VX_RM(NAME, ESZ) \
2383void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2384 void *vs2, CPURISCVState *env, \
2385 uint32_t desc) \
2386{ \
2387 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
2388 do_##NAME, ESZ); \
2389}
2390
2391RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2392RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2393RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2394RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2395GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2396GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2397GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2398GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
2399
2400static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2401{
2402 int8_t res = a + b;
2403 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2404 res = a > 0 ? INT8_MAX : INT8_MIN;
2405 env->vxsat = 0x1;
2406 }
2407 return res;
2408}
2409
2410static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2411 int16_t b)
2412{
2413 int16_t res = a + b;
2414 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2415 res = a > 0 ? INT16_MAX : INT16_MIN;
2416 env->vxsat = 0x1;
2417 }
2418 return res;
2419}
2420
2421static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2422 int32_t b)
2423{
2424 int32_t res = a + b;
2425 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2426 res = a > 0 ? INT32_MAX : INT32_MIN;
2427 env->vxsat = 0x1;
2428 }
2429 return res;
2430}
2431
2432static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2433 int64_t b)
2434{
2435 int64_t res = a + b;
2436 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2437 res = a > 0 ? INT64_MAX : INT64_MIN;
2438 env->vxsat = 0x1;
2439 }
2440 return res;
2441}
2442
2443RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2444RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2445RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2446RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2447GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2448GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2449GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2450GEN_VEXT_VV_RM(vsadd_vv_d, 8)
2451
2452RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2453RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2454RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2455RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2456GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2457GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2458GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2459GEN_VEXT_VX_RM(vsadd_vx_d, 8)
2460
2461static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2462 uint8_t b)
2463{
2464 uint8_t res = a - b;
2465 if (res > a) {
2466 res = 0;
2467 env->vxsat = 0x1;
2468 }
2469 return res;
2470}
2471
2472static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2473 uint16_t b)
2474{
2475 uint16_t res = a - b;
2476 if (res > a) {
2477 res = 0;
2478 env->vxsat = 0x1;
2479 }
2480 return res;
2481}
2482
2483static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2484 uint32_t b)
2485{
2486 uint32_t res = a - b;
2487 if (res > a) {
2488 res = 0;
2489 env->vxsat = 0x1;
2490 }
2491 return res;
2492}
2493
2494static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2495 uint64_t b)
2496{
2497 uint64_t res = a - b;
2498 if (res > a) {
2499 res = 0;
2500 env->vxsat = 0x1;
2501 }
2502 return res;
2503}
2504
2505RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2506RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2507RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2508RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2509GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2510GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2511GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2512GEN_VEXT_VV_RM(vssubu_vv_d, 8)
2513
2514RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2515RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2516RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2517RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2518GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2519GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2520GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2521GEN_VEXT_VX_RM(vssubu_vx_d, 8)
2522
2523static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2524{
2525 int8_t res = a - b;
2526 if ((res ^ a) & (a ^ b) & INT8_MIN) {
2527 res = a >= 0 ? INT8_MAX : INT8_MIN;
2528 env->vxsat = 0x1;
2529 }
2530 return res;
2531}
2532
2533static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2534 int16_t b)
2535{
2536 int16_t res = a - b;
2537 if ((res ^ a) & (a ^ b) & INT16_MIN) {
2538 res = a >= 0 ? INT16_MAX : INT16_MIN;
2539 env->vxsat = 0x1;
2540 }
2541 return res;
2542}
2543
2544static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2545 int32_t b)
2546{
2547 int32_t res = a - b;
2548 if ((res ^ a) & (a ^ b) & INT32_MIN) {
2549 res = a >= 0 ? INT32_MAX : INT32_MIN;
2550 env->vxsat = 0x1;
2551 }
2552 return res;
2553}
2554
2555static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2556 int64_t b)
2557{
2558 int64_t res = a - b;
2559 if ((res ^ a) & (a ^ b) & INT64_MIN) {
2560 res = a >= 0 ? INT64_MAX : INT64_MIN;
2561 env->vxsat = 0x1;
2562 }
2563 return res;
2564}
2565
2566RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2567RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2568RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2569RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2570GEN_VEXT_VV_RM(vssub_vv_b, 1)
2571GEN_VEXT_VV_RM(vssub_vv_h, 2)
2572GEN_VEXT_VV_RM(vssub_vv_w, 4)
2573GEN_VEXT_VV_RM(vssub_vv_d, 8)
2574
2575RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2576RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2577RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2578RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2579GEN_VEXT_VX_RM(vssub_vx_b, 1)
2580GEN_VEXT_VX_RM(vssub_vx_h, 2)
2581GEN_VEXT_VX_RM(vssub_vx_w, 4)
2582GEN_VEXT_VX_RM(vssub_vx_d, 8)
2583
2584
2585static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2586{
2587 uint8_t d = extract64(v, shift, 1);
2588 uint8_t d1;
2589 uint64_t D1, D2;
2590
2591 if (shift == 0 || shift > 64) {
2592 return 0;
2593 }
2594
2595 d1 = extract64(v, shift - 1, 1);
2596 D1 = extract64(v, 0, shift);
2597 if (vxrm == 0) {
2598 return d1;
2599 } else if (vxrm == 1) {
2600 if (shift > 1) {
2601 D2 = extract64(v, 0, shift - 1);
2602 return d1 & ((D2 != 0) | d);
2603 } else {
2604 return d1 & d;
2605 }
2606 } else if (vxrm == 3) {
2607 return !d & (D1 != 0);
2608 }
2609 return 0;
2610}
2611
2612static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2613 int32_t b)
2614{
2615 int64_t res = (int64_t)a + b;
2616 uint8_t round = get_round(vxrm, res, 1);
2617
2618 return (res >> 1) + round;
2619}
2620
2621static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2622 int64_t b)
2623{
2624 int64_t res = a + b;
2625 uint8_t round = get_round(vxrm, res, 1);
2626 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2627
2628
2629 return ((res >> 1) ^ over) + round;
2630}
2631
2632RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2633RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2634RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2635RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2636GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2637GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2638GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2639GEN_VEXT_VV_RM(vaadd_vv_d, 8)
2640
2641RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2642RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2643RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2644RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2645GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2646GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2647GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2648GEN_VEXT_VX_RM(vaadd_vx_d, 8)
2649
2650static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2651 uint32_t a, uint32_t b)
2652{
2653 uint64_t res = (uint64_t)a + b;
2654 uint8_t round = get_round(vxrm, res, 1);
2655
2656 return (res >> 1) + round;
2657}
2658
2659static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2660 uint64_t a, uint64_t b)
2661{
2662 uint64_t res = a + b;
2663 uint8_t round = get_round(vxrm, res, 1);
2664 uint64_t over = (uint64_t)(res < a) << 63;
2665
2666 return ((res >> 1) | over) + round;
2667}
2668
2669RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2670RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2671RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2672RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2673GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2674GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2675GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2676GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
2677
2678RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2679RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2680RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2681RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2682GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2683GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2684GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2685GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
2686
2687static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2688 int32_t b)
2689{
2690 int64_t res = (int64_t)a - b;
2691 uint8_t round = get_round(vxrm, res, 1);
2692
2693 return (res >> 1) + round;
2694}
2695
2696static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2697 int64_t b)
2698{
2699 int64_t res = (int64_t)a - b;
2700 uint8_t round = get_round(vxrm, res, 1);
2701 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2702
2703
2704 return ((res >> 1) ^ over) + round;
2705}
2706
2707RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2708RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2709RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2710RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2711GEN_VEXT_VV_RM(vasub_vv_b, 1)
2712GEN_VEXT_VV_RM(vasub_vv_h, 2)
2713GEN_VEXT_VV_RM(vasub_vv_w, 4)
2714GEN_VEXT_VV_RM(vasub_vv_d, 8)
2715
2716RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2717RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2718RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2719RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2720GEN_VEXT_VX_RM(vasub_vx_b, 1)
2721GEN_VEXT_VX_RM(vasub_vx_h, 2)
2722GEN_VEXT_VX_RM(vasub_vx_w, 4)
2723GEN_VEXT_VX_RM(vasub_vx_d, 8)
2724
2725static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2726 uint32_t a, uint32_t b)
2727{
2728 int64_t res = (int64_t)a - b;
2729 uint8_t round = get_round(vxrm, res, 1);
2730
2731 return (res >> 1) + round;
2732}
2733
2734static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2735 uint64_t a, uint64_t b)
2736{
2737 uint64_t res = (uint64_t)a - b;
2738 uint8_t round = get_round(vxrm, res, 1);
2739 uint64_t over = (uint64_t)(res > a) << 63;
2740
2741 return ((res >> 1) | over) + round;
2742}
2743
2744RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2745RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2746RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2747RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2748GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2749GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2750GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2751GEN_VEXT_VV_RM(vasubu_vv_d, 8)
2752
2753RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2754RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2755RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2756RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2757GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2758GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2759GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2760GEN_VEXT_VX_RM(vasubu_vx_d, 8)
2761
2762
2763static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2764{
2765 uint8_t round;
2766 int16_t res;
2767
2768 res = (int16_t)a * (int16_t)b;
2769 round = get_round(vxrm, res, 7);
2770 res = (res >> 7) + round;
2771
2772 if (res > INT8_MAX) {
2773 env->vxsat = 0x1;
2774 return INT8_MAX;
2775 } else if (res < INT8_MIN) {
2776 env->vxsat = 0x1;
2777 return INT8_MIN;
2778 } else {
2779 return res;
2780 }
2781}
2782
2783static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2784{
2785 uint8_t round;
2786 int32_t res;
2787
2788 res = (int32_t)a * (int32_t)b;
2789 round = get_round(vxrm, res, 15);
2790 res = (res >> 15) + round;
2791
2792 if (res > INT16_MAX) {
2793 env->vxsat = 0x1;
2794 return INT16_MAX;
2795 } else if (res < INT16_MIN) {
2796 env->vxsat = 0x1;
2797 return INT16_MIN;
2798 } else {
2799 return res;
2800 }
2801}
2802
2803static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2804{
2805 uint8_t round;
2806 int64_t res;
2807
2808 res = (int64_t)a * (int64_t)b;
2809 round = get_round(vxrm, res, 31);
2810 res = (res >> 31) + round;
2811
2812 if (res > INT32_MAX) {
2813 env->vxsat = 0x1;
2814 return INT32_MAX;
2815 } else if (res < INT32_MIN) {
2816 env->vxsat = 0x1;
2817 return INT32_MIN;
2818 } else {
2819 return res;
2820 }
2821}
2822
2823static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2824{
2825 uint8_t round;
2826 uint64_t hi_64, lo_64;
2827 int64_t res;
2828
2829 if (a == INT64_MIN && b == INT64_MIN) {
2830 env->vxsat = 1;
2831 return INT64_MAX;
2832 }
2833
2834 muls64(&lo_64, &hi_64, a, b);
2835 round = get_round(vxrm, lo_64, 63);
2836
2837
2838
2839
2840 res = (hi_64 << 1) | (lo_64 >> 63);
2841 if (round) {
2842 if (res == INT64_MAX) {
2843 env->vxsat = 1;
2844 } else {
2845 res += 1;
2846 }
2847 }
2848 return res;
2849}
2850
2851RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2852RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2853RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2854RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2855GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2856GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2857GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2858GEN_VEXT_VV_RM(vsmul_vv_d, 8)
2859
2860RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2861RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2862RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2863RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2864GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2865GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2866GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2867GEN_VEXT_VX_RM(vsmul_vx_d, 8)
2868
2869
2870static inline uint8_t
2871vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2872{
2873 uint8_t round, shift = b & 0x7;
2874 uint8_t res;
2875
2876 round = get_round(vxrm, a, shift);
2877 res = (a >> shift) + round;
2878 return res;
2879}
2880static inline uint16_t
2881vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2882{
2883 uint8_t round, shift = b & 0xf;
2884
2885 round = get_round(vxrm, a, shift);
2886 return (a >> shift) + round;
2887}
2888static inline uint32_t
2889vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2890{
2891 uint8_t round, shift = b & 0x1f;
2892
2893 round = get_round(vxrm, a, shift);
2894 return (a >> shift) + round;
2895}
2896static inline uint64_t
2897vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2898{
2899 uint8_t round, shift = b & 0x3f;
2900
2901 round = get_round(vxrm, a, shift);
2902 return (a >> shift) + round;
2903}
2904RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2905RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2906RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2907RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2908GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2909GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2910GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2911GEN_VEXT_VV_RM(vssrl_vv_d, 8)
2912
2913RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2914RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2915RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2916RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2917GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2918GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2919GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2920GEN_VEXT_VX_RM(vssrl_vx_d, 8)
2921
2922static inline int8_t
2923vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2924{
2925 uint8_t round, shift = b & 0x7;
2926
2927 round = get_round(vxrm, a, shift);
2928 return (a >> shift) + round;
2929}
2930static inline int16_t
2931vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2932{
2933 uint8_t round, shift = b & 0xf;
2934
2935 round = get_round(vxrm, a, shift);
2936 return (a >> shift) + round;
2937}
2938static inline int32_t
2939vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2940{
2941 uint8_t round, shift = b & 0x1f;
2942
2943 round = get_round(vxrm, a, shift);
2944 return (a >> shift) + round;
2945}
2946static inline int64_t
2947vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2948{
2949 uint8_t round, shift = b & 0x3f;
2950
2951 round = get_round(vxrm, a, shift);
2952 return (a >> shift) + round;
2953}
2954
2955RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2956RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2957RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2958RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2959GEN_VEXT_VV_RM(vssra_vv_b, 1)
2960GEN_VEXT_VV_RM(vssra_vv_h, 2)
2961GEN_VEXT_VV_RM(vssra_vv_w, 4)
2962GEN_VEXT_VV_RM(vssra_vv_d, 8)
2963
2964RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2965RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2966RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2967RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2968GEN_VEXT_VX_RM(vssra_vx_b, 1)
2969GEN_VEXT_VX_RM(vssra_vx_h, 2)
2970GEN_VEXT_VX_RM(vssra_vx_w, 4)
2971GEN_VEXT_VX_RM(vssra_vx_d, 8)
2972
2973
2974static inline int8_t
2975vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2976{
2977 uint8_t round, shift = b & 0xf;
2978 int16_t res;
2979
2980 round = get_round(vxrm, a, shift);
2981 res = (a >> shift) + round;
2982 if (res > INT8_MAX) {
2983 env->vxsat = 0x1;
2984 return INT8_MAX;
2985 } else if (res < INT8_MIN) {
2986 env->vxsat = 0x1;
2987 return INT8_MIN;
2988 } else {
2989 return res;
2990 }
2991}
2992
2993static inline int16_t
2994vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2995{
2996 uint8_t round, shift = b & 0x1f;
2997 int32_t res;
2998
2999 round = get_round(vxrm, a, shift);
3000 res = (a >> shift) + round;
3001 if (res > INT16_MAX) {
3002 env->vxsat = 0x1;
3003 return INT16_MAX;
3004 } else if (res < INT16_MIN) {
3005 env->vxsat = 0x1;
3006 return INT16_MIN;
3007 } else {
3008 return res;
3009 }
3010}
3011
3012static inline int32_t
3013vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
3014{
3015 uint8_t round, shift = b & 0x3f;
3016 int64_t res;
3017
3018 round = get_round(vxrm, a, shift);
3019 res = (a >> shift) + round;
3020 if (res > INT32_MAX) {
3021 env->vxsat = 0x1;
3022 return INT32_MAX;
3023 } else if (res < INT32_MIN) {
3024 env->vxsat = 0x1;
3025 return INT32_MIN;
3026 } else {
3027 return res;
3028 }
3029}
3030
3031RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
3032RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
3033RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
3034GEN_VEXT_VV_RM(vnclip_wv_b, 1)
3035GEN_VEXT_VV_RM(vnclip_wv_h, 2)
3036GEN_VEXT_VV_RM(vnclip_wv_w, 4)
3037
3038RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
3039RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
3040RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
3041GEN_VEXT_VX_RM(vnclip_wx_b, 1)
3042GEN_VEXT_VX_RM(vnclip_wx_h, 2)
3043GEN_VEXT_VX_RM(vnclip_wx_w, 4)
3044
3045static inline uint8_t
3046vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
3047{
3048 uint8_t round, shift = b & 0xf;
3049 uint16_t res;
3050
3051 round = get_round(vxrm, a, shift);
3052 res = (a >> shift) + round;
3053 if (res > UINT8_MAX) {
3054 env->vxsat = 0x1;
3055 return UINT8_MAX;
3056 } else {
3057 return res;
3058 }
3059}
3060
3061static inline uint16_t
3062vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
3063{
3064 uint8_t round, shift = b & 0x1f;
3065 uint32_t res;
3066
3067 round = get_round(vxrm, a, shift);
3068 res = (a >> shift) + round;
3069 if (res > UINT16_MAX) {
3070 env->vxsat = 0x1;
3071 return UINT16_MAX;
3072 } else {
3073 return res;
3074 }
3075}
3076
3077static inline uint32_t
3078vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
3079{
3080 uint8_t round, shift = b & 0x3f;
3081 uint64_t res;
3082
3083 round = get_round(vxrm, a, shift);
3084 res = (a >> shift) + round;
3085 if (res > UINT32_MAX) {
3086 env->vxsat = 0x1;
3087 return UINT32_MAX;
3088 } else {
3089 return res;
3090 }
3091}
3092
3093RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
3094RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
3095RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
3096GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
3097GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
3098GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
3099
3100RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
3101RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
3102RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
3103GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
3104GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
3105GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
3106
3107
3108
3109
3110
3111#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3112static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3113 CPURISCVState *env) \
3114{ \
3115 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3116 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3117 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
3118}
3119
3120#define GEN_VEXT_VV_ENV(NAME, ESZ) \
3121void HELPER(NAME)(void *vd, void *v0, void *vs1, \
3122 void *vs2, CPURISCVState *env, \
3123 uint32_t desc) \
3124{ \
3125 uint32_t vm = vext_vm(desc); \
3126 uint32_t vl = env->vl; \
3127 uint32_t total_elems = \
3128 vext_get_total_elems(env, desc, ESZ); \
3129 uint32_t vta = vext_vta(desc); \
3130 uint32_t vma = vext_vma(desc); \
3131 uint32_t i; \
3132 \
3133 VSTART_CHECK_EARLY_EXIT(env, vl); \
3134 \
3135 for (i = env->vstart; i < vl; i++) { \
3136 if (!vm && !vext_elem_mask(v0, i)) { \
3137 \
3138 vext_set_elems_1s(vd, vma, i * ESZ, \
3139 (i + 1) * ESZ); \
3140 continue; \
3141 } \
3142 do_##NAME(vd, vs1, vs2, i, env); \
3143 } \
3144 env->vstart = 0; \
3145 \
3146 vext_set_elems_1s(vd, vta, vl * ESZ, \
3147 total_elems * ESZ); \
3148}
3149
3150RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3151RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3152RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
3153GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
3154GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
3155GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
3156
3157#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3158static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3159 CPURISCVState *env) \
3160{ \
3161 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3162 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3163}
3164
3165#define GEN_VEXT_VF(NAME, ESZ) \
3166void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3167 void *vs2, CPURISCVState *env, \
3168 uint32_t desc) \
3169{ \
3170 uint32_t vm = vext_vm(desc); \
3171 uint32_t vl = env->vl; \
3172 uint32_t total_elems = \
3173 vext_get_total_elems(env, desc, ESZ); \
3174 uint32_t vta = vext_vta(desc); \
3175 uint32_t vma = vext_vma(desc); \
3176 uint32_t i; \
3177 \
3178 VSTART_CHECK_EARLY_EXIT(env, vl); \
3179 \
3180 for (i = env->vstart; i < vl; i++) { \
3181 if (!vm && !vext_elem_mask(v0, i)) { \
3182 \
3183 vext_set_elems_1s(vd, vma, i * ESZ, \
3184 (i + 1) * ESZ); \
3185 continue; \
3186 } \
3187 do_##NAME(vd, s1, vs2, i, env); \
3188 } \
3189 env->vstart = 0; \
3190 \
3191 vext_set_elems_1s(vd, vta, vl * ESZ, \
3192 total_elems * ESZ); \
3193}
3194
3195RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3196RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3197RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
3198GEN_VEXT_VF(vfadd_vf_h, 2)
3199GEN_VEXT_VF(vfadd_vf_w, 4)
3200GEN_VEXT_VF(vfadd_vf_d, 8)
3201
3202RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3203RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3204RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
3205GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
3206GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
3207GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
3208RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3209RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3210RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
3211GEN_VEXT_VF(vfsub_vf_h, 2)
3212GEN_VEXT_VF(vfsub_vf_w, 4)
3213GEN_VEXT_VF(vfsub_vf_d, 8)
3214
3215static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3216{
3217 return float16_sub(b, a, s);
3218}
3219
3220static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3221{
3222 return float32_sub(b, a, s);
3223}
3224
3225static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3226{
3227 return float64_sub(b, a, s);
3228}
3229
3230RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3231RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3232RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
3233GEN_VEXT_VF(vfrsub_vf_h, 2)
3234GEN_VEXT_VF(vfrsub_vf_w, 4)
3235GEN_VEXT_VF(vfrsub_vf_d, 8)
3236
3237
3238static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3239{
3240 return float32_add(float16_to_float32(a, true, s),
3241 float16_to_float32(b, true, s), s);
3242}
3243
3244static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3245{
3246 return float64_add(float32_to_float64(a, s),
3247 float32_to_float64(b, s), s);
3248
3249}
3250
3251RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3252RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3253GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3254GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
3255RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3256RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3257GEN_VEXT_VF(vfwadd_vf_h, 4)
3258GEN_VEXT_VF(vfwadd_vf_w, 8)
3259
3260static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3261{
3262 return float32_sub(float16_to_float32(a, true, s),
3263 float16_to_float32(b, true, s), s);
3264}
3265
3266static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3267{
3268 return float64_sub(float32_to_float64(a, s),
3269 float32_to_float64(b, s), s);
3270
3271}
3272
3273RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3274RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3275GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3276GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
3277RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3278RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3279GEN_VEXT_VF(vfwsub_vf_h, 4)
3280GEN_VEXT_VF(vfwsub_vf_w, 8)
3281
3282static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3283{
3284 return float32_add(a, float16_to_float32(b, true, s), s);
3285}
3286
3287static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3288{
3289 return float64_add(a, float32_to_float64(b, s), s);
3290}
3291
3292RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3293RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3294GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3295GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
3296RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3297RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3298GEN_VEXT_VF(vfwadd_wf_h, 4)
3299GEN_VEXT_VF(vfwadd_wf_w, 8)
3300
3301static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3302{
3303 return float32_sub(a, float16_to_float32(b, true, s), s);
3304}
3305
3306static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3307{
3308 return float64_sub(a, float32_to_float64(b, s), s);
3309}
3310
3311RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3312RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3313GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3314GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
3315RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3316RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3317GEN_VEXT_VF(vfwsub_wf_h, 4)
3318GEN_VEXT_VF(vfwsub_wf_w, 8)
3319
3320
3321RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3322RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3323RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3324GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3325GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3326GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
3327RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3328RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3329RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3330GEN_VEXT_VF(vfmul_vf_h, 2)
3331GEN_VEXT_VF(vfmul_vf_w, 4)
3332GEN_VEXT_VF(vfmul_vf_d, 8)
3333
3334RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3335RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3336RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3337GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3338GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3339GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
3340RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3341RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3342RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3343GEN_VEXT_VF(vfdiv_vf_h, 2)
3344GEN_VEXT_VF(vfdiv_vf_w, 4)
3345GEN_VEXT_VF(vfdiv_vf_d, 8)
3346
3347static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3348{
3349 return float16_div(b, a, s);
3350}
3351
3352static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3353{
3354 return float32_div(b, a, s);
3355}
3356
3357static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3358{
3359 return float64_div(b, a, s);
3360}
3361
3362RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3363RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3364RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3365GEN_VEXT_VF(vfrdiv_vf_h, 2)
3366GEN_VEXT_VF(vfrdiv_vf_w, 4)
3367GEN_VEXT_VF(vfrdiv_vf_d, 8)
3368
3369
3370static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3371{
3372 return float32_mul(float16_to_float32(a, true, s),
3373 float16_to_float32(b, true, s), s);
3374}
3375
3376static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3377{
3378 return float64_mul(float32_to_float64(a, s),
3379 float32_to_float64(b, s), s);
3380
3381}
3382RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3383RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3384GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3385GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
3386RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3387RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3388GEN_VEXT_VF(vfwmul_vf_h, 4)
3389GEN_VEXT_VF(vfwmul_vf_w, 8)
3390
3391
3392#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3393static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3394 CPURISCVState *env) \
3395{ \
3396 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3397 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3398 TD d = *((TD *)vd + HD(i)); \
3399 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3400}
3401
3402static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3403{
3404 return float16_muladd(a, b, d, 0, s);
3405}
3406
3407static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3408{
3409 return float32_muladd(a, b, d, 0, s);
3410}
3411
3412static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3413{
3414 return float64_muladd(a, b, d, 0, s);
3415}
3416
3417RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3418RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3419RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3420GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3421GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3422GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
3423
3424#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3425static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3426 CPURISCVState *env) \
3427{ \
3428 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3429 TD d = *((TD *)vd + HD(i)); \
3430 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3431}
3432
3433RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3434RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3435RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3436GEN_VEXT_VF(vfmacc_vf_h, 2)
3437GEN_VEXT_VF(vfmacc_vf_w, 4)
3438GEN_VEXT_VF(vfmacc_vf_d, 8)
3439
3440static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3441{
3442 return float16_muladd(a, b, d, float_muladd_negate_c |
3443 float_muladd_negate_product, s);
3444}
3445
3446static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3447{
3448 return float32_muladd(a, b, d, float_muladd_negate_c |
3449 float_muladd_negate_product, s);
3450}
3451
3452static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3453{
3454 return float64_muladd(a, b, d, float_muladd_negate_c |
3455 float_muladd_negate_product, s);
3456}
3457
3458RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3459RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3460RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3461GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3462GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3463GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
3464RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3465RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3466RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3467GEN_VEXT_VF(vfnmacc_vf_h, 2)
3468GEN_VEXT_VF(vfnmacc_vf_w, 4)
3469GEN_VEXT_VF(vfnmacc_vf_d, 8)
3470
3471static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3472{
3473 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3474}
3475
3476static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3477{
3478 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3479}
3480
3481static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3482{
3483 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3484}
3485
3486RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3487RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3488RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3489GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3490GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3491GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
3492RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3493RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3494RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3495GEN_VEXT_VF(vfmsac_vf_h, 2)
3496GEN_VEXT_VF(vfmsac_vf_w, 4)
3497GEN_VEXT_VF(vfmsac_vf_d, 8)
3498
3499static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3500{
3501 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3502}
3503
3504static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3505{
3506 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3507}
3508
3509static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3510{
3511 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3512}
3513
3514RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3515RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3516RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3517GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3518GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3519GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
3520RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3521RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3522RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3523GEN_VEXT_VF(vfnmsac_vf_h, 2)
3524GEN_VEXT_VF(vfnmsac_vf_w, 4)
3525GEN_VEXT_VF(vfnmsac_vf_d, 8)
3526
3527static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3528{
3529 return float16_muladd(d, b, a, 0, s);
3530}
3531
3532static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3533{
3534 return float32_muladd(d, b, a, 0, s);
3535}
3536
3537static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3538{
3539 return float64_muladd(d, b, a, 0, s);
3540}
3541
3542RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3543RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3544RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3545GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3546GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3547GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
3548RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3549RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3550RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3551GEN_VEXT_VF(vfmadd_vf_h, 2)
3552GEN_VEXT_VF(vfmadd_vf_w, 4)
3553GEN_VEXT_VF(vfmadd_vf_d, 8)
3554
3555static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3556{
3557 return float16_muladd(d, b, a, float_muladd_negate_c |
3558 float_muladd_negate_product, s);
3559}
3560
3561static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3562{
3563 return float32_muladd(d, b, a, float_muladd_negate_c |
3564 float_muladd_negate_product, s);
3565}
3566
3567static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3568{
3569 return float64_muladd(d, b, a, float_muladd_negate_c |
3570 float_muladd_negate_product, s);
3571}
3572
3573RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3574RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3575RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3576GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3577GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3578GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
3579RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3580RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3581RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3582GEN_VEXT_VF(vfnmadd_vf_h, 2)
3583GEN_VEXT_VF(vfnmadd_vf_w, 4)
3584GEN_VEXT_VF(vfnmadd_vf_d, 8)
3585
3586static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3587{
3588 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3589}
3590
3591static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3592{
3593 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3594}
3595
3596static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3597{
3598 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3599}
3600
3601RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3602RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3603RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3604GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3605GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3606GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
3607RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3608RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3609RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3610GEN_VEXT_VF(vfmsub_vf_h, 2)
3611GEN_VEXT_VF(vfmsub_vf_w, 4)
3612GEN_VEXT_VF(vfmsub_vf_d, 8)
3613
3614static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3615{
3616 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3617}
3618
3619static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3620{
3621 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3622}
3623
3624static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3625{
3626 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3627}
3628
3629RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3630RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3631RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3632GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3633GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3634GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
3635RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3636RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3637RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3638GEN_VEXT_VF(vfnmsub_vf_h, 2)
3639GEN_VEXT_VF(vfnmsub_vf_w, 4)
3640GEN_VEXT_VF(vfnmsub_vf_d, 8)
3641
3642
3643static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3644{
3645 return float32_muladd(float16_to_float32(a, true, s),
3646 float16_to_float32(b, true, s), d, 0, s);
3647}
3648
3649static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3650{
3651 return float64_muladd(float32_to_float64(a, s),
3652 float32_to_float64(b, s), d, 0, s);
3653}
3654
3655RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3656RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3657GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3658GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
3659RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3660RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3661GEN_VEXT_VF(vfwmacc_vf_h, 4)
3662GEN_VEXT_VF(vfwmacc_vf_w, 8)
3663
3664static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3665{
3666 return float32_muladd(bfloat16_to_float32(a, s),
3667 bfloat16_to_float32(b, s), d, 0, s);
3668}
3669
3670RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3671GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
3672RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
3673GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3674
3675static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3676{
3677 return float32_muladd(float16_to_float32(a, true, s),
3678 float16_to_float32(b, true, s), d,
3679 float_muladd_negate_c | float_muladd_negate_product,
3680 s);
3681}
3682
3683static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3684{
3685 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3686 d, float_muladd_negate_c |
3687 float_muladd_negate_product, s);
3688}
3689
3690RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3691RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3692GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3693GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
3694RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3695RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3696GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3697GEN_VEXT_VF(vfwnmacc_vf_w, 8)
3698
3699static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3700{
3701 return float32_muladd(float16_to_float32(a, true, s),
3702 float16_to_float32(b, true, s), d,
3703 float_muladd_negate_c, s);
3704}
3705
3706static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3707{
3708 return float64_muladd(float32_to_float64(a, s),
3709 float32_to_float64(b, s), d,
3710 float_muladd_negate_c, s);
3711}
3712
3713RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3714RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3715GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3716GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
3717RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3718RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3719GEN_VEXT_VF(vfwmsac_vf_h, 4)
3720GEN_VEXT_VF(vfwmsac_vf_w, 8)
3721
3722static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3723{
3724 return float32_muladd(float16_to_float32(a, true, s),
3725 float16_to_float32(b, true, s), d,
3726 float_muladd_negate_product, s);
3727}
3728
3729static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3730{
3731 return float64_muladd(float32_to_float64(a, s),
3732 float32_to_float64(b, s), d,
3733 float_muladd_negate_product, s);
3734}
3735
3736RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3737RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3738GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3739GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
3740RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3741RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3742GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3743GEN_VEXT_VF(vfwnmsac_vf_w, 8)
3744
3745
3746#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3747static void do_##NAME(void *vd, void *vs2, int i, \
3748 CPURISCVState *env) \
3749{ \
3750 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3751 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3752}
3753
3754#define GEN_VEXT_V_ENV(NAME, ESZ) \
3755void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3756 CPURISCVState *env, uint32_t desc) \
3757{ \
3758 uint32_t vm = vext_vm(desc); \
3759 uint32_t vl = env->vl; \
3760 uint32_t total_elems = \
3761 vext_get_total_elems(env, desc, ESZ); \
3762 uint32_t vta = vext_vta(desc); \
3763 uint32_t vma = vext_vma(desc); \
3764 uint32_t i; \
3765 \
3766 VSTART_CHECK_EARLY_EXIT(env, vl); \
3767 \
3768 if (vl == 0) { \
3769 return; \
3770 } \
3771 for (i = env->vstart; i < vl; i++) { \
3772 if (!vm && !vext_elem_mask(v0, i)) { \
3773 \
3774 vext_set_elems_1s(vd, vma, i * ESZ, \
3775 (i + 1) * ESZ); \
3776 continue; \
3777 } \
3778 do_##NAME(vd, vs2, i, env); \
3779 } \
3780 env->vstart = 0; \
3781 vext_set_elems_1s(vd, vta, vl * ESZ, \
3782 total_elems * ESZ); \
3783}
3784
3785RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3786RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3787RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3788GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3789GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3790GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
3791
3792
3793
3794
3795
3796
3797
3798static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3799{
3800 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3801 uint64_t exp = extract64(f, frac_size, exp_size);
3802 uint64_t frac = extract64(f, 0, frac_size);
3803
3804 const uint8_t lookup_table[] = {
3805 52, 51, 50, 48, 47, 46, 44, 43,
3806 42, 41, 40, 39, 38, 36, 35, 34,
3807 33, 32, 31, 30, 30, 29, 28, 27,
3808 26, 25, 24, 23, 23, 22, 21, 20,
3809 19, 19, 18, 17, 16, 16, 15, 14,
3810 14, 13, 12, 12, 11, 10, 10, 9,
3811 9, 8, 7, 7, 6, 6, 5, 4,
3812 4, 3, 3, 2, 2, 1, 1, 0,
3813 127, 125, 123, 121, 119, 118, 116, 114,
3814 113, 111, 109, 108, 106, 105, 103, 102,
3815 100, 99, 97, 96, 95, 93, 92, 91,
3816 90, 88, 87, 86, 85, 84, 83, 82,
3817 80, 79, 78, 77, 76, 75, 74, 73,
3818 72, 71, 70, 70, 69, 68, 67, 66,
3819 65, 64, 63, 63, 62, 61, 60, 59,
3820 59, 58, 57, 56, 56, 55, 54, 53
3821 };
3822 const int precision = 7;
3823
3824 if (exp == 0 && frac != 0) {
3825
3826 while (extract64(frac, frac_size - 1, 1) == 0) {
3827 exp--;
3828 frac <<= 1;
3829 }
3830
3831 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3832 }
3833
3834 int idx = ((exp & 1) << (precision - 1)) |
3835 (frac >> (frac_size - precision + 1));
3836 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3837 (frac_size - precision);
3838 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3839
3840 uint64_t val = 0;
3841 val = deposit64(val, 0, frac_size, out_frac);
3842 val = deposit64(val, frac_size, exp_size, out_exp);
3843 val = deposit64(val, frac_size + exp_size, 1, sign);
3844 return val;
3845}
3846
3847static float16 frsqrt7_h(float16 f, float_status *s)
3848{
3849 int exp_size = 5, frac_size = 10;
3850 bool sign = float16_is_neg(f);
3851
3852
3853
3854
3855
3856
3857
3858 if (float16_is_signaling_nan(f, s) ||
3859 (float16_is_infinity(f) && sign) ||
3860 (float16_is_normal(f) && sign) ||
3861 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3862 s->float_exception_flags |= float_flag_invalid;
3863 return float16_default_nan(s);
3864 }
3865
3866
3867 if (float16_is_quiet_nan(f, s)) {
3868 return float16_default_nan(s);
3869 }
3870
3871
3872 if (float16_is_zero(f)) {
3873 s->float_exception_flags |= float_flag_divbyzero;
3874 return float16_set_sign(float16_infinity, sign);
3875 }
3876
3877
3878 if (float16_is_infinity(f) && !sign) {
3879 return float16_set_sign(float16_zero, sign);
3880 }
3881
3882
3883 uint64_t val = frsqrt7(f, exp_size, frac_size);
3884 return make_float16(val);
3885}
3886
3887static float32 frsqrt7_s(float32 f, float_status *s)
3888{
3889 int exp_size = 8, frac_size = 23;
3890 bool sign = float32_is_neg(f);
3891
3892
3893
3894
3895
3896
3897
3898 if (float32_is_signaling_nan(f, s) ||
3899 (float32_is_infinity(f) && sign) ||
3900 (float32_is_normal(f) && sign) ||
3901 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3902 s->float_exception_flags |= float_flag_invalid;
3903 return float32_default_nan(s);
3904 }
3905
3906
3907 if (float32_is_quiet_nan(f, s)) {
3908 return float32_default_nan(s);
3909 }
3910
3911
3912 if (float32_is_zero(f)) {
3913 s->float_exception_flags |= float_flag_divbyzero;
3914 return float32_set_sign(float32_infinity, sign);
3915 }
3916
3917
3918 if (float32_is_infinity(f) && !sign) {
3919 return float32_set_sign(float32_zero, sign);
3920 }
3921
3922
3923 uint64_t val = frsqrt7(f, exp_size, frac_size);
3924 return make_float32(val);
3925}
3926
3927static float64 frsqrt7_d(float64 f, float_status *s)
3928{
3929 int exp_size = 11, frac_size = 52;
3930 bool sign = float64_is_neg(f);
3931
3932
3933
3934
3935
3936
3937
3938 if (float64_is_signaling_nan(f, s) ||
3939 (float64_is_infinity(f) && sign) ||
3940 (float64_is_normal(f) && sign) ||
3941 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3942 s->float_exception_flags |= float_flag_invalid;
3943 return float64_default_nan(s);
3944 }
3945
3946
3947 if (float64_is_quiet_nan(f, s)) {
3948 return float64_default_nan(s);
3949 }
3950
3951
3952 if (float64_is_zero(f)) {
3953 s->float_exception_flags |= float_flag_divbyzero;
3954 return float64_set_sign(float64_infinity, sign);
3955 }
3956
3957
3958 if (float64_is_infinity(f) && !sign) {
3959 return float64_set_sign(float64_zero, sign);
3960 }
3961
3962
3963 uint64_t val = frsqrt7(f, exp_size, frac_size);
3964 return make_float64(val);
3965}
3966
3967RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3968RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3969RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3970GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3971GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3972GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
3973
3974
3975
3976
3977
3978
3979
3980static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3981 float_status *s)
3982{
3983 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3984 uint64_t exp = extract64(f, frac_size, exp_size);
3985 uint64_t frac = extract64(f, 0, frac_size);
3986
3987 const uint8_t lookup_table[] = {
3988 127, 125, 123, 121, 119, 117, 116, 114,
3989 112, 110, 109, 107, 105, 104, 102, 100,
3990 99, 97, 96, 94, 93, 91, 90, 88,
3991 87, 85, 84, 83, 81, 80, 79, 77,
3992 76, 75, 74, 72, 71, 70, 69, 68,
3993 66, 65, 64, 63, 62, 61, 60, 59,
3994 58, 57, 56, 55, 54, 53, 52, 51,
3995 50, 49, 48, 47, 46, 45, 44, 43,
3996 42, 41, 40, 40, 39, 38, 37, 36,
3997 35, 35, 34, 33, 32, 31, 31, 30,
3998 29, 28, 28, 27, 26, 25, 25, 24,
3999 23, 23, 22, 21, 21, 20, 19, 19,
4000 18, 17, 17, 16, 15, 15, 14, 14,
4001 13, 12, 12, 11, 11, 10, 9, 9,
4002 8, 8, 7, 7, 6, 5, 5, 4,
4003 4, 3, 3, 2, 2, 1, 1, 0
4004 };
4005 const int precision = 7;
4006
4007 if (exp == 0 && frac != 0) {
4008
4009 while (extract64(frac, frac_size - 1, 1) == 0) {
4010 exp--;
4011 frac <<= 1;
4012 }
4013
4014 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
4015
4016 if (exp != 0 && exp != UINT64_MAX) {
4017
4018
4019
4020
4021 s->float_exception_flags |= (float_flag_inexact |
4022 float_flag_overflow);
4023
4024 if ((s->float_rounding_mode == float_round_to_zero) ||
4025 ((s->float_rounding_mode == float_round_down) && !sign) ||
4026 ((s->float_rounding_mode == float_round_up) && sign)) {
4027
4028 return (sign << (exp_size + frac_size)) |
4029 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
4030 } else {
4031
4032 return (sign << (exp_size + frac_size)) |
4033 MAKE_64BIT_MASK(frac_size, exp_size);
4034 }
4035 }
4036 }
4037
4038 int idx = frac >> (frac_size - precision);
4039 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
4040 (frac_size - precision);
4041 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
4042
4043 if (out_exp == 0 || out_exp == UINT64_MAX) {
4044
4045
4046
4047
4048 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
4049 if (out_exp == UINT64_MAX) {
4050 out_frac >>= 1;
4051 out_exp = 0;
4052 }
4053 }
4054
4055 uint64_t val = 0;
4056 val = deposit64(val, 0, frac_size, out_frac);
4057 val = deposit64(val, frac_size, exp_size, out_exp);
4058 val = deposit64(val, frac_size + exp_size, 1, sign);
4059 return val;
4060}
4061
4062static float16 frec7_h(float16 f, float_status *s)
4063{
4064 int exp_size = 5, frac_size = 10;
4065 bool sign = float16_is_neg(f);
4066
4067
4068 if (float16_is_infinity(f)) {
4069 return float16_set_sign(float16_zero, sign);
4070 }
4071
4072
4073 if (float16_is_zero(f)) {
4074 s->float_exception_flags |= float_flag_divbyzero;
4075 return float16_set_sign(float16_infinity, sign);
4076 }
4077
4078
4079 if (float16_is_signaling_nan(f, s)) {
4080 s->float_exception_flags |= float_flag_invalid;
4081 return float16_default_nan(s);
4082 }
4083
4084
4085 if (float16_is_quiet_nan(f, s)) {
4086 return float16_default_nan(s);
4087 }
4088
4089
4090 uint64_t val = frec7(f, exp_size, frac_size, s);
4091 return make_float16(val);
4092}
4093
4094static float32 frec7_s(float32 f, float_status *s)
4095{
4096 int exp_size = 8, frac_size = 23;
4097 bool sign = float32_is_neg(f);
4098
4099
4100 if (float32_is_infinity(f)) {
4101 return float32_set_sign(float32_zero, sign);
4102 }
4103
4104
4105 if (float32_is_zero(f)) {
4106 s->float_exception_flags |= float_flag_divbyzero;
4107 return float32_set_sign(float32_infinity, sign);
4108 }
4109
4110
4111 if (float32_is_signaling_nan(f, s)) {
4112 s->float_exception_flags |= float_flag_invalid;
4113 return float32_default_nan(s);
4114 }
4115
4116
4117 if (float32_is_quiet_nan(f, s)) {
4118 return float32_default_nan(s);
4119 }
4120
4121
4122 uint64_t val = frec7(f, exp_size, frac_size, s);
4123 return make_float32(val);
4124}
4125
4126static float64 frec7_d(float64 f, float_status *s)
4127{
4128 int exp_size = 11, frac_size = 52;
4129 bool sign = float64_is_neg(f);
4130
4131
4132 if (float64_is_infinity(f)) {
4133 return float64_set_sign(float64_zero, sign);
4134 }
4135
4136
4137 if (float64_is_zero(f)) {
4138 s->float_exception_flags |= float_flag_divbyzero;
4139 return float64_set_sign(float64_infinity, sign);
4140 }
4141
4142
4143 if (float64_is_signaling_nan(f, s)) {
4144 s->float_exception_flags |= float_flag_invalid;
4145 return float64_default_nan(s);
4146 }
4147
4148
4149 if (float64_is_quiet_nan(f, s)) {
4150 return float64_default_nan(s);
4151 }
4152
4153
4154 uint64_t val = frec7(f, exp_size, frac_size, s);
4155 return make_float64(val);
4156}
4157
4158RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
4159RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
4160RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
4161GEN_VEXT_V_ENV(vfrec7_v_h, 2)
4162GEN_VEXT_V_ENV(vfrec7_v_w, 4)
4163GEN_VEXT_V_ENV(vfrec7_v_d, 8)
4164
4165
4166RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
4167RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
4168RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
4169GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
4170GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
4171GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
4172RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4173RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4174RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
4175GEN_VEXT_VF(vfmin_vf_h, 2)
4176GEN_VEXT_VF(vfmin_vf_w, 4)
4177GEN_VEXT_VF(vfmin_vf_d, 8)
4178
4179RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4180RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4181RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
4182GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
4183GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
4184GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
4185RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4186RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4187RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
4188GEN_VEXT_VF(vfmax_vf_h, 2)
4189GEN_VEXT_VF(vfmax_vf_w, 4)
4190GEN_VEXT_VF(vfmax_vf_d, 8)
4191
4192
4193static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4194{
4195 return deposit64(b, 0, 15, a);
4196}
4197
4198static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4199{
4200 return deposit64(b, 0, 31, a);
4201}
4202
4203static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4204{
4205 return deposit64(b, 0, 63, a);
4206}
4207
4208RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4209RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4210RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
4211GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
4212GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
4213GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
4214RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4215RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4216RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
4217GEN_VEXT_VF(vfsgnj_vf_h, 2)
4218GEN_VEXT_VF(vfsgnj_vf_w, 4)
4219GEN_VEXT_VF(vfsgnj_vf_d, 8)
4220
4221static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4222{
4223 return deposit64(~b, 0, 15, a);
4224}
4225
4226static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4227{
4228 return deposit64(~b, 0, 31, a);
4229}
4230
4231static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4232{
4233 return deposit64(~b, 0, 63, a);
4234}
4235
4236RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4237RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4238RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
4239GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
4240GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
4241GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
4242RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4243RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4244RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
4245GEN_VEXT_VF(vfsgnjn_vf_h, 2)
4246GEN_VEXT_VF(vfsgnjn_vf_w, 4)
4247GEN_VEXT_VF(vfsgnjn_vf_d, 8)
4248
4249static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4250{
4251 return deposit64(b ^ a, 0, 15, a);
4252}
4253
4254static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4255{
4256 return deposit64(b ^ a, 0, 31, a);
4257}
4258
4259static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4260{
4261 return deposit64(b ^ a, 0, 63, a);
4262}
4263
4264RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4265RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4266RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
4267GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4268GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4269GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
4270RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4271RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4272RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
4273GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4274GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4275GEN_VEXT_VF(vfsgnjx_vf_d, 8)
4276
4277
4278#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4279void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4280 CPURISCVState *env, uint32_t desc) \
4281{ \
4282 uint32_t vm = vext_vm(desc); \
4283 uint32_t vl = env->vl; \
4284 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4285 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4286 uint32_t vma = vext_vma(desc); \
4287 uint32_t i; \
4288 \
4289 VSTART_CHECK_EARLY_EXIT(env, vl); \
4290 \
4291 for (i = env->vstart; i < vl; i++) { \
4292 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4293 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4294 if (!vm && !vext_elem_mask(v0, i)) { \
4295 \
4296 if (vma) { \
4297 vext_set_elem_mask(vd, i, 1); \
4298 } \
4299 continue; \
4300 } \
4301 vext_set_elem_mask(vd, i, \
4302 DO_OP(s2, s1, &env->fp_status)); \
4303 } \
4304 env->vstart = 0; \
4305
4306
4307
4308 \
4309 if (vta_all_1s) { \
4310 for (; i < total_elems; i++) { \
4311 vext_set_elem_mask(vd, i, 1); \
4312 } \
4313 } \
4314}
4315
4316GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4317GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4318GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4319
4320#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4321void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4322 CPURISCVState *env, uint32_t desc) \
4323{ \
4324 uint32_t vm = vext_vm(desc); \
4325 uint32_t vl = env->vl; \
4326 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4327 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4328 uint32_t vma = vext_vma(desc); \
4329 uint32_t i; \
4330 \
4331 VSTART_CHECK_EARLY_EXIT(env, vl); \
4332 \
4333 for (i = env->vstart; i < vl; i++) { \
4334 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4335 if (!vm && !vext_elem_mask(v0, i)) { \
4336 \
4337 if (vma) { \
4338 vext_set_elem_mask(vd, i, 1); \
4339 } \
4340 continue; \
4341 } \
4342 vext_set_elem_mask(vd, i, \
4343 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4344 } \
4345 env->vstart = 0; \
4346
4347
4348
4349 \
4350 if (vta_all_1s) { \
4351 for (; i < total_elems; i++) { \
4352 vext_set_elem_mask(vd, i, 1); \
4353 } \
4354 } \
4355}
4356
4357GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4358GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4359GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4360
4361static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4362{
4363 FloatRelation compare = float16_compare_quiet(a, b, s);
4364 return compare != float_relation_equal;
4365}
4366
4367static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4368{
4369 FloatRelation compare = float32_compare_quiet(a, b, s);
4370 return compare != float_relation_equal;
4371}
4372
4373static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4374{
4375 FloatRelation compare = float64_compare_quiet(a, b, s);
4376 return compare != float_relation_equal;
4377}
4378
4379GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4380GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4381GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4382GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4383GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4384GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4385
4386GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4387GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4388GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4389GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4390GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4391GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4392
4393GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4394GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4395GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4396GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4397GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4398GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4399
4400static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4401{
4402 FloatRelation compare = float16_compare(a, b, s);
4403 return compare == float_relation_greater;
4404}
4405
4406static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4407{
4408 FloatRelation compare = float32_compare(a, b, s);
4409 return compare == float_relation_greater;
4410}
4411
4412static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4413{
4414 FloatRelation compare = float64_compare(a, b, s);
4415 return compare == float_relation_greater;
4416}
4417
4418GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4419GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4420GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4421
4422static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4423{
4424 FloatRelation compare = float16_compare(a, b, s);
4425 return compare == float_relation_greater ||
4426 compare == float_relation_equal;
4427}
4428
4429static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4430{
4431 FloatRelation compare = float32_compare(a, b, s);
4432 return compare == float_relation_greater ||
4433 compare == float_relation_equal;
4434}
4435
4436static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4437{
4438 FloatRelation compare = float64_compare(a, b, s);
4439 return compare == float_relation_greater ||
4440 compare == float_relation_equal;
4441}
4442
4443GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4444GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4445GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4446
4447
4448target_ulong fclass_h(uint64_t frs1)
4449{
4450 float16 f = frs1;
4451 bool sign = float16_is_neg(f);
4452
4453 if (float16_is_infinity(f)) {
4454 return sign ? 1 << 0 : 1 << 7;
4455 } else if (float16_is_zero(f)) {
4456 return sign ? 1 << 3 : 1 << 4;
4457 } else if (float16_is_zero_or_denormal(f)) {
4458 return sign ? 1 << 2 : 1 << 5;
4459 } else if (float16_is_any_nan(f)) {
4460 float_status s = { };
4461 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4462 } else {
4463 return sign ? 1 << 1 : 1 << 6;
4464 }
4465}
4466
4467target_ulong fclass_s(uint64_t frs1)
4468{
4469 float32 f = frs1;
4470 bool sign = float32_is_neg(f);
4471
4472 if (float32_is_infinity(f)) {
4473 return sign ? 1 << 0 : 1 << 7;
4474 } else if (float32_is_zero(f)) {
4475 return sign ? 1 << 3 : 1 << 4;
4476 } else if (float32_is_zero_or_denormal(f)) {
4477 return sign ? 1 << 2 : 1 << 5;
4478 } else if (float32_is_any_nan(f)) {
4479 float_status s = { };
4480 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4481 } else {
4482 return sign ? 1 << 1 : 1 << 6;
4483 }
4484}
4485
4486target_ulong fclass_d(uint64_t frs1)
4487{
4488 float64 f = frs1;
4489 bool sign = float64_is_neg(f);
4490
4491 if (float64_is_infinity(f)) {
4492 return sign ? 1 << 0 : 1 << 7;
4493 } else if (float64_is_zero(f)) {
4494 return sign ? 1 << 3 : 1 << 4;
4495 } else if (float64_is_zero_or_denormal(f)) {
4496 return sign ? 1 << 2 : 1 << 5;
4497 } else if (float64_is_any_nan(f)) {
4498 float_status s = { };
4499 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4500 } else {
4501 return sign ? 1 << 1 : 1 << 6;
4502 }
4503}
4504
4505RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4506RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4507RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4508GEN_VEXT_V(vfclass_v_h, 2)
4509GEN_VEXT_V(vfclass_v_w, 4)
4510GEN_VEXT_V(vfclass_v_d, 8)
4511
4512
4513
4514#define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4515void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4516 CPURISCVState *env, uint32_t desc) \
4517{ \
4518 uint32_t vm = vext_vm(desc); \
4519 uint32_t vl = env->vl; \
4520 uint32_t esz = sizeof(ETYPE); \
4521 uint32_t total_elems = \
4522 vext_get_total_elems(env, desc, esz); \
4523 uint32_t vta = vext_vta(desc); \
4524 uint32_t i; \
4525 \
4526 VSTART_CHECK_EARLY_EXIT(env, vl); \
4527 \
4528 for (i = env->vstart; i < vl; i++) { \
4529 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4530 *((ETYPE *)vd + H(i)) = \
4531 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4532 } \
4533 env->vstart = 0; \
4534 \
4535 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4536}
4537
4538GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4539GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4540GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4541
4542
4543
4544RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4545RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4546RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4547GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4548GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4549GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
4550
4551
4552RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4553RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4554RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4555GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4556GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4557GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
4558
4559
4560RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4561RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4562RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4563GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4564GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4565GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
4566
4567
4568RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4569RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4570RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4571GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4572GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4573GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4574
4575
4576
4577#define WOP_UU_B uint16_t, uint8_t, uint8_t
4578#define WOP_UU_H uint32_t, uint16_t, uint16_t
4579#define WOP_UU_W uint64_t, uint32_t, uint32_t
4580
4581
4582
4583RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4584RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4585GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4586GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4587
4588
4589RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4590RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4591GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4592GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4593
4594
4595
4596
4597RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4598RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4599RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4600GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4601GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4602GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4603
4604
4605RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4606RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4607RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4608GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4609GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4610GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4611
4612
4613
4614
4615static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4616{
4617 return float16_to_float32(a, true, s);
4618}
4619
4620RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4621RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4622GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4623GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
4624
4625RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
4626GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
4627
4628
4629
4630#define NOP_UU_B uint8_t, uint16_t, uint32_t
4631#define NOP_UU_H uint16_t, uint32_t, uint32_t
4632#define NOP_UU_W uint32_t, uint64_t, uint64_t
4633
4634RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4635RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4636RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4637GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4638GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4639GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
4640
4641
4642RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4643RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4644RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4645GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4646GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4647GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
4648
4649
4650
4651
4652RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4653RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4654GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4655GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
4656
4657
4658RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4659RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4660GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4661GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
4662
4663
4664static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4665{
4666 return float32_to_float16(a, true, s);
4667}
4668
4669RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4670RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4671GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4672GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
4673
4674RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
4675GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
4676
4677
4678
4679
4680
4681#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4682void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4683 void *vs2, CPURISCVState *env, \
4684 uint32_t desc) \
4685{ \
4686 uint32_t vm = vext_vm(desc); \
4687 uint32_t vl = env->vl; \
4688 uint32_t esz = sizeof(TD); \
4689 uint32_t vlenb = simd_maxsz(desc); \
4690 uint32_t vta = vext_vta(desc); \
4691 uint32_t i; \
4692 TD s1 = *((TD *)vs1 + HD(0)); \
4693 \
4694 VSTART_CHECK_EARLY_EXIT(env, vl); \
4695 \
4696 for (i = env->vstart; i < vl; i++) { \
4697 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4698 if (!vm && !vext_elem_mask(v0, i)) { \
4699 continue; \
4700 } \
4701 s1 = OP(s1, (TD)s2); \
4702 } \
4703 if (vl > 0) { \
4704 *((TD *)vd + HD(0)) = s1; \
4705 } \
4706 env->vstart = 0; \
4707 \
4708 vext_set_elems_1s(vd, vta, esz, vlenb); \
4709}
4710
4711
4712GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4713GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4714GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4715GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4716
4717
4718GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4719GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4720GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4721GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4722
4723
4724GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4725GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4726GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4727GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4728
4729
4730GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4731GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4732GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4733GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4734
4735
4736GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4737GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4738GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4739GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4740
4741
4742GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4743GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4744GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4745GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4746
4747
4748GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4749GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4750GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4751GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4752
4753
4754GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4755GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4756GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4757GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4758
4759
4760
4761GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4762GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4763GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4764
4765
4766GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4767GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4768GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4769
4770
4771#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4772void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4773 void *vs2, CPURISCVState *env, \
4774 uint32_t desc) \
4775{ \
4776 uint32_t vm = vext_vm(desc); \
4777 uint32_t vl = env->vl; \
4778 uint32_t esz = sizeof(TD); \
4779 uint32_t vlenb = simd_maxsz(desc); \
4780 uint32_t vta = vext_vta(desc); \
4781 uint32_t i; \
4782 TD s1 = *((TD *)vs1 + HD(0)); \
4783 \
4784 VSTART_CHECK_EARLY_EXIT(env, vl); \
4785 \
4786 for (i = env->vstart; i < vl; i++) { \
4787 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4788 if (!vm && !vext_elem_mask(v0, i)) { \
4789 continue; \
4790 } \
4791 s1 = OP(s1, (TD)s2, &env->fp_status); \
4792 } \
4793 if (vl > 0) { \
4794 *((TD *)vd + HD(0)) = s1; \
4795 } \
4796 env->vstart = 0; \
4797 \
4798 vext_set_elems_1s(vd, vta, esz, vlenb); \
4799}
4800
4801
4802GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4803GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4804GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4805
4806
4807GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4808GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4809GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4810
4811
4812GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4813 float16_maximum_number)
4814GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4815 float32_maximum_number)
4816GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4817 float64_maximum_number)
4818
4819
4820GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4821 float16_minimum_number)
4822GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4823 float32_minimum_number)
4824GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4825 float64_minimum_number)
4826
4827
4828static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
4829{
4830 return float32_add(a, float16_to_float32(b, true, s), s);
4831}
4832
4833static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
4834{
4835 return float64_add(a, float32_to_float64(b, s), s);
4836}
4837
4838
4839
4840GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4841GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4842GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4843GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4844
4845
4846
4847
4848
4849#define GEN_VEXT_MASK_VV(NAME, OP) \
4850void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4851 void *vs2, CPURISCVState *env, \
4852 uint32_t desc) \
4853{ \
4854 uint32_t vl = env->vl; \
4855 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\
4856 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4857 uint32_t i; \
4858 int a, b; \
4859 \
4860 VSTART_CHECK_EARLY_EXIT(env, vl); \
4861 \
4862 for (i = env->vstart; i < vl; i++) { \
4863 a = vext_elem_mask(vs1, i); \
4864 b = vext_elem_mask(vs2, i); \
4865 vext_set_elem_mask(vd, i, OP(b, a)); \
4866 } \
4867 env->vstart = 0; \
4868
4869
4870
4871 \
4872 if (vta_all_1s) { \
4873 for (; i < total_elems; i++) { \
4874 vext_set_elem_mask(vd, i, 1); \
4875 } \
4876 } \
4877}
4878
4879#define DO_NAND(N, M) (!(N & M))
4880#define DO_ANDNOT(N, M) (N & !M)
4881#define DO_NOR(N, M) (!(N | M))
4882#define DO_ORNOT(N, M) (N | !M)
4883#define DO_XNOR(N, M) (!(N ^ M))
4884
4885GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4886GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4887GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4888GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4889GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4890GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4891GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4892GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4893
4894
4895target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4896 uint32_t desc)
4897{
4898 target_ulong cnt = 0;
4899 uint32_t vm = vext_vm(desc);
4900 uint32_t vl = env->vl;
4901 int i;
4902
4903 for (i = env->vstart; i < vl; i++) {
4904 if (vm || vext_elem_mask(v0, i)) {
4905 if (vext_elem_mask(vs2, i)) {
4906 cnt++;
4907 }
4908 }
4909 }
4910 env->vstart = 0;
4911 return cnt;
4912}
4913
4914
4915target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4916 uint32_t desc)
4917{
4918 uint32_t vm = vext_vm(desc);
4919 uint32_t vl = env->vl;
4920 int i;
4921
4922 for (i = env->vstart; i < vl; i++) {
4923 if (vm || vext_elem_mask(v0, i)) {
4924 if (vext_elem_mask(vs2, i)) {
4925 return i;
4926 }
4927 }
4928 }
4929 env->vstart = 0;
4930 return -1LL;
4931}
4932
4933enum set_mask_type {
4934 ONLY_FIRST = 1,
4935 INCLUDE_FIRST,
4936 BEFORE_FIRST,
4937};
4938
4939static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4940 uint32_t desc, enum set_mask_type type)
4941{
4942 uint32_t vm = vext_vm(desc);
4943 uint32_t vl = env->vl;
4944 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;
4945 uint32_t vta_all_1s = vext_vta_all_1s(desc);
4946 uint32_t vma = vext_vma(desc);
4947 int i;
4948 bool first_mask_bit = false;
4949
4950 VSTART_CHECK_EARLY_EXIT(env, vl);
4951
4952 for (i = env->vstart; i < vl; i++) {
4953 if (!vm && !vext_elem_mask(v0, i)) {
4954
4955 if (vma) {
4956 vext_set_elem_mask(vd, i, 1);
4957 }
4958 continue;
4959 }
4960
4961 if (first_mask_bit) {
4962 vext_set_elem_mask(vd, i, 0);
4963 continue;
4964 }
4965 if (vext_elem_mask(vs2, i)) {
4966 first_mask_bit = true;
4967 if (type == BEFORE_FIRST) {
4968 vext_set_elem_mask(vd, i, 0);
4969 } else {
4970 vext_set_elem_mask(vd, i, 1);
4971 }
4972 } else {
4973 if (type == ONLY_FIRST) {
4974 vext_set_elem_mask(vd, i, 0);
4975 } else {
4976 vext_set_elem_mask(vd, i, 1);
4977 }
4978 }
4979 }
4980 env->vstart = 0;
4981
4982
4983
4984
4985 if (vta_all_1s) {
4986 for (; i < total_elems; i++) {
4987 vext_set_elem_mask(vd, i, 1);
4988 }
4989 }
4990}
4991
4992void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4993 uint32_t desc)
4994{
4995 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4996}
4997
4998void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4999 uint32_t desc)
5000{
5001 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
5002}
5003
5004void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
5005 uint32_t desc)
5006{
5007 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
5008}
5009
5010
5011#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
5012void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
5013 uint32_t desc) \
5014{ \
5015 uint32_t vm = vext_vm(desc); \
5016 uint32_t vl = env->vl; \
5017 uint32_t esz = sizeof(ETYPE); \
5018 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5019 uint32_t vta = vext_vta(desc); \
5020 uint32_t vma = vext_vma(desc); \
5021 uint32_t sum = 0; \
5022 int i; \
5023 \
5024 VSTART_CHECK_EARLY_EXIT(env, vl); \
5025 \
5026 for (i = env->vstart; i < vl; i++) { \
5027 if (!vm && !vext_elem_mask(v0, i)) { \
5028 \
5029 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5030 continue; \
5031 } \
5032 *((ETYPE *)vd + H(i)) = sum; \
5033 if (vext_elem_mask(vs2, i)) { \
5034 sum++; \
5035 } \
5036 } \
5037 env->vstart = 0; \
5038 \
5039 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5040}
5041
5042GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
5043GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
5044GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
5045GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
5046
5047
5048#define GEN_VEXT_VID_V(NAME, ETYPE, H) \
5049void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
5050{ \
5051 uint32_t vm = vext_vm(desc); \
5052 uint32_t vl = env->vl; \
5053 uint32_t esz = sizeof(ETYPE); \
5054 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5055 uint32_t vta = vext_vta(desc); \
5056 uint32_t vma = vext_vma(desc); \
5057 int i; \
5058 \
5059 VSTART_CHECK_EARLY_EXIT(env, vl); \
5060 \
5061 for (i = env->vstart; i < vl; i++) { \
5062 if (!vm && !vext_elem_mask(v0, i)) { \
5063 \
5064 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5065 continue; \
5066 } \
5067 *((ETYPE *)vd + H(i)) = i; \
5068 } \
5069 env->vstart = 0; \
5070 \
5071 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5072}
5073
5074GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
5075GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
5076GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
5077GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
5078
5079
5080
5081
5082
5083
5084#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
5085void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5086 CPURISCVState *env, uint32_t desc) \
5087{ \
5088 uint32_t vm = vext_vm(desc); \
5089 uint32_t vl = env->vl; \
5090 uint32_t esz = sizeof(ETYPE); \
5091 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5092 uint32_t vta = vext_vta(desc); \
5093 uint32_t vma = vext_vma(desc); \
5094 target_ulong offset = s1, i_min, i; \
5095 \
5096 VSTART_CHECK_EARLY_EXIT(env, vl); \
5097 \
5098 i_min = MAX(env->vstart, offset); \
5099 for (i = i_min; i < vl; i++) { \
5100 if (!vm && !vext_elem_mask(v0, i)) { \
5101 \
5102 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5103 continue; \
5104 } \
5105 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
5106 } \
5107 env->vstart = 0; \
5108 \
5109 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5110}
5111
5112
5113GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
5114GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
5115GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
5116GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
5117
5118#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
5119void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5120 CPURISCVState *env, uint32_t desc) \
5121{ \
5122 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5123 uint32_t vm = vext_vm(desc); \
5124 uint32_t vl = env->vl; \
5125 uint32_t esz = sizeof(ETYPE); \
5126 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5127 uint32_t vta = vext_vta(desc); \
5128 uint32_t vma = vext_vma(desc); \
5129 target_ulong i_max, i_min, i; \
5130 \
5131 VSTART_CHECK_EARLY_EXIT(env, vl); \
5132 \
5133 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
5134 i_max = MAX(i_min, env->vstart); \
5135 for (i = env->vstart; i < i_max; ++i) { \
5136 if (!vm && !vext_elem_mask(v0, i)) { \
5137 \
5138 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5139 continue; \
5140 } \
5141 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
5142 } \
5143 \
5144 for (i = i_max; i < vl; ++i) { \
5145 if (!vm && !vext_elem_mask(v0, i)) { \
5146 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5147 continue; \
5148 } \
5149 *((ETYPE *)vd + H(i)) = 0; \
5150 } \
5151 \
5152 env->vstart = 0; \
5153 \
5154 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5155}
5156
5157
5158GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
5159GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
5160GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
5161GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
5162
5163#define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
5164static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5165 void *vs2, CPURISCVState *env, \
5166 uint32_t desc) \
5167{ \
5168 typedef uint##BITWIDTH##_t ETYPE; \
5169 uint32_t vm = vext_vm(desc); \
5170 uint32_t vl = env->vl; \
5171 uint32_t esz = sizeof(ETYPE); \
5172 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5173 uint32_t vta = vext_vta(desc); \
5174 uint32_t vma = vext_vma(desc); \
5175 uint32_t i; \
5176 \
5177 VSTART_CHECK_EARLY_EXIT(env, vl); \
5178 \
5179 for (i = env->vstart; i < vl; i++) { \
5180 if (!vm && !vext_elem_mask(v0, i)) { \
5181 \
5182 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5183 continue; \
5184 } \
5185 if (i == 0) { \
5186 *((ETYPE *)vd + H(i)) = s1; \
5187 } else { \
5188 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
5189 } \
5190 } \
5191 env->vstart = 0; \
5192 \
5193 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5194}
5195
5196GEN_VEXT_VSLIE1UP(8, H1)
5197GEN_VEXT_VSLIE1UP(16, H2)
5198GEN_VEXT_VSLIE1UP(32, H4)
5199GEN_VEXT_VSLIE1UP(64, H8)
5200
5201#define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
5202void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5203 CPURISCVState *env, uint32_t desc) \
5204{ \
5205 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5206}
5207
5208
5209GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
5210GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
5211GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
5212GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
5213
5214#define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
5215static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
5216 void *vs2, CPURISCVState *env, \
5217 uint32_t desc) \
5218{ \
5219 typedef uint##BITWIDTH##_t ETYPE; \
5220 uint32_t vm = vext_vm(desc); \
5221 uint32_t vl = env->vl; \
5222 uint32_t esz = sizeof(ETYPE); \
5223 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5224 uint32_t vta = vext_vta(desc); \
5225 uint32_t vma = vext_vma(desc); \
5226 uint32_t i; \
5227 \
5228 VSTART_CHECK_EARLY_EXIT(env, vl); \
5229 \
5230 for (i = env->vstart; i < vl; i++) { \
5231 if (!vm && !vext_elem_mask(v0, i)) { \
5232 \
5233 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5234 continue; \
5235 } \
5236 if (i == vl - 1) { \
5237 *((ETYPE *)vd + H(i)) = s1; \
5238 } else { \
5239 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
5240 } \
5241 } \
5242 env->vstart = 0; \
5243 \
5244 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5245}
5246
5247GEN_VEXT_VSLIDE1DOWN(8, H1)
5248GEN_VEXT_VSLIDE1DOWN(16, H2)
5249GEN_VEXT_VSLIDE1DOWN(32, H4)
5250GEN_VEXT_VSLIDE1DOWN(64, H8)
5251
5252#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
5253void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5254 CPURISCVState *env, uint32_t desc) \
5255{ \
5256 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5257}
5258
5259
5260GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
5261GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
5262GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
5263GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5264
5265
5266#define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
5267void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5268 CPURISCVState *env, uint32_t desc) \
5269{ \
5270 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5271}
5272
5273
5274GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5275GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5276GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5277
5278#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
5279void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5280 CPURISCVState *env, uint32_t desc) \
5281{ \
5282 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5283}
5284
5285
5286GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5287GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5288GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
5289
5290
5291#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
5292void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5293 CPURISCVState *env, uint32_t desc) \
5294{ \
5295 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
5296 uint32_t vm = vext_vm(desc); \
5297 uint32_t vl = env->vl; \
5298 uint32_t esz = sizeof(TS2); \
5299 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5300 uint32_t vta = vext_vta(desc); \
5301 uint32_t vma = vext_vma(desc); \
5302 uint64_t index; \
5303 uint32_t i; \
5304 \
5305 VSTART_CHECK_EARLY_EXIT(env, vl); \
5306 \
5307 for (i = env->vstart; i < vl; i++) { \
5308 if (!vm && !vext_elem_mask(v0, i)) { \
5309 \
5310 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5311 continue; \
5312 } \
5313 index = *((TS1 *)vs1 + HS1(i)); \
5314 if (index >= vlmax) { \
5315 *((TS2 *)vd + HS2(i)) = 0; \
5316 } else { \
5317 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
5318 } \
5319 } \
5320 env->vstart = 0; \
5321 \
5322 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5323}
5324
5325
5326GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5327GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5328GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5329GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5330
5331GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5332GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5333GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5334GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
5335
5336#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
5337void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5338 CPURISCVState *env, uint32_t desc) \
5339{ \
5340 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5341 uint32_t vm = vext_vm(desc); \
5342 uint32_t vl = env->vl; \
5343 uint32_t esz = sizeof(ETYPE); \
5344 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5345 uint32_t vta = vext_vta(desc); \
5346 uint32_t vma = vext_vma(desc); \
5347 uint64_t index = s1; \
5348 uint32_t i; \
5349 \
5350 VSTART_CHECK_EARLY_EXIT(env, vl); \
5351 \
5352 for (i = env->vstart; i < vl; i++) { \
5353 if (!vm && !vext_elem_mask(v0, i)) { \
5354 \
5355 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5356 continue; \
5357 } \
5358 if (index >= vlmax) { \
5359 *((ETYPE *)vd + H(i)) = 0; \
5360 } else { \
5361 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5362 } \
5363 } \
5364 env->vstart = 0; \
5365 \
5366 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5367}
5368
5369
5370GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5371GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5372GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5373GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
5374
5375
5376#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
5377void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5378 CPURISCVState *env, uint32_t desc) \
5379{ \
5380 uint32_t vl = env->vl; \
5381 uint32_t esz = sizeof(ETYPE); \
5382 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5383 uint32_t vta = vext_vta(desc); \
5384 uint32_t num = 0, i; \
5385 \
5386 VSTART_CHECK_EARLY_EXIT(env, vl); \
5387 \
5388 for (i = env->vstart; i < vl; i++) { \
5389 if (!vext_elem_mask(vs1, i)) { \
5390 continue; \
5391 } \
5392 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5393 num++; \
5394 } \
5395 env->vstart = 0; \
5396 \
5397 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \
5398}
5399
5400
5401GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5402GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5403GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5404GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
5405
5406
5407void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5408{
5409
5410 uint32_t maxsz = simd_maxsz(desc);
5411 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5412 uint32_t startb = env->vstart * sewb;
5413 uint32_t i = startb;
5414
5415 if (startb >= maxsz) {
5416 env->vstart = 0;
5417 return;
5418 }
5419
5420 if (HOST_BIG_ENDIAN && i % 8 != 0) {
5421 uint32_t j = ROUND_UP(i, 8);
5422 memcpy((uint8_t *)vd + H1(j - 1),
5423 (uint8_t *)vs2 + H1(j - 1),
5424 j - i);
5425 i = j;
5426 }
5427
5428 memcpy((uint8_t *)vd + H1(i),
5429 (uint8_t *)vs2 + H1(i),
5430 maxsz - i);
5431
5432 env->vstart = 0;
5433}
5434
5435
5436#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5437void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5438 CPURISCVState *env, uint32_t desc) \
5439{ \
5440 uint32_t vl = env->vl; \
5441 uint32_t vm = vext_vm(desc); \
5442 uint32_t esz = sizeof(ETYPE); \
5443 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5444 uint32_t vta = vext_vta(desc); \
5445 uint32_t vma = vext_vma(desc); \
5446 uint32_t i; \
5447 \
5448 VSTART_CHECK_EARLY_EXIT(env, vl); \
5449 \
5450 for (i = env->vstart; i < vl; i++) { \
5451 if (!vm && !vext_elem_mask(v0, i)) { \
5452 \
5453 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5454 continue; \
5455 } \
5456 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5457 } \
5458 env->vstart = 0; \
5459 \
5460 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5461}
5462
5463GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5464GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5465GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5466GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5467GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5468GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5469
5470GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5471GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5472GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5473GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5474GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5475GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)
5476