qemu/target/riscv/vector_helper.c
<<
>>
Prefs
   1/*
   2 * RISC-V Vector Extension Helpers for QEMU.
   3 *
   4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms and conditions of the GNU General Public License,
   8 * version 2 or later, as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 *
  15 * You should have received a copy of the GNU General Public License along with
  16 * this program.  If not, see <http://www.gnu.org/licenses/>.
  17 */
  18
  19#include "qemu/osdep.h"
  20#include "cpu.h"
  21#include "exec/memop.h"
  22#include "exec/exec-all.h"
  23#include "exec/helper-proto.h"
  24#include "fpu/softfloat.h"
  25#include "tcg/tcg-gvec-desc.h"
  26#include "internals.h"
  27#include <math.h>
  28
  29target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
  30                            target_ulong s2)
  31{
  32    int vlmax, vl;
  33    RISCVCPU *cpu = env_archcpu(env);
  34    uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
  35    uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
  36    bool vill = FIELD_EX64(s2, VTYPE, VILL);
  37    target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
  38
  39    if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
  40        /* only set vill bit. */
  41        env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
  42        env->vl = 0;
  43        env->vstart = 0;
  44        return 0;
  45    }
  46
  47    vlmax = vext_get_vlmax(cpu, s2);
  48    if (s1 <= vlmax) {
  49        vl = s1;
  50    } else {
  51        vl = vlmax;
  52    }
  53    env->vl = vl;
  54    env->vtype = s2;
  55    env->vstart = 0;
  56    return vl;
  57}
  58
  59/*
  60 * Note that vector data is stored in host-endian 64-bit chunks,
  61 * so addressing units smaller than that needs a host-endian fixup.
  62 */
  63#ifdef HOST_WORDS_BIGENDIAN
  64#define H1(x)   ((x) ^ 7)
  65#define H1_2(x) ((x) ^ 6)
  66#define H1_4(x) ((x) ^ 4)
  67#define H2(x)   ((x) ^ 3)
  68#define H4(x)   ((x) ^ 1)
  69#define H8(x)   ((x))
  70#else
  71#define H1(x)   (x)
  72#define H1_2(x) (x)
  73#define H1_4(x) (x)
  74#define H2(x)   (x)
  75#define H4(x)   (x)
  76#define H8(x)   (x)
  77#endif
  78
  79static inline uint32_t vext_nf(uint32_t desc)
  80{
  81    return FIELD_EX32(simd_data(desc), VDATA, NF);
  82}
  83
  84static inline uint32_t vext_mlen(uint32_t desc)
  85{
  86    return FIELD_EX32(simd_data(desc), VDATA, MLEN);
  87}
  88
  89static inline uint32_t vext_vm(uint32_t desc)
  90{
  91    return FIELD_EX32(simd_data(desc), VDATA, VM);
  92}
  93
  94static inline uint32_t vext_lmul(uint32_t desc)
  95{
  96    return FIELD_EX32(simd_data(desc), VDATA, LMUL);
  97}
  98
  99static uint32_t vext_wd(uint32_t desc)
 100{
 101    return (simd_data(desc) >> 11) & 0x1;
 102}
 103
 104/*
 105 * Get vector group length in bytes. Its range is [64, 2048].
 106 *
 107 * As simd_desc support at most 256, the max vlen is 512 bits.
 108 * So vlen in bytes is encoded as maxsz.
 109 */
 110static inline uint32_t vext_maxsz(uint32_t desc)
 111{
 112    return simd_maxsz(desc) << vext_lmul(desc);
 113}
 114
 115/*
 116 * This function checks watchpoint before real load operation.
 117 *
 118 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
 119 * In user mode, there is no watchpoint support now.
 120 *
 121 * It will trigger an exception if there is no mapping in TLB
 122 * and page table walk can't fill the TLB entry. Then the guest
 123 * software can return here after process the exception or never return.
 124 */
 125static void probe_pages(CPURISCVState *env, target_ulong addr,
 126                        target_ulong len, uintptr_t ra,
 127                        MMUAccessType access_type)
 128{
 129    target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
 130    target_ulong curlen = MIN(pagelen, len);
 131
 132    probe_access(env, addr, curlen, access_type,
 133                 cpu_mmu_index(env, false), ra);
 134    if (len > curlen) {
 135        addr += curlen;
 136        curlen = len - curlen;
 137        probe_access(env, addr, curlen, access_type,
 138                     cpu_mmu_index(env, false), ra);
 139    }
 140}
 141
 142#ifdef HOST_WORDS_BIGENDIAN
 143static void vext_clear(void *tail, uint32_t cnt, uint32_t tot)
 144{
 145    /*
 146     * Split the remaining range to two parts.
 147     * The first part is in the last uint64_t unit.
 148     * The second part start from the next uint64_t unit.
 149     */
 150    int part1 = 0, part2 = tot - cnt;
 151    if (cnt % 8) {
 152        part1 = 8 - (cnt % 8);
 153        part2 = tot - cnt - part1;
 154        memset(QEMU_ALIGN_PTR_DOWN(tail, 8), 0, part1);
 155        memset(QEMU_ALIGN_PTR_UP(tail, 8), 0, part2);
 156    } else {
 157        memset(tail, 0, part2);
 158    }
 159}
 160#else
 161static void vext_clear(void *tail, uint32_t cnt, uint32_t tot)
 162{
 163    memset(tail, 0, tot - cnt);
 164}
 165#endif
 166
 167static void clearb(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
 168{
 169    int8_t *cur = ((int8_t *)vd + H1(idx));
 170    vext_clear(cur, cnt, tot);
 171}
 172
 173static void clearh(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
 174{
 175    int16_t *cur = ((int16_t *)vd + H2(idx));
 176    vext_clear(cur, cnt, tot);
 177}
 178
 179static void clearl(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
 180{
 181    int32_t *cur = ((int32_t *)vd + H4(idx));
 182    vext_clear(cur, cnt, tot);
 183}
 184
 185static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
 186{
 187    int64_t *cur = (int64_t *)vd + idx;
 188    vext_clear(cur, cnt, tot);
 189}
 190
 191static inline void vext_set_elem_mask(void *v0, int mlen, int index,
 192        uint8_t value)
 193{
 194    int idx = (index * mlen) / 64;
 195    int pos = (index * mlen) % 64;
 196    uint64_t old = ((uint64_t *)v0)[idx];
 197    ((uint64_t *)v0)[idx] = deposit64(old, pos, mlen, value);
 198}
 199
 200static inline int vext_elem_mask(void *v0, int mlen, int index)
 201{
 202    int idx = (index * mlen) / 64;
 203    int pos = (index * mlen) % 64;
 204    return (((uint64_t *)v0)[idx] >> pos) & 1;
 205}
 206
 207/* elements operations for load and store */
 208typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
 209                               uint32_t idx, void *vd, uintptr_t retaddr);
 210typedef void clear_fn(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot);
 211
 212#define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF)     \
 213static void NAME(CPURISCVState *env, abi_ptr addr,         \
 214                 uint32_t idx, void *vd, uintptr_t retaddr)\
 215{                                                          \
 216    MTYPE data;                                            \
 217    ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
 218    data = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
 219    *cur = data;                                           \
 220}                                                          \
 221
 222GEN_VEXT_LD_ELEM(ldb_b, int8_t,  int8_t,  H1, ldsb)
 223GEN_VEXT_LD_ELEM(ldb_h, int8_t,  int16_t, H2, ldsb)
 224GEN_VEXT_LD_ELEM(ldb_w, int8_t,  int32_t, H4, ldsb)
 225GEN_VEXT_LD_ELEM(ldb_d, int8_t,  int64_t, H8, ldsb)
 226GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw)
 227GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw)
 228GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw)
 229GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl)
 230GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl)
 231GEN_VEXT_LD_ELEM(lde_b, int8_t,  int8_t,  H1, ldsb)
 232GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw)
 233GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl)
 234GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq)
 235GEN_VEXT_LD_ELEM(ldbu_b, uint8_t,  uint8_t,  H1, ldub)
 236GEN_VEXT_LD_ELEM(ldbu_h, uint8_t,  uint16_t, H2, ldub)
 237GEN_VEXT_LD_ELEM(ldbu_w, uint8_t,  uint32_t, H4, ldub)
 238GEN_VEXT_LD_ELEM(ldbu_d, uint8_t,  uint64_t, H8, ldub)
 239GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw)
 240GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw)
 241GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw)
 242GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl)
 243GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl)
 244
 245#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
 246static void NAME(CPURISCVState *env, abi_ptr addr,         \
 247                 uint32_t idx, void *vd, uintptr_t retaddr)\
 248{                                                          \
 249    ETYPE data = *((ETYPE *)vd + H(idx));                  \
 250    cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
 251}
 252
 253GEN_VEXT_ST_ELEM(stb_b, int8_t,  H1, stb)
 254GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb)
 255GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb)
 256GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb)
 257GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw)
 258GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw)
 259GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw)
 260GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl)
 261GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl)
 262GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
 263GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
 264GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
 265GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
 266
 267/*
 268 *** stride: access vector element from strided memory
 269 */
 270static void
 271vext_ldst_stride(void *vd, void *v0, target_ulong base,
 272                 target_ulong stride, CPURISCVState *env,
 273                 uint32_t desc, uint32_t vm,
 274                 vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem,
 275                 uint32_t esz, uint32_t msz, uintptr_t ra,
 276                 MMUAccessType access_type)
 277{
 278    uint32_t i, k;
 279    uint32_t nf = vext_nf(desc);
 280    uint32_t mlen = vext_mlen(desc);
 281    uint32_t vlmax = vext_maxsz(desc) / esz;
 282
 283    /* probe every access*/
 284    for (i = 0; i < env->vl; i++) {
 285        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 286            continue;
 287        }
 288        probe_pages(env, base + stride * i, nf * msz, ra, access_type);
 289    }
 290    /* do real access */
 291    for (i = 0; i < env->vl; i++) {
 292        k = 0;
 293        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 294            continue;
 295        }
 296        while (k < nf) {
 297            target_ulong addr = base + stride * i + k * msz;
 298            ldst_elem(env, addr, i + k * vlmax, vd, ra);
 299            k++;
 300        }
 301    }
 302    /* clear tail elements */
 303    if (clear_elem) {
 304        for (k = 0; k < nf; k++) {
 305            clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
 306        }
 307    }
 308}
 309
 310#define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN)       \
 311void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
 312                  target_ulong stride, CPURISCVState *env,              \
 313                  uint32_t desc)                                        \
 314{                                                                       \
 315    uint32_t vm = vext_vm(desc);                                        \
 316    vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
 317                     CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),            \
 318                     GETPC(), MMU_DATA_LOAD);                           \
 319}
 320
 321GEN_VEXT_LD_STRIDE(vlsb_v_b,  int8_t,   int8_t,   ldb_b,  clearb)
 322GEN_VEXT_LD_STRIDE(vlsb_v_h,  int8_t,   int16_t,  ldb_h,  clearh)
 323GEN_VEXT_LD_STRIDE(vlsb_v_w,  int8_t,   int32_t,  ldb_w,  clearl)
 324GEN_VEXT_LD_STRIDE(vlsb_v_d,  int8_t,   int64_t,  ldb_d,  clearq)
 325GEN_VEXT_LD_STRIDE(vlsh_v_h,  int16_t,  int16_t,  ldh_h,  clearh)
 326GEN_VEXT_LD_STRIDE(vlsh_v_w,  int16_t,  int32_t,  ldh_w,  clearl)
 327GEN_VEXT_LD_STRIDE(vlsh_v_d,  int16_t,  int64_t,  ldh_d,  clearq)
 328GEN_VEXT_LD_STRIDE(vlsw_v_w,  int32_t,  int32_t,  ldw_w,  clearl)
 329GEN_VEXT_LD_STRIDE(vlsw_v_d,  int32_t,  int64_t,  ldw_d,  clearq)
 330GEN_VEXT_LD_STRIDE(vlse_v_b,  int8_t,   int8_t,   lde_b,  clearb)
 331GEN_VEXT_LD_STRIDE(vlse_v_h,  int16_t,  int16_t,  lde_h,  clearh)
 332GEN_VEXT_LD_STRIDE(vlse_v_w,  int32_t,  int32_t,  lde_w,  clearl)
 333GEN_VEXT_LD_STRIDE(vlse_v_d,  int64_t,  int64_t,  lde_d,  clearq)
 334GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t,  uint8_t,  ldbu_b, clearb)
 335GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t,  uint16_t, ldbu_h, clearh)
 336GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t,  uint32_t, ldbu_w, clearl)
 337GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t,  uint64_t, ldbu_d, clearq)
 338GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h, clearh)
 339GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w, clearl)
 340GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d, clearq)
 341GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w, clearl)
 342GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d, clearq)
 343
 344#define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN)                \
 345void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
 346                  target_ulong stride, CPURISCVState *env,              \
 347                  uint32_t desc)                                        \
 348{                                                                       \
 349    uint32_t vm = vext_vm(desc);                                        \
 350    vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
 351                     NULL, sizeof(ETYPE), sizeof(MTYPE),                \
 352                     GETPC(), MMU_DATA_STORE);                          \
 353}
 354
 355GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t,  int8_t,  stb_b)
 356GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t,  int16_t, stb_h)
 357GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t,  int32_t, stb_w)
 358GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t,  int64_t, stb_d)
 359GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h)
 360GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w)
 361GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d)
 362GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w)
 363GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d)
 364GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t,  int8_t,  ste_b)
 365GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h)
 366GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w)
 367GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d)
 368
 369/*
 370 *** unit-stride: access elements stored contiguously in memory
 371 */
 372
 373/* unmasked unit-stride load and store operation*/
 374static void
 375vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
 376             vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem,
 377             uint32_t esz, uint32_t msz, uintptr_t ra,
 378             MMUAccessType access_type)
 379{
 380    uint32_t i, k;
 381    uint32_t nf = vext_nf(desc);
 382    uint32_t vlmax = vext_maxsz(desc) / esz;
 383
 384    /* probe every access */
 385    probe_pages(env, base, env->vl * nf * msz, ra, access_type);
 386    /* load bytes from guest memory */
 387    for (i = 0; i < env->vl; i++) {
 388        k = 0;
 389        while (k < nf) {
 390            target_ulong addr = base + (i * nf + k) * msz;
 391            ldst_elem(env, addr, i + k * vlmax, vd, ra);
 392            k++;
 393        }
 394    }
 395    /* clear tail elements */
 396    if (clear_elem) {
 397        for (k = 0; k < nf; k++) {
 398            clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
 399        }
 400    }
 401}
 402
 403/*
 404 * masked unit-stride load and store operation will be a special case of stride,
 405 * stride = NF * sizeof (MTYPE)
 406 */
 407
 408#define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN)           \
 409void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
 410                         CPURISCVState *env, uint32_t desc)             \
 411{                                                                       \
 412    uint32_t stride = vext_nf(desc) * sizeof(MTYPE);                    \
 413    vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
 414                     CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),            \
 415                     GETPC(), MMU_DATA_LOAD);                           \
 416}                                                                       \
 417                                                                        \
 418void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
 419                  CPURISCVState *env, uint32_t desc)                    \
 420{                                                                       \
 421    vext_ldst_us(vd, base, env, desc, LOAD_FN, CLEAR_FN,                \
 422                 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \
 423}
 424
 425GEN_VEXT_LD_US(vlb_v_b,  int8_t,   int8_t,   ldb_b,  clearb)
 426GEN_VEXT_LD_US(vlb_v_h,  int8_t,   int16_t,  ldb_h,  clearh)
 427GEN_VEXT_LD_US(vlb_v_w,  int8_t,   int32_t,  ldb_w,  clearl)
 428GEN_VEXT_LD_US(vlb_v_d,  int8_t,   int64_t,  ldb_d,  clearq)
 429GEN_VEXT_LD_US(vlh_v_h,  int16_t,  int16_t,  ldh_h,  clearh)
 430GEN_VEXT_LD_US(vlh_v_w,  int16_t,  int32_t,  ldh_w,  clearl)
 431GEN_VEXT_LD_US(vlh_v_d,  int16_t,  int64_t,  ldh_d,  clearq)
 432GEN_VEXT_LD_US(vlw_v_w,  int32_t,  int32_t,  ldw_w,  clearl)
 433GEN_VEXT_LD_US(vlw_v_d,  int32_t,  int64_t,  ldw_d,  clearq)
 434GEN_VEXT_LD_US(vle_v_b,  int8_t,   int8_t,   lde_b,  clearb)
 435GEN_VEXT_LD_US(vle_v_h,  int16_t,  int16_t,  lde_h,  clearh)
 436GEN_VEXT_LD_US(vle_v_w,  int32_t,  int32_t,  lde_w,  clearl)
 437GEN_VEXT_LD_US(vle_v_d,  int64_t,  int64_t,  lde_d,  clearq)
 438GEN_VEXT_LD_US(vlbu_v_b, uint8_t,  uint8_t,  ldbu_b, clearb)
 439GEN_VEXT_LD_US(vlbu_v_h, uint8_t,  uint16_t, ldbu_h, clearh)
 440GEN_VEXT_LD_US(vlbu_v_w, uint8_t,  uint32_t, ldbu_w, clearl)
 441GEN_VEXT_LD_US(vlbu_v_d, uint8_t,  uint64_t, ldbu_d, clearq)
 442GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h, clearh)
 443GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w, clearl)
 444GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d, clearq)
 445GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w, clearl)
 446GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d, clearq)
 447
 448#define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN)                    \
 449void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
 450                         CPURISCVState *env, uint32_t desc)             \
 451{                                                                       \
 452    uint32_t stride = vext_nf(desc) * sizeof(MTYPE);                    \
 453    vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,  \
 454                     NULL, sizeof(ETYPE), sizeof(MTYPE),                \
 455                     GETPC(), MMU_DATA_STORE);                          \
 456}                                                                       \
 457                                                                        \
 458void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
 459                  CPURISCVState *env, uint32_t desc)                    \
 460{                                                                       \
 461    vext_ldst_us(vd, base, env, desc, STORE_FN, NULL,                   \
 462                 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\
 463}
 464
 465GEN_VEXT_ST_US(vsb_v_b, int8_t,  int8_t , stb_b)
 466GEN_VEXT_ST_US(vsb_v_h, int8_t,  int16_t, stb_h)
 467GEN_VEXT_ST_US(vsb_v_w, int8_t,  int32_t, stb_w)
 468GEN_VEXT_ST_US(vsb_v_d, int8_t,  int64_t, stb_d)
 469GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h)
 470GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w)
 471GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d)
 472GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w)
 473GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d)
 474GEN_VEXT_ST_US(vse_v_b, int8_t,  int8_t , ste_b)
 475GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h)
 476GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w)
 477GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d)
 478
 479/*
 480 *** index: access vector element from indexed memory
 481 */
 482typedef target_ulong vext_get_index_addr(target_ulong base,
 483        uint32_t idx, void *vs2);
 484
 485#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
 486static target_ulong NAME(target_ulong base,            \
 487                         uint32_t idx, void *vs2)      \
 488{                                                      \
 489    return (base + *((ETYPE *)vs2 + H(idx)));          \
 490}
 491
 492GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t,  H1)
 493GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2)
 494GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4)
 495GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8)
 496
 497static inline void
 498vext_ldst_index(void *vd, void *v0, target_ulong base,
 499                void *vs2, CPURISCVState *env, uint32_t desc,
 500                vext_get_index_addr get_index_addr,
 501                vext_ldst_elem_fn *ldst_elem,
 502                clear_fn *clear_elem,
 503                uint32_t esz, uint32_t msz, uintptr_t ra,
 504                MMUAccessType access_type)
 505{
 506    uint32_t i, k;
 507    uint32_t nf = vext_nf(desc);
 508    uint32_t vm = vext_vm(desc);
 509    uint32_t mlen = vext_mlen(desc);
 510    uint32_t vlmax = vext_maxsz(desc) / esz;
 511
 512    /* probe every access*/
 513    for (i = 0; i < env->vl; i++) {
 514        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 515            continue;
 516        }
 517        probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra,
 518                    access_type);
 519    }
 520    /* load bytes from guest memory */
 521    for (i = 0; i < env->vl; i++) {
 522        k = 0;
 523        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 524            continue;
 525        }
 526        while (k < nf) {
 527            abi_ptr addr = get_index_addr(base, i, vs2) + k * msz;
 528            ldst_elem(env, addr, i + k * vlmax, vd, ra);
 529            k++;
 530        }
 531    }
 532    /* clear tail elements */
 533    if (clear_elem) {
 534        for (k = 0; k < nf; k++) {
 535            clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
 536        }
 537    }
 538}
 539
 540#define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN, CLEAR_FN) \
 541void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
 542                  void *vs2, CPURISCVState *env, uint32_t desc)            \
 543{                                                                          \
 544    vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
 545                    LOAD_FN, CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),       \
 546                    GETPC(), MMU_DATA_LOAD);                               \
 547}
 548
 549GEN_VEXT_LD_INDEX(vlxb_v_b,  int8_t,   int8_t,   idx_b, ldb_b,  clearb)
 550GEN_VEXT_LD_INDEX(vlxb_v_h,  int8_t,   int16_t,  idx_h, ldb_h,  clearh)
 551GEN_VEXT_LD_INDEX(vlxb_v_w,  int8_t,   int32_t,  idx_w, ldb_w,  clearl)
 552GEN_VEXT_LD_INDEX(vlxb_v_d,  int8_t,   int64_t,  idx_d, ldb_d,  clearq)
 553GEN_VEXT_LD_INDEX(vlxh_v_h,  int16_t,  int16_t,  idx_h, ldh_h,  clearh)
 554GEN_VEXT_LD_INDEX(vlxh_v_w,  int16_t,  int32_t,  idx_w, ldh_w,  clearl)
 555GEN_VEXT_LD_INDEX(vlxh_v_d,  int16_t,  int64_t,  idx_d, ldh_d,  clearq)
 556GEN_VEXT_LD_INDEX(vlxw_v_w,  int32_t,  int32_t,  idx_w, ldw_w,  clearl)
 557GEN_VEXT_LD_INDEX(vlxw_v_d,  int32_t,  int64_t,  idx_d, ldw_d,  clearq)
 558GEN_VEXT_LD_INDEX(vlxe_v_b,  int8_t,   int8_t,   idx_b, lde_b,  clearb)
 559GEN_VEXT_LD_INDEX(vlxe_v_h,  int16_t,  int16_t,  idx_h, lde_h,  clearh)
 560GEN_VEXT_LD_INDEX(vlxe_v_w,  int32_t,  int32_t,  idx_w, lde_w,  clearl)
 561GEN_VEXT_LD_INDEX(vlxe_v_d,  int64_t,  int64_t,  idx_d, lde_d,  clearq)
 562GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t,  uint8_t,  idx_b, ldbu_b, clearb)
 563GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t,  uint16_t, idx_h, ldbu_h, clearh)
 564GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t,  uint32_t, idx_w, ldbu_w, clearl)
 565GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t,  uint64_t, idx_d, ldbu_d, clearq)
 566GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h, clearh)
 567GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w, clearl)
 568GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d, clearq)
 569GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w, clearl)
 570GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d, clearq)
 571
 572#define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\
 573void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
 574                  void *vs2, CPURISCVState *env, uint32_t desc)  \
 575{                                                                \
 576    vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
 577                    STORE_FN, NULL, sizeof(ETYPE), sizeof(MTYPE),\
 578                    GETPC(), MMU_DATA_STORE);                    \
 579}
 580
 581GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t,  int8_t,  idx_b, stb_b)
 582GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t,  int16_t, idx_h, stb_h)
 583GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t,  int32_t, idx_w, stb_w)
 584GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t,  int64_t, idx_d, stb_d)
 585GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h)
 586GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w)
 587GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d)
 588GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w)
 589GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d)
 590GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t,  int8_t,  idx_b, ste_b)
 591GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h)
 592GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w)
 593GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d)
 594
 595/*
 596 *** unit-stride fault-only-fisrt load instructions
 597 */
 598static inline void
 599vext_ldff(void *vd, void *v0, target_ulong base,
 600          CPURISCVState *env, uint32_t desc,
 601          vext_ldst_elem_fn *ldst_elem,
 602          clear_fn *clear_elem,
 603          uint32_t esz, uint32_t msz, uintptr_t ra)
 604{
 605    void *host;
 606    uint32_t i, k, vl = 0;
 607    uint32_t mlen = vext_mlen(desc);
 608    uint32_t nf = vext_nf(desc);
 609    uint32_t vm = vext_vm(desc);
 610    uint32_t vlmax = vext_maxsz(desc) / esz;
 611    target_ulong addr, offset, remain;
 612
 613    /* probe every access*/
 614    for (i = 0; i < env->vl; i++) {
 615        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 616            continue;
 617        }
 618        addr = base + nf * i * msz;
 619        if (i == 0) {
 620            probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
 621        } else {
 622            /* if it triggers an exception, no need to check watchpoint */
 623            remain = nf * msz;
 624            while (remain > 0) {
 625                offset = -(addr | TARGET_PAGE_MASK);
 626                host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
 627                                         cpu_mmu_index(env, false));
 628                if (host) {
 629#ifdef CONFIG_USER_ONLY
 630                    if (page_check_range(addr, nf * msz, PAGE_READ) < 0) {
 631                        vl = i;
 632                        goto ProbeSuccess;
 633                    }
 634#else
 635                    probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
 636#endif
 637                } else {
 638                    vl = i;
 639                    goto ProbeSuccess;
 640                }
 641                if (remain <=  offset) {
 642                    break;
 643                }
 644                remain -= offset;
 645                addr += offset;
 646            }
 647        }
 648    }
 649ProbeSuccess:
 650    /* load bytes from guest memory */
 651    if (vl != 0) {
 652        env->vl = vl;
 653    }
 654    for (i = 0; i < env->vl; i++) {
 655        k = 0;
 656        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 657            continue;
 658        }
 659        while (k < nf) {
 660            target_ulong addr = base + (i * nf + k) * msz;
 661            ldst_elem(env, addr, i + k * vlmax, vd, ra);
 662            k++;
 663        }
 664    }
 665    /* clear tail elements */
 666    if (vl != 0) {
 667        return;
 668    }
 669    for (k = 0; k < nf; k++) {
 670        clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
 671    }
 672}
 673
 674#define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN)     \
 675void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
 676                  CPURISCVState *env, uint32_t desc)             \
 677{                                                                \
 678    vext_ldff(vd, v0, base, env, desc, LOAD_FN, CLEAR_FN,        \
 679              sizeof(ETYPE), sizeof(MTYPE), GETPC());            \
 680}
 681
 682GEN_VEXT_LDFF(vlbff_v_b,  int8_t,   int8_t,   ldb_b,  clearb)
 683GEN_VEXT_LDFF(vlbff_v_h,  int8_t,   int16_t,  ldb_h,  clearh)
 684GEN_VEXT_LDFF(vlbff_v_w,  int8_t,   int32_t,  ldb_w,  clearl)
 685GEN_VEXT_LDFF(vlbff_v_d,  int8_t,   int64_t,  ldb_d,  clearq)
 686GEN_VEXT_LDFF(vlhff_v_h,  int16_t,  int16_t,  ldh_h,  clearh)
 687GEN_VEXT_LDFF(vlhff_v_w,  int16_t,  int32_t,  ldh_w,  clearl)
 688GEN_VEXT_LDFF(vlhff_v_d,  int16_t,  int64_t,  ldh_d,  clearq)
 689GEN_VEXT_LDFF(vlwff_v_w,  int32_t,  int32_t,  ldw_w,  clearl)
 690GEN_VEXT_LDFF(vlwff_v_d,  int32_t,  int64_t,  ldw_d,  clearq)
 691GEN_VEXT_LDFF(vleff_v_b,  int8_t,   int8_t,   lde_b,  clearb)
 692GEN_VEXT_LDFF(vleff_v_h,  int16_t,  int16_t,  lde_h,  clearh)
 693GEN_VEXT_LDFF(vleff_v_w,  int32_t,  int32_t,  lde_w,  clearl)
 694GEN_VEXT_LDFF(vleff_v_d,  int64_t,  int64_t,  lde_d,  clearq)
 695GEN_VEXT_LDFF(vlbuff_v_b, uint8_t,  uint8_t,  ldbu_b, clearb)
 696GEN_VEXT_LDFF(vlbuff_v_h, uint8_t,  uint16_t, ldbu_h, clearh)
 697GEN_VEXT_LDFF(vlbuff_v_w, uint8_t,  uint32_t, ldbu_w, clearl)
 698GEN_VEXT_LDFF(vlbuff_v_d, uint8_t,  uint64_t, ldbu_d, clearq)
 699GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h, clearh)
 700GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl)
 701GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq)
 702GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl)
 703GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq)
 704
 705/*
 706 *** Vector AMO Operations (Zvamo)
 707 */
 708typedef void vext_amo_noatomic_fn(void *vs3, target_ulong addr,
 709                                  uint32_t wd, uint32_t idx, CPURISCVState *env,
 710                                  uintptr_t retaddr);
 711
 712/* no atomic opreation for vector atomic insructions */
 713#define DO_SWAP(N, M) (M)
 714#define DO_AND(N, M)  (N & M)
 715#define DO_XOR(N, M)  (N ^ M)
 716#define DO_OR(N, M)   (N | M)
 717#define DO_ADD(N, M)  (N + M)
 718
 719#define GEN_VEXT_AMO_NOATOMIC_OP(NAME, ESZ, MSZ, H, DO_OP, SUF) \
 720static void                                                     \
 721vext_##NAME##_noatomic_op(void *vs3, target_ulong addr,         \
 722                          uint32_t wd, uint32_t idx,            \
 723                          CPURISCVState *env, uintptr_t retaddr)\
 724{                                                               \
 725    typedef int##ESZ##_t ETYPE;                                 \
 726    typedef int##MSZ##_t MTYPE;                                 \
 727    typedef uint##MSZ##_t UMTYPE __attribute__((unused));       \
 728    ETYPE *pe3 = (ETYPE *)vs3 + H(idx);                         \
 729    MTYPE  a = cpu_ld##SUF##_data(env, addr), b = *pe3;         \
 730                                                                \
 731    cpu_st##SUF##_data(env, addr, DO_OP(a, b));                 \
 732    if (wd) {                                                   \
 733        *pe3 = a;                                               \
 734    }                                                           \
 735}
 736
 737/* Signed min/max */
 738#define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
 739#define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
 740
 741/* Unsigned min/max */
 742#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
 743#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
 744
 745GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_w, 32, 32, H4, DO_SWAP, l)
 746GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_w,  32, 32, H4, DO_ADD,  l)
 747GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_w,  32, 32, H4, DO_XOR,  l)
 748GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_w,  32, 32, H4, DO_AND,  l)
 749GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_w,   32, 32, H4, DO_OR,   l)
 750GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_w,  32, 32, H4, DO_MIN,  l)
 751GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_w,  32, 32, H4, DO_MAX,  l)
 752GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_w, 32, 32, H4, DO_MINU, l)
 753GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_w, 32, 32, H4, DO_MAXU, l)
 754GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_d, 64, 32, H8, DO_SWAP, l)
 755GEN_VEXT_AMO_NOATOMIC_OP(vamoswapd_v_d, 64, 64, H8, DO_SWAP, q)
 756GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_d,  64, 32, H8, DO_ADD,  l)
 757GEN_VEXT_AMO_NOATOMIC_OP(vamoaddd_v_d,  64, 64, H8, DO_ADD,  q)
 758GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_d,  64, 32, H8, DO_XOR,  l)
 759GEN_VEXT_AMO_NOATOMIC_OP(vamoxord_v_d,  64, 64, H8, DO_XOR,  q)
 760GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_d,  64, 32, H8, DO_AND,  l)
 761GEN_VEXT_AMO_NOATOMIC_OP(vamoandd_v_d,  64, 64, H8, DO_AND,  q)
 762GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_d,   64, 32, H8, DO_OR,   l)
 763GEN_VEXT_AMO_NOATOMIC_OP(vamoord_v_d,   64, 64, H8, DO_OR,   q)
 764GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_d,  64, 32, H8, DO_MIN,  l)
 765GEN_VEXT_AMO_NOATOMIC_OP(vamomind_v_d,  64, 64, H8, DO_MIN,  q)
 766GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_d,  64, 32, H8, DO_MAX,  l)
 767GEN_VEXT_AMO_NOATOMIC_OP(vamomaxd_v_d,  64, 64, H8, DO_MAX,  q)
 768GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_d, 64, 32, H8, DO_MINU, l)
 769GEN_VEXT_AMO_NOATOMIC_OP(vamominud_v_d, 64, 64, H8, DO_MINU, q)
 770GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_d, 64, 32, H8, DO_MAXU, l)
 771GEN_VEXT_AMO_NOATOMIC_OP(vamomaxud_v_d, 64, 64, H8, DO_MAXU, q)
 772
 773static inline void
 774vext_amo_noatomic(void *vs3, void *v0, target_ulong base,
 775                  void *vs2, CPURISCVState *env, uint32_t desc,
 776                  vext_get_index_addr get_index_addr,
 777                  vext_amo_noatomic_fn *noatomic_op,
 778                  clear_fn *clear_elem,
 779                  uint32_t esz, uint32_t msz, uintptr_t ra)
 780{
 781    uint32_t i;
 782    target_long addr;
 783    uint32_t wd = vext_wd(desc);
 784    uint32_t vm = vext_vm(desc);
 785    uint32_t mlen = vext_mlen(desc);
 786    uint32_t vlmax = vext_maxsz(desc) / esz;
 787
 788    for (i = 0; i < env->vl; i++) {
 789        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 790            continue;
 791        }
 792        probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_LOAD);
 793        probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_STORE);
 794    }
 795    for (i = 0; i < env->vl; i++) {
 796        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 797            continue;
 798        }
 799        addr = get_index_addr(base, i, vs2);
 800        noatomic_op(vs3, addr, wd, i, env, ra);
 801    }
 802    clear_elem(vs3, env->vl, env->vl * esz, vlmax * esz);
 803}
 804
 805#define GEN_VEXT_AMO(NAME, MTYPE, ETYPE, INDEX_FN, CLEAR_FN)    \
 806void HELPER(NAME)(void *vs3, void *v0, target_ulong base,       \
 807                  void *vs2, CPURISCVState *env, uint32_t desc) \
 808{                                                               \
 809    vext_amo_noatomic(vs3, v0, base, vs2, env, desc,            \
 810                      INDEX_FN, vext_##NAME##_noatomic_op,      \
 811                      CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),   \
 812                      GETPC());                                 \
 813}
 814
 815GEN_VEXT_AMO(vamoswapw_v_d, int32_t,  int64_t,  idx_d, clearq)
 816GEN_VEXT_AMO(vamoswapd_v_d, int64_t,  int64_t,  idx_d, clearq)
 817GEN_VEXT_AMO(vamoaddw_v_d,  int32_t,  int64_t,  idx_d, clearq)
 818GEN_VEXT_AMO(vamoaddd_v_d,  int64_t,  int64_t,  idx_d, clearq)
 819GEN_VEXT_AMO(vamoxorw_v_d,  int32_t,  int64_t,  idx_d, clearq)
 820GEN_VEXT_AMO(vamoxord_v_d,  int64_t,  int64_t,  idx_d, clearq)
 821GEN_VEXT_AMO(vamoandw_v_d,  int32_t,  int64_t,  idx_d, clearq)
 822GEN_VEXT_AMO(vamoandd_v_d,  int64_t,  int64_t,  idx_d, clearq)
 823GEN_VEXT_AMO(vamoorw_v_d,   int32_t,  int64_t,  idx_d, clearq)
 824GEN_VEXT_AMO(vamoord_v_d,   int64_t,  int64_t,  idx_d, clearq)
 825GEN_VEXT_AMO(vamominw_v_d,  int32_t,  int64_t,  idx_d, clearq)
 826GEN_VEXT_AMO(vamomind_v_d,  int64_t,  int64_t,  idx_d, clearq)
 827GEN_VEXT_AMO(vamomaxw_v_d,  int32_t,  int64_t,  idx_d, clearq)
 828GEN_VEXT_AMO(vamomaxd_v_d,  int64_t,  int64_t,  idx_d, clearq)
 829GEN_VEXT_AMO(vamominuw_v_d, uint32_t, uint64_t, idx_d, clearq)
 830GEN_VEXT_AMO(vamominud_v_d, uint64_t, uint64_t, idx_d, clearq)
 831GEN_VEXT_AMO(vamomaxuw_v_d, uint32_t, uint64_t, idx_d, clearq)
 832GEN_VEXT_AMO(vamomaxud_v_d, uint64_t, uint64_t, idx_d, clearq)
 833GEN_VEXT_AMO(vamoswapw_v_w, int32_t,  int32_t,  idx_w, clearl)
 834GEN_VEXT_AMO(vamoaddw_v_w,  int32_t,  int32_t,  idx_w, clearl)
 835GEN_VEXT_AMO(vamoxorw_v_w,  int32_t,  int32_t,  idx_w, clearl)
 836GEN_VEXT_AMO(vamoandw_v_w,  int32_t,  int32_t,  idx_w, clearl)
 837GEN_VEXT_AMO(vamoorw_v_w,   int32_t,  int32_t,  idx_w, clearl)
 838GEN_VEXT_AMO(vamominw_v_w,  int32_t,  int32_t,  idx_w, clearl)
 839GEN_VEXT_AMO(vamomaxw_v_w,  int32_t,  int32_t,  idx_w, clearl)
 840GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl)
 841GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl)
 842
 843/*
 844 *** Vector Integer Arithmetic Instructions
 845 */
 846
 847/* expand macro args before macro */
 848#define RVVCALL(macro, ...)  macro(__VA_ARGS__)
 849
 850/* (TD, T1, T2, TX1, TX2) */
 851#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
 852#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
 853#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
 854#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
 855#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
 856#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
 857#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
 858#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
 859#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
 860#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
 861#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
 862#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
 863#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
 864#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
 865#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
 866#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
 867#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
 868#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
 869#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
 870#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
 871#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
 872#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
 873#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
 874#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
 875#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
 876#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
 877#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
 878#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
 879#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
 880#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
 881
 882/* operation of two vector elements */
 883typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
 884
 885#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
 886static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
 887{                                                               \
 888    TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
 889    TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
 890    *((TD *)vd + HD(i)) = OP(s2, s1);                           \
 891}
 892#define DO_SUB(N, M) (N - M)
 893#define DO_RSUB(N, M) (M - N)
 894
 895RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
 896RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
 897RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
 898RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
 899RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
 900RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
 901RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
 902RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
 903
 904static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
 905                       CPURISCVState *env, uint32_t desc,
 906                       uint32_t esz, uint32_t dsz,
 907                       opivv2_fn *fn, clear_fn *clearfn)
 908{
 909    uint32_t vlmax = vext_maxsz(desc) / esz;
 910    uint32_t mlen = vext_mlen(desc);
 911    uint32_t vm = vext_vm(desc);
 912    uint32_t vl = env->vl;
 913    uint32_t i;
 914
 915    for (i = 0; i < vl; i++) {
 916        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 917            continue;
 918        }
 919        fn(vd, vs1, vs2, i);
 920    }
 921    clearfn(vd, vl, vl * dsz,  vlmax * dsz);
 922}
 923
 924/* generate the helpers for OPIVV */
 925#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN)             \
 926void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
 927                  void *vs2, CPURISCVState *env,          \
 928                  uint32_t desc)                          \
 929{                                                         \
 930    do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
 931               do_##NAME, CLEAR_FN);                      \
 932}
 933
 934GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb)
 935GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh)
 936GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl)
 937GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq)
 938GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb)
 939GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh)
 940GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl)
 941GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq)
 942
 943typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
 944
 945/*
 946 * (T1)s1 gives the real operator type.
 947 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
 948 */
 949#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
 950static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
 951{                                                                   \
 952    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
 953    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
 954}
 955
 956RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
 957RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
 958RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
 959RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
 960RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
 961RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
 962RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
 963RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
 964RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
 965RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
 966RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
 967RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
 968
 969static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
 970                       CPURISCVState *env, uint32_t desc,
 971                       uint32_t esz, uint32_t dsz,
 972                       opivx2_fn fn, clear_fn *clearfn)
 973{
 974    uint32_t vlmax = vext_maxsz(desc) / esz;
 975    uint32_t mlen = vext_mlen(desc);
 976    uint32_t vm = vext_vm(desc);
 977    uint32_t vl = env->vl;
 978    uint32_t i;
 979
 980    for (i = 0; i < vl; i++) {
 981        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 982            continue;
 983        }
 984        fn(vd, s1, vs2, i);
 985    }
 986    clearfn(vd, vl, vl * dsz,  vlmax * dsz);
 987}
 988
 989/* generate the helpers for OPIVX */
 990#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN)             \
 991void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
 992                  void *vs2, CPURISCVState *env,          \
 993                  uint32_t desc)                          \
 994{                                                         \
 995    do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
 996               do_##NAME, CLEAR_FN);                      \
 997}
 998
 999GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb)
1000GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh)
1001GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl)
1002GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq)
1003GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb)
1004GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh)
1005GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl)
1006GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq)
1007GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb)
1008GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh)
1009GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl)
1010GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq)
1011
1012void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
1013{
1014    intptr_t oprsz = simd_oprsz(desc);
1015    intptr_t i;
1016
1017    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1018        *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
1019    }
1020}
1021
1022void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
1023{
1024    intptr_t oprsz = simd_oprsz(desc);
1025    intptr_t i;
1026
1027    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1028        *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
1029    }
1030}
1031
1032void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
1033{
1034    intptr_t oprsz = simd_oprsz(desc);
1035    intptr_t i;
1036
1037    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1038        *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
1039    }
1040}
1041
1042void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
1043{
1044    intptr_t oprsz = simd_oprsz(desc);
1045    intptr_t i;
1046
1047    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1048        *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
1049    }
1050}
1051
1052/* Vector Widening Integer Add/Subtract */
1053#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
1054#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
1055#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
1056#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
1057#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
1058#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
1059#define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
1060#define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
1061#define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
1062#define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
1063#define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
1064#define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
1065RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
1066RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
1067RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
1068RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
1069RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
1070RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
1071RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
1072RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
1073RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
1074RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
1075RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
1076RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
1077RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
1078RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
1079RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
1080RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
1081RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
1082RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
1083RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
1084RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
1085RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
1086RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
1087RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
1088RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
1089GEN_VEXT_VV(vwaddu_vv_b, 1, 2, clearh)
1090GEN_VEXT_VV(vwaddu_vv_h, 2, 4, clearl)
1091GEN_VEXT_VV(vwaddu_vv_w, 4, 8, clearq)
1092GEN_VEXT_VV(vwsubu_vv_b, 1, 2, clearh)
1093GEN_VEXT_VV(vwsubu_vv_h, 2, 4, clearl)
1094GEN_VEXT_VV(vwsubu_vv_w, 4, 8, clearq)
1095GEN_VEXT_VV(vwadd_vv_b, 1, 2, clearh)
1096GEN_VEXT_VV(vwadd_vv_h, 2, 4, clearl)
1097GEN_VEXT_VV(vwadd_vv_w, 4, 8, clearq)
1098GEN_VEXT_VV(vwsub_vv_b, 1, 2, clearh)
1099GEN_VEXT_VV(vwsub_vv_h, 2, 4, clearl)
1100GEN_VEXT_VV(vwsub_vv_w, 4, 8, clearq)
1101GEN_VEXT_VV(vwaddu_wv_b, 1, 2, clearh)
1102GEN_VEXT_VV(vwaddu_wv_h, 2, 4, clearl)
1103GEN_VEXT_VV(vwaddu_wv_w, 4, 8, clearq)
1104GEN_VEXT_VV(vwsubu_wv_b, 1, 2, clearh)
1105GEN_VEXT_VV(vwsubu_wv_h, 2, 4, clearl)
1106GEN_VEXT_VV(vwsubu_wv_w, 4, 8, clearq)
1107GEN_VEXT_VV(vwadd_wv_b, 1, 2, clearh)
1108GEN_VEXT_VV(vwadd_wv_h, 2, 4, clearl)
1109GEN_VEXT_VV(vwadd_wv_w, 4, 8, clearq)
1110GEN_VEXT_VV(vwsub_wv_b, 1, 2, clearh)
1111GEN_VEXT_VV(vwsub_wv_h, 2, 4, clearl)
1112GEN_VEXT_VV(vwsub_wv_w, 4, 8, clearq)
1113
1114RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1115RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1116RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1117RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1118RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1119RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1120RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1121RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1122RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1123RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1124RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1125RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1126RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1127RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1128RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1129RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1130RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1131RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1132RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1133RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1134RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1135RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1136RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1137RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
1138GEN_VEXT_VX(vwaddu_vx_b, 1, 2, clearh)
1139GEN_VEXT_VX(vwaddu_vx_h, 2, 4, clearl)
1140GEN_VEXT_VX(vwaddu_vx_w, 4, 8, clearq)
1141GEN_VEXT_VX(vwsubu_vx_b, 1, 2, clearh)
1142GEN_VEXT_VX(vwsubu_vx_h, 2, 4, clearl)
1143GEN_VEXT_VX(vwsubu_vx_w, 4, 8, clearq)
1144GEN_VEXT_VX(vwadd_vx_b, 1, 2, clearh)
1145GEN_VEXT_VX(vwadd_vx_h, 2, 4, clearl)
1146GEN_VEXT_VX(vwadd_vx_w, 4, 8, clearq)
1147GEN_VEXT_VX(vwsub_vx_b, 1, 2, clearh)
1148GEN_VEXT_VX(vwsub_vx_h, 2, 4, clearl)
1149GEN_VEXT_VX(vwsub_vx_w, 4, 8, clearq)
1150GEN_VEXT_VX(vwaddu_wx_b, 1, 2, clearh)
1151GEN_VEXT_VX(vwaddu_wx_h, 2, 4, clearl)
1152GEN_VEXT_VX(vwaddu_wx_w, 4, 8, clearq)
1153GEN_VEXT_VX(vwsubu_wx_b, 1, 2, clearh)
1154GEN_VEXT_VX(vwsubu_wx_h, 2, 4, clearl)
1155GEN_VEXT_VX(vwsubu_wx_w, 4, 8, clearq)
1156GEN_VEXT_VX(vwadd_wx_b, 1, 2, clearh)
1157GEN_VEXT_VX(vwadd_wx_h, 2, 4, clearl)
1158GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq)
1159GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh)
1160GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl)
1161GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq)
1162
1163/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1164#define DO_VADC(N, M, C) (N + M + C)
1165#define DO_VSBC(N, M, C) (N - M - C)
1166
1167#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP, CLEAR_FN)    \
1168void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1169                  CPURISCVState *env, uint32_t desc)          \
1170{                                                             \
1171    uint32_t mlen = vext_mlen(desc);                          \
1172    uint32_t vl = env->vl;                                    \
1173    uint32_t esz = sizeof(ETYPE);                             \
1174    uint32_t vlmax = vext_maxsz(desc) / esz;                  \
1175    uint32_t i;                                               \
1176                                                              \
1177    for (i = 0; i < vl; i++) {                                \
1178        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1179        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1180        uint8_t carry = vext_elem_mask(v0, mlen, i);          \
1181                                                              \
1182        *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
1183    }                                                         \
1184    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                  \
1185}
1186
1187GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC, clearb)
1188GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC, clearh)
1189GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC, clearl)
1190GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC, clearq)
1191
1192GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC, clearb)
1193GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC, clearh)
1194GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC, clearl)
1195GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC, clearq)
1196
1197#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP, CLEAR_FN)               \
1198void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
1199                  CPURISCVState *env, uint32_t desc)                     \
1200{                                                                        \
1201    uint32_t mlen = vext_mlen(desc);                                     \
1202    uint32_t vl = env->vl;                                               \
1203    uint32_t esz = sizeof(ETYPE);                                        \
1204    uint32_t vlmax = vext_maxsz(desc) / esz;                             \
1205    uint32_t i;                                                          \
1206                                                                         \
1207    for (i = 0; i < vl; i++) {                                           \
1208        ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
1209        uint8_t carry = vext_elem_mask(v0, mlen, i);                     \
1210                                                                         \
1211        *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1212    }                                                                    \
1213    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                             \
1214}
1215
1216GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC, clearb)
1217GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC, clearh)
1218GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC, clearl)
1219GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC, clearq)
1220
1221GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC, clearb)
1222GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC, clearh)
1223GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC, clearl)
1224GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC, clearq)
1225
1226#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
1227                          (__typeof(N))(N + M) < N)
1228#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1229
1230#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
1231void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1232                  CPURISCVState *env, uint32_t desc)          \
1233{                                                             \
1234    uint32_t mlen = vext_mlen(desc);                          \
1235    uint32_t vl = env->vl;                                    \
1236    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
1237    uint32_t i;                                               \
1238                                                              \
1239    for (i = 0; i < vl; i++) {                                \
1240        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1241        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1242        uint8_t carry = vext_elem_mask(v0, mlen, i);          \
1243                                                              \
1244        vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1, carry));\
1245    }                                                         \
1246    for (; i < vlmax; i++) {                                  \
1247        vext_set_elem_mask(vd, mlen, i, 0);                   \
1248    }                                                         \
1249}
1250
1251GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1252GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1253GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1254GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1255
1256GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1257GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1258GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1259GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1260
1261#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1262void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1263                  void *vs2, CPURISCVState *env, uint32_t desc) \
1264{                                                               \
1265    uint32_t mlen = vext_mlen(desc);                            \
1266    uint32_t vl = env->vl;                                      \
1267    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);          \
1268    uint32_t i;                                                 \
1269                                                                \
1270    for (i = 0; i < vl; i++) {                                  \
1271        ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1272        uint8_t carry = vext_elem_mask(v0, mlen, i);            \
1273                                                                \
1274        vext_set_elem_mask(vd, mlen, i,                         \
1275                DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1276    }                                                           \
1277    for (; i < vlmax; i++) {                                    \
1278        vext_set_elem_mask(vd, mlen, i, 0);                     \
1279    }                                                           \
1280}
1281
1282GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1283GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1284GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1285GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1286
1287GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1288GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1289GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1290GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1291
1292/* Vector Bitwise Logical Instructions */
1293RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1294RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1295RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1296RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1297RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1298RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1299RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1300RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1301RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1302RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1303RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1304RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1305GEN_VEXT_VV(vand_vv_b, 1, 1, clearb)
1306GEN_VEXT_VV(vand_vv_h, 2, 2, clearh)
1307GEN_VEXT_VV(vand_vv_w, 4, 4, clearl)
1308GEN_VEXT_VV(vand_vv_d, 8, 8, clearq)
1309GEN_VEXT_VV(vor_vv_b, 1, 1, clearb)
1310GEN_VEXT_VV(vor_vv_h, 2, 2, clearh)
1311GEN_VEXT_VV(vor_vv_w, 4, 4, clearl)
1312GEN_VEXT_VV(vor_vv_d, 8, 8, clearq)
1313GEN_VEXT_VV(vxor_vv_b, 1, 1, clearb)
1314GEN_VEXT_VV(vxor_vv_h, 2, 2, clearh)
1315GEN_VEXT_VV(vxor_vv_w, 4, 4, clearl)
1316GEN_VEXT_VV(vxor_vv_d, 8, 8, clearq)
1317
1318RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1319RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1320RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1321RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1322RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1323RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1324RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1325RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1326RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1327RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1328RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1329RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1330GEN_VEXT_VX(vand_vx_b, 1, 1, clearb)
1331GEN_VEXT_VX(vand_vx_h, 2, 2, clearh)
1332GEN_VEXT_VX(vand_vx_w, 4, 4, clearl)
1333GEN_VEXT_VX(vand_vx_d, 8, 8, clearq)
1334GEN_VEXT_VX(vor_vx_b, 1, 1, clearb)
1335GEN_VEXT_VX(vor_vx_h, 2, 2, clearh)
1336GEN_VEXT_VX(vor_vx_w, 4, 4, clearl)
1337GEN_VEXT_VX(vor_vx_d, 8, 8, clearq)
1338GEN_VEXT_VX(vxor_vx_b, 1, 1, clearb)
1339GEN_VEXT_VX(vxor_vx_h, 2, 2, clearh)
1340GEN_VEXT_VX(vxor_vx_w, 4, 4, clearl)
1341GEN_VEXT_VX(vxor_vx_d, 8, 8, clearq)
1342
1343/* Vector Single-Width Bit Shift Instructions */
1344#define DO_SLL(N, M)  (N << (M))
1345#define DO_SRL(N, M)  (N >> (M))
1346
1347/* generate the helpers for shift instructions with two vector operators */
1348#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK, CLEAR_FN)   \
1349void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1350                  void *vs2, CPURISCVState *env, uint32_t desc)           \
1351{                                                                         \
1352    uint32_t mlen = vext_mlen(desc);                                      \
1353    uint32_t vm = vext_vm(desc);                                          \
1354    uint32_t vl = env->vl;                                                \
1355    uint32_t esz = sizeof(TS1);                                           \
1356    uint32_t vlmax = vext_maxsz(desc) / esz;                              \
1357    uint32_t i;                                                           \
1358                                                                          \
1359    for (i = 0; i < vl; i++) {                                            \
1360        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
1361            continue;                                                     \
1362        }                                                                 \
1363        TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1364        TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1365        *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1366    }                                                                     \
1367    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                              \
1368}
1369
1370GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7, clearb)
1371GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf, clearh)
1372GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f, clearl)
1373GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f, clearq)
1374
1375GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb)
1376GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh)
1377GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl)
1378GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq)
1379
1380GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7, clearb)
1381GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh)
1382GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl)
1383GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq)
1384
1385/* generate the helpers for shift instructions with one vector and one scalar */
1386#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK, CLEAR_FN) \
1387void HELPER(NAME)(void *vd, void *v0, target_ulong s1,                \
1388        void *vs2, CPURISCVState *env, uint32_t desc)                 \
1389{                                                                     \
1390    uint32_t mlen = vext_mlen(desc);                                  \
1391    uint32_t vm = vext_vm(desc);                                      \
1392    uint32_t vl = env->vl;                                            \
1393    uint32_t esz = sizeof(TD);                                        \
1394    uint32_t vlmax = vext_maxsz(desc) / esz;                          \
1395    uint32_t i;                                                       \
1396                                                                      \
1397    for (i = 0; i < vl; i++) {                                        \
1398        if (!vm && !vext_elem_mask(v0, mlen, i)) {                    \
1399            continue;                                                 \
1400        }                                                             \
1401        TS2 s2 = *((TS2 *)vs2 + HS2(i));                              \
1402        *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);                      \
1403    }                                                                 \
1404    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                          \
1405}
1406
1407GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7, clearb)
1408GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf, clearh)
1409GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f, clearl)
1410GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f, clearq)
1411
1412GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb)
1413GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh)
1414GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl)
1415GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq)
1416
1417GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb)
1418GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh)
1419GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl)
1420GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq)
1421
1422/* Vector Narrowing Integer Right Shift Instructions */
1423GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf, clearb)
1424GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh)
1425GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl)
1426GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf, clearb)
1427GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh)
1428GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl)
1429GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb)
1430GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh)
1431GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl)
1432GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb)
1433GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh)
1434GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl)
1435
1436/* Vector Integer Comparison Instructions */
1437#define DO_MSEQ(N, M) (N == M)
1438#define DO_MSNE(N, M) (N != M)
1439#define DO_MSLT(N, M) (N < M)
1440#define DO_MSLE(N, M) (N <= M)
1441#define DO_MSGT(N, M) (N > M)
1442
1443#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1444void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1445                  CPURISCVState *env, uint32_t desc)          \
1446{                                                             \
1447    uint32_t mlen = vext_mlen(desc);                          \
1448    uint32_t vm = vext_vm(desc);                              \
1449    uint32_t vl = env->vl;                                    \
1450    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
1451    uint32_t i;                                               \
1452                                                              \
1453    for (i = 0; i < vl; i++) {                                \
1454        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1455        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1456        if (!vm && !vext_elem_mask(v0, mlen, i)) {            \
1457            continue;                                         \
1458        }                                                     \
1459        vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1));       \
1460    }                                                         \
1461    for (; i < vlmax; i++) {                                  \
1462        vext_set_elem_mask(vd, mlen, i, 0);                   \
1463    }                                                         \
1464}
1465
1466GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1467GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1468GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1469GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1470
1471GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1472GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1473GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1474GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1475
1476GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1477GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1478GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1479GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1480
1481GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1482GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1483GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1484GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1485
1486GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1487GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1488GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1489GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1490
1491GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1492GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1493GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1494GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1495
1496#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1497void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1498                  CPURISCVState *env, uint32_t desc)                \
1499{                                                                   \
1500    uint32_t mlen = vext_mlen(desc);                                \
1501    uint32_t vm = vext_vm(desc);                                    \
1502    uint32_t vl = env->vl;                                          \
1503    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);              \
1504    uint32_t i;                                                     \
1505                                                                    \
1506    for (i = 0; i < vl; i++) {                                      \
1507        ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1508        if (!vm && !vext_elem_mask(v0, mlen, i)) {                  \
1509            continue;                                               \
1510        }                                                           \
1511        vext_set_elem_mask(vd, mlen, i,                             \
1512                DO_OP(s2, (ETYPE)(target_long)s1));                 \
1513    }                                                               \
1514    for (; i < vlmax; i++) {                                        \
1515        vext_set_elem_mask(vd, mlen, i, 0);                         \
1516    }                                                               \
1517}
1518
1519GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1520GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1521GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1522GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1523
1524GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1525GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1526GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1527GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1528
1529GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1530GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1531GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1532GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1533
1534GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1535GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1536GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1537GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1538
1539GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1540GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1541GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1542GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1543
1544GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1545GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1546GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1547GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1548
1549GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1550GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1551GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1552GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1553
1554GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1555GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1556GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1557GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1558
1559/* Vector Integer Min/Max Instructions */
1560RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1561RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1562RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1563RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1564RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1565RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1566RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1567RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1568RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1569RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1570RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1571RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1572RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1573RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1574RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1575RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1576GEN_VEXT_VV(vminu_vv_b, 1, 1, clearb)
1577GEN_VEXT_VV(vminu_vv_h, 2, 2, clearh)
1578GEN_VEXT_VV(vminu_vv_w, 4, 4, clearl)
1579GEN_VEXT_VV(vminu_vv_d, 8, 8, clearq)
1580GEN_VEXT_VV(vmin_vv_b, 1, 1, clearb)
1581GEN_VEXT_VV(vmin_vv_h, 2, 2, clearh)
1582GEN_VEXT_VV(vmin_vv_w, 4, 4, clearl)
1583GEN_VEXT_VV(vmin_vv_d, 8, 8, clearq)
1584GEN_VEXT_VV(vmaxu_vv_b, 1, 1, clearb)
1585GEN_VEXT_VV(vmaxu_vv_h, 2, 2, clearh)
1586GEN_VEXT_VV(vmaxu_vv_w, 4, 4, clearl)
1587GEN_VEXT_VV(vmaxu_vv_d, 8, 8, clearq)
1588GEN_VEXT_VV(vmax_vv_b, 1, 1, clearb)
1589GEN_VEXT_VV(vmax_vv_h, 2, 2, clearh)
1590GEN_VEXT_VV(vmax_vv_w, 4, 4, clearl)
1591GEN_VEXT_VV(vmax_vv_d, 8, 8, clearq)
1592
1593RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1594RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1595RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1596RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1597RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1598RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1599RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1600RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1601RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1602RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1603RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1604RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1605RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1606RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1607RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1608RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1609GEN_VEXT_VX(vminu_vx_b, 1, 1, clearb)
1610GEN_VEXT_VX(vminu_vx_h, 2, 2, clearh)
1611GEN_VEXT_VX(vminu_vx_w, 4, 4, clearl)
1612GEN_VEXT_VX(vminu_vx_d, 8, 8, clearq)
1613GEN_VEXT_VX(vmin_vx_b, 1, 1, clearb)
1614GEN_VEXT_VX(vmin_vx_h, 2, 2, clearh)
1615GEN_VEXT_VX(vmin_vx_w, 4, 4, clearl)
1616GEN_VEXT_VX(vmin_vx_d, 8, 8, clearq)
1617GEN_VEXT_VX(vmaxu_vx_b, 1, 1, clearb)
1618GEN_VEXT_VX(vmaxu_vx_h, 2, 2, clearh)
1619GEN_VEXT_VX(vmaxu_vx_w, 4, 4, clearl)
1620GEN_VEXT_VX(vmaxu_vx_d, 8, 8,  clearq)
1621GEN_VEXT_VX(vmax_vx_b, 1, 1, clearb)
1622GEN_VEXT_VX(vmax_vx_h, 2, 2, clearh)
1623GEN_VEXT_VX(vmax_vx_w, 4, 4, clearl)
1624GEN_VEXT_VX(vmax_vx_d, 8, 8, clearq)
1625
1626/* Vector Single-Width Integer Multiply Instructions */
1627#define DO_MUL(N, M) (N * M)
1628RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1629RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1630RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1631RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1632GEN_VEXT_VV(vmul_vv_b, 1, 1, clearb)
1633GEN_VEXT_VV(vmul_vv_h, 2, 2, clearh)
1634GEN_VEXT_VV(vmul_vv_w, 4, 4, clearl)
1635GEN_VEXT_VV(vmul_vv_d, 8, 8, clearq)
1636
1637static int8_t do_mulh_b(int8_t s2, int8_t s1)
1638{
1639    return (int16_t)s2 * (int16_t)s1 >> 8;
1640}
1641
1642static int16_t do_mulh_h(int16_t s2, int16_t s1)
1643{
1644    return (int32_t)s2 * (int32_t)s1 >> 16;
1645}
1646
1647static int32_t do_mulh_w(int32_t s2, int32_t s1)
1648{
1649    return (int64_t)s2 * (int64_t)s1 >> 32;
1650}
1651
1652static int64_t do_mulh_d(int64_t s2, int64_t s1)
1653{
1654    uint64_t hi_64, lo_64;
1655
1656    muls64(&lo_64, &hi_64, s1, s2);
1657    return hi_64;
1658}
1659
1660static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1661{
1662    return (uint16_t)s2 * (uint16_t)s1 >> 8;
1663}
1664
1665static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1666{
1667    return (uint32_t)s2 * (uint32_t)s1 >> 16;
1668}
1669
1670static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1671{
1672    return (uint64_t)s2 * (uint64_t)s1 >> 32;
1673}
1674
1675static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1676{
1677    uint64_t hi_64, lo_64;
1678
1679    mulu64(&lo_64, &hi_64, s2, s1);
1680    return hi_64;
1681}
1682
1683static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1684{
1685    return (int16_t)s2 * (uint16_t)s1 >> 8;
1686}
1687
1688static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1689{
1690    return (int32_t)s2 * (uint32_t)s1 >> 16;
1691}
1692
1693static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1694{
1695    return (int64_t)s2 * (uint64_t)s1 >> 32;
1696}
1697
1698/*
1699 * Let  A = signed operand,
1700 *      B = unsigned operand
1701 *      P = mulu64(A, B), unsigned product
1702 *
1703 * LET  X = 2 ** 64  - A, 2's complement of A
1704 *      SP = signed product
1705 * THEN
1706 *      IF A < 0
1707 *          SP = -X * B
1708 *             = -(2 ** 64 - A) * B
1709 *             = A * B - 2 ** 64 * B
1710 *             = P - 2 ** 64 * B
1711 *      ELSE
1712 *          SP = P
1713 * THEN
1714 *      HI_P -= (A < 0 ? B : 0)
1715 */
1716
1717static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1718{
1719    uint64_t hi_64, lo_64;
1720
1721    mulu64(&lo_64, &hi_64, s2, s1);
1722
1723    hi_64 -= s2 < 0 ? s1 : 0;
1724    return hi_64;
1725}
1726
1727RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1728RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1729RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1730RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1731RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1732RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1733RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1734RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1735RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1736RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1737RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1738RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1739GEN_VEXT_VV(vmulh_vv_b, 1, 1, clearb)
1740GEN_VEXT_VV(vmulh_vv_h, 2, 2, clearh)
1741GEN_VEXT_VV(vmulh_vv_w, 4, 4, clearl)
1742GEN_VEXT_VV(vmulh_vv_d, 8, 8, clearq)
1743GEN_VEXT_VV(vmulhu_vv_b, 1, 1, clearb)
1744GEN_VEXT_VV(vmulhu_vv_h, 2, 2, clearh)
1745GEN_VEXT_VV(vmulhu_vv_w, 4, 4, clearl)
1746GEN_VEXT_VV(vmulhu_vv_d, 8, 8, clearq)
1747GEN_VEXT_VV(vmulhsu_vv_b, 1, 1, clearb)
1748GEN_VEXT_VV(vmulhsu_vv_h, 2, 2, clearh)
1749GEN_VEXT_VV(vmulhsu_vv_w, 4, 4, clearl)
1750GEN_VEXT_VV(vmulhsu_vv_d, 8, 8, clearq)
1751
1752RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1753RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1754RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1755RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1756RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1757RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1758RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1759RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1760RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1761RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1762RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1763RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1764RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1765RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1766RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1767RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1768GEN_VEXT_VX(vmul_vx_b, 1, 1, clearb)
1769GEN_VEXT_VX(vmul_vx_h, 2, 2, clearh)
1770GEN_VEXT_VX(vmul_vx_w, 4, 4, clearl)
1771GEN_VEXT_VX(vmul_vx_d, 8, 8, clearq)
1772GEN_VEXT_VX(vmulh_vx_b, 1, 1, clearb)
1773GEN_VEXT_VX(vmulh_vx_h, 2, 2, clearh)
1774GEN_VEXT_VX(vmulh_vx_w, 4, 4, clearl)
1775GEN_VEXT_VX(vmulh_vx_d, 8, 8, clearq)
1776GEN_VEXT_VX(vmulhu_vx_b, 1, 1, clearb)
1777GEN_VEXT_VX(vmulhu_vx_h, 2, 2, clearh)
1778GEN_VEXT_VX(vmulhu_vx_w, 4, 4, clearl)
1779GEN_VEXT_VX(vmulhu_vx_d, 8, 8, clearq)
1780GEN_VEXT_VX(vmulhsu_vx_b, 1, 1, clearb)
1781GEN_VEXT_VX(vmulhsu_vx_h, 2, 2, clearh)
1782GEN_VEXT_VX(vmulhsu_vx_w, 4, 4, clearl)
1783GEN_VEXT_VX(vmulhsu_vx_d, 8, 8, clearq)
1784
1785/* Vector Integer Divide Instructions */
1786#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1787#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1788#define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1789        unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1790#define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1791        unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1792
1793RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1794RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1795RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1796RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1797RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1798RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1799RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1800RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1801RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1802RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1803RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1804RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1805RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1806RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1807RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1808RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1809GEN_VEXT_VV(vdivu_vv_b, 1, 1, clearb)
1810GEN_VEXT_VV(vdivu_vv_h, 2, 2, clearh)
1811GEN_VEXT_VV(vdivu_vv_w, 4, 4, clearl)
1812GEN_VEXT_VV(vdivu_vv_d, 8, 8, clearq)
1813GEN_VEXT_VV(vdiv_vv_b, 1, 1, clearb)
1814GEN_VEXT_VV(vdiv_vv_h, 2, 2, clearh)
1815GEN_VEXT_VV(vdiv_vv_w, 4, 4, clearl)
1816GEN_VEXT_VV(vdiv_vv_d, 8, 8, clearq)
1817GEN_VEXT_VV(vremu_vv_b, 1, 1, clearb)
1818GEN_VEXT_VV(vremu_vv_h, 2, 2, clearh)
1819GEN_VEXT_VV(vremu_vv_w, 4, 4, clearl)
1820GEN_VEXT_VV(vremu_vv_d, 8, 8, clearq)
1821GEN_VEXT_VV(vrem_vv_b, 1, 1, clearb)
1822GEN_VEXT_VV(vrem_vv_h, 2, 2, clearh)
1823GEN_VEXT_VV(vrem_vv_w, 4, 4, clearl)
1824GEN_VEXT_VV(vrem_vv_d, 8, 8, clearq)
1825
1826RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1827RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1828RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1829RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1830RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1831RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1832RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1833RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1834RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1835RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1836RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1837RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1838RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1839RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1840RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1841RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1842GEN_VEXT_VX(vdivu_vx_b, 1, 1, clearb)
1843GEN_VEXT_VX(vdivu_vx_h, 2, 2, clearh)
1844GEN_VEXT_VX(vdivu_vx_w, 4, 4, clearl)
1845GEN_VEXT_VX(vdivu_vx_d, 8, 8, clearq)
1846GEN_VEXT_VX(vdiv_vx_b, 1, 1, clearb)
1847GEN_VEXT_VX(vdiv_vx_h, 2, 2, clearh)
1848GEN_VEXT_VX(vdiv_vx_w, 4, 4, clearl)
1849GEN_VEXT_VX(vdiv_vx_d, 8, 8, clearq)
1850GEN_VEXT_VX(vremu_vx_b, 1, 1, clearb)
1851GEN_VEXT_VX(vremu_vx_h, 2, 2, clearh)
1852GEN_VEXT_VX(vremu_vx_w, 4, 4, clearl)
1853GEN_VEXT_VX(vremu_vx_d, 8, 8, clearq)
1854GEN_VEXT_VX(vrem_vx_b, 1, 1, clearb)
1855GEN_VEXT_VX(vrem_vx_h, 2, 2, clearh)
1856GEN_VEXT_VX(vrem_vx_w, 4, 4, clearl)
1857GEN_VEXT_VX(vrem_vx_d, 8, 8, clearq)
1858
1859/* Vector Widening Integer Multiply Instructions */
1860RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1861RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1862RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1863RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1864RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1865RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1866RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1867RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1868RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1869GEN_VEXT_VV(vwmul_vv_b, 1, 2, clearh)
1870GEN_VEXT_VV(vwmul_vv_h, 2, 4, clearl)
1871GEN_VEXT_VV(vwmul_vv_w, 4, 8, clearq)
1872GEN_VEXT_VV(vwmulu_vv_b, 1, 2, clearh)
1873GEN_VEXT_VV(vwmulu_vv_h, 2, 4, clearl)
1874GEN_VEXT_VV(vwmulu_vv_w, 4, 8, clearq)
1875GEN_VEXT_VV(vwmulsu_vv_b, 1, 2, clearh)
1876GEN_VEXT_VV(vwmulsu_vv_h, 2, 4, clearl)
1877GEN_VEXT_VV(vwmulsu_vv_w, 4, 8, clearq)
1878
1879RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1880RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1881RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1882RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1883RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1884RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1885RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1886RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1887RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1888GEN_VEXT_VX(vwmul_vx_b, 1, 2, clearh)
1889GEN_VEXT_VX(vwmul_vx_h, 2, 4, clearl)
1890GEN_VEXT_VX(vwmul_vx_w, 4, 8, clearq)
1891GEN_VEXT_VX(vwmulu_vx_b, 1, 2, clearh)
1892GEN_VEXT_VX(vwmulu_vx_h, 2, 4, clearl)
1893GEN_VEXT_VX(vwmulu_vx_w, 4, 8, clearq)
1894GEN_VEXT_VX(vwmulsu_vx_b, 1, 2, clearh)
1895GEN_VEXT_VX(vwmulsu_vx_h, 2, 4, clearl)
1896GEN_VEXT_VX(vwmulsu_vx_w, 4, 8, clearq)
1897
1898/* Vector Single-Width Integer Multiply-Add Instructions */
1899#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1900static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1901{                                                                  \
1902    TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1903    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1904    TD d = *((TD *)vd + HD(i));                                    \
1905    *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1906}
1907
1908#define DO_MACC(N, M, D) (M * N + D)
1909#define DO_NMSAC(N, M, D) (-(M * N) + D)
1910#define DO_MADD(N, M, D) (M * D + N)
1911#define DO_NMSUB(N, M, D) (-(M * D) + N)
1912RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1913RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1914RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1915RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1916RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1917RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1918RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1919RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1920RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1921RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1922RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1923RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1924RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1925RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1926RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1927RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1928GEN_VEXT_VV(vmacc_vv_b, 1, 1, clearb)
1929GEN_VEXT_VV(vmacc_vv_h, 2, 2, clearh)
1930GEN_VEXT_VV(vmacc_vv_w, 4, 4, clearl)
1931GEN_VEXT_VV(vmacc_vv_d, 8, 8, clearq)
1932GEN_VEXT_VV(vnmsac_vv_b, 1, 1, clearb)
1933GEN_VEXT_VV(vnmsac_vv_h, 2, 2, clearh)
1934GEN_VEXT_VV(vnmsac_vv_w, 4, 4, clearl)
1935GEN_VEXT_VV(vnmsac_vv_d, 8, 8, clearq)
1936GEN_VEXT_VV(vmadd_vv_b, 1, 1, clearb)
1937GEN_VEXT_VV(vmadd_vv_h, 2, 2, clearh)
1938GEN_VEXT_VV(vmadd_vv_w, 4, 4, clearl)
1939GEN_VEXT_VV(vmadd_vv_d, 8, 8, clearq)
1940GEN_VEXT_VV(vnmsub_vv_b, 1, 1, clearb)
1941GEN_VEXT_VV(vnmsub_vv_h, 2, 2, clearh)
1942GEN_VEXT_VV(vnmsub_vv_w, 4, 4, clearl)
1943GEN_VEXT_VV(vnmsub_vv_d, 8, 8, clearq)
1944
1945#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1946static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1947{                                                                   \
1948    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1949    TD d = *((TD *)vd + HD(i));                                     \
1950    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1951}
1952
1953RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1954RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1955RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1956RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1957RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1958RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1959RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1960RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1961RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1962RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1963RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1964RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1965RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1966RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1967RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1968RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1969GEN_VEXT_VX(vmacc_vx_b, 1, 1, clearb)
1970GEN_VEXT_VX(vmacc_vx_h, 2, 2, clearh)
1971GEN_VEXT_VX(vmacc_vx_w, 4, 4, clearl)
1972GEN_VEXT_VX(vmacc_vx_d, 8, 8, clearq)
1973GEN_VEXT_VX(vnmsac_vx_b, 1, 1, clearb)
1974GEN_VEXT_VX(vnmsac_vx_h, 2, 2, clearh)
1975GEN_VEXT_VX(vnmsac_vx_w, 4, 4, clearl)
1976GEN_VEXT_VX(vnmsac_vx_d, 8, 8, clearq)
1977GEN_VEXT_VX(vmadd_vx_b, 1, 1, clearb)
1978GEN_VEXT_VX(vmadd_vx_h, 2, 2, clearh)
1979GEN_VEXT_VX(vmadd_vx_w, 4, 4, clearl)
1980GEN_VEXT_VX(vmadd_vx_d, 8, 8, clearq)
1981GEN_VEXT_VX(vnmsub_vx_b, 1, 1, clearb)
1982GEN_VEXT_VX(vnmsub_vx_h, 2, 2, clearh)
1983GEN_VEXT_VX(vnmsub_vx_w, 4, 4, clearl)
1984GEN_VEXT_VX(vnmsub_vx_d, 8, 8, clearq)
1985
1986/* Vector Widening Integer Multiply-Add Instructions */
1987RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1988RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1989RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1990RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1991RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1992RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1993RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1994RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1995RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1996GEN_VEXT_VV(vwmaccu_vv_b, 1, 2, clearh)
1997GEN_VEXT_VV(vwmaccu_vv_h, 2, 4, clearl)
1998GEN_VEXT_VV(vwmaccu_vv_w, 4, 8, clearq)
1999GEN_VEXT_VV(vwmacc_vv_b, 1, 2, clearh)
2000GEN_VEXT_VV(vwmacc_vv_h, 2, 4, clearl)
2001GEN_VEXT_VV(vwmacc_vv_w, 4, 8, clearq)
2002GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2, clearh)
2003GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4, clearl)
2004GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8, clearq)
2005
2006RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
2007RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
2008RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
2009RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
2010RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
2011RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
2012RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
2013RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
2014RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
2015RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
2016RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
2017RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
2018GEN_VEXT_VX(vwmaccu_vx_b, 1, 2, clearh)
2019GEN_VEXT_VX(vwmaccu_vx_h, 2, 4, clearl)
2020GEN_VEXT_VX(vwmaccu_vx_w, 4, 8, clearq)
2021GEN_VEXT_VX(vwmacc_vx_b, 1, 2, clearh)
2022GEN_VEXT_VX(vwmacc_vx_h, 2, 4, clearl)
2023GEN_VEXT_VX(vwmacc_vx_w, 4, 8, clearq)
2024GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2, clearh)
2025GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4, clearl)
2026GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq)
2027GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh)
2028GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl)
2029GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq)
2030
2031/* Vector Integer Merge and Move Instructions */
2032#define GEN_VEXT_VMV_VV(NAME, ETYPE, H, CLEAR_FN)                    \
2033void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
2034                  uint32_t desc)                                     \
2035{                                                                    \
2036    uint32_t vl = env->vl;                                           \
2037    uint32_t esz = sizeof(ETYPE);                                    \
2038    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
2039    uint32_t i;                                                      \
2040                                                                     \
2041    for (i = 0; i < vl; i++) {                                       \
2042        ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
2043        *((ETYPE *)vd + H(i)) = s1;                                  \
2044    }                                                                \
2045    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                         \
2046}
2047
2048GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1, clearb)
2049GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2, clearh)
2050GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4, clearl)
2051GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8, clearq)
2052
2053#define GEN_VEXT_VMV_VX(NAME, ETYPE, H, CLEAR_FN)                    \
2054void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
2055                  uint32_t desc)                                     \
2056{                                                                    \
2057    uint32_t vl = env->vl;                                           \
2058    uint32_t esz = sizeof(ETYPE);                                    \
2059    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
2060    uint32_t i;                                                      \
2061                                                                     \
2062    for (i = 0; i < vl; i++) {                                       \
2063        *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
2064    }                                                                \
2065    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                         \
2066}
2067
2068GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1, clearb)
2069GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2, clearh)
2070GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4, clearl)
2071GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8, clearq)
2072
2073#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H, CLEAR_FN)                 \
2074void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
2075                  CPURISCVState *env, uint32_t desc)                 \
2076{                                                                    \
2077    uint32_t mlen = vext_mlen(desc);                                 \
2078    uint32_t vl = env->vl;                                           \
2079    uint32_t esz = sizeof(ETYPE);                                    \
2080    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
2081    uint32_t i;                                                      \
2082                                                                     \
2083    for (i = 0; i < vl; i++) {                                       \
2084        ETYPE *vt = (!vext_elem_mask(v0, mlen, i) ? vs2 : vs1);      \
2085        *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
2086    }                                                                \
2087    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                         \
2088}
2089
2090GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1, clearb)
2091GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2, clearh)
2092GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4, clearl)
2093GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8, clearq)
2094
2095#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H, CLEAR_FN)                 \
2096void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
2097                  void *vs2, CPURISCVState *env, uint32_t desc)      \
2098{                                                                    \
2099    uint32_t mlen = vext_mlen(desc);                                 \
2100    uint32_t vl = env->vl;                                           \
2101    uint32_t esz = sizeof(ETYPE);                                    \
2102    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
2103    uint32_t i;                                                      \
2104                                                                     \
2105    for (i = 0; i < vl; i++) {                                       \
2106        ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
2107        ETYPE d = (!vext_elem_mask(v0, mlen, i) ? s2 :               \
2108                   (ETYPE)(target_long)s1);                          \
2109        *((ETYPE *)vd + H(i)) = d;                                   \
2110    }                                                                \
2111    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                         \
2112}
2113
2114GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1, clearb)
2115GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh)
2116GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl)
2117GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq)
2118
2119/*
2120 *** Vector Fixed-Point Arithmetic Instructions
2121 */
2122
2123/* Vector Single-Width Saturating Add and Subtract */
2124
2125/*
2126 * As fixed point instructions probably have round mode and saturation,
2127 * define common macros for fixed point here.
2128 */
2129typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2130                          CPURISCVState *env, int vxrm);
2131
2132#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
2133static inline void                                                  \
2134do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
2135          CPURISCVState *env, int vxrm)                             \
2136{                                                                   \
2137    TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
2138    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2139    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
2140}
2141
2142static inline void
2143vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2144             CPURISCVState *env,
2145             uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm,
2146             opivv2_rm_fn *fn)
2147{
2148    for (uint32_t i = 0; i < vl; i++) {
2149        if (!vm && !vext_elem_mask(v0, mlen, i)) {
2150            continue;
2151        }
2152        fn(vd, vs1, vs2, i, env, vxrm);
2153    }
2154}
2155
2156static inline void
2157vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2158             CPURISCVState *env,
2159             uint32_t desc, uint32_t esz, uint32_t dsz,
2160             opivv2_rm_fn *fn, clear_fn *clearfn)
2161{
2162    uint32_t vlmax = vext_maxsz(desc) / esz;
2163    uint32_t mlen = vext_mlen(desc);
2164    uint32_t vm = vext_vm(desc);
2165    uint32_t vl = env->vl;
2166
2167    switch (env->vxrm) {
2168    case 0: /* rnu */
2169        vext_vv_rm_1(vd, v0, vs1, vs2,
2170                     env, vl, vm, mlen, 0, fn);
2171        break;
2172    case 1: /* rne */
2173        vext_vv_rm_1(vd, v0, vs1, vs2,
2174                     env, vl, vm, mlen, 1, fn);
2175        break;
2176    case 2: /* rdn */
2177        vext_vv_rm_1(vd, v0, vs1, vs2,
2178                     env, vl, vm, mlen, 2, fn);
2179        break;
2180    default: /* rod */
2181        vext_vv_rm_1(vd, v0, vs1, vs2,
2182                     env, vl, vm, mlen, 3, fn);
2183        break;
2184    }
2185
2186    clearfn(vd, vl, vl * dsz,  vlmax * dsz);
2187}
2188
2189/* generate helpers for fixed point instructions with OPIVV format */
2190#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ, CLEAR_FN)                \
2191void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
2192                  CPURISCVState *env, uint32_t desc)            \
2193{                                                               \
2194    vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
2195                 do_##NAME, CLEAR_FN);                          \
2196}
2197
2198static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2199{
2200    uint8_t res = a + b;
2201    if (res < a) {
2202        res = UINT8_MAX;
2203        env->vxsat = 0x1;
2204    }
2205    return res;
2206}
2207
2208static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2209                               uint16_t b)
2210{
2211    uint16_t res = a + b;
2212    if (res < a) {
2213        res = UINT16_MAX;
2214        env->vxsat = 0x1;
2215    }
2216    return res;
2217}
2218
2219static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2220                               uint32_t b)
2221{
2222    uint32_t res = a + b;
2223    if (res < a) {
2224        res = UINT32_MAX;
2225        env->vxsat = 0x1;
2226    }
2227    return res;
2228}
2229
2230static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2231                               uint64_t b)
2232{
2233    uint64_t res = a + b;
2234    if (res < a) {
2235        res = UINT64_MAX;
2236        env->vxsat = 0x1;
2237    }
2238    return res;
2239}
2240
2241RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2242RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2243RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2244RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2245GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1, clearb)
2246GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2, clearh)
2247GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4, clearl)
2248GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8, clearq)
2249
2250typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2251                          CPURISCVState *env, int vxrm);
2252
2253#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
2254static inline void                                                  \
2255do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
2256          CPURISCVState *env, int vxrm)                             \
2257{                                                                   \
2258    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2259    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
2260}
2261
2262static inline void
2263vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2264             CPURISCVState *env,
2265             uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm,
2266             opivx2_rm_fn *fn)
2267{
2268    for (uint32_t i = 0; i < vl; i++) {
2269        if (!vm && !vext_elem_mask(v0, mlen, i)) {
2270            continue;
2271        }
2272        fn(vd, s1, vs2, i, env, vxrm);
2273    }
2274}
2275
2276static inline void
2277vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2278             CPURISCVState *env,
2279             uint32_t desc, uint32_t esz, uint32_t dsz,
2280             opivx2_rm_fn *fn, clear_fn *clearfn)
2281{
2282    uint32_t vlmax = vext_maxsz(desc) / esz;
2283    uint32_t mlen = vext_mlen(desc);
2284    uint32_t vm = vext_vm(desc);
2285    uint32_t vl = env->vl;
2286
2287    switch (env->vxrm) {
2288    case 0: /* rnu */
2289        vext_vx_rm_1(vd, v0, s1, vs2,
2290                     env, vl, vm, mlen, 0, fn);
2291        break;
2292    case 1: /* rne */
2293        vext_vx_rm_1(vd, v0, s1, vs2,
2294                     env, vl, vm, mlen, 1, fn);
2295        break;
2296    case 2: /* rdn */
2297        vext_vx_rm_1(vd, v0, s1, vs2,
2298                     env, vl, vm, mlen, 2, fn);
2299        break;
2300    default: /* rod */
2301        vext_vx_rm_1(vd, v0, s1, vs2,
2302                     env, vl, vm, mlen, 3, fn);
2303        break;
2304    }
2305
2306    clearfn(vd, vl, vl * dsz,  vlmax * dsz);
2307}
2308
2309/* generate helpers for fixed point instructions with OPIVX format */
2310#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ, CLEAR_FN)          \
2311void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2312        void *vs2, CPURISCVState *env, uint32_t desc)     \
2313{                                                         \
2314    vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
2315                 do_##NAME, CLEAR_FN);                    \
2316}
2317
2318RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2319RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2320RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2321RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2322GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1, clearb)
2323GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2, clearh)
2324GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4, clearl)
2325GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8, clearq)
2326
2327static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2328{
2329    int8_t res = a + b;
2330    if ((res ^ a) & (res ^ b) & INT8_MIN) {
2331        res = a > 0 ? INT8_MAX : INT8_MIN;
2332        env->vxsat = 0x1;
2333    }
2334    return res;
2335}
2336
2337static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2338{
2339    int16_t res = a + b;
2340    if ((res ^ a) & (res ^ b) & INT16_MIN) {
2341        res = a > 0 ? INT16_MAX : INT16_MIN;
2342        env->vxsat = 0x1;
2343    }
2344    return res;
2345}
2346
2347static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2348{
2349    int32_t res = a + b;
2350    if ((res ^ a) & (res ^ b) & INT32_MIN) {
2351        res = a > 0 ? INT32_MAX : INT32_MIN;
2352        env->vxsat = 0x1;
2353    }
2354    return res;
2355}
2356
2357static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2358{
2359    int64_t res = a + b;
2360    if ((res ^ a) & (res ^ b) & INT64_MIN) {
2361        res = a > 0 ? INT64_MAX : INT64_MIN;
2362        env->vxsat = 0x1;
2363    }
2364    return res;
2365}
2366
2367RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2368RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2369RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2370RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2371GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1, clearb)
2372GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2, clearh)
2373GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4, clearl)
2374GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8, clearq)
2375
2376RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2377RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2378RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2379RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2380GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1, clearb)
2381GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2, clearh)
2382GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4, clearl)
2383GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8, clearq)
2384
2385static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2386{
2387    uint8_t res = a - b;
2388    if (res > a) {
2389        res = 0;
2390        env->vxsat = 0x1;
2391    }
2392    return res;
2393}
2394
2395static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2396                               uint16_t b)
2397{
2398    uint16_t res = a - b;
2399    if (res > a) {
2400        res = 0;
2401        env->vxsat = 0x1;
2402    }
2403    return res;
2404}
2405
2406static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2407                               uint32_t b)
2408{
2409    uint32_t res = a - b;
2410    if (res > a) {
2411        res = 0;
2412        env->vxsat = 0x1;
2413    }
2414    return res;
2415}
2416
2417static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2418                               uint64_t b)
2419{
2420    uint64_t res = a - b;
2421    if (res > a) {
2422        res = 0;
2423        env->vxsat = 0x1;
2424    }
2425    return res;
2426}
2427
2428RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2429RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2430RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2431RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2432GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1, clearb)
2433GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2, clearh)
2434GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4, clearl)
2435GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8, clearq)
2436
2437RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2438RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2439RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2440RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2441GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1, clearb)
2442GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2, clearh)
2443GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4, clearl)
2444GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8, clearq)
2445
2446static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2447{
2448    int8_t res = a - b;
2449    if ((res ^ a) & (a ^ b) & INT8_MIN) {
2450        res = a >= 0 ? INT8_MAX : INT8_MIN;
2451        env->vxsat = 0x1;
2452    }
2453    return res;
2454}
2455
2456static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2457{
2458    int16_t res = a - b;
2459    if ((res ^ a) & (a ^ b) & INT16_MIN) {
2460        res = a >= 0 ? INT16_MAX : INT16_MIN;
2461        env->vxsat = 0x1;
2462    }
2463    return res;
2464}
2465
2466static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2467{
2468    int32_t res = a - b;
2469    if ((res ^ a) & (a ^ b) & INT32_MIN) {
2470        res = a >= 0 ? INT32_MAX : INT32_MIN;
2471        env->vxsat = 0x1;
2472    }
2473    return res;
2474}
2475
2476static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2477{
2478    int64_t res = a - b;
2479    if ((res ^ a) & (a ^ b) & INT64_MIN) {
2480        res = a >= 0 ? INT64_MAX : INT64_MIN;
2481        env->vxsat = 0x1;
2482    }
2483    return res;
2484}
2485
2486RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2487RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2488RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2489RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2490GEN_VEXT_VV_RM(vssub_vv_b, 1, 1, clearb)
2491GEN_VEXT_VV_RM(vssub_vv_h, 2, 2, clearh)
2492GEN_VEXT_VV_RM(vssub_vv_w, 4, 4, clearl)
2493GEN_VEXT_VV_RM(vssub_vv_d, 8, 8, clearq)
2494
2495RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2496RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2497RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2498RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2499GEN_VEXT_VX_RM(vssub_vx_b, 1, 1, clearb)
2500GEN_VEXT_VX_RM(vssub_vx_h, 2, 2, clearh)
2501GEN_VEXT_VX_RM(vssub_vx_w, 4, 4, clearl)
2502GEN_VEXT_VX_RM(vssub_vx_d, 8, 8, clearq)
2503
2504/* Vector Single-Width Averaging Add and Subtract */
2505static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2506{
2507    uint8_t d = extract64(v, shift, 1);
2508    uint8_t d1;
2509    uint64_t D1, D2;
2510
2511    if (shift == 0 || shift > 64) {
2512        return 0;
2513    }
2514
2515    d1 = extract64(v, shift - 1, 1);
2516    D1 = extract64(v, 0, shift);
2517    if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2518        return d1;
2519    } else if (vxrm == 1) { /* round-to-nearest-even */
2520        if (shift > 1) {
2521            D2 = extract64(v, 0, shift - 1);
2522            return d1 & ((D2 != 0) | d);
2523        } else {
2524            return d1 & d;
2525        }
2526    } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2527        return !d & (D1 != 0);
2528    }
2529    return 0; /* round-down (truncate) */
2530}
2531
2532static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2533{
2534    int64_t res = (int64_t)a + b;
2535    uint8_t round = get_round(vxrm, res, 1);
2536
2537    return (res >> 1) + round;
2538}
2539
2540static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2541{
2542    int64_t res = a + b;
2543    uint8_t round = get_round(vxrm, res, 1);
2544    int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2545
2546    /* With signed overflow, bit 64 is inverse of bit 63. */
2547    return ((res >> 1) ^ over) + round;
2548}
2549
2550RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2551RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2552RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2553RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2554GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1, clearb)
2555GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2, clearh)
2556GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4, clearl)
2557GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8, clearq)
2558
2559RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2560RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2561RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2562RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2563GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1, clearb)
2564GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2, clearh)
2565GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4, clearl)
2566GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8, clearq)
2567
2568static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2569{
2570    int64_t res = (int64_t)a - b;
2571    uint8_t round = get_round(vxrm, res, 1);
2572
2573    return (res >> 1) + round;
2574}
2575
2576static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2577{
2578    int64_t res = (int64_t)a - b;
2579    uint8_t round = get_round(vxrm, res, 1);
2580    int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2581
2582    /* With signed overflow, bit 64 is inverse of bit 63. */
2583    return ((res >> 1) ^ over) + round;
2584}
2585
2586RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2587RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2588RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2589RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2590GEN_VEXT_VV_RM(vasub_vv_b, 1, 1, clearb)
2591GEN_VEXT_VV_RM(vasub_vv_h, 2, 2, clearh)
2592GEN_VEXT_VV_RM(vasub_vv_w, 4, 4, clearl)
2593GEN_VEXT_VV_RM(vasub_vv_d, 8, 8, clearq)
2594
2595RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2596RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2597RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2598RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2599GEN_VEXT_VX_RM(vasub_vx_b, 1, 1, clearb)
2600GEN_VEXT_VX_RM(vasub_vx_h, 2, 2, clearh)
2601GEN_VEXT_VX_RM(vasub_vx_w, 4, 4, clearl)
2602GEN_VEXT_VX_RM(vasub_vx_d, 8, 8, clearq)
2603
2604/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2605static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2606{
2607    uint8_t round;
2608    int16_t res;
2609
2610    res = (int16_t)a * (int16_t)b;
2611    round = get_round(vxrm, res, 7);
2612    res   = (res >> 7) + round;
2613
2614    if (res > INT8_MAX) {
2615        env->vxsat = 0x1;
2616        return INT8_MAX;
2617    } else if (res < INT8_MIN) {
2618        env->vxsat = 0x1;
2619        return INT8_MIN;
2620    } else {
2621        return res;
2622    }
2623}
2624
2625static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2626{
2627    uint8_t round;
2628    int32_t res;
2629
2630    res = (int32_t)a * (int32_t)b;
2631    round = get_round(vxrm, res, 15);
2632    res   = (res >> 15) + round;
2633
2634    if (res > INT16_MAX) {
2635        env->vxsat = 0x1;
2636        return INT16_MAX;
2637    } else if (res < INT16_MIN) {
2638        env->vxsat = 0x1;
2639        return INT16_MIN;
2640    } else {
2641        return res;
2642    }
2643}
2644
2645static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2646{
2647    uint8_t round;
2648    int64_t res;
2649
2650    res = (int64_t)a * (int64_t)b;
2651    round = get_round(vxrm, res, 31);
2652    res   = (res >> 31) + round;
2653
2654    if (res > INT32_MAX) {
2655        env->vxsat = 0x1;
2656        return INT32_MAX;
2657    } else if (res < INT32_MIN) {
2658        env->vxsat = 0x1;
2659        return INT32_MIN;
2660    } else {
2661        return res;
2662    }
2663}
2664
2665static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2666{
2667    uint8_t round;
2668    uint64_t hi_64, lo_64;
2669    int64_t res;
2670
2671    if (a == INT64_MIN && b == INT64_MIN) {
2672        env->vxsat = 1;
2673        return INT64_MAX;
2674    }
2675
2676    muls64(&lo_64, &hi_64, a, b);
2677    round = get_round(vxrm, lo_64, 63);
2678    /*
2679     * Cannot overflow, as there are always
2680     * 2 sign bits after multiply.
2681     */
2682    res = (hi_64 << 1) | (lo_64 >> 63);
2683    if (round) {
2684        if (res == INT64_MAX) {
2685            env->vxsat = 1;
2686        } else {
2687            res += 1;
2688        }
2689    }
2690    return res;
2691}
2692
2693RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2694RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2695RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2696RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2697GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1, clearb)
2698GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2, clearh)
2699GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4, clearl)
2700GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8, clearq)
2701
2702RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2703RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2704RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2705RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2706GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb)
2707GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh)
2708GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl)
2709GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq)
2710
2711/* Vector Widening Saturating Scaled Multiply-Add */
2712static inline uint16_t
2713vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b,
2714          uint16_t c)
2715{
2716    uint8_t round;
2717    uint16_t res = (uint16_t)a * b;
2718
2719    round = get_round(vxrm, res, 4);
2720    res   = (res >> 4) + round;
2721    return saddu16(env, vxrm, c, res);
2722}
2723
2724static inline uint32_t
2725vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b,
2726           uint32_t c)
2727{
2728    uint8_t round;
2729    uint32_t res = (uint32_t)a * b;
2730
2731    round = get_round(vxrm, res, 8);
2732    res   = (res >> 8) + round;
2733    return saddu32(env, vxrm, c, res);
2734}
2735
2736static inline uint64_t
2737vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b,
2738           uint64_t c)
2739{
2740    uint8_t round;
2741    uint64_t res = (uint64_t)a * b;
2742
2743    round = get_round(vxrm, res, 16);
2744    res   = (res >> 16) + round;
2745    return saddu64(env, vxrm, c, res);
2746}
2747
2748#define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
2749static inline void                                                 \
2750do_##NAME(void *vd, void *vs1, void *vs2, int i,                   \
2751          CPURISCVState *env, int vxrm)                            \
2752{                                                                  \
2753    TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
2754    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
2755    TD d = *((TD *)vd + HD(i));                                    \
2756    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d);                \
2757}
2758
2759RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8)
2760RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16)
2761RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32)
2762GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2, clearh)
2763GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4, clearl)
2764GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8, clearq)
2765
2766#define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)         \
2767static inline void                                                 \
2768do_##NAME(void *vd, target_long s1, void *vs2, int i,              \
2769          CPURISCVState *env, int vxrm)                            \
2770{                                                                  \
2771    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
2772    TD d = *((TD *)vd + HD(i));                                    \
2773    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d);       \
2774}
2775
2776RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8)
2777RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16)
2778RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32)
2779GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2, clearh)
2780GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4, clearl)
2781GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8, clearq)
2782
2783static inline int16_t
2784vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c)
2785{
2786    uint8_t round;
2787    int16_t res = (int16_t)a * b;
2788
2789    round = get_round(vxrm, res, 4);
2790    res   = (res >> 4) + round;
2791    return sadd16(env, vxrm, c, res);
2792}
2793
2794static inline int32_t
2795vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c)
2796{
2797    uint8_t round;
2798    int32_t res = (int32_t)a * b;
2799
2800    round = get_round(vxrm, res, 8);
2801    res   = (res >> 8) + round;
2802    return sadd32(env, vxrm, c, res);
2803
2804}
2805
2806static inline int64_t
2807vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c)
2808{
2809    uint8_t round;
2810    int64_t res = (int64_t)a * b;
2811
2812    round = get_round(vxrm, res, 16);
2813    res   = (res >> 16) + round;
2814    return sadd64(env, vxrm, c, res);
2815}
2816
2817RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8)
2818RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16)
2819RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32)
2820GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2, clearh)
2821GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4, clearl)
2822GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8, clearq)
2823RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8)
2824RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16)
2825RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32)
2826GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2, clearh)
2827GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4, clearl)
2828GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8, clearq)
2829
2830static inline int16_t
2831vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c)
2832{
2833    uint8_t round;
2834    int16_t res = a * (int16_t)b;
2835
2836    round = get_round(vxrm, res, 4);
2837    res   = (res >> 4) + round;
2838    return ssub16(env, vxrm, c, res);
2839}
2840
2841static inline int32_t
2842vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c)
2843{
2844    uint8_t round;
2845    int32_t res = a * (int32_t)b;
2846
2847    round = get_round(vxrm, res, 8);
2848    res   = (res >> 8) + round;
2849    return ssub32(env, vxrm, c, res);
2850}
2851
2852static inline int64_t
2853vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c)
2854{
2855    uint8_t round;
2856    int64_t res = a * (int64_t)b;
2857
2858    round = get_round(vxrm, res, 16);
2859    res   = (res >> 16) + round;
2860    return ssub64(env, vxrm, c, res);
2861}
2862
2863RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8)
2864RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16)
2865RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32)
2866GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2, clearh)
2867GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4, clearl)
2868GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8, clearq)
2869RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8)
2870RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16)
2871RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32)
2872GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2, clearh)
2873GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4, clearl)
2874GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8, clearq)
2875
2876static inline int16_t
2877vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c)
2878{
2879    uint8_t round;
2880    int16_t res = (int16_t)a * b;
2881
2882    round = get_round(vxrm, res, 4);
2883    res   = (res >> 4) + round;
2884    return ssub16(env, vxrm, c, res);
2885}
2886
2887static inline int32_t
2888vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c)
2889{
2890    uint8_t round;
2891    int32_t res = (int32_t)a * b;
2892
2893    round = get_round(vxrm, res, 8);
2894    res   = (res >> 8) + round;
2895    return ssub32(env, vxrm, c, res);
2896}
2897
2898static inline int64_t
2899vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c)
2900{
2901    uint8_t round;
2902    int64_t res = (int64_t)a * b;
2903
2904    round = get_round(vxrm, res, 16);
2905    res   = (res >> 16) + round;
2906    return ssub64(env, vxrm, c, res);
2907}
2908
2909RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8)
2910RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16)
2911RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32)
2912GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh)
2913GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl)
2914GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq)
2915
2916/* Vector Single-Width Scaling Shift Instructions */
2917static inline uint8_t
2918vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2919{
2920    uint8_t round, shift = b & 0x7;
2921    uint8_t res;
2922
2923    round = get_round(vxrm, a, shift);
2924    res   = (a >> shift)  + round;
2925    return res;
2926}
2927static inline uint16_t
2928vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2929{
2930    uint8_t round, shift = b & 0xf;
2931    uint16_t res;
2932
2933    round = get_round(vxrm, a, shift);
2934    res   = (a >> shift)  + round;
2935    return res;
2936}
2937static inline uint32_t
2938vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2939{
2940    uint8_t round, shift = b & 0x1f;
2941    uint32_t res;
2942
2943    round = get_round(vxrm, a, shift);
2944    res   = (a >> shift)  + round;
2945    return res;
2946}
2947static inline uint64_t
2948vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2949{
2950    uint8_t round, shift = b & 0x3f;
2951    uint64_t res;
2952
2953    round = get_round(vxrm, a, shift);
2954    res   = (a >> shift)  + round;
2955    return res;
2956}
2957RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2958RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2959RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2960RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2961GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1, clearb)
2962GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2, clearh)
2963GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4, clearl)
2964GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8, clearq)
2965
2966RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2967RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2968RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2969RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2970GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1, clearb)
2971GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2, clearh)
2972GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4, clearl)
2973GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8, clearq)
2974
2975static inline int8_t
2976vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2977{
2978    uint8_t round, shift = b & 0x7;
2979    int8_t res;
2980
2981    round = get_round(vxrm, a, shift);
2982    res   = (a >> shift)  + round;
2983    return res;
2984}
2985static inline int16_t
2986vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2987{
2988    uint8_t round, shift = b & 0xf;
2989    int16_t res;
2990
2991    round = get_round(vxrm, a, shift);
2992    res   = (a >> shift)  + round;
2993    return res;
2994}
2995static inline int32_t
2996vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2997{
2998    uint8_t round, shift = b & 0x1f;
2999    int32_t res;
3000
3001    round = get_round(vxrm, a, shift);
3002    res   = (a >> shift)  + round;
3003    return res;
3004}
3005static inline int64_t
3006vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
3007{
3008    uint8_t round, shift = b & 0x3f;
3009    int64_t res;
3010
3011    round = get_round(vxrm, a, shift);
3012    res   = (a >> shift)  + round;
3013    return res;
3014}
3015
3016RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
3017RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
3018RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
3019RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
3020GEN_VEXT_VV_RM(vssra_vv_b, 1, 1, clearb)
3021GEN_VEXT_VV_RM(vssra_vv_h, 2, 2, clearh)
3022GEN_VEXT_VV_RM(vssra_vv_w, 4, 4, clearl)
3023GEN_VEXT_VV_RM(vssra_vv_d, 8, 8, clearq)
3024
3025RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
3026RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
3027RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
3028RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
3029GEN_VEXT_VX_RM(vssra_vx_b, 1, 1, clearb)
3030GEN_VEXT_VX_RM(vssra_vx_h, 2, 2, clearh)
3031GEN_VEXT_VX_RM(vssra_vx_w, 4, 4, clearl)
3032GEN_VEXT_VX_RM(vssra_vx_d, 8, 8, clearq)
3033
3034/* Vector Narrowing Fixed-Point Clip Instructions */
3035static inline int8_t
3036vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
3037{
3038    uint8_t round, shift = b & 0xf;
3039    int16_t res;
3040
3041    round = get_round(vxrm, a, shift);
3042    res   = (a >> shift)  + round;
3043    if (res > INT8_MAX) {
3044        env->vxsat = 0x1;
3045        return INT8_MAX;
3046    } else if (res < INT8_MIN) {
3047        env->vxsat = 0x1;
3048        return INT8_MIN;
3049    } else {
3050        return res;
3051    }
3052}
3053
3054static inline int16_t
3055vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
3056{
3057    uint8_t round, shift = b & 0x1f;
3058    int32_t res;
3059
3060    round = get_round(vxrm, a, shift);
3061    res   = (a >> shift)  + round;
3062    if (res > INT16_MAX) {
3063        env->vxsat = 0x1;
3064        return INT16_MAX;
3065    } else if (res < INT16_MIN) {
3066        env->vxsat = 0x1;
3067        return INT16_MIN;
3068    } else {
3069        return res;
3070    }
3071}
3072
3073static inline int32_t
3074vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
3075{
3076    uint8_t round, shift = b & 0x3f;
3077    int64_t res;
3078
3079    round = get_round(vxrm, a, shift);
3080    res   = (a >> shift)  + round;
3081    if (res > INT32_MAX) {
3082        env->vxsat = 0x1;
3083        return INT32_MAX;
3084    } else if (res < INT32_MIN) {
3085        env->vxsat = 0x1;
3086        return INT32_MIN;
3087    } else {
3088        return res;
3089    }
3090}
3091
3092RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
3093RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
3094RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
3095GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1, clearb)
3096GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2, clearh)
3097GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4, clearl)
3098
3099RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8)
3100RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16)
3101RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32)
3102GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1, clearb)
3103GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2, clearh)
3104GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4, clearl)
3105
3106static inline uint8_t
3107vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
3108{
3109    uint8_t round, shift = b & 0xf;
3110    uint16_t res;
3111
3112    round = get_round(vxrm, a, shift);
3113    res   = (a >> shift)  + round;
3114    if (res > UINT8_MAX) {
3115        env->vxsat = 0x1;
3116        return UINT8_MAX;
3117    } else {
3118        return res;
3119    }
3120}
3121
3122static inline uint16_t
3123vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
3124{
3125    uint8_t round, shift = b & 0x1f;
3126    uint32_t res;
3127
3128    round = get_round(vxrm, a, shift);
3129    res   = (a >> shift)  + round;
3130    if (res > UINT16_MAX) {
3131        env->vxsat = 0x1;
3132        return UINT16_MAX;
3133    } else {
3134        return res;
3135    }
3136}
3137
3138static inline uint32_t
3139vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
3140{
3141    uint8_t round, shift = b & 0x3f;
3142    int64_t res;
3143
3144    round = get_round(vxrm, a, shift);
3145    res   = (a >> shift)  + round;
3146    if (res > UINT32_MAX) {
3147        env->vxsat = 0x1;
3148        return UINT32_MAX;
3149    } else {
3150        return res;
3151    }
3152}
3153
3154RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
3155RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
3156RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
3157GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1, clearb)
3158GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2, clearh)
3159GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4, clearl)
3160
3161RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8)
3162RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16)
3163RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32)
3164GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1, clearb)
3165GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2, clearh)
3166GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4, clearl)
3167
3168/*
3169 *** Vector Float Point Arithmetic Instructions
3170 */
3171/* Vector Single-Width Floating-Point Add/Subtract Instructions */
3172#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
3173static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
3174                      CPURISCVState *env)                      \
3175{                                                              \
3176    TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
3177    TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
3178    *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
3179}
3180
3181#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ, CLEAR_FN)         \
3182void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
3183                  void *vs2, CPURISCVState *env,          \
3184                  uint32_t desc)                          \
3185{                                                         \
3186    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
3187    uint32_t mlen = vext_mlen(desc);                      \
3188    uint32_t vm = vext_vm(desc);                          \
3189    uint32_t vl = env->vl;                                \
3190    uint32_t i;                                           \
3191                                                          \
3192    for (i = 0; i < vl; i++) {                            \
3193        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
3194            continue;                                     \
3195        }                                                 \
3196        do_##NAME(vd, vs1, vs2, i, env);                  \
3197    }                                                     \
3198    CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);             \
3199}
3200
3201RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3202RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3203RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
3204GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2, clearh)
3205GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4, clearl)
3206GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8, clearq)
3207
3208#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
3209static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3210                      CPURISCVState *env)                      \
3211{                                                              \
3212    TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
3213    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3214}
3215
3216#define GEN_VEXT_VF(NAME, ESZ, DSZ, CLEAR_FN)             \
3217void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
3218                  void *vs2, CPURISCVState *env,          \
3219                  uint32_t desc)                          \
3220{                                                         \
3221    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
3222    uint32_t mlen = vext_mlen(desc);                      \
3223    uint32_t vm = vext_vm(desc);                          \
3224    uint32_t vl = env->vl;                                \
3225    uint32_t i;                                           \
3226                                                          \
3227    for (i = 0; i < vl; i++) {                            \
3228        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
3229            continue;                                     \
3230        }                                                 \
3231        do_##NAME(vd, s1, vs2, i, env);                   \
3232    }                                                     \
3233    CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);             \
3234}
3235
3236RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3237RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3238RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
3239GEN_VEXT_VF(vfadd_vf_h, 2, 2, clearh)
3240GEN_VEXT_VF(vfadd_vf_w, 4, 4, clearl)
3241GEN_VEXT_VF(vfadd_vf_d, 8, 8, clearq)
3242
3243RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3244RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3245RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
3246GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2, clearh)
3247GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4, clearl)
3248GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8, clearq)
3249RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3250RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3251RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
3252GEN_VEXT_VF(vfsub_vf_h, 2, 2, clearh)
3253GEN_VEXT_VF(vfsub_vf_w, 4, 4, clearl)
3254GEN_VEXT_VF(vfsub_vf_d, 8, 8, clearq)
3255
3256static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3257{
3258    return float16_sub(b, a, s);
3259}
3260
3261static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3262{
3263    return float32_sub(b, a, s);
3264}
3265
3266static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3267{
3268    return float64_sub(b, a, s);
3269}
3270
3271RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3272RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3273RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
3274GEN_VEXT_VF(vfrsub_vf_h, 2, 2, clearh)
3275GEN_VEXT_VF(vfrsub_vf_w, 4, 4, clearl)
3276GEN_VEXT_VF(vfrsub_vf_d, 8, 8, clearq)
3277
3278/* Vector Widening Floating-Point Add/Subtract Instructions */
3279static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3280{
3281    return float32_add(float16_to_float32(a, true, s),
3282            float16_to_float32(b, true, s), s);
3283}
3284
3285static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3286{
3287    return float64_add(float32_to_float64(a, s),
3288            float32_to_float64(b, s), s);
3289
3290}
3291
3292RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3293RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3294GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4, clearl)
3295GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8, clearq)
3296RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3297RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3298GEN_VEXT_VF(vfwadd_vf_h, 2, 4, clearl)
3299GEN_VEXT_VF(vfwadd_vf_w, 4, 8, clearq)
3300
3301static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3302{
3303    return float32_sub(float16_to_float32(a, true, s),
3304            float16_to_float32(b, true, s), s);
3305}
3306
3307static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3308{
3309    return float64_sub(float32_to_float64(a, s),
3310            float32_to_float64(b, s), s);
3311
3312}
3313
3314RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3315RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3316GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4, clearl)
3317GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8, clearq)
3318RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3319RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3320GEN_VEXT_VF(vfwsub_vf_h, 2, 4, clearl)
3321GEN_VEXT_VF(vfwsub_vf_w, 4, 8, clearq)
3322
3323static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3324{
3325    return float32_add(a, float16_to_float32(b, true, s), s);
3326}
3327
3328static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3329{
3330    return float64_add(a, float32_to_float64(b, s), s);
3331}
3332
3333RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3334RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3335GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4, clearl)
3336GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8, clearq)
3337RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3338RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3339GEN_VEXT_VF(vfwadd_wf_h, 2, 4, clearl)
3340GEN_VEXT_VF(vfwadd_wf_w, 4, 8, clearq)
3341
3342static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3343{
3344    return float32_sub(a, float16_to_float32(b, true, s), s);
3345}
3346
3347static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3348{
3349    return float64_sub(a, float32_to_float64(b, s), s);
3350}
3351
3352RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3353RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3354GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4, clearl)
3355GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8, clearq)
3356RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3357RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3358GEN_VEXT_VF(vfwsub_wf_h, 2, 4, clearl)
3359GEN_VEXT_VF(vfwsub_wf_w, 4, 8, clearq)
3360
3361/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3362RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3363RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3364RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3365GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2, clearh)
3366GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4, clearl)
3367GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8, clearq)
3368RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3369RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3370RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3371GEN_VEXT_VF(vfmul_vf_h, 2, 2, clearh)
3372GEN_VEXT_VF(vfmul_vf_w, 4, 4, clearl)
3373GEN_VEXT_VF(vfmul_vf_d, 8, 8, clearq)
3374
3375RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3376RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3377RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3378GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2, clearh)
3379GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4, clearl)
3380GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8, clearq)
3381RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3382RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3383RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3384GEN_VEXT_VF(vfdiv_vf_h, 2, 2, clearh)
3385GEN_VEXT_VF(vfdiv_vf_w, 4, 4, clearl)
3386GEN_VEXT_VF(vfdiv_vf_d, 8, 8, clearq)
3387
3388static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3389{
3390    return float16_div(b, a, s);
3391}
3392
3393static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3394{
3395    return float32_div(b, a, s);
3396}
3397
3398static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3399{
3400    return float64_div(b, a, s);
3401}
3402
3403RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3404RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3405RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3406GEN_VEXT_VF(vfrdiv_vf_h, 2, 2, clearh)
3407GEN_VEXT_VF(vfrdiv_vf_w, 4, 4, clearl)
3408GEN_VEXT_VF(vfrdiv_vf_d, 8, 8, clearq)
3409
3410/* Vector Widening Floating-Point Multiply */
3411static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3412{
3413    return float32_mul(float16_to_float32(a, true, s),
3414            float16_to_float32(b, true, s), s);
3415}
3416
3417static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3418{
3419    return float64_mul(float32_to_float64(a, s),
3420            float32_to_float64(b, s), s);
3421
3422}
3423RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3424RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3425GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4, clearl)
3426GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8, clearq)
3427RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3428RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3429GEN_VEXT_VF(vfwmul_vf_h, 2, 4, clearl)
3430GEN_VEXT_VF(vfwmul_vf_w, 4, 8, clearq)
3431
3432/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3433#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3434static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3435        CPURISCVState *env)                                        \
3436{                                                                  \
3437    TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3438    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3439    TD d = *((TD *)vd + HD(i));                                    \
3440    *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3441}
3442
3443static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3444{
3445    return float16_muladd(a, b, d, 0, s);
3446}
3447
3448static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3449{
3450    return float32_muladd(a, b, d, 0, s);
3451}
3452
3453static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3454{
3455    return float64_muladd(a, b, d, 0, s);
3456}
3457
3458RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3459RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3460RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3461GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2, clearh)
3462GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4, clearl)
3463GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8, clearq)
3464
3465#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3466static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3467        CPURISCVState *env)                                       \
3468{                                                                 \
3469    TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3470    TD d = *((TD *)vd + HD(i));                                   \
3471    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3472}
3473
3474RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3475RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3476RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3477GEN_VEXT_VF(vfmacc_vf_h, 2, 2, clearh)
3478GEN_VEXT_VF(vfmacc_vf_w, 4, 4, clearl)
3479GEN_VEXT_VF(vfmacc_vf_d, 8, 8, clearq)
3480
3481static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3482{
3483    return float16_muladd(a, b, d,
3484            float_muladd_negate_c | float_muladd_negate_product, s);
3485}
3486
3487static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3488{
3489    return float32_muladd(a, b, d,
3490            float_muladd_negate_c | float_muladd_negate_product, s);
3491}
3492
3493static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3494{
3495    return float64_muladd(a, b, d,
3496            float_muladd_negate_c | float_muladd_negate_product, s);
3497}
3498
3499RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3500RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3501RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3502GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2, clearh)
3503GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4, clearl)
3504GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8, clearq)
3505RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3506RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3507RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3508GEN_VEXT_VF(vfnmacc_vf_h, 2, 2, clearh)
3509GEN_VEXT_VF(vfnmacc_vf_w, 4, 4, clearl)
3510GEN_VEXT_VF(vfnmacc_vf_d, 8, 8, clearq)
3511
3512static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3513{
3514    return float16_muladd(a, b, d, float_muladd_negate_c, s);
3515}
3516
3517static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3518{
3519    return float32_muladd(a, b, d, float_muladd_negate_c, s);
3520}
3521
3522static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3523{
3524    return float64_muladd(a, b, d, float_muladd_negate_c, s);
3525}
3526
3527RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3528RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3529RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3530GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2, clearh)
3531GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4, clearl)
3532GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8, clearq)
3533RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3534RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3535RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3536GEN_VEXT_VF(vfmsac_vf_h, 2, 2, clearh)
3537GEN_VEXT_VF(vfmsac_vf_w, 4, 4, clearl)
3538GEN_VEXT_VF(vfmsac_vf_d, 8, 8, clearq)
3539
3540static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3541{
3542    return float16_muladd(a, b, d, float_muladd_negate_product, s);
3543}
3544
3545static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3546{
3547    return float32_muladd(a, b, d, float_muladd_negate_product, s);
3548}
3549
3550static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3551{
3552    return float64_muladd(a, b, d, float_muladd_negate_product, s);
3553}
3554
3555RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3556RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3557RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3558GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2, clearh)
3559GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4, clearl)
3560GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8, clearq)
3561RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3562RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3563RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3564GEN_VEXT_VF(vfnmsac_vf_h, 2, 2, clearh)
3565GEN_VEXT_VF(vfnmsac_vf_w, 4, 4, clearl)
3566GEN_VEXT_VF(vfnmsac_vf_d, 8, 8, clearq)
3567
3568static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3569{
3570    return float16_muladd(d, b, a, 0, s);
3571}
3572
3573static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3574{
3575    return float32_muladd(d, b, a, 0, s);
3576}
3577
3578static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3579{
3580    return float64_muladd(d, b, a, 0, s);
3581}
3582
3583RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3584RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3585RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3586GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2, clearh)
3587GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4, clearl)
3588GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8, clearq)
3589RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3590RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3591RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3592GEN_VEXT_VF(vfmadd_vf_h, 2, 2, clearh)
3593GEN_VEXT_VF(vfmadd_vf_w, 4, 4, clearl)
3594GEN_VEXT_VF(vfmadd_vf_d, 8, 8, clearq)
3595
3596static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3597{
3598    return float16_muladd(d, b, a,
3599            float_muladd_negate_c | float_muladd_negate_product, s);
3600}
3601
3602static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3603{
3604    return float32_muladd(d, b, a,
3605            float_muladd_negate_c | float_muladd_negate_product, s);
3606}
3607
3608static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3609{
3610    return float64_muladd(d, b, a,
3611            float_muladd_negate_c | float_muladd_negate_product, s);
3612}
3613
3614RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3615RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3616RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3617GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2, clearh)
3618GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4, clearl)
3619GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8, clearq)
3620RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3621RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3622RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3623GEN_VEXT_VF(vfnmadd_vf_h, 2, 2, clearh)
3624GEN_VEXT_VF(vfnmadd_vf_w, 4, 4, clearl)
3625GEN_VEXT_VF(vfnmadd_vf_d, 8, 8, clearq)
3626
3627static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3628{
3629    return float16_muladd(d, b, a, float_muladd_negate_c, s);
3630}
3631
3632static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3633{
3634    return float32_muladd(d, b, a, float_muladd_negate_c, s);
3635}
3636
3637static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3638{
3639    return float64_muladd(d, b, a, float_muladd_negate_c, s);
3640}
3641
3642RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3643RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3644RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3645GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2, clearh)
3646GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4, clearl)
3647GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8, clearq)
3648RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3649RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3650RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3651GEN_VEXT_VF(vfmsub_vf_h, 2, 2, clearh)
3652GEN_VEXT_VF(vfmsub_vf_w, 4, 4, clearl)
3653GEN_VEXT_VF(vfmsub_vf_d, 8, 8, clearq)
3654
3655static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3656{
3657    return float16_muladd(d, b, a, float_muladd_negate_product, s);
3658}
3659
3660static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3661{
3662    return float32_muladd(d, b, a, float_muladd_negate_product, s);
3663}
3664
3665static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3666{
3667    return float64_muladd(d, b, a, float_muladd_negate_product, s);
3668}
3669
3670RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3671RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3672RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3673GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2, clearh)
3674GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4, clearl)
3675GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8, clearq)
3676RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3677RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3678RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3679GEN_VEXT_VF(vfnmsub_vf_h, 2, 2, clearh)
3680GEN_VEXT_VF(vfnmsub_vf_w, 4, 4, clearl)
3681GEN_VEXT_VF(vfnmsub_vf_d, 8, 8, clearq)
3682
3683/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3684static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3685{
3686    return float32_muladd(float16_to_float32(a, true, s),
3687                        float16_to_float32(b, true, s), d, 0, s);
3688}
3689
3690static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3691{
3692    return float64_muladd(float32_to_float64(a, s),
3693                        float32_to_float64(b, s), d, 0, s);
3694}
3695
3696RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3697RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3698GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4, clearl)
3699GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8, clearq)
3700RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3701RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3702GEN_VEXT_VF(vfwmacc_vf_h, 2, 4, clearl)
3703GEN_VEXT_VF(vfwmacc_vf_w, 4, 8, clearq)
3704
3705static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3706{
3707    return float32_muladd(float16_to_float32(a, true, s),
3708                        float16_to_float32(b, true, s), d,
3709                        float_muladd_negate_c | float_muladd_negate_product, s);
3710}
3711
3712static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3713{
3714    return float64_muladd(float32_to_float64(a, s),
3715                        float32_to_float64(b, s), d,
3716                        float_muladd_negate_c | float_muladd_negate_product, s);
3717}
3718
3719RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3720RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3721GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4, clearl)
3722GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8, clearq)
3723RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3724RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3725GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4, clearl)
3726GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8, clearq)
3727
3728static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3729{
3730    return float32_muladd(float16_to_float32(a, true, s),
3731                        float16_to_float32(b, true, s), d,
3732                        float_muladd_negate_c, s);
3733}
3734
3735static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3736{
3737    return float64_muladd(float32_to_float64(a, s),
3738                        float32_to_float64(b, s), d,
3739                        float_muladd_negate_c, s);
3740}
3741
3742RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3743RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3744GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4, clearl)
3745GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8, clearq)
3746RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3747RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3748GEN_VEXT_VF(vfwmsac_vf_h, 2, 4, clearl)
3749GEN_VEXT_VF(vfwmsac_vf_w, 4, 8, clearq)
3750
3751static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3752{
3753    return float32_muladd(float16_to_float32(a, true, s),
3754                        float16_to_float32(b, true, s), d,
3755                        float_muladd_negate_product, s);
3756}
3757
3758static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3759{
3760    return float64_muladd(float32_to_float64(a, s),
3761                        float32_to_float64(b, s), d,
3762                        float_muladd_negate_product, s);
3763}
3764
3765RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3766RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3767GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4, clearl)
3768GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8, clearq)
3769RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3770RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3771GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4, clearl)
3772GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8, clearq)
3773
3774/* Vector Floating-Point Square-Root Instruction */
3775/* (TD, T2, TX2) */
3776#define OP_UU_H uint16_t, uint16_t, uint16_t
3777#define OP_UU_W uint32_t, uint32_t, uint32_t
3778#define OP_UU_D uint64_t, uint64_t, uint64_t
3779
3780#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3781static void do_##NAME(void *vd, void *vs2, int i,      \
3782        CPURISCVState *env)                            \
3783{                                                      \
3784    TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3785    *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3786}
3787
3788#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ, CLEAR_FN)       \
3789void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3790        CPURISCVState *env, uint32_t desc)             \
3791{                                                      \
3792    uint32_t vlmax = vext_maxsz(desc) / ESZ;           \
3793    uint32_t mlen = vext_mlen(desc);                   \
3794    uint32_t vm = vext_vm(desc);                       \
3795    uint32_t vl = env->vl;                             \
3796    uint32_t i;                                        \
3797                                                       \
3798    if (vl == 0) {                                     \
3799        return;                                        \
3800    }                                                  \
3801    for (i = 0; i < vl; i++) {                         \
3802        if (!vm && !vext_elem_mask(v0, mlen, i)) {     \
3803            continue;                                  \
3804        }                                              \
3805        do_##NAME(vd, vs2, i, env);                    \
3806    }                                                  \
3807    CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);          \
3808}
3809
3810RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3811RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3812RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3813GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2, clearh)
3814GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4, clearl)
3815GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8, clearq)
3816
3817/* Vector Floating-Point MIN/MAX Instructions */
3818RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum)
3819RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum)
3820RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum)
3821GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2, clearh)
3822GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4, clearl)
3823GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8, clearq)
3824RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum)
3825RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum)
3826RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum)
3827GEN_VEXT_VF(vfmin_vf_h, 2, 2, clearh)
3828GEN_VEXT_VF(vfmin_vf_w, 4, 4, clearl)
3829GEN_VEXT_VF(vfmin_vf_d, 8, 8, clearq)
3830
3831RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum)
3832RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum)
3833RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum)
3834GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2, clearh)
3835GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4, clearl)
3836GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8, clearq)
3837RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum)
3838RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum)
3839RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum)
3840GEN_VEXT_VF(vfmax_vf_h, 2, 2, clearh)
3841GEN_VEXT_VF(vfmax_vf_w, 4, 4, clearl)
3842GEN_VEXT_VF(vfmax_vf_d, 8, 8, clearq)
3843
3844/* Vector Floating-Point Sign-Injection Instructions */
3845static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3846{
3847    return deposit64(b, 0, 15, a);
3848}
3849
3850static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3851{
3852    return deposit64(b, 0, 31, a);
3853}
3854
3855static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3856{
3857    return deposit64(b, 0, 63, a);
3858}
3859
3860RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3861RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3862RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3863GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2, clearh)
3864GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4, clearl)
3865GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8, clearq)
3866RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3867RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3868RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3869GEN_VEXT_VF(vfsgnj_vf_h, 2, 2, clearh)
3870GEN_VEXT_VF(vfsgnj_vf_w, 4, 4, clearl)
3871GEN_VEXT_VF(vfsgnj_vf_d, 8, 8, clearq)
3872
3873static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3874{
3875    return deposit64(~b, 0, 15, a);
3876}
3877
3878static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3879{
3880    return deposit64(~b, 0, 31, a);
3881}
3882
3883static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3884{
3885    return deposit64(~b, 0, 63, a);
3886}
3887
3888RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3889RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3890RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3891GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2, clearh)
3892GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4, clearl)
3893GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8, clearq)
3894RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3895RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3896RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3897GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2, clearh)
3898GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4, clearl)
3899GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8, clearq)
3900
3901static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3902{
3903    return deposit64(b ^ a, 0, 15, a);
3904}
3905
3906static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3907{
3908    return deposit64(b ^ a, 0, 31, a);
3909}
3910
3911static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3912{
3913    return deposit64(b ^ a, 0, 63, a);
3914}
3915
3916RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3917RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3918RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3919GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2, clearh)
3920GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4, clearl)
3921GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8, clearq)
3922RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3923RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3924RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3925GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2, clearh)
3926GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4, clearl)
3927GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8, clearq)
3928
3929/* Vector Floating-Point Compare Instructions */
3930#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3931void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3932                  CPURISCVState *env, uint32_t desc)          \
3933{                                                             \
3934    uint32_t mlen = vext_mlen(desc);                          \
3935    uint32_t vm = vext_vm(desc);                              \
3936    uint32_t vl = env->vl;                                    \
3937    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
3938    uint32_t i;                                               \
3939                                                              \
3940    for (i = 0; i < vl; i++) {                                \
3941        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3942        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3943        if (!vm && !vext_elem_mask(v0, mlen, i)) {            \
3944            continue;                                         \
3945        }                                                     \
3946        vext_set_elem_mask(vd, mlen, i,                       \
3947                           DO_OP(s2, s1, &env->fp_status));   \
3948    }                                                         \
3949    for (; i < vlmax; i++) {                                  \
3950        vext_set_elem_mask(vd, mlen, i, 0);                   \
3951    }                                                         \
3952}
3953
3954GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3955GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3956GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3957
3958#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3959void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3960                  CPURISCVState *env, uint32_t desc)                \
3961{                                                                   \
3962    uint32_t mlen = vext_mlen(desc);                                \
3963    uint32_t vm = vext_vm(desc);                                    \
3964    uint32_t vl = env->vl;                                          \
3965    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);              \
3966    uint32_t i;                                                     \
3967                                                                    \
3968    for (i = 0; i < vl; i++) {                                      \
3969        ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3970        if (!vm && !vext_elem_mask(v0, mlen, i)) {                  \
3971            continue;                                               \
3972        }                                                           \
3973        vext_set_elem_mask(vd, mlen, i,                             \
3974                           DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3975    }                                                               \
3976    for (; i < vlmax; i++) {                                        \
3977        vext_set_elem_mask(vd, mlen, i, 0);                         \
3978    }                                                               \
3979}
3980
3981GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3982GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3983GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3984
3985static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3986{
3987    FloatRelation compare = float16_compare_quiet(a, b, s);
3988    return compare != float_relation_equal;
3989}
3990
3991static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3992{
3993    FloatRelation compare = float32_compare_quiet(a, b, s);
3994    return compare != float_relation_equal;
3995}
3996
3997static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3998{
3999    FloatRelation compare = float64_compare_quiet(a, b, s);
4000    return compare != float_relation_equal;
4001}
4002
4003GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4004GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4005GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4006GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4007GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4008GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4009
4010GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4011GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4012GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4013GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4014GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4015GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4016
4017GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4018GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4019GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4020GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4021GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4022GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4023
4024static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4025{
4026    FloatRelation compare = float16_compare(a, b, s);
4027    return compare == float_relation_greater;
4028}
4029
4030static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4031{
4032    FloatRelation compare = float32_compare(a, b, s);
4033    return compare == float_relation_greater;
4034}
4035
4036static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4037{
4038    FloatRelation compare = float64_compare(a, b, s);
4039    return compare == float_relation_greater;
4040}
4041
4042GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4043GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4044GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4045
4046static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4047{
4048    FloatRelation compare = float16_compare(a, b, s);
4049    return compare == float_relation_greater ||
4050           compare == float_relation_equal;
4051}
4052
4053static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4054{
4055    FloatRelation compare = float32_compare(a, b, s);
4056    return compare == float_relation_greater ||
4057           compare == float_relation_equal;
4058}
4059
4060static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4061{
4062    FloatRelation compare = float64_compare(a, b, s);
4063    return compare == float_relation_greater ||
4064           compare == float_relation_equal;
4065}
4066
4067GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4068GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4069GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4070
4071GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet)
4072GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet)
4073GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet)
4074GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet)
4075GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet)
4076GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet)
4077
4078/* Vector Floating-Point Classify Instruction */
4079#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
4080static void do_##NAME(void *vd, void *vs2, int i)      \
4081{                                                      \
4082    TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
4083    *((TD *)vd + HD(i)) = OP(s2);                      \
4084}
4085
4086#define GEN_VEXT_V(NAME, ESZ, DSZ, CLEAR_FN)           \
4087void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
4088                  CPURISCVState *env, uint32_t desc)   \
4089{                                                      \
4090    uint32_t vlmax = vext_maxsz(desc) / ESZ;           \
4091    uint32_t mlen = vext_mlen(desc);                   \
4092    uint32_t vm = vext_vm(desc);                       \
4093    uint32_t vl = env->vl;                             \
4094    uint32_t i;                                        \
4095                                                       \
4096    for (i = 0; i < vl; i++) {                         \
4097        if (!vm && !vext_elem_mask(v0, mlen, i)) {     \
4098            continue;                                  \
4099        }                                              \
4100        do_##NAME(vd, vs2, i);                         \
4101    }                                                  \
4102    CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);          \
4103}
4104
4105target_ulong fclass_h(uint64_t frs1)
4106{
4107    float16 f = frs1;
4108    bool sign = float16_is_neg(f);
4109
4110    if (float16_is_infinity(f)) {
4111        return sign ? 1 << 0 : 1 << 7;
4112    } else if (float16_is_zero(f)) {
4113        return sign ? 1 << 3 : 1 << 4;
4114    } else if (float16_is_zero_or_denormal(f)) {
4115        return sign ? 1 << 2 : 1 << 5;
4116    } else if (float16_is_any_nan(f)) {
4117        float_status s = { }; /* for snan_bit_is_one */
4118        return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4119    } else {
4120        return sign ? 1 << 1 : 1 << 6;
4121    }
4122}
4123
4124target_ulong fclass_s(uint64_t frs1)
4125{
4126    float32 f = frs1;
4127    bool sign = float32_is_neg(f);
4128
4129    if (float32_is_infinity(f)) {
4130        return sign ? 1 << 0 : 1 << 7;
4131    } else if (float32_is_zero(f)) {
4132        return sign ? 1 << 3 : 1 << 4;
4133    } else if (float32_is_zero_or_denormal(f)) {
4134        return sign ? 1 << 2 : 1 << 5;
4135    } else if (float32_is_any_nan(f)) {
4136        float_status s = { }; /* for snan_bit_is_one */
4137        return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4138    } else {
4139        return sign ? 1 << 1 : 1 << 6;
4140    }
4141}
4142
4143target_ulong fclass_d(uint64_t frs1)
4144{
4145    float64 f = frs1;
4146    bool sign = float64_is_neg(f);
4147
4148    if (float64_is_infinity(f)) {
4149        return sign ? 1 << 0 : 1 << 7;
4150    } else if (float64_is_zero(f)) {
4151        return sign ? 1 << 3 : 1 << 4;
4152    } else if (float64_is_zero_or_denormal(f)) {
4153        return sign ? 1 << 2 : 1 << 5;
4154    } else if (float64_is_any_nan(f)) {
4155        float_status s = { }; /* for snan_bit_is_one */
4156        return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4157    } else {
4158        return sign ? 1 << 1 : 1 << 6;
4159    }
4160}
4161
4162RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4163RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4164RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4165GEN_VEXT_V(vfclass_v_h, 2, 2, clearh)
4166GEN_VEXT_V(vfclass_v_w, 4, 4, clearl)
4167GEN_VEXT_V(vfclass_v_d, 8, 8, clearq)
4168
4169/* Vector Floating-Point Merge Instruction */
4170#define GEN_VFMERGE_VF(NAME, ETYPE, H, CLEAR_FN)              \
4171void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4172                  CPURISCVState *env, uint32_t desc)          \
4173{                                                             \
4174    uint32_t mlen = vext_mlen(desc);                          \
4175    uint32_t vm = vext_vm(desc);                              \
4176    uint32_t vl = env->vl;                                    \
4177    uint32_t esz = sizeof(ETYPE);                             \
4178    uint32_t vlmax = vext_maxsz(desc) / esz;                  \
4179    uint32_t i;                                               \
4180                                                              \
4181    for (i = 0; i < vl; i++) {                                \
4182        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
4183        *((ETYPE *)vd + H(i))                                 \
4184          = (!vm && !vext_elem_mask(v0, mlen, i) ? s2 : s1);  \
4185    }                                                         \
4186    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                  \
4187}
4188
4189GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh)
4190GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4, clearl)
4191GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8, clearq)
4192
4193/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4194/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4195RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4196RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4197RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4198GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2, clearh)
4199GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4, clearl)
4200GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8, clearq)
4201
4202/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4203RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4204RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4205RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4206GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2, clearh)
4207GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4, clearl)
4208GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8, clearq)
4209
4210/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4211RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4212RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4213RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4214GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2, clearh)
4215GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4, clearl)
4216GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8, clearq)
4217
4218/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4219RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4220RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4221RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4222GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2, clearh)
4223GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4, clearl)
4224GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8, clearq)
4225
4226/* Widening Floating-Point/Integer Type-Convert Instructions */
4227/* (TD, T2, TX2) */
4228#define WOP_UU_H uint32_t, uint16_t, uint16_t
4229#define WOP_UU_W uint64_t, uint32_t, uint32_t
4230/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4231RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4232RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4233GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4, clearl)
4234GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8, clearq)
4235
4236/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4237RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4238RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4239GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4, clearl)
4240GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8, clearq)
4241
4242/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4243RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4244RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4245GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4, clearl)
4246GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8, clearq)
4247
4248/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4249RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4250RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4251GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4, clearl)
4252GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8, clearq)
4253
4254/*
4255 * vfwcvt.f.f.v vd, vs2, vm #
4256 * Convert single-width float to double-width float.
4257 */
4258static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4259{
4260    return float16_to_float32(a, true, s);
4261}
4262
4263RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4264RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4265GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4, clearl)
4266GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8, clearq)
4267
4268/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4269/* (TD, T2, TX2) */
4270#define NOP_UU_H uint16_t, uint32_t, uint32_t
4271#define NOP_UU_W uint32_t, uint64_t, uint64_t
4272/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4273RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16)
4274RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32)
4275GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2, clearh)
4276GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4, clearl)
4277
4278/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4279RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16)
4280RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32)
4281GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2, clearh)
4282GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4, clearl)
4283
4284/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4285RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16)
4286RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32)
4287GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2, clearh)
4288GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4, clearl)
4289
4290/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4291RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16)
4292RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32)
4293GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2, clearh)
4294GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4, clearl)
4295
4296/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4297static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4298{
4299    return float32_to_float16(a, true, s);
4300}
4301
4302RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16)
4303RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32)
4304GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2, clearh)
4305GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4, clearl)
4306
4307/*
4308 *** Vector Reduction Operations
4309 */
4310/* Vector Single-Width Integer Reduction Instructions */
4311#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\
4312void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4313        void *vs2, CPURISCVState *env, uint32_t desc)     \
4314{                                                         \
4315    uint32_t mlen = vext_mlen(desc);                      \
4316    uint32_t vm = vext_vm(desc);                          \
4317    uint32_t vl = env->vl;                                \
4318    uint32_t i;                                           \
4319    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;        \
4320    TD s1 =  *((TD *)vs1 + HD(0));                        \
4321                                                          \
4322    for (i = 0; i < vl; i++) {                            \
4323        TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4324        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
4325            continue;                                     \
4326        }                                                 \
4327        s1 = OP(s1, (TD)s2);                              \
4328    }                                                     \
4329    *((TD *)vd + HD(0)) = s1;                             \
4330    CLEAR_FN(vd, 1, sizeof(TD), tot);                     \
4331}
4332
4333/* vd[0] = sum(vs1[0], vs2[*]) */
4334GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD, clearb)
4335GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD, clearh)
4336GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD, clearl)
4337GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD, clearq)
4338
4339/* vd[0] = maxu(vs1[0], vs2[*]) */
4340GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX, clearb)
4341GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX, clearh)
4342GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX, clearl)
4343GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX, clearq)
4344
4345/* vd[0] = max(vs1[0], vs2[*]) */
4346GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX, clearb)
4347GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX, clearh)
4348GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX, clearl)
4349GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX, clearq)
4350
4351/* vd[0] = minu(vs1[0], vs2[*]) */
4352GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN, clearb)
4353GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN, clearh)
4354GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN, clearl)
4355GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN, clearq)
4356
4357/* vd[0] = min(vs1[0], vs2[*]) */
4358GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN, clearb)
4359GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN, clearh)
4360GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN, clearl)
4361GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN, clearq)
4362
4363/* vd[0] = and(vs1[0], vs2[*]) */
4364GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND, clearb)
4365GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND, clearh)
4366GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND, clearl)
4367GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND, clearq)
4368
4369/* vd[0] = or(vs1[0], vs2[*]) */
4370GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR, clearb)
4371GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR, clearh)
4372GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR, clearl)
4373GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR, clearq)
4374
4375/* vd[0] = xor(vs1[0], vs2[*]) */
4376GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR, clearb)
4377GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR, clearh)
4378GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR, clearl)
4379GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR, clearq)
4380
4381/* Vector Widening Integer Reduction Instructions */
4382/* signed sum reduction into double-width accumulator */
4383GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD, clearh)
4384GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD, clearl)
4385GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD, clearq)
4386
4387/* Unsigned sum reduction into double-width accumulator */
4388GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD, clearh)
4389GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD, clearl)
4390GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD, clearq)
4391
4392/* Vector Single-Width Floating-Point Reduction Instructions */
4393#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\
4394void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4395                  void *vs2, CPURISCVState *env,           \
4396                  uint32_t desc)                           \
4397{                                                          \
4398    uint32_t mlen = vext_mlen(desc);                       \
4399    uint32_t vm = vext_vm(desc);                           \
4400    uint32_t vl = env->vl;                                 \
4401    uint32_t i;                                            \
4402    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;         \
4403    TD s1 =  *((TD *)vs1 + HD(0));                         \
4404                                                           \
4405    for (i = 0; i < vl; i++) {                             \
4406        TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4407        if (!vm && !vext_elem_mask(v0, mlen, i)) {         \
4408            continue;                                      \
4409        }                                                  \
4410        s1 = OP(s1, (TD)s2, &env->fp_status);              \
4411    }                                                      \
4412    *((TD *)vd + HD(0)) = s1;                              \
4413    CLEAR_FN(vd, 1, sizeof(TD), tot);                      \
4414}
4415
4416/* Unordered sum */
4417GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add, clearh)
4418GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add, clearl)
4419GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add, clearq)
4420
4421/* Maximum value */
4422GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum, clearh)
4423GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum, clearl)
4424GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, clearq)
4425
4426/* Minimum value */
4427GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, clearh)
4428GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, clearl)
4429GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, clearq)
4430
4431/* Vector Widening Floating-Point Reduction Instructions */
4432/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4433void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4434                            void *vs2, CPURISCVState *env, uint32_t desc)
4435{
4436    uint32_t mlen = vext_mlen(desc);
4437    uint32_t vm = vext_vm(desc);
4438    uint32_t vl = env->vl;
4439    uint32_t i;
4440    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;
4441    uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4442
4443    for (i = 0; i < vl; i++) {
4444        uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4445        if (!vm && !vext_elem_mask(v0, mlen, i)) {
4446            continue;
4447        }
4448        s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4449                         &env->fp_status);
4450    }
4451    *((uint32_t *)vd + H4(0)) = s1;
4452    clearl(vd, 1, sizeof(uint32_t), tot);
4453}
4454
4455void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4456                            void *vs2, CPURISCVState *env, uint32_t desc)
4457{
4458    uint32_t mlen = vext_mlen(desc);
4459    uint32_t vm = vext_vm(desc);
4460    uint32_t vl = env->vl;
4461    uint32_t i;
4462    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;
4463    uint64_t s1 =  *((uint64_t *)vs1);
4464
4465    for (i = 0; i < vl; i++) {
4466        uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4467        if (!vm && !vext_elem_mask(v0, mlen, i)) {
4468            continue;
4469        }
4470        s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4471                         &env->fp_status);
4472    }
4473    *((uint64_t *)vd) = s1;
4474    clearq(vd, 1, sizeof(uint64_t), tot);
4475}
4476
4477/*
4478 *** Vector Mask Operations
4479 */
4480/* Vector Mask-Register Logical Instructions */
4481#define GEN_VEXT_MASK_VV(NAME, OP)                        \
4482void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4483                  void *vs2, CPURISCVState *env,          \
4484                  uint32_t desc)                          \
4485{                                                         \
4486    uint32_t mlen = vext_mlen(desc);                      \
4487    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;   \
4488    uint32_t vl = env->vl;                                \
4489    uint32_t i;                                           \
4490    int a, b;                                             \
4491                                                          \
4492    for (i = 0; i < vl; i++) {                            \
4493        a = vext_elem_mask(vs1, mlen, i);                 \
4494        b = vext_elem_mask(vs2, mlen, i);                 \
4495        vext_set_elem_mask(vd, mlen, i, OP(b, a));        \
4496    }                                                     \
4497    for (; i < vlmax; i++) {                              \
4498        vext_set_elem_mask(vd, mlen, i, 0);               \
4499    }                                                     \
4500}
4501
4502#define DO_NAND(N, M)  (!(N & M))
4503#define DO_ANDNOT(N, M)  (N & !M)
4504#define DO_NOR(N, M)  (!(N | M))
4505#define DO_ORNOT(N, M)  (N | !M)
4506#define DO_XNOR(N, M)  (!(N ^ M))
4507
4508GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4509GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4510GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT)
4511GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4512GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4513GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4514GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
4515GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4516
4517/* Vector mask population count vmpopc */
4518target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env,
4519                              uint32_t desc)
4520{
4521    target_ulong cnt = 0;
4522    uint32_t mlen = vext_mlen(desc);
4523    uint32_t vm = vext_vm(desc);
4524    uint32_t vl = env->vl;
4525    int i;
4526
4527    for (i = 0; i < vl; i++) {
4528        if (vm || vext_elem_mask(v0, mlen, i)) {
4529            if (vext_elem_mask(vs2, mlen, i)) {
4530                cnt++;
4531            }
4532        }
4533    }
4534    return cnt;
4535}
4536
4537/* vmfirst find-first-set mask bit*/
4538target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4539                               uint32_t desc)
4540{
4541    uint32_t mlen = vext_mlen(desc);
4542    uint32_t vm = vext_vm(desc);
4543    uint32_t vl = env->vl;
4544    int i;
4545
4546    for (i = 0; i < vl; i++) {
4547        if (vm || vext_elem_mask(v0, mlen, i)) {
4548            if (vext_elem_mask(vs2, mlen, i)) {
4549                return i;
4550            }
4551        }
4552    }
4553    return -1LL;
4554}
4555
4556enum set_mask_type {
4557    ONLY_FIRST = 1,
4558    INCLUDE_FIRST,
4559    BEFORE_FIRST,
4560};
4561
4562static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4563                   uint32_t desc, enum set_mask_type type)
4564{
4565    uint32_t mlen = vext_mlen(desc);
4566    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;
4567    uint32_t vm = vext_vm(desc);
4568    uint32_t vl = env->vl;
4569    int i;
4570    bool first_mask_bit = false;
4571
4572    for (i = 0; i < vl; i++) {
4573        if (!vm && !vext_elem_mask(v0, mlen, i)) {
4574            continue;
4575        }
4576        /* write a zero to all following active elements */
4577        if (first_mask_bit) {
4578            vext_set_elem_mask(vd, mlen, i, 0);
4579            continue;
4580        }
4581        if (vext_elem_mask(vs2, mlen, i)) {
4582            first_mask_bit = true;
4583            if (type == BEFORE_FIRST) {
4584                vext_set_elem_mask(vd, mlen, i, 0);
4585            } else {
4586                vext_set_elem_mask(vd, mlen, i, 1);
4587            }
4588        } else {
4589            if (type == ONLY_FIRST) {
4590                vext_set_elem_mask(vd, mlen, i, 0);
4591            } else {
4592                vext_set_elem_mask(vd, mlen, i, 1);
4593            }
4594        }
4595    }
4596    for (; i < vlmax; i++) {
4597        vext_set_elem_mask(vd, mlen, i, 0);
4598    }
4599}
4600
4601void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4602                     uint32_t desc)
4603{
4604    vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4605}
4606
4607void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4608                     uint32_t desc)
4609{
4610    vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4611}
4612
4613void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4614                     uint32_t desc)
4615{
4616    vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4617}
4618
4619/* Vector Iota Instruction */
4620#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H, CLEAR_FN)                        \
4621void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4622                  uint32_t desc)                                          \
4623{                                                                         \
4624    uint32_t mlen = vext_mlen(desc);                                      \
4625    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4626    uint32_t vm = vext_vm(desc);                                          \
4627    uint32_t vl = env->vl;                                                \
4628    uint32_t sum = 0;                                                     \
4629    int i;                                                                \
4630                                                                          \
4631    for (i = 0; i < vl; i++) {                                            \
4632        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4633            continue;                                                     \
4634        }                                                                 \
4635        *((ETYPE *)vd + H(i)) = sum;                                      \
4636        if (vext_elem_mask(vs2, mlen, i)) {                               \
4637            sum++;                                                        \
4638        }                                                                 \
4639    }                                                                     \
4640    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4641}
4642
4643GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1, clearb)
4644GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2, clearh)
4645GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4, clearl)
4646GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8, clearq)
4647
4648/* Vector Element Index Instruction */
4649#define GEN_VEXT_VID_V(NAME, ETYPE, H, CLEAR_FN)                          \
4650void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4651{                                                                         \
4652    uint32_t mlen = vext_mlen(desc);                                      \
4653    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4654    uint32_t vm = vext_vm(desc);                                          \
4655    uint32_t vl = env->vl;                                                \
4656    int i;                                                                \
4657                                                                          \
4658    for (i = 0; i < vl; i++) {                                            \
4659        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4660            continue;                                                     \
4661        }                                                                 \
4662        *((ETYPE *)vd + H(i)) = i;                                        \
4663    }                                                                     \
4664    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4665}
4666
4667GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb)
4668GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh)
4669GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl)
4670GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq)
4671
4672/*
4673 *** Vector Permutation Instructions
4674 */
4675
4676/* Vector Slide Instructions */
4677#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H, CLEAR_FN)                    \
4678void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4679                  CPURISCVState *env, uint32_t desc)                      \
4680{                                                                         \
4681    uint32_t mlen = vext_mlen(desc);                                      \
4682    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4683    uint32_t vm = vext_vm(desc);                                          \
4684    uint32_t vl = env->vl;                                                \
4685    target_ulong offset = s1, i;                                          \
4686                                                                          \
4687    for (i = offset; i < vl; i++) {                                       \
4688        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4689            continue;                                                     \
4690        }                                                                 \
4691        *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4692    }                                                                     \
4693    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4694}
4695
4696/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4697GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1, clearb)
4698GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2, clearh)
4699GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4, clearl)
4700GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8, clearq)
4701
4702#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H, CLEAR_FN)                  \
4703void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4704                  CPURISCVState *env, uint32_t desc)                      \
4705{                                                                         \
4706    uint32_t mlen = vext_mlen(desc);                                      \
4707    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4708    uint32_t vm = vext_vm(desc);                                          \
4709    uint32_t vl = env->vl;                                                \
4710    target_ulong offset = s1, i;                                          \
4711                                                                          \
4712    for (i = 0; i < vl; ++i) {                                            \
4713        target_ulong j = i + offset;                                      \
4714        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4715            continue;                                                     \
4716        }                                                                 \
4717        *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j));  \
4718    }                                                                     \
4719    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4720}
4721
4722/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4723GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1, clearb)
4724GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2, clearh)
4725GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4, clearl)
4726GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8, clearq)
4727
4728#define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H, CLEAR_FN)                   \
4729void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4730                  CPURISCVState *env, uint32_t desc)                      \
4731{                                                                         \
4732    uint32_t mlen = vext_mlen(desc);                                      \
4733    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4734    uint32_t vm = vext_vm(desc);                                          \
4735    uint32_t vl = env->vl;                                                \
4736    uint32_t i;                                                           \
4737                                                                          \
4738    for (i = 0; i < vl; i++) {                                            \
4739        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4740            continue;                                                     \
4741        }                                                                 \
4742        if (i == 0) {                                                     \
4743            *((ETYPE *)vd + H(i)) = s1;                                   \
4744        } else {                                                          \
4745            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));           \
4746        }                                                                 \
4747    }                                                                     \
4748    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4749}
4750
4751/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4752GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1, clearb)
4753GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2, clearh)
4754GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4, clearl)
4755GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8, clearq)
4756
4757#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H, CLEAR_FN)                 \
4758void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4759                  CPURISCVState *env, uint32_t desc)                      \
4760{                                                                         \
4761    uint32_t mlen = vext_mlen(desc);                                      \
4762    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4763    uint32_t vm = vext_vm(desc);                                          \
4764    uint32_t vl = env->vl;                                                \
4765    uint32_t i;                                                           \
4766                                                                          \
4767    for (i = 0; i < vl; i++) {                                            \
4768        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4769            continue;                                                     \
4770        }                                                                 \
4771        if (i == vl - 1) {                                                \
4772            *((ETYPE *)vd + H(i)) = s1;                                   \
4773        } else {                                                          \
4774            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));           \
4775        }                                                                 \
4776    }                                                                     \
4777    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4778}
4779
4780/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4781GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1, clearb)
4782GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2, clearh)
4783GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4, clearl)
4784GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq)
4785
4786/* Vector Register Gather Instruction */
4787#define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H, CLEAR_FN)                    \
4788void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4789                  CPURISCVState *env, uint32_t desc)                      \
4790{                                                                         \
4791    uint32_t mlen = vext_mlen(desc);                                      \
4792    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4793    uint32_t vm = vext_vm(desc);                                          \
4794    uint32_t vl = env->vl;                                                \
4795    uint64_t index;                                                       \
4796    uint32_t i;                                                           \
4797                                                                          \
4798    for (i = 0; i < vl; i++) {                                            \
4799        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4800            continue;                                                     \
4801        }                                                                 \
4802        index = *((ETYPE *)vs1 + H(i));                                   \
4803        if (index >= vlmax) {                                             \
4804            *((ETYPE *)vd + H(i)) = 0;                                    \
4805        } else {                                                          \
4806            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4807        }                                                                 \
4808    }                                                                     \
4809    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4810}
4811
4812/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4813GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1, clearb)
4814GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2, clearh)
4815GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4, clearl)
4816GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8, clearq)
4817
4818#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H, CLEAR_FN)                    \
4819void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4820                  CPURISCVState *env, uint32_t desc)                      \
4821{                                                                         \
4822    uint32_t mlen = vext_mlen(desc);                                      \
4823    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4824    uint32_t vm = vext_vm(desc);                                          \
4825    uint32_t vl = env->vl;                                                \
4826    uint64_t index = s1;                                                  \
4827    uint32_t i;                                                           \
4828                                                                          \
4829    for (i = 0; i < vl; i++) {                                            \
4830        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4831            continue;                                                     \
4832        }                                                                 \
4833        if (index >= vlmax) {                                             \
4834            *((ETYPE *)vd + H(i)) = 0;                                    \
4835        } else {                                                          \
4836            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4837        }                                                                 \
4838    }                                                                     \
4839    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4840}
4841
4842/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4843GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1, clearb)
4844GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2, clearh)
4845GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4, clearl)
4846GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq)
4847
4848/* Vector Compress Instruction */
4849#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H, CLEAR_FN)                   \
4850void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4851                  CPURISCVState *env, uint32_t desc)                      \
4852{                                                                         \
4853    uint32_t mlen = vext_mlen(desc);                                      \
4854    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4855    uint32_t vl = env->vl;                                                \
4856    uint32_t num = 0, i;                                                  \
4857                                                                          \
4858    for (i = 0; i < vl; i++) {                                            \
4859        if (!vext_elem_mask(vs1, mlen, i)) {                              \
4860            continue;                                                     \
4861        }                                                                 \
4862        *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4863        num++;                                                            \
4864    }                                                                     \
4865    CLEAR_FN(vd, num, num * sizeof(ETYPE), vlmax * sizeof(ETYPE));        \
4866}
4867
4868/* Compress into vd elements of vs2 where vs1 is enabled */
4869GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1, clearb)
4870GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2, clearh)
4871GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4, clearl)
4872GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8, clearq)
4873