qemu/target/riscv/vector_helper.c
<<
>>
Prefs
   1/*
   2 * RISC-V Vector Extension Helpers for QEMU.
   3 *
   4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms and conditions of the GNU General Public License,
   8 * version 2 or later, as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 *
  15 * You should have received a copy of the GNU General Public License along with
  16 * this program.  If not, see <http://www.gnu.org/licenses/>.
  17 */
  18
  19#include "qemu/osdep.h"
  20#include "cpu.h"
  21#include "exec/memop.h"
  22#include "exec/exec-all.h"
  23#include "exec/helper-proto.h"
  24#include "fpu/softfloat.h"
  25#include "tcg/tcg-gvec-desc.h"
  26#include "internals.h"
  27#include <math.h>
  28
  29target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
  30                            target_ulong s2)
  31{
  32    int vlmax, vl;
  33    RISCVCPU *cpu = env_archcpu(env);
  34    uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
  35    uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
  36    bool vill = FIELD_EX64(s2, VTYPE, VILL);
  37    target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED);
  38
  39    if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
  40        /* only set vill bit. */
  41        env->vtype = FIELD_DP64(0, VTYPE, VILL, 1);
  42        env->vl = 0;
  43        env->vstart = 0;
  44        return 0;
  45    }
  46
  47    vlmax = vext_get_vlmax(cpu, s2);
  48    if (s1 <= vlmax) {
  49        vl = s1;
  50    } else {
  51        vl = vlmax;
  52    }
  53    env->vl = vl;
  54    env->vtype = s2;
  55    env->vstart = 0;
  56    return vl;
  57}
  58
  59/*
  60 * Note that vector data is stored in host-endian 64-bit chunks,
  61 * so addressing units smaller than that needs a host-endian fixup.
  62 */
  63#ifdef HOST_WORDS_BIGENDIAN
  64#define H1(x)   ((x) ^ 7)
  65#define H1_2(x) ((x) ^ 6)
  66#define H1_4(x) ((x) ^ 4)
  67#define H2(x)   ((x) ^ 3)
  68#define H4(x)   ((x) ^ 1)
  69#define H8(x)   ((x))
  70#else
  71#define H1(x)   (x)
  72#define H1_2(x) (x)
  73#define H1_4(x) (x)
  74#define H2(x)   (x)
  75#define H4(x)   (x)
  76#define H8(x)   (x)
  77#endif
  78
  79static inline uint32_t vext_nf(uint32_t desc)
  80{
  81    return FIELD_EX32(simd_data(desc), VDATA, NF);
  82}
  83
  84static inline uint32_t vext_mlen(uint32_t desc)
  85{
  86    return FIELD_EX32(simd_data(desc), VDATA, MLEN);
  87}
  88
  89static inline uint32_t vext_vm(uint32_t desc)
  90{
  91    return FIELD_EX32(simd_data(desc), VDATA, VM);
  92}
  93
  94static inline uint32_t vext_lmul(uint32_t desc)
  95{
  96    return FIELD_EX32(simd_data(desc), VDATA, LMUL);
  97}
  98
  99static uint32_t vext_wd(uint32_t desc)
 100{
 101    return (simd_data(desc) >> 11) & 0x1;
 102}
 103
 104/*
 105 * Get vector group length in bytes. Its range is [64, 2048].
 106 *
 107 * As simd_desc support at most 256, the max vlen is 512 bits.
 108 * So vlen in bytes is encoded as maxsz.
 109 */
 110static inline uint32_t vext_maxsz(uint32_t desc)
 111{
 112    return simd_maxsz(desc) << vext_lmul(desc);
 113}
 114
 115/*
 116 * This function checks watchpoint before real load operation.
 117 *
 118 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
 119 * In user mode, there is no watchpoint support now.
 120 *
 121 * It will trigger an exception if there is no mapping in TLB
 122 * and page table walk can't fill the TLB entry. Then the guest
 123 * software can return here after process the exception or never return.
 124 */
 125static void probe_pages(CPURISCVState *env, target_ulong addr,
 126                        target_ulong len, uintptr_t ra,
 127                        MMUAccessType access_type)
 128{
 129    target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
 130    target_ulong curlen = MIN(pagelen, len);
 131
 132    probe_access(env, addr, curlen, access_type,
 133                 cpu_mmu_index(env, false), ra);
 134    if (len > curlen) {
 135        addr += curlen;
 136        curlen = len - curlen;
 137        probe_access(env, addr, curlen, access_type,
 138                     cpu_mmu_index(env, false), ra);
 139    }
 140}
 141
 142#ifdef HOST_WORDS_BIGENDIAN
 143static void vext_clear(void *tail, uint32_t cnt, uint32_t tot)
 144{
 145    /*
 146     * Split the remaining range to two parts.
 147     * The first part is in the last uint64_t unit.
 148     * The second part start from the next uint64_t unit.
 149     */
 150    int part1 = 0, part2 = tot - cnt;
 151    if (cnt % 8) {
 152        part1 = 8 - (cnt % 8);
 153        part2 = tot - cnt - part1;
 154        memset(QEMU_ALIGN_PTR_DOWN(tail, 8), 0, part1);
 155        memset(QEMU_ALIGN_PTR_UP(tail, 8), 0, part2);
 156    } else {
 157        memset(tail, 0, part2);
 158    }
 159}
 160#else
 161static void vext_clear(void *tail, uint32_t cnt, uint32_t tot)
 162{
 163    memset(tail, 0, tot - cnt);
 164}
 165#endif
 166
 167static void clearb(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
 168{
 169    int8_t *cur = ((int8_t *)vd + H1(idx));
 170    vext_clear(cur, cnt, tot);
 171}
 172
 173static void clearh(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
 174{
 175    int16_t *cur = ((int16_t *)vd + H2(idx));
 176    vext_clear(cur, cnt, tot);
 177}
 178
 179static void clearl(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
 180{
 181    int32_t *cur = ((int32_t *)vd + H4(idx));
 182    vext_clear(cur, cnt, tot);
 183}
 184
 185static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot)
 186{
 187    int64_t *cur = (int64_t *)vd + idx;
 188    vext_clear(cur, cnt, tot);
 189}
 190
 191static inline void vext_set_elem_mask(void *v0, int mlen, int index,
 192        uint8_t value)
 193{
 194    int idx = (index * mlen) / 64;
 195    int pos = (index * mlen) % 64;
 196    uint64_t old = ((uint64_t *)v0)[idx];
 197    ((uint64_t *)v0)[idx] = deposit64(old, pos, mlen, value);
 198}
 199
 200static inline int vext_elem_mask(void *v0, int mlen, int index)
 201{
 202    int idx = (index * mlen) / 64;
 203    int pos = (index * mlen) % 64;
 204    return (((uint64_t *)v0)[idx] >> pos) & 1;
 205}
 206
 207/* elements operations for load and store */
 208typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
 209                               uint32_t idx, void *vd, uintptr_t retaddr);
 210typedef void clear_fn(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot);
 211
 212#define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF)     \
 213static void NAME(CPURISCVState *env, abi_ptr addr,         \
 214                 uint32_t idx, void *vd, uintptr_t retaddr)\
 215{                                                          \
 216    MTYPE data;                                            \
 217    ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
 218    data = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
 219    *cur = data;                                           \
 220}                                                          \
 221
 222GEN_VEXT_LD_ELEM(ldb_b, int8_t,  int8_t,  H1, ldsb)
 223GEN_VEXT_LD_ELEM(ldb_h, int8_t,  int16_t, H2, ldsb)
 224GEN_VEXT_LD_ELEM(ldb_w, int8_t,  int32_t, H4, ldsb)
 225GEN_VEXT_LD_ELEM(ldb_d, int8_t,  int64_t, H8, ldsb)
 226GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw)
 227GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw)
 228GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw)
 229GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl)
 230GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl)
 231GEN_VEXT_LD_ELEM(lde_b, int8_t,  int8_t,  H1, ldsb)
 232GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw)
 233GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl)
 234GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq)
 235GEN_VEXT_LD_ELEM(ldbu_b, uint8_t,  uint8_t,  H1, ldub)
 236GEN_VEXT_LD_ELEM(ldbu_h, uint8_t,  uint16_t, H2, ldub)
 237GEN_VEXT_LD_ELEM(ldbu_w, uint8_t,  uint32_t, H4, ldub)
 238GEN_VEXT_LD_ELEM(ldbu_d, uint8_t,  uint64_t, H8, ldub)
 239GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw)
 240GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw)
 241GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw)
 242GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl)
 243GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl)
 244
 245#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
 246static void NAME(CPURISCVState *env, abi_ptr addr,         \
 247                 uint32_t idx, void *vd, uintptr_t retaddr)\
 248{                                                          \
 249    ETYPE data = *((ETYPE *)vd + H(idx));                  \
 250    cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
 251}
 252
 253GEN_VEXT_ST_ELEM(stb_b, int8_t,  H1, stb)
 254GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb)
 255GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb)
 256GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb)
 257GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw)
 258GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw)
 259GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw)
 260GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl)
 261GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl)
 262GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
 263GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
 264GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
 265GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
 266
 267/*
 268 *** stride: access vector element from strided memory
 269 */
 270static void
 271vext_ldst_stride(void *vd, void *v0, target_ulong base,
 272                 target_ulong stride, CPURISCVState *env,
 273                 uint32_t desc, uint32_t vm,
 274                 vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem,
 275                 uint32_t esz, uint32_t msz, uintptr_t ra,
 276                 MMUAccessType access_type)
 277{
 278    uint32_t i, k;
 279    uint32_t nf = vext_nf(desc);
 280    uint32_t mlen = vext_mlen(desc);
 281    uint32_t vlmax = vext_maxsz(desc) / esz;
 282
 283    /* probe every access*/
 284    for (i = 0; i < env->vl; i++) {
 285        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 286            continue;
 287        }
 288        probe_pages(env, base + stride * i, nf * msz, ra, access_type);
 289    }
 290    /* do real access */
 291    for (i = 0; i < env->vl; i++) {
 292        k = 0;
 293        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 294            continue;
 295        }
 296        while (k < nf) {
 297            target_ulong addr = base + stride * i + k * msz;
 298            ldst_elem(env, addr, i + k * vlmax, vd, ra);
 299            k++;
 300        }
 301    }
 302    /* clear tail elements */
 303    if (clear_elem) {
 304        for (k = 0; k < nf; k++) {
 305            clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
 306        }
 307    }
 308}
 309
 310#define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN)       \
 311void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
 312                  target_ulong stride, CPURISCVState *env,              \
 313                  uint32_t desc)                                        \
 314{                                                                       \
 315    uint32_t vm = vext_vm(desc);                                        \
 316    vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
 317                     CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),            \
 318                     GETPC(), MMU_DATA_LOAD);                           \
 319}
 320
 321GEN_VEXT_LD_STRIDE(vlsb_v_b,  int8_t,   int8_t,   ldb_b,  clearb)
 322GEN_VEXT_LD_STRIDE(vlsb_v_h,  int8_t,   int16_t,  ldb_h,  clearh)
 323GEN_VEXT_LD_STRIDE(vlsb_v_w,  int8_t,   int32_t,  ldb_w,  clearl)
 324GEN_VEXT_LD_STRIDE(vlsb_v_d,  int8_t,   int64_t,  ldb_d,  clearq)
 325GEN_VEXT_LD_STRIDE(vlsh_v_h,  int16_t,  int16_t,  ldh_h,  clearh)
 326GEN_VEXT_LD_STRIDE(vlsh_v_w,  int16_t,  int32_t,  ldh_w,  clearl)
 327GEN_VEXT_LD_STRIDE(vlsh_v_d,  int16_t,  int64_t,  ldh_d,  clearq)
 328GEN_VEXT_LD_STRIDE(vlsw_v_w,  int32_t,  int32_t,  ldw_w,  clearl)
 329GEN_VEXT_LD_STRIDE(vlsw_v_d,  int32_t,  int64_t,  ldw_d,  clearq)
 330GEN_VEXT_LD_STRIDE(vlse_v_b,  int8_t,   int8_t,   lde_b,  clearb)
 331GEN_VEXT_LD_STRIDE(vlse_v_h,  int16_t,  int16_t,  lde_h,  clearh)
 332GEN_VEXT_LD_STRIDE(vlse_v_w,  int32_t,  int32_t,  lde_w,  clearl)
 333GEN_VEXT_LD_STRIDE(vlse_v_d,  int64_t,  int64_t,  lde_d,  clearq)
 334GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t,  uint8_t,  ldbu_b, clearb)
 335GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t,  uint16_t, ldbu_h, clearh)
 336GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t,  uint32_t, ldbu_w, clearl)
 337GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t,  uint64_t, ldbu_d, clearq)
 338GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h, clearh)
 339GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w, clearl)
 340GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d, clearq)
 341GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w, clearl)
 342GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d, clearq)
 343
 344#define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN)                \
 345void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
 346                  target_ulong stride, CPURISCVState *env,              \
 347                  uint32_t desc)                                        \
 348{                                                                       \
 349    uint32_t vm = vext_vm(desc);                                        \
 350    vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
 351                     NULL, sizeof(ETYPE), sizeof(MTYPE),                \
 352                     GETPC(), MMU_DATA_STORE);                          \
 353}
 354
 355GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t,  int8_t,  stb_b)
 356GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t,  int16_t, stb_h)
 357GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t,  int32_t, stb_w)
 358GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t,  int64_t, stb_d)
 359GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h)
 360GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w)
 361GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d)
 362GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w)
 363GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d)
 364GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t,  int8_t,  ste_b)
 365GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h)
 366GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w)
 367GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d)
 368
 369/*
 370 *** unit-stride: access elements stored contiguously in memory
 371 */
 372
 373/* unmasked unit-stride load and store operation*/
 374static void
 375vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
 376             vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem,
 377             uint32_t esz, uint32_t msz, uintptr_t ra,
 378             MMUAccessType access_type)
 379{
 380    uint32_t i, k;
 381    uint32_t nf = vext_nf(desc);
 382    uint32_t vlmax = vext_maxsz(desc) / esz;
 383
 384    /* probe every access */
 385    probe_pages(env, base, env->vl * nf * msz, ra, access_type);
 386    /* load bytes from guest memory */
 387    for (i = 0; i < env->vl; i++) {
 388        k = 0;
 389        while (k < nf) {
 390            target_ulong addr = base + (i * nf + k) * msz;
 391            ldst_elem(env, addr, i + k * vlmax, vd, ra);
 392            k++;
 393        }
 394    }
 395    /* clear tail elements */
 396    if (clear_elem) {
 397        for (k = 0; k < nf; k++) {
 398            clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
 399        }
 400    }
 401}
 402
 403/*
 404 * masked unit-stride load and store operation will be a special case of stride,
 405 * stride = NF * sizeof (MTYPE)
 406 */
 407
 408#define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN)           \
 409void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
 410                         CPURISCVState *env, uint32_t desc)             \
 411{                                                                       \
 412    uint32_t stride = vext_nf(desc) * sizeof(MTYPE);                    \
 413    vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
 414                     CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),            \
 415                     GETPC(), MMU_DATA_LOAD);                           \
 416}                                                                       \
 417                                                                        \
 418void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
 419                  CPURISCVState *env, uint32_t desc)                    \
 420{                                                                       \
 421    vext_ldst_us(vd, base, env, desc, LOAD_FN, CLEAR_FN,                \
 422                 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \
 423}
 424
 425GEN_VEXT_LD_US(vlb_v_b,  int8_t,   int8_t,   ldb_b,  clearb)
 426GEN_VEXT_LD_US(vlb_v_h,  int8_t,   int16_t,  ldb_h,  clearh)
 427GEN_VEXT_LD_US(vlb_v_w,  int8_t,   int32_t,  ldb_w,  clearl)
 428GEN_VEXT_LD_US(vlb_v_d,  int8_t,   int64_t,  ldb_d,  clearq)
 429GEN_VEXT_LD_US(vlh_v_h,  int16_t,  int16_t,  ldh_h,  clearh)
 430GEN_VEXT_LD_US(vlh_v_w,  int16_t,  int32_t,  ldh_w,  clearl)
 431GEN_VEXT_LD_US(vlh_v_d,  int16_t,  int64_t,  ldh_d,  clearq)
 432GEN_VEXT_LD_US(vlw_v_w,  int32_t,  int32_t,  ldw_w,  clearl)
 433GEN_VEXT_LD_US(vlw_v_d,  int32_t,  int64_t,  ldw_d,  clearq)
 434GEN_VEXT_LD_US(vle_v_b,  int8_t,   int8_t,   lde_b,  clearb)
 435GEN_VEXT_LD_US(vle_v_h,  int16_t,  int16_t,  lde_h,  clearh)
 436GEN_VEXT_LD_US(vle_v_w,  int32_t,  int32_t,  lde_w,  clearl)
 437GEN_VEXT_LD_US(vle_v_d,  int64_t,  int64_t,  lde_d,  clearq)
 438GEN_VEXT_LD_US(vlbu_v_b, uint8_t,  uint8_t,  ldbu_b, clearb)
 439GEN_VEXT_LD_US(vlbu_v_h, uint8_t,  uint16_t, ldbu_h, clearh)
 440GEN_VEXT_LD_US(vlbu_v_w, uint8_t,  uint32_t, ldbu_w, clearl)
 441GEN_VEXT_LD_US(vlbu_v_d, uint8_t,  uint64_t, ldbu_d, clearq)
 442GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h, clearh)
 443GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w, clearl)
 444GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d, clearq)
 445GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w, clearl)
 446GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d, clearq)
 447
 448#define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN)                    \
 449void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
 450                         CPURISCVState *env, uint32_t desc)             \
 451{                                                                       \
 452    uint32_t stride = vext_nf(desc) * sizeof(MTYPE);                    \
 453    vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,  \
 454                     NULL, sizeof(ETYPE), sizeof(MTYPE),                \
 455                     GETPC(), MMU_DATA_STORE);                          \
 456}                                                                       \
 457                                                                        \
 458void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
 459                  CPURISCVState *env, uint32_t desc)                    \
 460{                                                                       \
 461    vext_ldst_us(vd, base, env, desc, STORE_FN, NULL,                   \
 462                 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\
 463}
 464
 465GEN_VEXT_ST_US(vsb_v_b, int8_t,  int8_t , stb_b)
 466GEN_VEXT_ST_US(vsb_v_h, int8_t,  int16_t, stb_h)
 467GEN_VEXT_ST_US(vsb_v_w, int8_t,  int32_t, stb_w)
 468GEN_VEXT_ST_US(vsb_v_d, int8_t,  int64_t, stb_d)
 469GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h)
 470GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w)
 471GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d)
 472GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w)
 473GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d)
 474GEN_VEXT_ST_US(vse_v_b, int8_t,  int8_t , ste_b)
 475GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h)
 476GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w)
 477GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d)
 478
 479/*
 480 *** index: access vector element from indexed memory
 481 */
 482typedef target_ulong vext_get_index_addr(target_ulong base,
 483        uint32_t idx, void *vs2);
 484
 485#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
 486static target_ulong NAME(target_ulong base,            \
 487                         uint32_t idx, void *vs2)      \
 488{                                                      \
 489    return (base + *((ETYPE *)vs2 + H(idx)));          \
 490}
 491
 492GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t,  H1)
 493GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2)
 494GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4)
 495GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8)
 496
 497static inline void
 498vext_ldst_index(void *vd, void *v0, target_ulong base,
 499                void *vs2, CPURISCVState *env, uint32_t desc,
 500                vext_get_index_addr get_index_addr,
 501                vext_ldst_elem_fn *ldst_elem,
 502                clear_fn *clear_elem,
 503                uint32_t esz, uint32_t msz, uintptr_t ra,
 504                MMUAccessType access_type)
 505{
 506    uint32_t i, k;
 507    uint32_t nf = vext_nf(desc);
 508    uint32_t vm = vext_vm(desc);
 509    uint32_t mlen = vext_mlen(desc);
 510    uint32_t vlmax = vext_maxsz(desc) / esz;
 511
 512    /* probe every access*/
 513    for (i = 0; i < env->vl; i++) {
 514        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 515            continue;
 516        }
 517        probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra,
 518                    access_type);
 519    }
 520    /* load bytes from guest memory */
 521    for (i = 0; i < env->vl; i++) {
 522        k = 0;
 523        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 524            continue;
 525        }
 526        while (k < nf) {
 527            abi_ptr addr = get_index_addr(base, i, vs2) + k * msz;
 528            ldst_elem(env, addr, i + k * vlmax, vd, ra);
 529            k++;
 530        }
 531    }
 532    /* clear tail elements */
 533    if (clear_elem) {
 534        for (k = 0; k < nf; k++) {
 535            clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
 536        }
 537    }
 538}
 539
 540#define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN, CLEAR_FN) \
 541void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
 542                  void *vs2, CPURISCVState *env, uint32_t desc)            \
 543{                                                                          \
 544    vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
 545                    LOAD_FN, CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),       \
 546                    GETPC(), MMU_DATA_LOAD);                               \
 547}
 548
 549GEN_VEXT_LD_INDEX(vlxb_v_b,  int8_t,   int8_t,   idx_b, ldb_b,  clearb)
 550GEN_VEXT_LD_INDEX(vlxb_v_h,  int8_t,   int16_t,  idx_h, ldb_h,  clearh)
 551GEN_VEXT_LD_INDEX(vlxb_v_w,  int8_t,   int32_t,  idx_w, ldb_w,  clearl)
 552GEN_VEXT_LD_INDEX(vlxb_v_d,  int8_t,   int64_t,  idx_d, ldb_d,  clearq)
 553GEN_VEXT_LD_INDEX(vlxh_v_h,  int16_t,  int16_t,  idx_h, ldh_h,  clearh)
 554GEN_VEXT_LD_INDEX(vlxh_v_w,  int16_t,  int32_t,  idx_w, ldh_w,  clearl)
 555GEN_VEXT_LD_INDEX(vlxh_v_d,  int16_t,  int64_t,  idx_d, ldh_d,  clearq)
 556GEN_VEXT_LD_INDEX(vlxw_v_w,  int32_t,  int32_t,  idx_w, ldw_w,  clearl)
 557GEN_VEXT_LD_INDEX(vlxw_v_d,  int32_t,  int64_t,  idx_d, ldw_d,  clearq)
 558GEN_VEXT_LD_INDEX(vlxe_v_b,  int8_t,   int8_t,   idx_b, lde_b,  clearb)
 559GEN_VEXT_LD_INDEX(vlxe_v_h,  int16_t,  int16_t,  idx_h, lde_h,  clearh)
 560GEN_VEXT_LD_INDEX(vlxe_v_w,  int32_t,  int32_t,  idx_w, lde_w,  clearl)
 561GEN_VEXT_LD_INDEX(vlxe_v_d,  int64_t,  int64_t,  idx_d, lde_d,  clearq)
 562GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t,  uint8_t,  idx_b, ldbu_b, clearb)
 563GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t,  uint16_t, idx_h, ldbu_h, clearh)
 564GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t,  uint32_t, idx_w, ldbu_w, clearl)
 565GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t,  uint64_t, idx_d, ldbu_d, clearq)
 566GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h, clearh)
 567GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w, clearl)
 568GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d, clearq)
 569GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w, clearl)
 570GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d, clearq)
 571
 572#define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\
 573void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
 574                  void *vs2, CPURISCVState *env, uint32_t desc)  \
 575{                                                                \
 576    vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
 577                    STORE_FN, NULL, sizeof(ETYPE), sizeof(MTYPE),\
 578                    GETPC(), MMU_DATA_STORE);                    \
 579}
 580
 581GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t,  int8_t,  idx_b, stb_b)
 582GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t,  int16_t, idx_h, stb_h)
 583GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t,  int32_t, idx_w, stb_w)
 584GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t,  int64_t, idx_d, stb_d)
 585GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h)
 586GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w)
 587GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d)
 588GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w)
 589GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d)
 590GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t,  int8_t,  idx_b, ste_b)
 591GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h)
 592GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w)
 593GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d)
 594
 595/*
 596 *** unit-stride fault-only-fisrt load instructions
 597 */
 598static inline void
 599vext_ldff(void *vd, void *v0, target_ulong base,
 600          CPURISCVState *env, uint32_t desc,
 601          vext_ldst_elem_fn *ldst_elem,
 602          clear_fn *clear_elem,
 603          uint32_t esz, uint32_t msz, uintptr_t ra)
 604{
 605    void *host;
 606    uint32_t i, k, vl = 0;
 607    uint32_t mlen = vext_mlen(desc);
 608    uint32_t nf = vext_nf(desc);
 609    uint32_t vm = vext_vm(desc);
 610    uint32_t vlmax = vext_maxsz(desc) / esz;
 611    target_ulong addr, offset, remain;
 612
 613    /* probe every access*/
 614    for (i = 0; i < env->vl; i++) {
 615        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 616            continue;
 617        }
 618        addr = base + nf * i * msz;
 619        if (i == 0) {
 620            probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
 621        } else {
 622            /* if it triggers an exception, no need to check watchpoint */
 623            remain = nf * msz;
 624            while (remain > 0) {
 625                offset = -(addr | TARGET_PAGE_MASK);
 626                host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
 627                                         cpu_mmu_index(env, false));
 628                if (host) {
 629#ifdef CONFIG_USER_ONLY
 630                    if (page_check_range(addr, nf * msz, PAGE_READ) < 0) {
 631                        vl = i;
 632                        goto ProbeSuccess;
 633                    }
 634#else
 635                    probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD);
 636#endif
 637                } else {
 638                    vl = i;
 639                    goto ProbeSuccess;
 640                }
 641                if (remain <=  offset) {
 642                    break;
 643                }
 644                remain -= offset;
 645                addr += offset;
 646            }
 647        }
 648    }
 649ProbeSuccess:
 650    /* load bytes from guest memory */
 651    if (vl != 0) {
 652        env->vl = vl;
 653    }
 654    for (i = 0; i < env->vl; i++) {
 655        k = 0;
 656        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 657            continue;
 658        }
 659        while (k < nf) {
 660            target_ulong addr = base + (i * nf + k) * msz;
 661            ldst_elem(env, addr, i + k * vlmax, vd, ra);
 662            k++;
 663        }
 664    }
 665    /* clear tail elements */
 666    if (vl != 0) {
 667        return;
 668    }
 669    for (k = 0; k < nf; k++) {
 670        clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz);
 671    }
 672}
 673
 674#define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN)     \
 675void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
 676                  CPURISCVState *env, uint32_t desc)             \
 677{                                                                \
 678    vext_ldff(vd, v0, base, env, desc, LOAD_FN, CLEAR_FN,        \
 679              sizeof(ETYPE), sizeof(MTYPE), GETPC());            \
 680}
 681
 682GEN_VEXT_LDFF(vlbff_v_b,  int8_t,   int8_t,   ldb_b,  clearb)
 683GEN_VEXT_LDFF(vlbff_v_h,  int8_t,   int16_t,  ldb_h,  clearh)
 684GEN_VEXT_LDFF(vlbff_v_w,  int8_t,   int32_t,  ldb_w,  clearl)
 685GEN_VEXT_LDFF(vlbff_v_d,  int8_t,   int64_t,  ldb_d,  clearq)
 686GEN_VEXT_LDFF(vlhff_v_h,  int16_t,  int16_t,  ldh_h,  clearh)
 687GEN_VEXT_LDFF(vlhff_v_w,  int16_t,  int32_t,  ldh_w,  clearl)
 688GEN_VEXT_LDFF(vlhff_v_d,  int16_t,  int64_t,  ldh_d,  clearq)
 689GEN_VEXT_LDFF(vlwff_v_w,  int32_t,  int32_t,  ldw_w,  clearl)
 690GEN_VEXT_LDFF(vlwff_v_d,  int32_t,  int64_t,  ldw_d,  clearq)
 691GEN_VEXT_LDFF(vleff_v_b,  int8_t,   int8_t,   lde_b,  clearb)
 692GEN_VEXT_LDFF(vleff_v_h,  int16_t,  int16_t,  lde_h,  clearh)
 693GEN_VEXT_LDFF(vleff_v_w,  int32_t,  int32_t,  lde_w,  clearl)
 694GEN_VEXT_LDFF(vleff_v_d,  int64_t,  int64_t,  lde_d,  clearq)
 695GEN_VEXT_LDFF(vlbuff_v_b, uint8_t,  uint8_t,  ldbu_b, clearb)
 696GEN_VEXT_LDFF(vlbuff_v_h, uint8_t,  uint16_t, ldbu_h, clearh)
 697GEN_VEXT_LDFF(vlbuff_v_w, uint8_t,  uint32_t, ldbu_w, clearl)
 698GEN_VEXT_LDFF(vlbuff_v_d, uint8_t,  uint64_t, ldbu_d, clearq)
 699GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h, clearh)
 700GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl)
 701GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq)
 702GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl)
 703GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq)
 704
 705/*
 706 *** Vector AMO Operations (Zvamo)
 707 */
 708typedef void vext_amo_noatomic_fn(void *vs3, target_ulong addr,
 709                                  uint32_t wd, uint32_t idx, CPURISCVState *env,
 710                                  uintptr_t retaddr);
 711
 712/* no atomic opreation for vector atomic insructions */
 713#define DO_SWAP(N, M) (M)
 714#define DO_AND(N, M)  (N & M)
 715#define DO_XOR(N, M)  (N ^ M)
 716#define DO_OR(N, M)   (N | M)
 717#define DO_ADD(N, M)  (N + M)
 718
 719#define GEN_VEXT_AMO_NOATOMIC_OP(NAME, ESZ, MSZ, H, DO_OP, SUF) \
 720static void                                                     \
 721vext_##NAME##_noatomic_op(void *vs3, target_ulong addr,         \
 722                          uint32_t wd, uint32_t idx,            \
 723                          CPURISCVState *env, uintptr_t retaddr)\
 724{                                                               \
 725    typedef int##ESZ##_t ETYPE;                                 \
 726    typedef int##MSZ##_t MTYPE;                                 \
 727    typedef uint##MSZ##_t UMTYPE __attribute__((unused));       \
 728    ETYPE *pe3 = (ETYPE *)vs3 + H(idx);                         \
 729    MTYPE  a = cpu_ld##SUF##_data(env, addr), b = *pe3;         \
 730                                                                \
 731    cpu_st##SUF##_data(env, addr, DO_OP(a, b));                 \
 732    if (wd) {                                                   \
 733        *pe3 = a;                                               \
 734    }                                                           \
 735}
 736
 737/* Signed min/max */
 738#define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
 739#define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
 740
 741/* Unsigned min/max */
 742#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
 743#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
 744
 745GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_w, 32, 32, H4, DO_SWAP, l)
 746GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_w,  32, 32, H4, DO_ADD,  l)
 747GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_w,  32, 32, H4, DO_XOR,  l)
 748GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_w,  32, 32, H4, DO_AND,  l)
 749GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_w,   32, 32, H4, DO_OR,   l)
 750GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_w,  32, 32, H4, DO_MIN,  l)
 751GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_w,  32, 32, H4, DO_MAX,  l)
 752GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_w, 32, 32, H4, DO_MINU, l)
 753GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_w, 32, 32, H4, DO_MAXU, l)
 754#ifdef TARGET_RISCV64
 755GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_d, 64, 32, H8, DO_SWAP, l)
 756GEN_VEXT_AMO_NOATOMIC_OP(vamoswapd_v_d, 64, 64, H8, DO_SWAP, q)
 757GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_d,  64, 32, H8, DO_ADD,  l)
 758GEN_VEXT_AMO_NOATOMIC_OP(vamoaddd_v_d,  64, 64, H8, DO_ADD,  q)
 759GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_d,  64, 32, H8, DO_XOR,  l)
 760GEN_VEXT_AMO_NOATOMIC_OP(vamoxord_v_d,  64, 64, H8, DO_XOR,  q)
 761GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_d,  64, 32, H8, DO_AND,  l)
 762GEN_VEXT_AMO_NOATOMIC_OP(vamoandd_v_d,  64, 64, H8, DO_AND,  q)
 763GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_d,   64, 32, H8, DO_OR,   l)
 764GEN_VEXT_AMO_NOATOMIC_OP(vamoord_v_d,   64, 64, H8, DO_OR,   q)
 765GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_d,  64, 32, H8, DO_MIN,  l)
 766GEN_VEXT_AMO_NOATOMIC_OP(vamomind_v_d,  64, 64, H8, DO_MIN,  q)
 767GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_d,  64, 32, H8, DO_MAX,  l)
 768GEN_VEXT_AMO_NOATOMIC_OP(vamomaxd_v_d,  64, 64, H8, DO_MAX,  q)
 769GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_d, 64, 32, H8, DO_MINU, l)
 770GEN_VEXT_AMO_NOATOMIC_OP(vamominud_v_d, 64, 64, H8, DO_MINU, q)
 771GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_d, 64, 32, H8, DO_MAXU, l)
 772GEN_VEXT_AMO_NOATOMIC_OP(vamomaxud_v_d, 64, 64, H8, DO_MAXU, q)
 773#endif
 774
 775static inline void
 776vext_amo_noatomic(void *vs3, void *v0, target_ulong base,
 777                  void *vs2, CPURISCVState *env, uint32_t desc,
 778                  vext_get_index_addr get_index_addr,
 779                  vext_amo_noatomic_fn *noatomic_op,
 780                  clear_fn *clear_elem,
 781                  uint32_t esz, uint32_t msz, uintptr_t ra)
 782{
 783    uint32_t i;
 784    target_long addr;
 785    uint32_t wd = vext_wd(desc);
 786    uint32_t vm = vext_vm(desc);
 787    uint32_t mlen = vext_mlen(desc);
 788    uint32_t vlmax = vext_maxsz(desc) / esz;
 789
 790    for (i = 0; i < env->vl; i++) {
 791        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 792            continue;
 793        }
 794        probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_LOAD);
 795        probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_STORE);
 796    }
 797    for (i = 0; i < env->vl; i++) {
 798        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 799            continue;
 800        }
 801        addr = get_index_addr(base, i, vs2);
 802        noatomic_op(vs3, addr, wd, i, env, ra);
 803    }
 804    clear_elem(vs3, env->vl, env->vl * esz, vlmax * esz);
 805}
 806
 807#define GEN_VEXT_AMO(NAME, MTYPE, ETYPE, INDEX_FN, CLEAR_FN)    \
 808void HELPER(NAME)(void *vs3, void *v0, target_ulong base,       \
 809                  void *vs2, CPURISCVState *env, uint32_t desc) \
 810{                                                               \
 811    vext_amo_noatomic(vs3, v0, base, vs2, env, desc,            \
 812                      INDEX_FN, vext_##NAME##_noatomic_op,      \
 813                      CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE),   \
 814                      GETPC());                                 \
 815}
 816
 817#ifdef TARGET_RISCV64
 818GEN_VEXT_AMO(vamoswapw_v_d, int32_t,  int64_t,  idx_d, clearq)
 819GEN_VEXT_AMO(vamoswapd_v_d, int64_t,  int64_t,  idx_d, clearq)
 820GEN_VEXT_AMO(vamoaddw_v_d,  int32_t,  int64_t,  idx_d, clearq)
 821GEN_VEXT_AMO(vamoaddd_v_d,  int64_t,  int64_t,  idx_d, clearq)
 822GEN_VEXT_AMO(vamoxorw_v_d,  int32_t,  int64_t,  idx_d, clearq)
 823GEN_VEXT_AMO(vamoxord_v_d,  int64_t,  int64_t,  idx_d, clearq)
 824GEN_VEXT_AMO(vamoandw_v_d,  int32_t,  int64_t,  idx_d, clearq)
 825GEN_VEXT_AMO(vamoandd_v_d,  int64_t,  int64_t,  idx_d, clearq)
 826GEN_VEXT_AMO(vamoorw_v_d,   int32_t,  int64_t,  idx_d, clearq)
 827GEN_VEXT_AMO(vamoord_v_d,   int64_t,  int64_t,  idx_d, clearq)
 828GEN_VEXT_AMO(vamominw_v_d,  int32_t,  int64_t,  idx_d, clearq)
 829GEN_VEXT_AMO(vamomind_v_d,  int64_t,  int64_t,  idx_d, clearq)
 830GEN_VEXT_AMO(vamomaxw_v_d,  int32_t,  int64_t,  idx_d, clearq)
 831GEN_VEXT_AMO(vamomaxd_v_d,  int64_t,  int64_t,  idx_d, clearq)
 832GEN_VEXT_AMO(vamominuw_v_d, uint32_t, uint64_t, idx_d, clearq)
 833GEN_VEXT_AMO(vamominud_v_d, uint64_t, uint64_t, idx_d, clearq)
 834GEN_VEXT_AMO(vamomaxuw_v_d, uint32_t, uint64_t, idx_d, clearq)
 835GEN_VEXT_AMO(vamomaxud_v_d, uint64_t, uint64_t, idx_d, clearq)
 836#endif
 837GEN_VEXT_AMO(vamoswapw_v_w, int32_t,  int32_t,  idx_w, clearl)
 838GEN_VEXT_AMO(vamoaddw_v_w,  int32_t,  int32_t,  idx_w, clearl)
 839GEN_VEXT_AMO(vamoxorw_v_w,  int32_t,  int32_t,  idx_w, clearl)
 840GEN_VEXT_AMO(vamoandw_v_w,  int32_t,  int32_t,  idx_w, clearl)
 841GEN_VEXT_AMO(vamoorw_v_w,   int32_t,  int32_t,  idx_w, clearl)
 842GEN_VEXT_AMO(vamominw_v_w,  int32_t,  int32_t,  idx_w, clearl)
 843GEN_VEXT_AMO(vamomaxw_v_w,  int32_t,  int32_t,  idx_w, clearl)
 844GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl)
 845GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl)
 846
 847/*
 848 *** Vector Integer Arithmetic Instructions
 849 */
 850
 851/* expand macro args before macro */
 852#define RVVCALL(macro, ...)  macro(__VA_ARGS__)
 853
 854/* (TD, T1, T2, TX1, TX2) */
 855#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
 856#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
 857#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
 858#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
 859#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
 860#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
 861#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
 862#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
 863#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
 864#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
 865#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
 866#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
 867#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
 868#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
 869#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
 870#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
 871#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
 872#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
 873#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
 874#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
 875#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
 876#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
 877#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
 878#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
 879#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
 880#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
 881#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
 882#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
 883#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
 884#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
 885
 886/* operation of two vector elements */
 887typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
 888
 889#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
 890static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
 891{                                                               \
 892    TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
 893    TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
 894    *((TD *)vd + HD(i)) = OP(s2, s1);                           \
 895}
 896#define DO_SUB(N, M) (N - M)
 897#define DO_RSUB(N, M) (M - N)
 898
 899RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
 900RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
 901RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
 902RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
 903RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
 904RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
 905RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
 906RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
 907
 908static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
 909                       CPURISCVState *env, uint32_t desc,
 910                       uint32_t esz, uint32_t dsz,
 911                       opivv2_fn *fn, clear_fn *clearfn)
 912{
 913    uint32_t vlmax = vext_maxsz(desc) / esz;
 914    uint32_t mlen = vext_mlen(desc);
 915    uint32_t vm = vext_vm(desc);
 916    uint32_t vl = env->vl;
 917    uint32_t i;
 918
 919    for (i = 0; i < vl; i++) {
 920        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 921            continue;
 922        }
 923        fn(vd, vs1, vs2, i);
 924    }
 925    clearfn(vd, vl, vl * dsz,  vlmax * dsz);
 926}
 927
 928/* generate the helpers for OPIVV */
 929#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN)             \
 930void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
 931                  void *vs2, CPURISCVState *env,          \
 932                  uint32_t desc)                          \
 933{                                                         \
 934    do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
 935               do_##NAME, CLEAR_FN);                      \
 936}
 937
 938GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb)
 939GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh)
 940GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl)
 941GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq)
 942GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb)
 943GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh)
 944GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl)
 945GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq)
 946
 947typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
 948
 949/*
 950 * (T1)s1 gives the real operator type.
 951 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
 952 */
 953#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
 954static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
 955{                                                                   \
 956    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
 957    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
 958}
 959
 960RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
 961RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
 962RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
 963RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
 964RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
 965RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
 966RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
 967RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
 968RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
 969RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
 970RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
 971RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
 972
 973static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
 974                       CPURISCVState *env, uint32_t desc,
 975                       uint32_t esz, uint32_t dsz,
 976                       opivx2_fn fn, clear_fn *clearfn)
 977{
 978    uint32_t vlmax = vext_maxsz(desc) / esz;
 979    uint32_t mlen = vext_mlen(desc);
 980    uint32_t vm = vext_vm(desc);
 981    uint32_t vl = env->vl;
 982    uint32_t i;
 983
 984    for (i = 0; i < vl; i++) {
 985        if (!vm && !vext_elem_mask(v0, mlen, i)) {
 986            continue;
 987        }
 988        fn(vd, s1, vs2, i);
 989    }
 990    clearfn(vd, vl, vl * dsz,  vlmax * dsz);
 991}
 992
 993/* generate the helpers for OPIVX */
 994#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN)             \
 995void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
 996                  void *vs2, CPURISCVState *env,          \
 997                  uint32_t desc)                          \
 998{                                                         \
 999    do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
1000               do_##NAME, CLEAR_FN);                      \
1001}
1002
1003GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb)
1004GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh)
1005GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl)
1006GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq)
1007GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb)
1008GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh)
1009GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl)
1010GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq)
1011GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb)
1012GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh)
1013GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl)
1014GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq)
1015
1016void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
1017{
1018    intptr_t oprsz = simd_oprsz(desc);
1019    intptr_t i;
1020
1021    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1022        *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
1023    }
1024}
1025
1026void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
1027{
1028    intptr_t oprsz = simd_oprsz(desc);
1029    intptr_t i;
1030
1031    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1032        *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
1033    }
1034}
1035
1036void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
1037{
1038    intptr_t oprsz = simd_oprsz(desc);
1039    intptr_t i;
1040
1041    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1042        *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
1043    }
1044}
1045
1046void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
1047{
1048    intptr_t oprsz = simd_oprsz(desc);
1049    intptr_t i;
1050
1051    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1052        *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
1053    }
1054}
1055
1056/* Vector Widening Integer Add/Subtract */
1057#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
1058#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
1059#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
1060#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
1061#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
1062#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
1063#define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
1064#define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
1065#define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
1066#define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
1067#define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
1068#define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
1069RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
1070RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
1071RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
1072RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
1073RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
1074RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
1075RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
1076RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
1077RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
1078RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
1079RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
1080RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
1081RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
1082RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
1083RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
1084RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
1085RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
1086RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
1087RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
1088RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
1089RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
1090RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
1091RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
1092RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
1093GEN_VEXT_VV(vwaddu_vv_b, 1, 2, clearh)
1094GEN_VEXT_VV(vwaddu_vv_h, 2, 4, clearl)
1095GEN_VEXT_VV(vwaddu_vv_w, 4, 8, clearq)
1096GEN_VEXT_VV(vwsubu_vv_b, 1, 2, clearh)
1097GEN_VEXT_VV(vwsubu_vv_h, 2, 4, clearl)
1098GEN_VEXT_VV(vwsubu_vv_w, 4, 8, clearq)
1099GEN_VEXT_VV(vwadd_vv_b, 1, 2, clearh)
1100GEN_VEXT_VV(vwadd_vv_h, 2, 4, clearl)
1101GEN_VEXT_VV(vwadd_vv_w, 4, 8, clearq)
1102GEN_VEXT_VV(vwsub_vv_b, 1, 2, clearh)
1103GEN_VEXT_VV(vwsub_vv_h, 2, 4, clearl)
1104GEN_VEXT_VV(vwsub_vv_w, 4, 8, clearq)
1105GEN_VEXT_VV(vwaddu_wv_b, 1, 2, clearh)
1106GEN_VEXT_VV(vwaddu_wv_h, 2, 4, clearl)
1107GEN_VEXT_VV(vwaddu_wv_w, 4, 8, clearq)
1108GEN_VEXT_VV(vwsubu_wv_b, 1, 2, clearh)
1109GEN_VEXT_VV(vwsubu_wv_h, 2, 4, clearl)
1110GEN_VEXT_VV(vwsubu_wv_w, 4, 8, clearq)
1111GEN_VEXT_VV(vwadd_wv_b, 1, 2, clearh)
1112GEN_VEXT_VV(vwadd_wv_h, 2, 4, clearl)
1113GEN_VEXT_VV(vwadd_wv_w, 4, 8, clearq)
1114GEN_VEXT_VV(vwsub_wv_b, 1, 2, clearh)
1115GEN_VEXT_VV(vwsub_wv_h, 2, 4, clearl)
1116GEN_VEXT_VV(vwsub_wv_w, 4, 8, clearq)
1117
1118RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1119RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1120RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1121RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1122RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1123RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1124RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1125RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1126RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1127RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1128RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1129RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1130RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1131RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1132RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1133RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1134RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1135RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1136RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1137RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1138RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1139RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1140RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1141RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
1142GEN_VEXT_VX(vwaddu_vx_b, 1, 2, clearh)
1143GEN_VEXT_VX(vwaddu_vx_h, 2, 4, clearl)
1144GEN_VEXT_VX(vwaddu_vx_w, 4, 8, clearq)
1145GEN_VEXT_VX(vwsubu_vx_b, 1, 2, clearh)
1146GEN_VEXT_VX(vwsubu_vx_h, 2, 4, clearl)
1147GEN_VEXT_VX(vwsubu_vx_w, 4, 8, clearq)
1148GEN_VEXT_VX(vwadd_vx_b, 1, 2, clearh)
1149GEN_VEXT_VX(vwadd_vx_h, 2, 4, clearl)
1150GEN_VEXT_VX(vwadd_vx_w, 4, 8, clearq)
1151GEN_VEXT_VX(vwsub_vx_b, 1, 2, clearh)
1152GEN_VEXT_VX(vwsub_vx_h, 2, 4, clearl)
1153GEN_VEXT_VX(vwsub_vx_w, 4, 8, clearq)
1154GEN_VEXT_VX(vwaddu_wx_b, 1, 2, clearh)
1155GEN_VEXT_VX(vwaddu_wx_h, 2, 4, clearl)
1156GEN_VEXT_VX(vwaddu_wx_w, 4, 8, clearq)
1157GEN_VEXT_VX(vwsubu_wx_b, 1, 2, clearh)
1158GEN_VEXT_VX(vwsubu_wx_h, 2, 4, clearl)
1159GEN_VEXT_VX(vwsubu_wx_w, 4, 8, clearq)
1160GEN_VEXT_VX(vwadd_wx_b, 1, 2, clearh)
1161GEN_VEXT_VX(vwadd_wx_h, 2, 4, clearl)
1162GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq)
1163GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh)
1164GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl)
1165GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq)
1166
1167/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1168#define DO_VADC(N, M, C) (N + M + C)
1169#define DO_VSBC(N, M, C) (N - M - C)
1170
1171#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP, CLEAR_FN)    \
1172void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1173                  CPURISCVState *env, uint32_t desc)          \
1174{                                                             \
1175    uint32_t mlen = vext_mlen(desc);                          \
1176    uint32_t vl = env->vl;                                    \
1177    uint32_t esz = sizeof(ETYPE);                             \
1178    uint32_t vlmax = vext_maxsz(desc) / esz;                  \
1179    uint32_t i;                                               \
1180                                                              \
1181    for (i = 0; i < vl; i++) {                                \
1182        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1183        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1184        uint8_t carry = vext_elem_mask(v0, mlen, i);          \
1185                                                              \
1186        *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
1187    }                                                         \
1188    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                  \
1189}
1190
1191GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC, clearb)
1192GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC, clearh)
1193GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC, clearl)
1194GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC, clearq)
1195
1196GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC, clearb)
1197GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC, clearh)
1198GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC, clearl)
1199GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC, clearq)
1200
1201#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP, CLEAR_FN)               \
1202void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
1203                  CPURISCVState *env, uint32_t desc)                     \
1204{                                                                        \
1205    uint32_t mlen = vext_mlen(desc);                                     \
1206    uint32_t vl = env->vl;                                               \
1207    uint32_t esz = sizeof(ETYPE);                                        \
1208    uint32_t vlmax = vext_maxsz(desc) / esz;                             \
1209    uint32_t i;                                                          \
1210                                                                         \
1211    for (i = 0; i < vl; i++) {                                           \
1212        ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
1213        uint8_t carry = vext_elem_mask(v0, mlen, i);                     \
1214                                                                         \
1215        *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1216    }                                                                    \
1217    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                             \
1218}
1219
1220GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC, clearb)
1221GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC, clearh)
1222GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC, clearl)
1223GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC, clearq)
1224
1225GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC, clearb)
1226GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC, clearh)
1227GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC, clearl)
1228GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC, clearq)
1229
1230#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
1231                          (__typeof(N))(N + M) < N)
1232#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1233
1234#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
1235void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1236                  CPURISCVState *env, uint32_t desc)          \
1237{                                                             \
1238    uint32_t mlen = vext_mlen(desc);                          \
1239    uint32_t vl = env->vl;                                    \
1240    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
1241    uint32_t i;                                               \
1242                                                              \
1243    for (i = 0; i < vl; i++) {                                \
1244        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1245        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1246        uint8_t carry = vext_elem_mask(v0, mlen, i);          \
1247                                                              \
1248        vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1, carry));\
1249    }                                                         \
1250    for (; i < vlmax; i++) {                                  \
1251        vext_set_elem_mask(vd, mlen, i, 0);                   \
1252    }                                                         \
1253}
1254
1255GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1256GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1257GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1258GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1259
1260GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1261GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1262GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1263GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1264
1265#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1266void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1267                  void *vs2, CPURISCVState *env, uint32_t desc) \
1268{                                                               \
1269    uint32_t mlen = vext_mlen(desc);                            \
1270    uint32_t vl = env->vl;                                      \
1271    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);          \
1272    uint32_t i;                                                 \
1273                                                                \
1274    for (i = 0; i < vl; i++) {                                  \
1275        ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1276        uint8_t carry = vext_elem_mask(v0, mlen, i);            \
1277                                                                \
1278        vext_set_elem_mask(vd, mlen, i,                         \
1279                DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1280    }                                                           \
1281    for (; i < vlmax; i++) {                                    \
1282        vext_set_elem_mask(vd, mlen, i, 0);                     \
1283    }                                                           \
1284}
1285
1286GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1287GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1288GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1289GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1290
1291GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1292GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1293GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1294GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1295
1296/* Vector Bitwise Logical Instructions */
1297RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1298RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1299RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1300RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1301RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1302RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1303RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1304RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1305RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1306RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1307RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1308RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1309GEN_VEXT_VV(vand_vv_b, 1, 1, clearb)
1310GEN_VEXT_VV(vand_vv_h, 2, 2, clearh)
1311GEN_VEXT_VV(vand_vv_w, 4, 4, clearl)
1312GEN_VEXT_VV(vand_vv_d, 8, 8, clearq)
1313GEN_VEXT_VV(vor_vv_b, 1, 1, clearb)
1314GEN_VEXT_VV(vor_vv_h, 2, 2, clearh)
1315GEN_VEXT_VV(vor_vv_w, 4, 4, clearl)
1316GEN_VEXT_VV(vor_vv_d, 8, 8, clearq)
1317GEN_VEXT_VV(vxor_vv_b, 1, 1, clearb)
1318GEN_VEXT_VV(vxor_vv_h, 2, 2, clearh)
1319GEN_VEXT_VV(vxor_vv_w, 4, 4, clearl)
1320GEN_VEXT_VV(vxor_vv_d, 8, 8, clearq)
1321
1322RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1323RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1324RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1325RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1326RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1327RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1328RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1329RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1330RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1331RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1332RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1333RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1334GEN_VEXT_VX(vand_vx_b, 1, 1, clearb)
1335GEN_VEXT_VX(vand_vx_h, 2, 2, clearh)
1336GEN_VEXT_VX(vand_vx_w, 4, 4, clearl)
1337GEN_VEXT_VX(vand_vx_d, 8, 8, clearq)
1338GEN_VEXT_VX(vor_vx_b, 1, 1, clearb)
1339GEN_VEXT_VX(vor_vx_h, 2, 2, clearh)
1340GEN_VEXT_VX(vor_vx_w, 4, 4, clearl)
1341GEN_VEXT_VX(vor_vx_d, 8, 8, clearq)
1342GEN_VEXT_VX(vxor_vx_b, 1, 1, clearb)
1343GEN_VEXT_VX(vxor_vx_h, 2, 2, clearh)
1344GEN_VEXT_VX(vxor_vx_w, 4, 4, clearl)
1345GEN_VEXT_VX(vxor_vx_d, 8, 8, clearq)
1346
1347/* Vector Single-Width Bit Shift Instructions */
1348#define DO_SLL(N, M)  (N << (M))
1349#define DO_SRL(N, M)  (N >> (M))
1350
1351/* generate the helpers for shift instructions with two vector operators */
1352#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK, CLEAR_FN)   \
1353void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1354                  void *vs2, CPURISCVState *env, uint32_t desc)           \
1355{                                                                         \
1356    uint32_t mlen = vext_mlen(desc);                                      \
1357    uint32_t vm = vext_vm(desc);                                          \
1358    uint32_t vl = env->vl;                                                \
1359    uint32_t esz = sizeof(TS1);                                           \
1360    uint32_t vlmax = vext_maxsz(desc) / esz;                              \
1361    uint32_t i;                                                           \
1362                                                                          \
1363    for (i = 0; i < vl; i++) {                                            \
1364        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
1365            continue;                                                     \
1366        }                                                                 \
1367        TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1368        TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1369        *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1370    }                                                                     \
1371    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                              \
1372}
1373
1374GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7, clearb)
1375GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf, clearh)
1376GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f, clearl)
1377GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f, clearq)
1378
1379GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb)
1380GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh)
1381GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl)
1382GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq)
1383
1384GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7, clearb)
1385GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh)
1386GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl)
1387GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq)
1388
1389/* generate the helpers for shift instructions with one vector and one scalar */
1390#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK, CLEAR_FN) \
1391void HELPER(NAME)(void *vd, void *v0, target_ulong s1,                \
1392        void *vs2, CPURISCVState *env, uint32_t desc)                 \
1393{                                                                     \
1394    uint32_t mlen = vext_mlen(desc);                                  \
1395    uint32_t vm = vext_vm(desc);                                      \
1396    uint32_t vl = env->vl;                                            \
1397    uint32_t esz = sizeof(TD);                                        \
1398    uint32_t vlmax = vext_maxsz(desc) / esz;                          \
1399    uint32_t i;                                                       \
1400                                                                      \
1401    for (i = 0; i < vl; i++) {                                        \
1402        if (!vm && !vext_elem_mask(v0, mlen, i)) {                    \
1403            continue;                                                 \
1404        }                                                             \
1405        TS2 s2 = *((TS2 *)vs2 + HS2(i));                              \
1406        *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);                      \
1407    }                                                                 \
1408    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                          \
1409}
1410
1411GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7, clearb)
1412GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf, clearh)
1413GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f, clearl)
1414GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f, clearq)
1415
1416GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb)
1417GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh)
1418GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl)
1419GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq)
1420
1421GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb)
1422GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh)
1423GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl)
1424GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq)
1425
1426/* Vector Narrowing Integer Right Shift Instructions */
1427GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf, clearb)
1428GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh)
1429GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl)
1430GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf, clearb)
1431GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh)
1432GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl)
1433GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb)
1434GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh)
1435GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl)
1436GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb)
1437GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh)
1438GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl)
1439
1440/* Vector Integer Comparison Instructions */
1441#define DO_MSEQ(N, M) (N == M)
1442#define DO_MSNE(N, M) (N != M)
1443#define DO_MSLT(N, M) (N < M)
1444#define DO_MSLE(N, M) (N <= M)
1445#define DO_MSGT(N, M) (N > M)
1446
1447#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1448void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1449                  CPURISCVState *env, uint32_t desc)          \
1450{                                                             \
1451    uint32_t mlen = vext_mlen(desc);                          \
1452    uint32_t vm = vext_vm(desc);                              \
1453    uint32_t vl = env->vl;                                    \
1454    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
1455    uint32_t i;                                               \
1456                                                              \
1457    for (i = 0; i < vl; i++) {                                \
1458        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1459        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1460        if (!vm && !vext_elem_mask(v0, mlen, i)) {            \
1461            continue;                                         \
1462        }                                                     \
1463        vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1));       \
1464    }                                                         \
1465    for (; i < vlmax; i++) {                                  \
1466        vext_set_elem_mask(vd, mlen, i, 0);                   \
1467    }                                                         \
1468}
1469
1470GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1471GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1472GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1473GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1474
1475GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1476GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1477GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1478GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1479
1480GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1481GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1482GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1483GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1484
1485GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1486GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1487GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1488GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1489
1490GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1491GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1492GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1493GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1494
1495GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1496GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1497GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1498GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1499
1500#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1501void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1502                  CPURISCVState *env, uint32_t desc)                \
1503{                                                                   \
1504    uint32_t mlen = vext_mlen(desc);                                \
1505    uint32_t vm = vext_vm(desc);                                    \
1506    uint32_t vl = env->vl;                                          \
1507    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);              \
1508    uint32_t i;                                                     \
1509                                                                    \
1510    for (i = 0; i < vl; i++) {                                      \
1511        ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1512        if (!vm && !vext_elem_mask(v0, mlen, i)) {                  \
1513            continue;                                               \
1514        }                                                           \
1515        vext_set_elem_mask(vd, mlen, i,                             \
1516                DO_OP(s2, (ETYPE)(target_long)s1));                 \
1517    }                                                               \
1518    for (; i < vlmax; i++) {                                        \
1519        vext_set_elem_mask(vd, mlen, i, 0);                         \
1520    }                                                               \
1521}
1522
1523GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1524GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1525GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1526GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1527
1528GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1529GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1530GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1531GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1532
1533GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1534GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1535GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1536GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1537
1538GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1539GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1540GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1541GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1542
1543GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1544GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1545GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1546GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1547
1548GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1549GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1550GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1551GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1552
1553GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1554GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1555GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1556GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1557
1558GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1559GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1560GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1561GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1562
1563/* Vector Integer Min/Max Instructions */
1564RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1565RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1566RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1567RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1568RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1569RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1570RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1571RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1572RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1573RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1574RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1575RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1576RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1577RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1578RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1579RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1580GEN_VEXT_VV(vminu_vv_b, 1, 1, clearb)
1581GEN_VEXT_VV(vminu_vv_h, 2, 2, clearh)
1582GEN_VEXT_VV(vminu_vv_w, 4, 4, clearl)
1583GEN_VEXT_VV(vminu_vv_d, 8, 8, clearq)
1584GEN_VEXT_VV(vmin_vv_b, 1, 1, clearb)
1585GEN_VEXT_VV(vmin_vv_h, 2, 2, clearh)
1586GEN_VEXT_VV(vmin_vv_w, 4, 4, clearl)
1587GEN_VEXT_VV(vmin_vv_d, 8, 8, clearq)
1588GEN_VEXT_VV(vmaxu_vv_b, 1, 1, clearb)
1589GEN_VEXT_VV(vmaxu_vv_h, 2, 2, clearh)
1590GEN_VEXT_VV(vmaxu_vv_w, 4, 4, clearl)
1591GEN_VEXT_VV(vmaxu_vv_d, 8, 8, clearq)
1592GEN_VEXT_VV(vmax_vv_b, 1, 1, clearb)
1593GEN_VEXT_VV(vmax_vv_h, 2, 2, clearh)
1594GEN_VEXT_VV(vmax_vv_w, 4, 4, clearl)
1595GEN_VEXT_VV(vmax_vv_d, 8, 8, clearq)
1596
1597RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1598RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1599RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1600RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1601RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1602RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1603RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1604RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1605RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1606RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1607RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1608RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1609RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1610RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1611RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1612RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1613GEN_VEXT_VX(vminu_vx_b, 1, 1, clearb)
1614GEN_VEXT_VX(vminu_vx_h, 2, 2, clearh)
1615GEN_VEXT_VX(vminu_vx_w, 4, 4, clearl)
1616GEN_VEXT_VX(vminu_vx_d, 8, 8, clearq)
1617GEN_VEXT_VX(vmin_vx_b, 1, 1, clearb)
1618GEN_VEXT_VX(vmin_vx_h, 2, 2, clearh)
1619GEN_VEXT_VX(vmin_vx_w, 4, 4, clearl)
1620GEN_VEXT_VX(vmin_vx_d, 8, 8, clearq)
1621GEN_VEXT_VX(vmaxu_vx_b, 1, 1, clearb)
1622GEN_VEXT_VX(vmaxu_vx_h, 2, 2, clearh)
1623GEN_VEXT_VX(vmaxu_vx_w, 4, 4, clearl)
1624GEN_VEXT_VX(vmaxu_vx_d, 8, 8,  clearq)
1625GEN_VEXT_VX(vmax_vx_b, 1, 1, clearb)
1626GEN_VEXT_VX(vmax_vx_h, 2, 2, clearh)
1627GEN_VEXT_VX(vmax_vx_w, 4, 4, clearl)
1628GEN_VEXT_VX(vmax_vx_d, 8, 8, clearq)
1629
1630/* Vector Single-Width Integer Multiply Instructions */
1631#define DO_MUL(N, M) (N * M)
1632RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1633RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1634RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1635RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1636GEN_VEXT_VV(vmul_vv_b, 1, 1, clearb)
1637GEN_VEXT_VV(vmul_vv_h, 2, 2, clearh)
1638GEN_VEXT_VV(vmul_vv_w, 4, 4, clearl)
1639GEN_VEXT_VV(vmul_vv_d, 8, 8, clearq)
1640
1641static int8_t do_mulh_b(int8_t s2, int8_t s1)
1642{
1643    return (int16_t)s2 * (int16_t)s1 >> 8;
1644}
1645
1646static int16_t do_mulh_h(int16_t s2, int16_t s1)
1647{
1648    return (int32_t)s2 * (int32_t)s1 >> 16;
1649}
1650
1651static int32_t do_mulh_w(int32_t s2, int32_t s1)
1652{
1653    return (int64_t)s2 * (int64_t)s1 >> 32;
1654}
1655
1656static int64_t do_mulh_d(int64_t s2, int64_t s1)
1657{
1658    uint64_t hi_64, lo_64;
1659
1660    muls64(&lo_64, &hi_64, s1, s2);
1661    return hi_64;
1662}
1663
1664static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1665{
1666    return (uint16_t)s2 * (uint16_t)s1 >> 8;
1667}
1668
1669static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1670{
1671    return (uint32_t)s2 * (uint32_t)s1 >> 16;
1672}
1673
1674static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1675{
1676    return (uint64_t)s2 * (uint64_t)s1 >> 32;
1677}
1678
1679static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1680{
1681    uint64_t hi_64, lo_64;
1682
1683    mulu64(&lo_64, &hi_64, s2, s1);
1684    return hi_64;
1685}
1686
1687static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1688{
1689    return (int16_t)s2 * (uint16_t)s1 >> 8;
1690}
1691
1692static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1693{
1694    return (int32_t)s2 * (uint32_t)s1 >> 16;
1695}
1696
1697static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1698{
1699    return (int64_t)s2 * (uint64_t)s1 >> 32;
1700}
1701
1702/*
1703 * Let  A = signed operand,
1704 *      B = unsigned operand
1705 *      P = mulu64(A, B), unsigned product
1706 *
1707 * LET  X = 2 ** 64  - A, 2's complement of A
1708 *      SP = signed product
1709 * THEN
1710 *      IF A < 0
1711 *          SP = -X * B
1712 *             = -(2 ** 64 - A) * B
1713 *             = A * B - 2 ** 64 * B
1714 *             = P - 2 ** 64 * B
1715 *      ELSE
1716 *          SP = P
1717 * THEN
1718 *      HI_P -= (A < 0 ? B : 0)
1719 */
1720
1721static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1722{
1723    uint64_t hi_64, lo_64;
1724
1725    mulu64(&lo_64, &hi_64, s2, s1);
1726
1727    hi_64 -= s2 < 0 ? s1 : 0;
1728    return hi_64;
1729}
1730
1731RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1732RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1733RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1734RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1735RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1736RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1737RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1738RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1739RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1740RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1741RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1742RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1743GEN_VEXT_VV(vmulh_vv_b, 1, 1, clearb)
1744GEN_VEXT_VV(vmulh_vv_h, 2, 2, clearh)
1745GEN_VEXT_VV(vmulh_vv_w, 4, 4, clearl)
1746GEN_VEXT_VV(vmulh_vv_d, 8, 8, clearq)
1747GEN_VEXT_VV(vmulhu_vv_b, 1, 1, clearb)
1748GEN_VEXT_VV(vmulhu_vv_h, 2, 2, clearh)
1749GEN_VEXT_VV(vmulhu_vv_w, 4, 4, clearl)
1750GEN_VEXT_VV(vmulhu_vv_d, 8, 8, clearq)
1751GEN_VEXT_VV(vmulhsu_vv_b, 1, 1, clearb)
1752GEN_VEXT_VV(vmulhsu_vv_h, 2, 2, clearh)
1753GEN_VEXT_VV(vmulhsu_vv_w, 4, 4, clearl)
1754GEN_VEXT_VV(vmulhsu_vv_d, 8, 8, clearq)
1755
1756RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1757RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1758RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1759RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1760RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1761RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1762RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1763RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1764RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1765RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1766RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1767RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1768RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1769RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1770RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1771RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1772GEN_VEXT_VX(vmul_vx_b, 1, 1, clearb)
1773GEN_VEXT_VX(vmul_vx_h, 2, 2, clearh)
1774GEN_VEXT_VX(vmul_vx_w, 4, 4, clearl)
1775GEN_VEXT_VX(vmul_vx_d, 8, 8, clearq)
1776GEN_VEXT_VX(vmulh_vx_b, 1, 1, clearb)
1777GEN_VEXT_VX(vmulh_vx_h, 2, 2, clearh)
1778GEN_VEXT_VX(vmulh_vx_w, 4, 4, clearl)
1779GEN_VEXT_VX(vmulh_vx_d, 8, 8, clearq)
1780GEN_VEXT_VX(vmulhu_vx_b, 1, 1, clearb)
1781GEN_VEXT_VX(vmulhu_vx_h, 2, 2, clearh)
1782GEN_VEXT_VX(vmulhu_vx_w, 4, 4, clearl)
1783GEN_VEXT_VX(vmulhu_vx_d, 8, 8, clearq)
1784GEN_VEXT_VX(vmulhsu_vx_b, 1, 1, clearb)
1785GEN_VEXT_VX(vmulhsu_vx_h, 2, 2, clearh)
1786GEN_VEXT_VX(vmulhsu_vx_w, 4, 4, clearl)
1787GEN_VEXT_VX(vmulhsu_vx_d, 8, 8, clearq)
1788
1789/* Vector Integer Divide Instructions */
1790#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1791#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1792#define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1793        unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1794#define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1795        unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1796
1797RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1798RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1799RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1800RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1801RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1802RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1803RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1804RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1805RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1806RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1807RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1808RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1809RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1810RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1811RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1812RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1813GEN_VEXT_VV(vdivu_vv_b, 1, 1, clearb)
1814GEN_VEXT_VV(vdivu_vv_h, 2, 2, clearh)
1815GEN_VEXT_VV(vdivu_vv_w, 4, 4, clearl)
1816GEN_VEXT_VV(vdivu_vv_d, 8, 8, clearq)
1817GEN_VEXT_VV(vdiv_vv_b, 1, 1, clearb)
1818GEN_VEXT_VV(vdiv_vv_h, 2, 2, clearh)
1819GEN_VEXT_VV(vdiv_vv_w, 4, 4, clearl)
1820GEN_VEXT_VV(vdiv_vv_d, 8, 8, clearq)
1821GEN_VEXT_VV(vremu_vv_b, 1, 1, clearb)
1822GEN_VEXT_VV(vremu_vv_h, 2, 2, clearh)
1823GEN_VEXT_VV(vremu_vv_w, 4, 4, clearl)
1824GEN_VEXT_VV(vremu_vv_d, 8, 8, clearq)
1825GEN_VEXT_VV(vrem_vv_b, 1, 1, clearb)
1826GEN_VEXT_VV(vrem_vv_h, 2, 2, clearh)
1827GEN_VEXT_VV(vrem_vv_w, 4, 4, clearl)
1828GEN_VEXT_VV(vrem_vv_d, 8, 8, clearq)
1829
1830RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1831RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1832RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1833RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1834RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1835RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1836RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1837RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1838RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1839RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1840RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1841RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1842RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1843RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1844RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1845RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1846GEN_VEXT_VX(vdivu_vx_b, 1, 1, clearb)
1847GEN_VEXT_VX(vdivu_vx_h, 2, 2, clearh)
1848GEN_VEXT_VX(vdivu_vx_w, 4, 4, clearl)
1849GEN_VEXT_VX(vdivu_vx_d, 8, 8, clearq)
1850GEN_VEXT_VX(vdiv_vx_b, 1, 1, clearb)
1851GEN_VEXT_VX(vdiv_vx_h, 2, 2, clearh)
1852GEN_VEXT_VX(vdiv_vx_w, 4, 4, clearl)
1853GEN_VEXT_VX(vdiv_vx_d, 8, 8, clearq)
1854GEN_VEXT_VX(vremu_vx_b, 1, 1, clearb)
1855GEN_VEXT_VX(vremu_vx_h, 2, 2, clearh)
1856GEN_VEXT_VX(vremu_vx_w, 4, 4, clearl)
1857GEN_VEXT_VX(vremu_vx_d, 8, 8, clearq)
1858GEN_VEXT_VX(vrem_vx_b, 1, 1, clearb)
1859GEN_VEXT_VX(vrem_vx_h, 2, 2, clearh)
1860GEN_VEXT_VX(vrem_vx_w, 4, 4, clearl)
1861GEN_VEXT_VX(vrem_vx_d, 8, 8, clearq)
1862
1863/* Vector Widening Integer Multiply Instructions */
1864RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1865RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1866RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1867RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1868RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1869RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1870RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1871RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1872RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1873GEN_VEXT_VV(vwmul_vv_b, 1, 2, clearh)
1874GEN_VEXT_VV(vwmul_vv_h, 2, 4, clearl)
1875GEN_VEXT_VV(vwmul_vv_w, 4, 8, clearq)
1876GEN_VEXT_VV(vwmulu_vv_b, 1, 2, clearh)
1877GEN_VEXT_VV(vwmulu_vv_h, 2, 4, clearl)
1878GEN_VEXT_VV(vwmulu_vv_w, 4, 8, clearq)
1879GEN_VEXT_VV(vwmulsu_vv_b, 1, 2, clearh)
1880GEN_VEXT_VV(vwmulsu_vv_h, 2, 4, clearl)
1881GEN_VEXT_VV(vwmulsu_vv_w, 4, 8, clearq)
1882
1883RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1884RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1885RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1886RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1887RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1888RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1889RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1890RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1891RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1892GEN_VEXT_VX(vwmul_vx_b, 1, 2, clearh)
1893GEN_VEXT_VX(vwmul_vx_h, 2, 4, clearl)
1894GEN_VEXT_VX(vwmul_vx_w, 4, 8, clearq)
1895GEN_VEXT_VX(vwmulu_vx_b, 1, 2, clearh)
1896GEN_VEXT_VX(vwmulu_vx_h, 2, 4, clearl)
1897GEN_VEXT_VX(vwmulu_vx_w, 4, 8, clearq)
1898GEN_VEXT_VX(vwmulsu_vx_b, 1, 2, clearh)
1899GEN_VEXT_VX(vwmulsu_vx_h, 2, 4, clearl)
1900GEN_VEXT_VX(vwmulsu_vx_w, 4, 8, clearq)
1901
1902/* Vector Single-Width Integer Multiply-Add Instructions */
1903#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1904static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1905{                                                                  \
1906    TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1907    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1908    TD d = *((TD *)vd + HD(i));                                    \
1909    *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1910}
1911
1912#define DO_MACC(N, M, D) (M * N + D)
1913#define DO_NMSAC(N, M, D) (-(M * N) + D)
1914#define DO_MADD(N, M, D) (M * D + N)
1915#define DO_NMSUB(N, M, D) (-(M * D) + N)
1916RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1917RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1918RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1919RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1920RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1921RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1922RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1923RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1924RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1925RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1926RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1927RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1928RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1929RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1930RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1931RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1932GEN_VEXT_VV(vmacc_vv_b, 1, 1, clearb)
1933GEN_VEXT_VV(vmacc_vv_h, 2, 2, clearh)
1934GEN_VEXT_VV(vmacc_vv_w, 4, 4, clearl)
1935GEN_VEXT_VV(vmacc_vv_d, 8, 8, clearq)
1936GEN_VEXT_VV(vnmsac_vv_b, 1, 1, clearb)
1937GEN_VEXT_VV(vnmsac_vv_h, 2, 2, clearh)
1938GEN_VEXT_VV(vnmsac_vv_w, 4, 4, clearl)
1939GEN_VEXT_VV(vnmsac_vv_d, 8, 8, clearq)
1940GEN_VEXT_VV(vmadd_vv_b, 1, 1, clearb)
1941GEN_VEXT_VV(vmadd_vv_h, 2, 2, clearh)
1942GEN_VEXT_VV(vmadd_vv_w, 4, 4, clearl)
1943GEN_VEXT_VV(vmadd_vv_d, 8, 8, clearq)
1944GEN_VEXT_VV(vnmsub_vv_b, 1, 1, clearb)
1945GEN_VEXT_VV(vnmsub_vv_h, 2, 2, clearh)
1946GEN_VEXT_VV(vnmsub_vv_w, 4, 4, clearl)
1947GEN_VEXT_VV(vnmsub_vv_d, 8, 8, clearq)
1948
1949#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1950static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1951{                                                                   \
1952    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1953    TD d = *((TD *)vd + HD(i));                                     \
1954    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1955}
1956
1957RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1958RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1959RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1960RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1961RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1962RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1963RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1964RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1965RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1966RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1967RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1968RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1969RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1970RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1971RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1972RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1973GEN_VEXT_VX(vmacc_vx_b, 1, 1, clearb)
1974GEN_VEXT_VX(vmacc_vx_h, 2, 2, clearh)
1975GEN_VEXT_VX(vmacc_vx_w, 4, 4, clearl)
1976GEN_VEXT_VX(vmacc_vx_d, 8, 8, clearq)
1977GEN_VEXT_VX(vnmsac_vx_b, 1, 1, clearb)
1978GEN_VEXT_VX(vnmsac_vx_h, 2, 2, clearh)
1979GEN_VEXT_VX(vnmsac_vx_w, 4, 4, clearl)
1980GEN_VEXT_VX(vnmsac_vx_d, 8, 8, clearq)
1981GEN_VEXT_VX(vmadd_vx_b, 1, 1, clearb)
1982GEN_VEXT_VX(vmadd_vx_h, 2, 2, clearh)
1983GEN_VEXT_VX(vmadd_vx_w, 4, 4, clearl)
1984GEN_VEXT_VX(vmadd_vx_d, 8, 8, clearq)
1985GEN_VEXT_VX(vnmsub_vx_b, 1, 1, clearb)
1986GEN_VEXT_VX(vnmsub_vx_h, 2, 2, clearh)
1987GEN_VEXT_VX(vnmsub_vx_w, 4, 4, clearl)
1988GEN_VEXT_VX(vnmsub_vx_d, 8, 8, clearq)
1989
1990/* Vector Widening Integer Multiply-Add Instructions */
1991RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1992RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1993RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1994RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1995RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1996RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1997RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1998RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1999RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
2000GEN_VEXT_VV(vwmaccu_vv_b, 1, 2, clearh)
2001GEN_VEXT_VV(vwmaccu_vv_h, 2, 4, clearl)
2002GEN_VEXT_VV(vwmaccu_vv_w, 4, 8, clearq)
2003GEN_VEXT_VV(vwmacc_vv_b, 1, 2, clearh)
2004GEN_VEXT_VV(vwmacc_vv_h, 2, 4, clearl)
2005GEN_VEXT_VV(vwmacc_vv_w, 4, 8, clearq)
2006GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2, clearh)
2007GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4, clearl)
2008GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8, clearq)
2009
2010RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
2011RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
2012RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
2013RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
2014RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
2015RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
2016RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
2017RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
2018RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
2019RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
2020RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
2021RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
2022GEN_VEXT_VX(vwmaccu_vx_b, 1, 2, clearh)
2023GEN_VEXT_VX(vwmaccu_vx_h, 2, 4, clearl)
2024GEN_VEXT_VX(vwmaccu_vx_w, 4, 8, clearq)
2025GEN_VEXT_VX(vwmacc_vx_b, 1, 2, clearh)
2026GEN_VEXT_VX(vwmacc_vx_h, 2, 4, clearl)
2027GEN_VEXT_VX(vwmacc_vx_w, 4, 8, clearq)
2028GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2, clearh)
2029GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4, clearl)
2030GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq)
2031GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh)
2032GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl)
2033GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq)
2034
2035/* Vector Integer Merge and Move Instructions */
2036#define GEN_VEXT_VMV_VV(NAME, ETYPE, H, CLEAR_FN)                    \
2037void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
2038                  uint32_t desc)                                     \
2039{                                                                    \
2040    uint32_t vl = env->vl;                                           \
2041    uint32_t esz = sizeof(ETYPE);                                    \
2042    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
2043    uint32_t i;                                                      \
2044                                                                     \
2045    for (i = 0; i < vl; i++) {                                       \
2046        ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
2047        *((ETYPE *)vd + H(i)) = s1;                                  \
2048    }                                                                \
2049    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                         \
2050}
2051
2052GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1, clearb)
2053GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2, clearh)
2054GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4, clearl)
2055GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8, clearq)
2056
2057#define GEN_VEXT_VMV_VX(NAME, ETYPE, H, CLEAR_FN)                    \
2058void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
2059                  uint32_t desc)                                     \
2060{                                                                    \
2061    uint32_t vl = env->vl;                                           \
2062    uint32_t esz = sizeof(ETYPE);                                    \
2063    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
2064    uint32_t i;                                                      \
2065                                                                     \
2066    for (i = 0; i < vl; i++) {                                       \
2067        *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
2068    }                                                                \
2069    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                         \
2070}
2071
2072GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1, clearb)
2073GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2, clearh)
2074GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4, clearl)
2075GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8, clearq)
2076
2077#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H, CLEAR_FN)                 \
2078void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
2079                  CPURISCVState *env, uint32_t desc)                 \
2080{                                                                    \
2081    uint32_t mlen = vext_mlen(desc);                                 \
2082    uint32_t vl = env->vl;                                           \
2083    uint32_t esz = sizeof(ETYPE);                                    \
2084    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
2085    uint32_t i;                                                      \
2086                                                                     \
2087    for (i = 0; i < vl; i++) {                                       \
2088        ETYPE *vt = (!vext_elem_mask(v0, mlen, i) ? vs2 : vs1);      \
2089        *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
2090    }                                                                \
2091    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                         \
2092}
2093
2094GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1, clearb)
2095GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2, clearh)
2096GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4, clearl)
2097GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8, clearq)
2098
2099#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H, CLEAR_FN)                 \
2100void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
2101                  void *vs2, CPURISCVState *env, uint32_t desc)      \
2102{                                                                    \
2103    uint32_t mlen = vext_mlen(desc);                                 \
2104    uint32_t vl = env->vl;                                           \
2105    uint32_t esz = sizeof(ETYPE);                                    \
2106    uint32_t vlmax = vext_maxsz(desc) / esz;                         \
2107    uint32_t i;                                                      \
2108                                                                     \
2109    for (i = 0; i < vl; i++) {                                       \
2110        ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
2111        ETYPE d = (!vext_elem_mask(v0, mlen, i) ? s2 :               \
2112                   (ETYPE)(target_long)s1);                          \
2113        *((ETYPE *)vd + H(i)) = d;                                   \
2114    }                                                                \
2115    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                         \
2116}
2117
2118GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1, clearb)
2119GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh)
2120GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl)
2121GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq)
2122
2123/*
2124 *** Vector Fixed-Point Arithmetic Instructions
2125 */
2126
2127/* Vector Single-Width Saturating Add and Subtract */
2128
2129/*
2130 * As fixed point instructions probably have round mode and saturation,
2131 * define common macros for fixed point here.
2132 */
2133typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2134                          CPURISCVState *env, int vxrm);
2135
2136#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
2137static inline void                                                  \
2138do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
2139          CPURISCVState *env, int vxrm)                             \
2140{                                                                   \
2141    TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
2142    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2143    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
2144}
2145
2146static inline void
2147vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2148             CPURISCVState *env,
2149             uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm,
2150             opivv2_rm_fn *fn)
2151{
2152    for (uint32_t i = 0; i < vl; i++) {
2153        if (!vm && !vext_elem_mask(v0, mlen, i)) {
2154            continue;
2155        }
2156        fn(vd, vs1, vs2, i, env, vxrm);
2157    }
2158}
2159
2160static inline void
2161vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2162             CPURISCVState *env,
2163             uint32_t desc, uint32_t esz, uint32_t dsz,
2164             opivv2_rm_fn *fn, clear_fn *clearfn)
2165{
2166    uint32_t vlmax = vext_maxsz(desc) / esz;
2167    uint32_t mlen = vext_mlen(desc);
2168    uint32_t vm = vext_vm(desc);
2169    uint32_t vl = env->vl;
2170
2171    switch (env->vxrm) {
2172    case 0: /* rnu */
2173        vext_vv_rm_1(vd, v0, vs1, vs2,
2174                     env, vl, vm, mlen, 0, fn);
2175        break;
2176    case 1: /* rne */
2177        vext_vv_rm_1(vd, v0, vs1, vs2,
2178                     env, vl, vm, mlen, 1, fn);
2179        break;
2180    case 2: /* rdn */
2181        vext_vv_rm_1(vd, v0, vs1, vs2,
2182                     env, vl, vm, mlen, 2, fn);
2183        break;
2184    default: /* rod */
2185        vext_vv_rm_1(vd, v0, vs1, vs2,
2186                     env, vl, vm, mlen, 3, fn);
2187        break;
2188    }
2189
2190    clearfn(vd, vl, vl * dsz,  vlmax * dsz);
2191}
2192
2193/* generate helpers for fixed point instructions with OPIVV format */
2194#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ, CLEAR_FN)                \
2195void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
2196                  CPURISCVState *env, uint32_t desc)            \
2197{                                                               \
2198    vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
2199                 do_##NAME, CLEAR_FN);                          \
2200}
2201
2202static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2203{
2204    uint8_t res = a + b;
2205    if (res < a) {
2206        res = UINT8_MAX;
2207        env->vxsat = 0x1;
2208    }
2209    return res;
2210}
2211
2212static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2213                               uint16_t b)
2214{
2215    uint16_t res = a + b;
2216    if (res < a) {
2217        res = UINT16_MAX;
2218        env->vxsat = 0x1;
2219    }
2220    return res;
2221}
2222
2223static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2224                               uint32_t b)
2225{
2226    uint32_t res = a + b;
2227    if (res < a) {
2228        res = UINT32_MAX;
2229        env->vxsat = 0x1;
2230    }
2231    return res;
2232}
2233
2234static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2235                               uint64_t b)
2236{
2237    uint64_t res = a + b;
2238    if (res < a) {
2239        res = UINT64_MAX;
2240        env->vxsat = 0x1;
2241    }
2242    return res;
2243}
2244
2245RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2246RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2247RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2248RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2249GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1, clearb)
2250GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2, clearh)
2251GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4, clearl)
2252GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8, clearq)
2253
2254typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2255                          CPURISCVState *env, int vxrm);
2256
2257#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
2258static inline void                                                  \
2259do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
2260          CPURISCVState *env, int vxrm)                             \
2261{                                                                   \
2262    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2263    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
2264}
2265
2266static inline void
2267vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2268             CPURISCVState *env,
2269             uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm,
2270             opivx2_rm_fn *fn)
2271{
2272    for (uint32_t i = 0; i < vl; i++) {
2273        if (!vm && !vext_elem_mask(v0, mlen, i)) {
2274            continue;
2275        }
2276        fn(vd, s1, vs2, i, env, vxrm);
2277    }
2278}
2279
2280static inline void
2281vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2282             CPURISCVState *env,
2283             uint32_t desc, uint32_t esz, uint32_t dsz,
2284             opivx2_rm_fn *fn, clear_fn *clearfn)
2285{
2286    uint32_t vlmax = vext_maxsz(desc) / esz;
2287    uint32_t mlen = vext_mlen(desc);
2288    uint32_t vm = vext_vm(desc);
2289    uint32_t vl = env->vl;
2290
2291    switch (env->vxrm) {
2292    case 0: /* rnu */
2293        vext_vx_rm_1(vd, v0, s1, vs2,
2294                     env, vl, vm, mlen, 0, fn);
2295        break;
2296    case 1: /* rne */
2297        vext_vx_rm_1(vd, v0, s1, vs2,
2298                     env, vl, vm, mlen, 1, fn);
2299        break;
2300    case 2: /* rdn */
2301        vext_vx_rm_1(vd, v0, s1, vs2,
2302                     env, vl, vm, mlen, 2, fn);
2303        break;
2304    default: /* rod */
2305        vext_vx_rm_1(vd, v0, s1, vs2,
2306                     env, vl, vm, mlen, 3, fn);
2307        break;
2308    }
2309
2310    clearfn(vd, vl, vl * dsz,  vlmax * dsz);
2311}
2312
2313/* generate helpers for fixed point instructions with OPIVX format */
2314#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ, CLEAR_FN)          \
2315void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2316        void *vs2, CPURISCVState *env, uint32_t desc)     \
2317{                                                         \
2318    vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
2319                 do_##NAME, CLEAR_FN);                    \
2320}
2321
2322RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2323RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2324RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2325RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2326GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1, clearb)
2327GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2, clearh)
2328GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4, clearl)
2329GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8, clearq)
2330
2331static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2332{
2333    int8_t res = a + b;
2334    if ((res ^ a) & (res ^ b) & INT8_MIN) {
2335        res = a > 0 ? INT8_MAX : INT8_MIN;
2336        env->vxsat = 0x1;
2337    }
2338    return res;
2339}
2340
2341static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2342{
2343    int16_t res = a + b;
2344    if ((res ^ a) & (res ^ b) & INT16_MIN) {
2345        res = a > 0 ? INT16_MAX : INT16_MIN;
2346        env->vxsat = 0x1;
2347    }
2348    return res;
2349}
2350
2351static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2352{
2353    int32_t res = a + b;
2354    if ((res ^ a) & (res ^ b) & INT32_MIN) {
2355        res = a > 0 ? INT32_MAX : INT32_MIN;
2356        env->vxsat = 0x1;
2357    }
2358    return res;
2359}
2360
2361static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2362{
2363    int64_t res = a + b;
2364    if ((res ^ a) & (res ^ b) & INT64_MIN) {
2365        res = a > 0 ? INT64_MAX : INT64_MIN;
2366        env->vxsat = 0x1;
2367    }
2368    return res;
2369}
2370
2371RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2372RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2373RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2374RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2375GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1, clearb)
2376GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2, clearh)
2377GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4, clearl)
2378GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8, clearq)
2379
2380RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2381RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2382RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2383RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2384GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1, clearb)
2385GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2, clearh)
2386GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4, clearl)
2387GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8, clearq)
2388
2389static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2390{
2391    uint8_t res = a - b;
2392    if (res > a) {
2393        res = 0;
2394        env->vxsat = 0x1;
2395    }
2396    return res;
2397}
2398
2399static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2400                               uint16_t b)
2401{
2402    uint16_t res = a - b;
2403    if (res > a) {
2404        res = 0;
2405        env->vxsat = 0x1;
2406    }
2407    return res;
2408}
2409
2410static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2411                               uint32_t b)
2412{
2413    uint32_t res = a - b;
2414    if (res > a) {
2415        res = 0;
2416        env->vxsat = 0x1;
2417    }
2418    return res;
2419}
2420
2421static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2422                               uint64_t b)
2423{
2424    uint64_t res = a - b;
2425    if (res > a) {
2426        res = 0;
2427        env->vxsat = 0x1;
2428    }
2429    return res;
2430}
2431
2432RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2433RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2434RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2435RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2436GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1, clearb)
2437GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2, clearh)
2438GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4, clearl)
2439GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8, clearq)
2440
2441RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2442RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2443RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2444RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2445GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1, clearb)
2446GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2, clearh)
2447GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4, clearl)
2448GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8, clearq)
2449
2450static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2451{
2452    int8_t res = a - b;
2453    if ((res ^ a) & (a ^ b) & INT8_MIN) {
2454        res = a > 0 ? INT8_MAX : INT8_MIN;
2455        env->vxsat = 0x1;
2456    }
2457    return res;
2458}
2459
2460static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2461{
2462    int16_t res = a - b;
2463    if ((res ^ a) & (a ^ b) & INT16_MIN) {
2464        res = a > 0 ? INT16_MAX : INT16_MIN;
2465        env->vxsat = 0x1;
2466    }
2467    return res;
2468}
2469
2470static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2471{
2472    int32_t res = a - b;
2473    if ((res ^ a) & (a ^ b) & INT32_MIN) {
2474        res = a > 0 ? INT32_MAX : INT32_MIN;
2475        env->vxsat = 0x1;
2476    }
2477    return res;
2478}
2479
2480static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2481{
2482    int64_t res = a - b;
2483    if ((res ^ a) & (a ^ b) & INT64_MIN) {
2484        res = a > 0 ? INT64_MAX : INT64_MIN;
2485        env->vxsat = 0x1;
2486    }
2487    return res;
2488}
2489
2490RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2491RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2492RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2493RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2494GEN_VEXT_VV_RM(vssub_vv_b, 1, 1, clearb)
2495GEN_VEXT_VV_RM(vssub_vv_h, 2, 2, clearh)
2496GEN_VEXT_VV_RM(vssub_vv_w, 4, 4, clearl)
2497GEN_VEXT_VV_RM(vssub_vv_d, 8, 8, clearq)
2498
2499RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2500RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2501RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2502RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2503GEN_VEXT_VX_RM(vssub_vx_b, 1, 1, clearb)
2504GEN_VEXT_VX_RM(vssub_vx_h, 2, 2, clearh)
2505GEN_VEXT_VX_RM(vssub_vx_w, 4, 4, clearl)
2506GEN_VEXT_VX_RM(vssub_vx_d, 8, 8, clearq)
2507
2508/* Vector Single-Width Averaging Add and Subtract */
2509static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2510{
2511    uint8_t d = extract64(v, shift, 1);
2512    uint8_t d1;
2513    uint64_t D1, D2;
2514
2515    if (shift == 0 || shift > 64) {
2516        return 0;
2517    }
2518
2519    d1 = extract64(v, shift - 1, 1);
2520    D1 = extract64(v, 0, shift);
2521    if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2522        return d1;
2523    } else if (vxrm == 1) { /* round-to-nearest-even */
2524        if (shift > 1) {
2525            D2 = extract64(v, 0, shift - 1);
2526            return d1 & ((D2 != 0) | d);
2527        } else {
2528            return d1 & d;
2529        }
2530    } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2531        return !d & (D1 != 0);
2532    }
2533    return 0; /* round-down (truncate) */
2534}
2535
2536static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2537{
2538    int64_t res = (int64_t)a + b;
2539    uint8_t round = get_round(vxrm, res, 1);
2540
2541    return (res >> 1) + round;
2542}
2543
2544static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2545{
2546    int64_t res = a + b;
2547    uint8_t round = get_round(vxrm, res, 1);
2548    int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2549
2550    /* With signed overflow, bit 64 is inverse of bit 63. */
2551    return ((res >> 1) ^ over) + round;
2552}
2553
2554RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2555RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2556RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2557RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2558GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1, clearb)
2559GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2, clearh)
2560GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4, clearl)
2561GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8, clearq)
2562
2563RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2564RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2565RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2566RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2567GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1, clearb)
2568GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2, clearh)
2569GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4, clearl)
2570GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8, clearq)
2571
2572static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2573{
2574    int64_t res = (int64_t)a - b;
2575    uint8_t round = get_round(vxrm, res, 1);
2576
2577    return (res >> 1) + round;
2578}
2579
2580static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2581{
2582    int64_t res = (int64_t)a - b;
2583    uint8_t round = get_round(vxrm, res, 1);
2584    int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2585
2586    /* With signed overflow, bit 64 is inverse of bit 63. */
2587    return ((res >> 1) ^ over) + round;
2588}
2589
2590RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2591RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2592RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2593RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2594GEN_VEXT_VV_RM(vasub_vv_b, 1, 1, clearb)
2595GEN_VEXT_VV_RM(vasub_vv_h, 2, 2, clearh)
2596GEN_VEXT_VV_RM(vasub_vv_w, 4, 4, clearl)
2597GEN_VEXT_VV_RM(vasub_vv_d, 8, 8, clearq)
2598
2599RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2600RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2601RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2602RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2603GEN_VEXT_VX_RM(vasub_vx_b, 1, 1, clearb)
2604GEN_VEXT_VX_RM(vasub_vx_h, 2, 2, clearh)
2605GEN_VEXT_VX_RM(vasub_vx_w, 4, 4, clearl)
2606GEN_VEXT_VX_RM(vasub_vx_d, 8, 8, clearq)
2607
2608/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2609static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2610{
2611    uint8_t round;
2612    int16_t res;
2613
2614    res = (int16_t)a * (int16_t)b;
2615    round = get_round(vxrm, res, 7);
2616    res   = (res >> 7) + round;
2617
2618    if (res > INT8_MAX) {
2619        env->vxsat = 0x1;
2620        return INT8_MAX;
2621    } else if (res < INT8_MIN) {
2622        env->vxsat = 0x1;
2623        return INT8_MIN;
2624    } else {
2625        return res;
2626    }
2627}
2628
2629static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2630{
2631    uint8_t round;
2632    int32_t res;
2633
2634    res = (int32_t)a * (int32_t)b;
2635    round = get_round(vxrm, res, 15);
2636    res   = (res >> 15) + round;
2637
2638    if (res > INT16_MAX) {
2639        env->vxsat = 0x1;
2640        return INT16_MAX;
2641    } else if (res < INT16_MIN) {
2642        env->vxsat = 0x1;
2643        return INT16_MIN;
2644    } else {
2645        return res;
2646    }
2647}
2648
2649static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2650{
2651    uint8_t round;
2652    int64_t res;
2653
2654    res = (int64_t)a * (int64_t)b;
2655    round = get_round(vxrm, res, 31);
2656    res   = (res >> 31) + round;
2657
2658    if (res > INT32_MAX) {
2659        env->vxsat = 0x1;
2660        return INT32_MAX;
2661    } else if (res < INT32_MIN) {
2662        env->vxsat = 0x1;
2663        return INT32_MIN;
2664    } else {
2665        return res;
2666    }
2667}
2668
2669static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2670{
2671    uint8_t round;
2672    uint64_t hi_64, lo_64;
2673    int64_t res;
2674
2675    if (a == INT64_MIN && b == INT64_MIN) {
2676        env->vxsat = 1;
2677        return INT64_MAX;
2678    }
2679
2680    muls64(&lo_64, &hi_64, a, b);
2681    round = get_round(vxrm, lo_64, 63);
2682    /*
2683     * Cannot overflow, as there are always
2684     * 2 sign bits after multiply.
2685     */
2686    res = (hi_64 << 1) | (lo_64 >> 63);
2687    if (round) {
2688        if (res == INT64_MAX) {
2689            env->vxsat = 1;
2690        } else {
2691            res += 1;
2692        }
2693    }
2694    return res;
2695}
2696
2697RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2698RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2699RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2700RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2701GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1, clearb)
2702GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2, clearh)
2703GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4, clearl)
2704GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8, clearq)
2705
2706RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2707RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2708RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2709RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2710GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb)
2711GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh)
2712GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl)
2713GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq)
2714
2715/* Vector Widening Saturating Scaled Multiply-Add */
2716static inline uint16_t
2717vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b,
2718          uint16_t c)
2719{
2720    uint8_t round;
2721    uint16_t res = (uint16_t)a * b;
2722
2723    round = get_round(vxrm, res, 4);
2724    res   = (res >> 4) + round;
2725    return saddu16(env, vxrm, c, res);
2726}
2727
2728static inline uint32_t
2729vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b,
2730           uint32_t c)
2731{
2732    uint8_t round;
2733    uint32_t res = (uint32_t)a * b;
2734
2735    round = get_round(vxrm, res, 8);
2736    res   = (res >> 8) + round;
2737    return saddu32(env, vxrm, c, res);
2738}
2739
2740static inline uint64_t
2741vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b,
2742           uint64_t c)
2743{
2744    uint8_t round;
2745    uint64_t res = (uint64_t)a * b;
2746
2747    round = get_round(vxrm, res, 16);
2748    res   = (res >> 16) + round;
2749    return saddu64(env, vxrm, c, res);
2750}
2751
2752#define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
2753static inline void                                                 \
2754do_##NAME(void *vd, void *vs1, void *vs2, int i,                   \
2755          CPURISCVState *env, int vxrm)                            \
2756{                                                                  \
2757    TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
2758    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
2759    TD d = *((TD *)vd + HD(i));                                    \
2760    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d);                \
2761}
2762
2763RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8)
2764RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16)
2765RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32)
2766GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2, clearh)
2767GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4, clearl)
2768GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8, clearq)
2769
2770#define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)         \
2771static inline void                                                 \
2772do_##NAME(void *vd, target_long s1, void *vs2, int i,              \
2773          CPURISCVState *env, int vxrm)                            \
2774{                                                                  \
2775    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
2776    TD d = *((TD *)vd + HD(i));                                    \
2777    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d);       \
2778}
2779
2780RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8)
2781RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16)
2782RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32)
2783GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2, clearh)
2784GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4, clearl)
2785GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8, clearq)
2786
2787static inline int16_t
2788vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c)
2789{
2790    uint8_t round;
2791    int16_t res = (int16_t)a * b;
2792
2793    round = get_round(vxrm, res, 4);
2794    res   = (res >> 4) + round;
2795    return sadd16(env, vxrm, c, res);
2796}
2797
2798static inline int32_t
2799vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c)
2800{
2801    uint8_t round;
2802    int32_t res = (int32_t)a * b;
2803
2804    round = get_round(vxrm, res, 8);
2805    res   = (res >> 8) + round;
2806    return sadd32(env, vxrm, c, res);
2807
2808}
2809
2810static inline int64_t
2811vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c)
2812{
2813    uint8_t round;
2814    int64_t res = (int64_t)a * b;
2815
2816    round = get_round(vxrm, res, 16);
2817    res   = (res >> 16) + round;
2818    return sadd64(env, vxrm, c, res);
2819}
2820
2821RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8)
2822RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16)
2823RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32)
2824GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2, clearh)
2825GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4, clearl)
2826GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8, clearq)
2827RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8)
2828RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16)
2829RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32)
2830GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2, clearh)
2831GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4, clearl)
2832GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8, clearq)
2833
2834static inline int16_t
2835vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c)
2836{
2837    uint8_t round;
2838    int16_t res = a * (int16_t)b;
2839
2840    round = get_round(vxrm, res, 4);
2841    res   = (res >> 4) + round;
2842    return ssub16(env, vxrm, c, res);
2843}
2844
2845static inline int32_t
2846vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c)
2847{
2848    uint8_t round;
2849    int32_t res = a * (int32_t)b;
2850
2851    round = get_round(vxrm, res, 8);
2852    res   = (res >> 8) + round;
2853    return ssub32(env, vxrm, c, res);
2854}
2855
2856static inline int64_t
2857vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c)
2858{
2859    uint8_t round;
2860    int64_t res = a * (int64_t)b;
2861
2862    round = get_round(vxrm, res, 16);
2863    res   = (res >> 16) + round;
2864    return ssub64(env, vxrm, c, res);
2865}
2866
2867RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8)
2868RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16)
2869RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32)
2870GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2, clearh)
2871GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4, clearl)
2872GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8, clearq)
2873RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8)
2874RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16)
2875RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32)
2876GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2, clearh)
2877GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4, clearl)
2878GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8, clearq)
2879
2880static inline int16_t
2881vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c)
2882{
2883    uint8_t round;
2884    int16_t res = (int16_t)a * b;
2885
2886    round = get_round(vxrm, res, 4);
2887    res   = (res >> 4) + round;
2888    return ssub16(env, vxrm, c, res);
2889}
2890
2891static inline int32_t
2892vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c)
2893{
2894    uint8_t round;
2895    int32_t res = (int32_t)a * b;
2896
2897    round = get_round(vxrm, res, 8);
2898    res   = (res >> 8) + round;
2899    return ssub32(env, vxrm, c, res);
2900}
2901
2902static inline int64_t
2903vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c)
2904{
2905    uint8_t round;
2906    int64_t res = (int64_t)a * b;
2907
2908    round = get_round(vxrm, res, 16);
2909    res   = (res >> 16) + round;
2910    return ssub64(env, vxrm, c, res);
2911}
2912
2913RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8)
2914RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16)
2915RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32)
2916GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh)
2917GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl)
2918GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq)
2919
2920/* Vector Single-Width Scaling Shift Instructions */
2921static inline uint8_t
2922vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2923{
2924    uint8_t round, shift = b & 0x7;
2925    uint8_t res;
2926
2927    round = get_round(vxrm, a, shift);
2928    res   = (a >> shift)  + round;
2929    return res;
2930}
2931static inline uint16_t
2932vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2933{
2934    uint8_t round, shift = b & 0xf;
2935    uint16_t res;
2936
2937    round = get_round(vxrm, a, shift);
2938    res   = (a >> shift)  + round;
2939    return res;
2940}
2941static inline uint32_t
2942vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2943{
2944    uint8_t round, shift = b & 0x1f;
2945    uint32_t res;
2946
2947    round = get_round(vxrm, a, shift);
2948    res   = (a >> shift)  + round;
2949    return res;
2950}
2951static inline uint64_t
2952vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2953{
2954    uint8_t round, shift = b & 0x3f;
2955    uint64_t res;
2956
2957    round = get_round(vxrm, a, shift);
2958    res   = (a >> shift)  + round;
2959    return res;
2960}
2961RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2962RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2963RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2964RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2965GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1, clearb)
2966GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2, clearh)
2967GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4, clearl)
2968GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8, clearq)
2969
2970RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2971RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2972RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2973RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2974GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1, clearb)
2975GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2, clearh)
2976GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4, clearl)
2977GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8, clearq)
2978
2979static inline int8_t
2980vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2981{
2982    uint8_t round, shift = b & 0x7;
2983    int8_t res;
2984
2985    round = get_round(vxrm, a, shift);
2986    res   = (a >> shift)  + round;
2987    return res;
2988}
2989static inline int16_t
2990vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2991{
2992    uint8_t round, shift = b & 0xf;
2993    int16_t res;
2994
2995    round = get_round(vxrm, a, shift);
2996    res   = (a >> shift)  + round;
2997    return res;
2998}
2999static inline int32_t
3000vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
3001{
3002    uint8_t round, shift = b & 0x1f;
3003    int32_t res;
3004
3005    round = get_round(vxrm, a, shift);
3006    res   = (a >> shift)  + round;
3007    return res;
3008}
3009static inline int64_t
3010vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
3011{
3012    uint8_t round, shift = b & 0x3f;
3013    int64_t res;
3014
3015    round = get_round(vxrm, a, shift);
3016    res   = (a >> shift)  + round;
3017    return res;
3018}
3019
3020RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
3021RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
3022RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
3023RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
3024GEN_VEXT_VV_RM(vssra_vv_b, 1, 1, clearb)
3025GEN_VEXT_VV_RM(vssra_vv_h, 2, 2, clearh)
3026GEN_VEXT_VV_RM(vssra_vv_w, 4, 4, clearl)
3027GEN_VEXT_VV_RM(vssra_vv_d, 8, 8, clearq)
3028
3029RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
3030RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
3031RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
3032RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
3033GEN_VEXT_VX_RM(vssra_vx_b, 1, 1, clearb)
3034GEN_VEXT_VX_RM(vssra_vx_h, 2, 2, clearh)
3035GEN_VEXT_VX_RM(vssra_vx_w, 4, 4, clearl)
3036GEN_VEXT_VX_RM(vssra_vx_d, 8, 8, clearq)
3037
3038/* Vector Narrowing Fixed-Point Clip Instructions */
3039static inline int8_t
3040vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
3041{
3042    uint8_t round, shift = b & 0xf;
3043    int16_t res;
3044
3045    round = get_round(vxrm, a, shift);
3046    res   = (a >> shift)  + round;
3047    if (res > INT8_MAX) {
3048        env->vxsat = 0x1;
3049        return INT8_MAX;
3050    } else if (res < INT8_MIN) {
3051        env->vxsat = 0x1;
3052        return INT8_MIN;
3053    } else {
3054        return res;
3055    }
3056}
3057
3058static inline int16_t
3059vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
3060{
3061    uint8_t round, shift = b & 0x1f;
3062    int32_t res;
3063
3064    round = get_round(vxrm, a, shift);
3065    res   = (a >> shift)  + round;
3066    if (res > INT16_MAX) {
3067        env->vxsat = 0x1;
3068        return INT16_MAX;
3069    } else if (res < INT16_MIN) {
3070        env->vxsat = 0x1;
3071        return INT16_MIN;
3072    } else {
3073        return res;
3074    }
3075}
3076
3077static inline int32_t
3078vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
3079{
3080    uint8_t round, shift = b & 0x3f;
3081    int64_t res;
3082
3083    round = get_round(vxrm, a, shift);
3084    res   = (a >> shift)  + round;
3085    if (res > INT32_MAX) {
3086        env->vxsat = 0x1;
3087        return INT32_MAX;
3088    } else if (res < INT32_MIN) {
3089        env->vxsat = 0x1;
3090        return INT32_MIN;
3091    } else {
3092        return res;
3093    }
3094}
3095
3096RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
3097RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
3098RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
3099GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1, clearb)
3100GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2, clearh)
3101GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4, clearl)
3102
3103RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8)
3104RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16)
3105RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32)
3106GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1, clearb)
3107GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2, clearh)
3108GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4, clearl)
3109
3110static inline uint8_t
3111vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
3112{
3113    uint8_t round, shift = b & 0xf;
3114    uint16_t res;
3115
3116    round = get_round(vxrm, a, shift);
3117    res   = (a >> shift)  + round;
3118    if (res > UINT8_MAX) {
3119        env->vxsat = 0x1;
3120        return UINT8_MAX;
3121    } else {
3122        return res;
3123    }
3124}
3125
3126static inline uint16_t
3127vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
3128{
3129    uint8_t round, shift = b & 0x1f;
3130    uint32_t res;
3131
3132    round = get_round(vxrm, a, shift);
3133    res   = (a >> shift)  + round;
3134    if (res > UINT16_MAX) {
3135        env->vxsat = 0x1;
3136        return UINT16_MAX;
3137    } else {
3138        return res;
3139    }
3140}
3141
3142static inline uint32_t
3143vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
3144{
3145    uint8_t round, shift = b & 0x3f;
3146    int64_t res;
3147
3148    round = get_round(vxrm, a, shift);
3149    res   = (a >> shift)  + round;
3150    if (res > UINT32_MAX) {
3151        env->vxsat = 0x1;
3152        return UINT32_MAX;
3153    } else {
3154        return res;
3155    }
3156}
3157
3158RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
3159RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
3160RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
3161GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1, clearb)
3162GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2, clearh)
3163GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4, clearl)
3164
3165RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8)
3166RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16)
3167RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32)
3168GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1, clearb)
3169GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2, clearh)
3170GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4, clearl)
3171
3172/*
3173 *** Vector Float Point Arithmetic Instructions
3174 */
3175/* Vector Single-Width Floating-Point Add/Subtract Instructions */
3176#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
3177static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
3178                      CPURISCVState *env)                      \
3179{                                                              \
3180    TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
3181    TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
3182    *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
3183}
3184
3185#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ, CLEAR_FN)         \
3186void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
3187                  void *vs2, CPURISCVState *env,          \
3188                  uint32_t desc)                          \
3189{                                                         \
3190    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
3191    uint32_t mlen = vext_mlen(desc);                      \
3192    uint32_t vm = vext_vm(desc);                          \
3193    uint32_t vl = env->vl;                                \
3194    uint32_t i;                                           \
3195                                                          \
3196    for (i = 0; i < vl; i++) {                            \
3197        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
3198            continue;                                     \
3199        }                                                 \
3200        do_##NAME(vd, vs1, vs2, i, env);                  \
3201    }                                                     \
3202    CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);             \
3203}
3204
3205RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3206RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3207RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
3208GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2, clearh)
3209GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4, clearl)
3210GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8, clearq)
3211
3212#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
3213static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3214                      CPURISCVState *env)                      \
3215{                                                              \
3216    TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
3217    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3218}
3219
3220#define GEN_VEXT_VF(NAME, ESZ, DSZ, CLEAR_FN)             \
3221void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
3222                  void *vs2, CPURISCVState *env,          \
3223                  uint32_t desc)                          \
3224{                                                         \
3225    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
3226    uint32_t mlen = vext_mlen(desc);                      \
3227    uint32_t vm = vext_vm(desc);                          \
3228    uint32_t vl = env->vl;                                \
3229    uint32_t i;                                           \
3230                                                          \
3231    for (i = 0; i < vl; i++) {                            \
3232        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
3233            continue;                                     \
3234        }                                                 \
3235        do_##NAME(vd, s1, vs2, i, env);                   \
3236    }                                                     \
3237    CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);             \
3238}
3239
3240RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3241RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3242RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
3243GEN_VEXT_VF(vfadd_vf_h, 2, 2, clearh)
3244GEN_VEXT_VF(vfadd_vf_w, 4, 4, clearl)
3245GEN_VEXT_VF(vfadd_vf_d, 8, 8, clearq)
3246
3247RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3248RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3249RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
3250GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2, clearh)
3251GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4, clearl)
3252GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8, clearq)
3253RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3254RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3255RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
3256GEN_VEXT_VF(vfsub_vf_h, 2, 2, clearh)
3257GEN_VEXT_VF(vfsub_vf_w, 4, 4, clearl)
3258GEN_VEXT_VF(vfsub_vf_d, 8, 8, clearq)
3259
3260static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3261{
3262    return float16_sub(b, a, s);
3263}
3264
3265static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3266{
3267    return float32_sub(b, a, s);
3268}
3269
3270static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3271{
3272    return float64_sub(b, a, s);
3273}
3274
3275RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3276RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3277RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
3278GEN_VEXT_VF(vfrsub_vf_h, 2, 2, clearh)
3279GEN_VEXT_VF(vfrsub_vf_w, 4, 4, clearl)
3280GEN_VEXT_VF(vfrsub_vf_d, 8, 8, clearq)
3281
3282/* Vector Widening Floating-Point Add/Subtract Instructions */
3283static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3284{
3285    return float32_add(float16_to_float32(a, true, s),
3286            float16_to_float32(b, true, s), s);
3287}
3288
3289static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3290{
3291    return float64_add(float32_to_float64(a, s),
3292            float32_to_float64(b, s), s);
3293
3294}
3295
3296RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3297RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3298GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4, clearl)
3299GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8, clearq)
3300RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3301RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3302GEN_VEXT_VF(vfwadd_vf_h, 2, 4, clearl)
3303GEN_VEXT_VF(vfwadd_vf_w, 4, 8, clearq)
3304
3305static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3306{
3307    return float32_sub(float16_to_float32(a, true, s),
3308            float16_to_float32(b, true, s), s);
3309}
3310
3311static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3312{
3313    return float64_sub(float32_to_float64(a, s),
3314            float32_to_float64(b, s), s);
3315
3316}
3317
3318RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3319RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3320GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4, clearl)
3321GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8, clearq)
3322RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3323RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3324GEN_VEXT_VF(vfwsub_vf_h, 2, 4, clearl)
3325GEN_VEXT_VF(vfwsub_vf_w, 4, 8, clearq)
3326
3327static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3328{
3329    return float32_add(a, float16_to_float32(b, true, s), s);
3330}
3331
3332static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3333{
3334    return float64_add(a, float32_to_float64(b, s), s);
3335}
3336
3337RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3338RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3339GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4, clearl)
3340GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8, clearq)
3341RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3342RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3343GEN_VEXT_VF(vfwadd_wf_h, 2, 4, clearl)
3344GEN_VEXT_VF(vfwadd_wf_w, 4, 8, clearq)
3345
3346static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3347{
3348    return float32_sub(a, float16_to_float32(b, true, s), s);
3349}
3350
3351static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3352{
3353    return float64_sub(a, float32_to_float64(b, s), s);
3354}
3355
3356RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3357RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3358GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4, clearl)
3359GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8, clearq)
3360RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3361RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3362GEN_VEXT_VF(vfwsub_wf_h, 2, 4, clearl)
3363GEN_VEXT_VF(vfwsub_wf_w, 4, 8, clearq)
3364
3365/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3366RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3367RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3368RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3369GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2, clearh)
3370GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4, clearl)
3371GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8, clearq)
3372RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3373RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3374RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3375GEN_VEXT_VF(vfmul_vf_h, 2, 2, clearh)
3376GEN_VEXT_VF(vfmul_vf_w, 4, 4, clearl)
3377GEN_VEXT_VF(vfmul_vf_d, 8, 8, clearq)
3378
3379RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3380RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3381RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3382GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2, clearh)
3383GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4, clearl)
3384GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8, clearq)
3385RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3386RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3387RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3388GEN_VEXT_VF(vfdiv_vf_h, 2, 2, clearh)
3389GEN_VEXT_VF(vfdiv_vf_w, 4, 4, clearl)
3390GEN_VEXT_VF(vfdiv_vf_d, 8, 8, clearq)
3391
3392static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3393{
3394    return float16_div(b, a, s);
3395}
3396
3397static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3398{
3399    return float32_div(b, a, s);
3400}
3401
3402static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3403{
3404    return float64_div(b, a, s);
3405}
3406
3407RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3408RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3409RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3410GEN_VEXT_VF(vfrdiv_vf_h, 2, 2, clearh)
3411GEN_VEXT_VF(vfrdiv_vf_w, 4, 4, clearl)
3412GEN_VEXT_VF(vfrdiv_vf_d, 8, 8, clearq)
3413
3414/* Vector Widening Floating-Point Multiply */
3415static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3416{
3417    return float32_mul(float16_to_float32(a, true, s),
3418            float16_to_float32(b, true, s), s);
3419}
3420
3421static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3422{
3423    return float64_mul(float32_to_float64(a, s),
3424            float32_to_float64(b, s), s);
3425
3426}
3427RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3428RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3429GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4, clearl)
3430GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8, clearq)
3431RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3432RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3433GEN_VEXT_VF(vfwmul_vf_h, 2, 4, clearl)
3434GEN_VEXT_VF(vfwmul_vf_w, 4, 8, clearq)
3435
3436/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3437#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3438static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3439        CPURISCVState *env)                                        \
3440{                                                                  \
3441    TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3442    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3443    TD d = *((TD *)vd + HD(i));                                    \
3444    *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3445}
3446
3447static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3448{
3449    return float16_muladd(a, b, d, 0, s);
3450}
3451
3452static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3453{
3454    return float32_muladd(a, b, d, 0, s);
3455}
3456
3457static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3458{
3459    return float64_muladd(a, b, d, 0, s);
3460}
3461
3462RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3463RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3464RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3465GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2, clearh)
3466GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4, clearl)
3467GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8, clearq)
3468
3469#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3470static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3471        CPURISCVState *env)                                       \
3472{                                                                 \
3473    TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3474    TD d = *((TD *)vd + HD(i));                                   \
3475    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3476}
3477
3478RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3479RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3480RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3481GEN_VEXT_VF(vfmacc_vf_h, 2, 2, clearh)
3482GEN_VEXT_VF(vfmacc_vf_w, 4, 4, clearl)
3483GEN_VEXT_VF(vfmacc_vf_d, 8, 8, clearq)
3484
3485static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3486{
3487    return float16_muladd(a, b, d,
3488            float_muladd_negate_c | float_muladd_negate_product, s);
3489}
3490
3491static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3492{
3493    return float32_muladd(a, b, d,
3494            float_muladd_negate_c | float_muladd_negate_product, s);
3495}
3496
3497static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3498{
3499    return float64_muladd(a, b, d,
3500            float_muladd_negate_c | float_muladd_negate_product, s);
3501}
3502
3503RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3504RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3505RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3506GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2, clearh)
3507GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4, clearl)
3508GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8, clearq)
3509RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3510RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3511RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3512GEN_VEXT_VF(vfnmacc_vf_h, 2, 2, clearh)
3513GEN_VEXT_VF(vfnmacc_vf_w, 4, 4, clearl)
3514GEN_VEXT_VF(vfnmacc_vf_d, 8, 8, clearq)
3515
3516static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3517{
3518    return float16_muladd(a, b, d, float_muladd_negate_c, s);
3519}
3520
3521static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3522{
3523    return float32_muladd(a, b, d, float_muladd_negate_c, s);
3524}
3525
3526static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3527{
3528    return float64_muladd(a, b, d, float_muladd_negate_c, s);
3529}
3530
3531RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3532RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3533RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3534GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2, clearh)
3535GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4, clearl)
3536GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8, clearq)
3537RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3538RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3539RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3540GEN_VEXT_VF(vfmsac_vf_h, 2, 2, clearh)
3541GEN_VEXT_VF(vfmsac_vf_w, 4, 4, clearl)
3542GEN_VEXT_VF(vfmsac_vf_d, 8, 8, clearq)
3543
3544static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3545{
3546    return float16_muladd(a, b, d, float_muladd_negate_product, s);
3547}
3548
3549static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3550{
3551    return float32_muladd(a, b, d, float_muladd_negate_product, s);
3552}
3553
3554static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3555{
3556    return float64_muladd(a, b, d, float_muladd_negate_product, s);
3557}
3558
3559RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3560RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3561RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3562GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2, clearh)
3563GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4, clearl)
3564GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8, clearq)
3565RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3566RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3567RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3568GEN_VEXT_VF(vfnmsac_vf_h, 2, 2, clearh)
3569GEN_VEXT_VF(vfnmsac_vf_w, 4, 4, clearl)
3570GEN_VEXT_VF(vfnmsac_vf_d, 8, 8, clearq)
3571
3572static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3573{
3574    return float16_muladd(d, b, a, 0, s);
3575}
3576
3577static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3578{
3579    return float32_muladd(d, b, a, 0, s);
3580}
3581
3582static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3583{
3584    return float64_muladd(d, b, a, 0, s);
3585}
3586
3587RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3588RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3589RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3590GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2, clearh)
3591GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4, clearl)
3592GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8, clearq)
3593RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3594RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3595RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3596GEN_VEXT_VF(vfmadd_vf_h, 2, 2, clearh)
3597GEN_VEXT_VF(vfmadd_vf_w, 4, 4, clearl)
3598GEN_VEXT_VF(vfmadd_vf_d, 8, 8, clearq)
3599
3600static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3601{
3602    return float16_muladd(d, b, a,
3603            float_muladd_negate_c | float_muladd_negate_product, s);
3604}
3605
3606static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3607{
3608    return float32_muladd(d, b, a,
3609            float_muladd_negate_c | float_muladd_negate_product, s);
3610}
3611
3612static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3613{
3614    return float64_muladd(d, b, a,
3615            float_muladd_negate_c | float_muladd_negate_product, s);
3616}
3617
3618RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3619RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3620RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3621GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2, clearh)
3622GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4, clearl)
3623GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8, clearq)
3624RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3625RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3626RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3627GEN_VEXT_VF(vfnmadd_vf_h, 2, 2, clearh)
3628GEN_VEXT_VF(vfnmadd_vf_w, 4, 4, clearl)
3629GEN_VEXT_VF(vfnmadd_vf_d, 8, 8, clearq)
3630
3631static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3632{
3633    return float16_muladd(d, b, a, float_muladd_negate_c, s);
3634}
3635
3636static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3637{
3638    return float32_muladd(d, b, a, float_muladd_negate_c, s);
3639}
3640
3641static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3642{
3643    return float64_muladd(d, b, a, float_muladd_negate_c, s);
3644}
3645
3646RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3647RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3648RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3649GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2, clearh)
3650GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4, clearl)
3651GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8, clearq)
3652RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3653RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3654RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3655GEN_VEXT_VF(vfmsub_vf_h, 2, 2, clearh)
3656GEN_VEXT_VF(vfmsub_vf_w, 4, 4, clearl)
3657GEN_VEXT_VF(vfmsub_vf_d, 8, 8, clearq)
3658
3659static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3660{
3661    return float16_muladd(d, b, a, float_muladd_negate_product, s);
3662}
3663
3664static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3665{
3666    return float32_muladd(d, b, a, float_muladd_negate_product, s);
3667}
3668
3669static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3670{
3671    return float64_muladd(d, b, a, float_muladd_negate_product, s);
3672}
3673
3674RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3675RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3676RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3677GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2, clearh)
3678GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4, clearl)
3679GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8, clearq)
3680RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3681RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3682RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3683GEN_VEXT_VF(vfnmsub_vf_h, 2, 2, clearh)
3684GEN_VEXT_VF(vfnmsub_vf_w, 4, 4, clearl)
3685GEN_VEXT_VF(vfnmsub_vf_d, 8, 8, clearq)
3686
3687/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3688static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3689{
3690    return float32_muladd(float16_to_float32(a, true, s),
3691                        float16_to_float32(b, true, s), d, 0, s);
3692}
3693
3694static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3695{
3696    return float64_muladd(float32_to_float64(a, s),
3697                        float32_to_float64(b, s), d, 0, s);
3698}
3699
3700RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3701RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3702GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4, clearl)
3703GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8, clearq)
3704RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3705RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3706GEN_VEXT_VF(vfwmacc_vf_h, 2, 4, clearl)
3707GEN_VEXT_VF(vfwmacc_vf_w, 4, 8, clearq)
3708
3709static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3710{
3711    return float32_muladd(float16_to_float32(a, true, s),
3712                        float16_to_float32(b, true, s), d,
3713                        float_muladd_negate_c | float_muladd_negate_product, s);
3714}
3715
3716static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3717{
3718    return float64_muladd(float32_to_float64(a, s),
3719                        float32_to_float64(b, s), d,
3720                        float_muladd_negate_c | float_muladd_negate_product, s);
3721}
3722
3723RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3724RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3725GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4, clearl)
3726GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8, clearq)
3727RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3728RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3729GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4, clearl)
3730GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8, clearq)
3731
3732static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3733{
3734    return float32_muladd(float16_to_float32(a, true, s),
3735                        float16_to_float32(b, true, s), d,
3736                        float_muladd_negate_c, s);
3737}
3738
3739static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3740{
3741    return float64_muladd(float32_to_float64(a, s),
3742                        float32_to_float64(b, s), d,
3743                        float_muladd_negate_c, s);
3744}
3745
3746RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3747RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3748GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4, clearl)
3749GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8, clearq)
3750RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3751RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3752GEN_VEXT_VF(vfwmsac_vf_h, 2, 4, clearl)
3753GEN_VEXT_VF(vfwmsac_vf_w, 4, 8, clearq)
3754
3755static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3756{
3757    return float32_muladd(float16_to_float32(a, true, s),
3758                        float16_to_float32(b, true, s), d,
3759                        float_muladd_negate_product, s);
3760}
3761
3762static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3763{
3764    return float64_muladd(float32_to_float64(a, s),
3765                        float32_to_float64(b, s), d,
3766                        float_muladd_negate_product, s);
3767}
3768
3769RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3770RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3771GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4, clearl)
3772GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8, clearq)
3773RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3774RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3775GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4, clearl)
3776GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8, clearq)
3777
3778/* Vector Floating-Point Square-Root Instruction */
3779/* (TD, T2, TX2) */
3780#define OP_UU_H uint16_t, uint16_t, uint16_t
3781#define OP_UU_W uint32_t, uint32_t, uint32_t
3782#define OP_UU_D uint64_t, uint64_t, uint64_t
3783
3784#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3785static void do_##NAME(void *vd, void *vs2, int i,      \
3786        CPURISCVState *env)                            \
3787{                                                      \
3788    TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3789    *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3790}
3791
3792#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ, CLEAR_FN)       \
3793void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3794        CPURISCVState *env, uint32_t desc)             \
3795{                                                      \
3796    uint32_t vlmax = vext_maxsz(desc) / ESZ;           \
3797    uint32_t mlen = vext_mlen(desc);                   \
3798    uint32_t vm = vext_vm(desc);                       \
3799    uint32_t vl = env->vl;                             \
3800    uint32_t i;                                        \
3801                                                       \
3802    if (vl == 0) {                                     \
3803        return;                                        \
3804    }                                                  \
3805    for (i = 0; i < vl; i++) {                         \
3806        if (!vm && !vext_elem_mask(v0, mlen, i)) {     \
3807            continue;                                  \
3808        }                                              \
3809        do_##NAME(vd, vs2, i, env);                    \
3810    }                                                  \
3811    CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);          \
3812}
3813
3814RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3815RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3816RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3817GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2, clearh)
3818GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4, clearl)
3819GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8, clearq)
3820
3821/* Vector Floating-Point MIN/MAX Instructions */
3822RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum)
3823RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum)
3824RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum)
3825GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2, clearh)
3826GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4, clearl)
3827GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8, clearq)
3828RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum)
3829RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum)
3830RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum)
3831GEN_VEXT_VF(vfmin_vf_h, 2, 2, clearh)
3832GEN_VEXT_VF(vfmin_vf_w, 4, 4, clearl)
3833GEN_VEXT_VF(vfmin_vf_d, 8, 8, clearq)
3834
3835RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum)
3836RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum)
3837RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum)
3838GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2, clearh)
3839GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4, clearl)
3840GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8, clearq)
3841RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum)
3842RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum)
3843RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum)
3844GEN_VEXT_VF(vfmax_vf_h, 2, 2, clearh)
3845GEN_VEXT_VF(vfmax_vf_w, 4, 4, clearl)
3846GEN_VEXT_VF(vfmax_vf_d, 8, 8, clearq)
3847
3848/* Vector Floating-Point Sign-Injection Instructions */
3849static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3850{
3851    return deposit64(b, 0, 15, a);
3852}
3853
3854static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3855{
3856    return deposit64(b, 0, 31, a);
3857}
3858
3859static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3860{
3861    return deposit64(b, 0, 63, a);
3862}
3863
3864RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3865RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3866RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3867GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2, clearh)
3868GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4, clearl)
3869GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8, clearq)
3870RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3871RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3872RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3873GEN_VEXT_VF(vfsgnj_vf_h, 2, 2, clearh)
3874GEN_VEXT_VF(vfsgnj_vf_w, 4, 4, clearl)
3875GEN_VEXT_VF(vfsgnj_vf_d, 8, 8, clearq)
3876
3877static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3878{
3879    return deposit64(~b, 0, 15, a);
3880}
3881
3882static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3883{
3884    return deposit64(~b, 0, 31, a);
3885}
3886
3887static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3888{
3889    return deposit64(~b, 0, 63, a);
3890}
3891
3892RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3893RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3894RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3895GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2, clearh)
3896GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4, clearl)
3897GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8, clearq)
3898RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3899RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3900RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3901GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2, clearh)
3902GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4, clearl)
3903GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8, clearq)
3904
3905static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3906{
3907    return deposit64(b ^ a, 0, 15, a);
3908}
3909
3910static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3911{
3912    return deposit64(b ^ a, 0, 31, a);
3913}
3914
3915static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3916{
3917    return deposit64(b ^ a, 0, 63, a);
3918}
3919
3920RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3921RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3922RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3923GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2, clearh)
3924GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4, clearl)
3925GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8, clearq)
3926RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3927RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3928RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3929GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2, clearh)
3930GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4, clearl)
3931GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8, clearq)
3932
3933/* Vector Floating-Point Compare Instructions */
3934#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3935void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3936                  CPURISCVState *env, uint32_t desc)          \
3937{                                                             \
3938    uint32_t mlen = vext_mlen(desc);                          \
3939    uint32_t vm = vext_vm(desc);                              \
3940    uint32_t vl = env->vl;                                    \
3941    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);        \
3942    uint32_t i;                                               \
3943                                                              \
3944    for (i = 0; i < vl; i++) {                                \
3945        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3946        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3947        if (!vm && !vext_elem_mask(v0, mlen, i)) {            \
3948            continue;                                         \
3949        }                                                     \
3950        vext_set_elem_mask(vd, mlen, i,                       \
3951                           DO_OP(s2, s1, &env->fp_status));   \
3952    }                                                         \
3953    for (; i < vlmax; i++) {                                  \
3954        vext_set_elem_mask(vd, mlen, i, 0);                   \
3955    }                                                         \
3956}
3957
3958GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3959GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3960GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3961
3962#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3963void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3964                  CPURISCVState *env, uint32_t desc)                \
3965{                                                                   \
3966    uint32_t mlen = vext_mlen(desc);                                \
3967    uint32_t vm = vext_vm(desc);                                    \
3968    uint32_t vl = env->vl;                                          \
3969    uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE);              \
3970    uint32_t i;                                                     \
3971                                                                    \
3972    for (i = 0; i < vl; i++) {                                      \
3973        ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3974        if (!vm && !vext_elem_mask(v0, mlen, i)) {                  \
3975            continue;                                               \
3976        }                                                           \
3977        vext_set_elem_mask(vd, mlen, i,                             \
3978                           DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3979    }                                                               \
3980    for (; i < vlmax; i++) {                                        \
3981        vext_set_elem_mask(vd, mlen, i, 0);                         \
3982    }                                                               \
3983}
3984
3985GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3986GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3987GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3988
3989static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3990{
3991    FloatRelation compare = float16_compare_quiet(a, b, s);
3992    return compare != float_relation_equal;
3993}
3994
3995static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3996{
3997    FloatRelation compare = float32_compare_quiet(a, b, s);
3998    return compare != float_relation_equal;
3999}
4000
4001static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4002{
4003    FloatRelation compare = float64_compare_quiet(a, b, s);
4004    return compare != float_relation_equal;
4005}
4006
4007GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4008GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4009GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4010GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4011GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4012GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4013
4014GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4015GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4016GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4017GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4018GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4019GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4020
4021GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4022GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4023GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4024GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4025GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4026GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4027
4028static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4029{
4030    FloatRelation compare = float16_compare(a, b, s);
4031    return compare == float_relation_greater;
4032}
4033
4034static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4035{
4036    FloatRelation compare = float32_compare(a, b, s);
4037    return compare == float_relation_greater;
4038}
4039
4040static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4041{
4042    FloatRelation compare = float64_compare(a, b, s);
4043    return compare == float_relation_greater;
4044}
4045
4046GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4047GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4048GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4049
4050static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4051{
4052    FloatRelation compare = float16_compare(a, b, s);
4053    return compare == float_relation_greater ||
4054           compare == float_relation_equal;
4055}
4056
4057static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4058{
4059    FloatRelation compare = float32_compare(a, b, s);
4060    return compare == float_relation_greater ||
4061           compare == float_relation_equal;
4062}
4063
4064static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4065{
4066    FloatRelation compare = float64_compare(a, b, s);
4067    return compare == float_relation_greater ||
4068           compare == float_relation_equal;
4069}
4070
4071GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4072GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4073GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4074
4075GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet)
4076GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet)
4077GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet)
4078GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet)
4079GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet)
4080GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet)
4081
4082/* Vector Floating-Point Classify Instruction */
4083#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
4084static void do_##NAME(void *vd, void *vs2, int i)      \
4085{                                                      \
4086    TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
4087    *((TD *)vd + HD(i)) = OP(s2);                      \
4088}
4089
4090#define GEN_VEXT_V(NAME, ESZ, DSZ, CLEAR_FN)           \
4091void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
4092                  CPURISCVState *env, uint32_t desc)   \
4093{                                                      \
4094    uint32_t vlmax = vext_maxsz(desc) / ESZ;           \
4095    uint32_t mlen = vext_mlen(desc);                   \
4096    uint32_t vm = vext_vm(desc);                       \
4097    uint32_t vl = env->vl;                             \
4098    uint32_t i;                                        \
4099                                                       \
4100    for (i = 0; i < vl; i++) {                         \
4101        if (!vm && !vext_elem_mask(v0, mlen, i)) {     \
4102            continue;                                  \
4103        }                                              \
4104        do_##NAME(vd, vs2, i);                         \
4105    }                                                  \
4106    CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);          \
4107}
4108
4109target_ulong fclass_h(uint64_t frs1)
4110{
4111    float16 f = frs1;
4112    bool sign = float16_is_neg(f);
4113
4114    if (float16_is_infinity(f)) {
4115        return sign ? 1 << 0 : 1 << 7;
4116    } else if (float16_is_zero(f)) {
4117        return sign ? 1 << 3 : 1 << 4;
4118    } else if (float16_is_zero_or_denormal(f)) {
4119        return sign ? 1 << 2 : 1 << 5;
4120    } else if (float16_is_any_nan(f)) {
4121        float_status s = { }; /* for snan_bit_is_one */
4122        return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4123    } else {
4124        return sign ? 1 << 1 : 1 << 6;
4125    }
4126}
4127
4128target_ulong fclass_s(uint64_t frs1)
4129{
4130    float32 f = frs1;
4131    bool sign = float32_is_neg(f);
4132
4133    if (float32_is_infinity(f)) {
4134        return sign ? 1 << 0 : 1 << 7;
4135    } else if (float32_is_zero(f)) {
4136        return sign ? 1 << 3 : 1 << 4;
4137    } else if (float32_is_zero_or_denormal(f)) {
4138        return sign ? 1 << 2 : 1 << 5;
4139    } else if (float32_is_any_nan(f)) {
4140        float_status s = { }; /* for snan_bit_is_one */
4141        return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4142    } else {
4143        return sign ? 1 << 1 : 1 << 6;
4144    }
4145}
4146
4147target_ulong fclass_d(uint64_t frs1)
4148{
4149    float64 f = frs1;
4150    bool sign = float64_is_neg(f);
4151
4152    if (float64_is_infinity(f)) {
4153        return sign ? 1 << 0 : 1 << 7;
4154    } else if (float64_is_zero(f)) {
4155        return sign ? 1 << 3 : 1 << 4;
4156    } else if (float64_is_zero_or_denormal(f)) {
4157        return sign ? 1 << 2 : 1 << 5;
4158    } else if (float64_is_any_nan(f)) {
4159        float_status s = { }; /* for snan_bit_is_one */
4160        return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4161    } else {
4162        return sign ? 1 << 1 : 1 << 6;
4163    }
4164}
4165
4166RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4167RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4168RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4169GEN_VEXT_V(vfclass_v_h, 2, 2, clearh)
4170GEN_VEXT_V(vfclass_v_w, 4, 4, clearl)
4171GEN_VEXT_V(vfclass_v_d, 8, 8, clearq)
4172
4173/* Vector Floating-Point Merge Instruction */
4174#define GEN_VFMERGE_VF(NAME, ETYPE, H, CLEAR_FN)              \
4175void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4176                  CPURISCVState *env, uint32_t desc)          \
4177{                                                             \
4178    uint32_t mlen = vext_mlen(desc);                          \
4179    uint32_t vm = vext_vm(desc);                              \
4180    uint32_t vl = env->vl;                                    \
4181    uint32_t esz = sizeof(ETYPE);                             \
4182    uint32_t vlmax = vext_maxsz(desc) / esz;                  \
4183    uint32_t i;                                               \
4184                                                              \
4185    for (i = 0; i < vl; i++) {                                \
4186        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
4187        *((ETYPE *)vd + H(i))                                 \
4188          = (!vm && !vext_elem_mask(v0, mlen, i) ? s2 : s1);  \
4189    }                                                         \
4190    CLEAR_FN(vd, vl, vl * esz, vlmax * esz);                  \
4191}
4192
4193GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh)
4194GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4, clearl)
4195GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8, clearq)
4196
4197/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4198/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4199RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4200RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4201RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4202GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2, clearh)
4203GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4, clearl)
4204GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8, clearq)
4205
4206/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4207RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4208RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4209RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4210GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2, clearh)
4211GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4, clearl)
4212GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8, clearq)
4213
4214/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4215RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4216RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4217RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4218GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2, clearh)
4219GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4, clearl)
4220GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8, clearq)
4221
4222/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4223RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4224RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4225RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4226GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2, clearh)
4227GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4, clearl)
4228GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8, clearq)
4229
4230/* Widening Floating-Point/Integer Type-Convert Instructions */
4231/* (TD, T2, TX2) */
4232#define WOP_UU_H uint32_t, uint16_t, uint16_t
4233#define WOP_UU_W uint64_t, uint32_t, uint32_t
4234/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4235RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4236RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4237GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4, clearl)
4238GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8, clearq)
4239
4240/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4241RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4242RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4243GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4, clearl)
4244GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8, clearq)
4245
4246/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4247RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4248RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4249GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4, clearl)
4250GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8, clearq)
4251
4252/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4253RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4254RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4255GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4, clearl)
4256GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8, clearq)
4257
4258/*
4259 * vfwcvt.f.f.v vd, vs2, vm #
4260 * Convert single-width float to double-width float.
4261 */
4262static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4263{
4264    return float16_to_float32(a, true, s);
4265}
4266
4267RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4268RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4269GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4, clearl)
4270GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8, clearq)
4271
4272/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4273/* (TD, T2, TX2) */
4274#define NOP_UU_H uint16_t, uint32_t, uint32_t
4275#define NOP_UU_W uint32_t, uint64_t, uint64_t
4276/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4277RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16)
4278RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32)
4279GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2, clearh)
4280GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4, clearl)
4281
4282/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4283RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16)
4284RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32)
4285GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2, clearh)
4286GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4, clearl)
4287
4288/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4289RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16)
4290RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32)
4291GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2, clearh)
4292GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4, clearl)
4293
4294/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4295RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16)
4296RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32)
4297GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2, clearh)
4298GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4, clearl)
4299
4300/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4301static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4302{
4303    return float32_to_float16(a, true, s);
4304}
4305
4306RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16)
4307RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32)
4308GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2, clearh)
4309GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4, clearl)
4310
4311/*
4312 *** Vector Reduction Operations
4313 */
4314/* Vector Single-Width Integer Reduction Instructions */
4315#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\
4316void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4317        void *vs2, CPURISCVState *env, uint32_t desc)     \
4318{                                                         \
4319    uint32_t mlen = vext_mlen(desc);                      \
4320    uint32_t vm = vext_vm(desc);                          \
4321    uint32_t vl = env->vl;                                \
4322    uint32_t i;                                           \
4323    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;        \
4324    TD s1 =  *((TD *)vs1 + HD(0));                        \
4325                                                          \
4326    for (i = 0; i < vl; i++) {                            \
4327        TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4328        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
4329            continue;                                     \
4330        }                                                 \
4331        s1 = OP(s1, (TD)s2);                              \
4332    }                                                     \
4333    *((TD *)vd + HD(0)) = s1;                             \
4334    CLEAR_FN(vd, 1, sizeof(TD), tot);                     \
4335}
4336
4337/* vd[0] = sum(vs1[0], vs2[*]) */
4338GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD, clearb)
4339GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD, clearh)
4340GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD, clearl)
4341GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD, clearq)
4342
4343/* vd[0] = maxu(vs1[0], vs2[*]) */
4344GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX, clearb)
4345GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX, clearh)
4346GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX, clearl)
4347GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX, clearq)
4348
4349/* vd[0] = max(vs1[0], vs2[*]) */
4350GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX, clearb)
4351GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX, clearh)
4352GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX, clearl)
4353GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX, clearq)
4354
4355/* vd[0] = minu(vs1[0], vs2[*]) */
4356GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN, clearb)
4357GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN, clearh)
4358GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN, clearl)
4359GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN, clearq)
4360
4361/* vd[0] = min(vs1[0], vs2[*]) */
4362GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN, clearb)
4363GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN, clearh)
4364GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN, clearl)
4365GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN, clearq)
4366
4367/* vd[0] = and(vs1[0], vs2[*]) */
4368GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND, clearb)
4369GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND, clearh)
4370GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND, clearl)
4371GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND, clearq)
4372
4373/* vd[0] = or(vs1[0], vs2[*]) */
4374GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR, clearb)
4375GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR, clearh)
4376GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR, clearl)
4377GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR, clearq)
4378
4379/* vd[0] = xor(vs1[0], vs2[*]) */
4380GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR, clearb)
4381GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR, clearh)
4382GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR, clearl)
4383GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR, clearq)
4384
4385/* Vector Widening Integer Reduction Instructions */
4386/* signed sum reduction into double-width accumulator */
4387GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD, clearh)
4388GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD, clearl)
4389GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD, clearq)
4390
4391/* Unsigned sum reduction into double-width accumulator */
4392GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD, clearh)
4393GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD, clearl)
4394GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD, clearq)
4395
4396/* Vector Single-Width Floating-Point Reduction Instructions */
4397#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\
4398void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4399                  void *vs2, CPURISCVState *env,           \
4400                  uint32_t desc)                           \
4401{                                                          \
4402    uint32_t mlen = vext_mlen(desc);                       \
4403    uint32_t vm = vext_vm(desc);                           \
4404    uint32_t vl = env->vl;                                 \
4405    uint32_t i;                                            \
4406    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;         \
4407    TD s1 =  *((TD *)vs1 + HD(0));                         \
4408                                                           \
4409    for (i = 0; i < vl; i++) {                             \
4410        TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4411        if (!vm && !vext_elem_mask(v0, mlen, i)) {         \
4412            continue;                                      \
4413        }                                                  \
4414        s1 = OP(s1, (TD)s2, &env->fp_status);              \
4415    }                                                      \
4416    *((TD *)vd + HD(0)) = s1;                              \
4417    CLEAR_FN(vd, 1, sizeof(TD), tot);                      \
4418}
4419
4420/* Unordered sum */
4421GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add, clearh)
4422GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add, clearl)
4423GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add, clearq)
4424
4425/* Maximum value */
4426GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum, clearh)
4427GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum, clearl)
4428GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, clearq)
4429
4430/* Minimum value */
4431GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, clearh)
4432GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, clearl)
4433GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, clearq)
4434
4435/* Vector Widening Floating-Point Reduction Instructions */
4436/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4437void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4438                            void *vs2, CPURISCVState *env, uint32_t desc)
4439{
4440    uint32_t mlen = vext_mlen(desc);
4441    uint32_t vm = vext_vm(desc);
4442    uint32_t vl = env->vl;
4443    uint32_t i;
4444    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;
4445    uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4446
4447    for (i = 0; i < vl; i++) {
4448        uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4449        if (!vm && !vext_elem_mask(v0, mlen, i)) {
4450            continue;
4451        }
4452        s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4453                         &env->fp_status);
4454    }
4455    *((uint32_t *)vd + H4(0)) = s1;
4456    clearl(vd, 1, sizeof(uint32_t), tot);
4457}
4458
4459void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4460                            void *vs2, CPURISCVState *env, uint32_t desc)
4461{
4462    uint32_t mlen = vext_mlen(desc);
4463    uint32_t vm = vext_vm(desc);
4464    uint32_t vl = env->vl;
4465    uint32_t i;
4466    uint32_t tot = env_archcpu(env)->cfg.vlen / 8;
4467    uint64_t s1 =  *((uint64_t *)vs1);
4468
4469    for (i = 0; i < vl; i++) {
4470        uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4471        if (!vm && !vext_elem_mask(v0, mlen, i)) {
4472            continue;
4473        }
4474        s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4475                         &env->fp_status);
4476    }
4477    *((uint64_t *)vd) = s1;
4478    clearq(vd, 1, sizeof(uint64_t), tot);
4479}
4480
4481/*
4482 *** Vector Mask Operations
4483 */
4484/* Vector Mask-Register Logical Instructions */
4485#define GEN_VEXT_MASK_VV(NAME, OP)                        \
4486void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4487                  void *vs2, CPURISCVState *env,          \
4488                  uint32_t desc)                          \
4489{                                                         \
4490    uint32_t mlen = vext_mlen(desc);                      \
4491    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;   \
4492    uint32_t vl = env->vl;                                \
4493    uint32_t i;                                           \
4494    int a, b;                                             \
4495                                                          \
4496    for (i = 0; i < vl; i++) {                            \
4497        a = vext_elem_mask(vs1, mlen, i);                 \
4498        b = vext_elem_mask(vs2, mlen, i);                 \
4499        vext_set_elem_mask(vd, mlen, i, OP(b, a));        \
4500    }                                                     \
4501    for (; i < vlmax; i++) {                              \
4502        vext_set_elem_mask(vd, mlen, i, 0);               \
4503    }                                                     \
4504}
4505
4506#define DO_NAND(N, M)  (!(N & M))
4507#define DO_ANDNOT(N, M)  (N & !M)
4508#define DO_NOR(N, M)  (!(N | M))
4509#define DO_ORNOT(N, M)  (N | !M)
4510#define DO_XNOR(N, M)  (!(N ^ M))
4511
4512GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4513GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4514GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT)
4515GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4516GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4517GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4518GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT)
4519GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4520
4521/* Vector mask population count vmpopc */
4522target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env,
4523                              uint32_t desc)
4524{
4525    target_ulong cnt = 0;
4526    uint32_t mlen = vext_mlen(desc);
4527    uint32_t vm = vext_vm(desc);
4528    uint32_t vl = env->vl;
4529    int i;
4530
4531    for (i = 0; i < vl; i++) {
4532        if (vm || vext_elem_mask(v0, mlen, i)) {
4533            if (vext_elem_mask(vs2, mlen, i)) {
4534                cnt++;
4535            }
4536        }
4537    }
4538    return cnt;
4539}
4540
4541/* vmfirst find-first-set mask bit*/
4542target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4543                               uint32_t desc)
4544{
4545    uint32_t mlen = vext_mlen(desc);
4546    uint32_t vm = vext_vm(desc);
4547    uint32_t vl = env->vl;
4548    int i;
4549
4550    for (i = 0; i < vl; i++) {
4551        if (vm || vext_elem_mask(v0, mlen, i)) {
4552            if (vext_elem_mask(vs2, mlen, i)) {
4553                return i;
4554            }
4555        }
4556    }
4557    return -1LL;
4558}
4559
4560enum set_mask_type {
4561    ONLY_FIRST = 1,
4562    INCLUDE_FIRST,
4563    BEFORE_FIRST,
4564};
4565
4566static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4567                   uint32_t desc, enum set_mask_type type)
4568{
4569    uint32_t mlen = vext_mlen(desc);
4570    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;
4571    uint32_t vm = vext_vm(desc);
4572    uint32_t vl = env->vl;
4573    int i;
4574    bool first_mask_bit = false;
4575
4576    for (i = 0; i < vl; i++) {
4577        if (!vm && !vext_elem_mask(v0, mlen, i)) {
4578            continue;
4579        }
4580        /* write a zero to all following active elements */
4581        if (first_mask_bit) {
4582            vext_set_elem_mask(vd, mlen, i, 0);
4583            continue;
4584        }
4585        if (vext_elem_mask(vs2, mlen, i)) {
4586            first_mask_bit = true;
4587            if (type == BEFORE_FIRST) {
4588                vext_set_elem_mask(vd, mlen, i, 0);
4589            } else {
4590                vext_set_elem_mask(vd, mlen, i, 1);
4591            }
4592        } else {
4593            if (type == ONLY_FIRST) {
4594                vext_set_elem_mask(vd, mlen, i, 0);
4595            } else {
4596                vext_set_elem_mask(vd, mlen, i, 1);
4597            }
4598        }
4599    }
4600    for (; i < vlmax; i++) {
4601        vext_set_elem_mask(vd, mlen, i, 0);
4602    }
4603}
4604
4605void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4606                     uint32_t desc)
4607{
4608    vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4609}
4610
4611void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4612                     uint32_t desc)
4613{
4614    vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4615}
4616
4617void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4618                     uint32_t desc)
4619{
4620    vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4621}
4622
4623/* Vector Iota Instruction */
4624#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H, CLEAR_FN)                        \
4625void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4626                  uint32_t desc)                                          \
4627{                                                                         \
4628    uint32_t mlen = vext_mlen(desc);                                      \
4629    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4630    uint32_t vm = vext_vm(desc);                                          \
4631    uint32_t vl = env->vl;                                                \
4632    uint32_t sum = 0;                                                     \
4633    int i;                                                                \
4634                                                                          \
4635    for (i = 0; i < vl; i++) {                                            \
4636        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4637            continue;                                                     \
4638        }                                                                 \
4639        *((ETYPE *)vd + H(i)) = sum;                                      \
4640        if (vext_elem_mask(vs2, mlen, i)) {                               \
4641            sum++;                                                        \
4642        }                                                                 \
4643    }                                                                     \
4644    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4645}
4646
4647GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1, clearb)
4648GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2, clearh)
4649GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4, clearl)
4650GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8, clearq)
4651
4652/* Vector Element Index Instruction */
4653#define GEN_VEXT_VID_V(NAME, ETYPE, H, CLEAR_FN)                          \
4654void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4655{                                                                         \
4656    uint32_t mlen = vext_mlen(desc);                                      \
4657    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4658    uint32_t vm = vext_vm(desc);                                          \
4659    uint32_t vl = env->vl;                                                \
4660    int i;                                                                \
4661                                                                          \
4662    for (i = 0; i < vl; i++) {                                            \
4663        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4664            continue;                                                     \
4665        }                                                                 \
4666        *((ETYPE *)vd + H(i)) = i;                                        \
4667    }                                                                     \
4668    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4669}
4670
4671GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb)
4672GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh)
4673GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl)
4674GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq)
4675
4676/*
4677 *** Vector Permutation Instructions
4678 */
4679
4680/* Vector Slide Instructions */
4681#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H, CLEAR_FN)                    \
4682void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4683                  CPURISCVState *env, uint32_t desc)                      \
4684{                                                                         \
4685    uint32_t mlen = vext_mlen(desc);                                      \
4686    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4687    uint32_t vm = vext_vm(desc);                                          \
4688    uint32_t vl = env->vl;                                                \
4689    target_ulong offset = s1, i;                                          \
4690                                                                          \
4691    for (i = offset; i < vl; i++) {                                       \
4692        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4693            continue;                                                     \
4694        }                                                                 \
4695        *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4696    }                                                                     \
4697    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4698}
4699
4700/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4701GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1, clearb)
4702GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2, clearh)
4703GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4, clearl)
4704GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8, clearq)
4705
4706#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H, CLEAR_FN)                  \
4707void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4708                  CPURISCVState *env, uint32_t desc)                      \
4709{                                                                         \
4710    uint32_t mlen = vext_mlen(desc);                                      \
4711    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4712    uint32_t vm = vext_vm(desc);                                          \
4713    uint32_t vl = env->vl;                                                \
4714    target_ulong offset = s1, i;                                          \
4715                                                                          \
4716    for (i = 0; i < vl; ++i) {                                            \
4717        target_ulong j = i + offset;                                      \
4718        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4719            continue;                                                     \
4720        }                                                                 \
4721        *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j));  \
4722    }                                                                     \
4723    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4724}
4725
4726/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4727GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1, clearb)
4728GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2, clearh)
4729GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4, clearl)
4730GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8, clearq)
4731
4732#define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H, CLEAR_FN)                   \
4733void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4734                  CPURISCVState *env, uint32_t desc)                      \
4735{                                                                         \
4736    uint32_t mlen = vext_mlen(desc);                                      \
4737    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4738    uint32_t vm = vext_vm(desc);                                          \
4739    uint32_t vl = env->vl;                                                \
4740    uint32_t i;                                                           \
4741                                                                          \
4742    for (i = 0; i < vl; i++) {                                            \
4743        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4744            continue;                                                     \
4745        }                                                                 \
4746        if (i == 0) {                                                     \
4747            *((ETYPE *)vd + H(i)) = s1;                                   \
4748        } else {                                                          \
4749            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));           \
4750        }                                                                 \
4751    }                                                                     \
4752    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4753}
4754
4755/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4756GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1, clearb)
4757GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2, clearh)
4758GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4, clearl)
4759GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8, clearq)
4760
4761#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H, CLEAR_FN)                 \
4762void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4763                  CPURISCVState *env, uint32_t desc)                      \
4764{                                                                         \
4765    uint32_t mlen = vext_mlen(desc);                                      \
4766    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4767    uint32_t vm = vext_vm(desc);                                          \
4768    uint32_t vl = env->vl;                                                \
4769    uint32_t i;                                                           \
4770                                                                          \
4771    for (i = 0; i < vl; i++) {                                            \
4772        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4773            continue;                                                     \
4774        }                                                                 \
4775        if (i == vl - 1) {                                                \
4776            *((ETYPE *)vd + H(i)) = s1;                                   \
4777        } else {                                                          \
4778            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));           \
4779        }                                                                 \
4780    }                                                                     \
4781    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4782}
4783
4784/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4785GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1, clearb)
4786GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2, clearh)
4787GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4, clearl)
4788GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq)
4789
4790/* Vector Register Gather Instruction */
4791#define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H, CLEAR_FN)                    \
4792void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4793                  CPURISCVState *env, uint32_t desc)                      \
4794{                                                                         \
4795    uint32_t mlen = vext_mlen(desc);                                      \
4796    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4797    uint32_t vm = vext_vm(desc);                                          \
4798    uint32_t vl = env->vl;                                                \
4799    uint32_t index, i;                                                    \
4800                                                                          \
4801    for (i = 0; i < vl; i++) {                                            \
4802        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4803            continue;                                                     \
4804        }                                                                 \
4805        index = *((ETYPE *)vs1 + H(i));                                   \
4806        if (index >= vlmax) {                                             \
4807            *((ETYPE *)vd + H(i)) = 0;                                    \
4808        } else {                                                          \
4809            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4810        }                                                                 \
4811    }                                                                     \
4812    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4813}
4814
4815/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4816GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1, clearb)
4817GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2, clearh)
4818GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4, clearl)
4819GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8, clearq)
4820
4821#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H, CLEAR_FN)                    \
4822void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4823                  CPURISCVState *env, uint32_t desc)                      \
4824{                                                                         \
4825    uint32_t mlen = vext_mlen(desc);                                      \
4826    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4827    uint32_t vm = vext_vm(desc);                                          \
4828    uint32_t vl = env->vl;                                                \
4829    uint32_t index = s1, i;                                               \
4830                                                                          \
4831    for (i = 0; i < vl; i++) {                                            \
4832        if (!vm && !vext_elem_mask(v0, mlen, i)) {                        \
4833            continue;                                                     \
4834        }                                                                 \
4835        if (index >= vlmax) {                                             \
4836            *((ETYPE *)vd + H(i)) = 0;                                    \
4837        } else {                                                          \
4838            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4839        }                                                                 \
4840    }                                                                     \
4841    CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE));          \
4842}
4843
4844/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4845GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1, clearb)
4846GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2, clearh)
4847GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4, clearl)
4848GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq)
4849
4850/* Vector Compress Instruction */
4851#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H, CLEAR_FN)                   \
4852void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4853                  CPURISCVState *env, uint32_t desc)                      \
4854{                                                                         \
4855    uint32_t mlen = vext_mlen(desc);                                      \
4856    uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen;                   \
4857    uint32_t vl = env->vl;                                                \
4858    uint32_t num = 0, i;                                                  \
4859                                                                          \
4860    for (i = 0; i < vl; i++) {                                            \
4861        if (!vext_elem_mask(vs1, mlen, i)) {                              \
4862            continue;                                                     \
4863        }                                                                 \
4864        *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4865        num++;                                                            \
4866    }                                                                     \
4867    CLEAR_FN(vd, num, num * sizeof(ETYPE), vlmax * sizeof(ETYPE));        \
4868}
4869
4870/* Compress into vd elements of vs2 where vs1 is enabled */
4871GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1, clearb)
4872GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2, clearh)
4873GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4, clearl)
4874GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8, clearq)
4875