qemu/target/riscv/vector_helper.c
<<
>>
Prefs
   1/*
   2 * RISC-V Vector Extension Helpers for QEMU.
   3 *
   4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms and conditions of the GNU General Public License,
   8 * version 2 or later, as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope it will be useful, but WITHOUT
  11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13 * more details.
  14 *
  15 * You should have received a copy of the GNU General Public License along with
  16 * this program.  If not, see <http://www.gnu.org/licenses/>.
  17 */
  18
  19#include "qemu/osdep.h"
  20#include "qemu/host-utils.h"
  21#include "qemu/bitops.h"
  22#include "cpu.h"
  23#include "exec/memop.h"
  24#include "exec/exec-all.h"
  25#include "exec/helper-proto.h"
  26#include "fpu/softfloat.h"
  27#include "tcg/tcg-gvec-desc.h"
  28#include "internals.h"
  29#include <math.h>
  30
  31target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
  32                            target_ulong s2)
  33{
  34    int vlmax, vl;
  35    RISCVCPU *cpu = env_archcpu(env);
  36    uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
  37    uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
  38    uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
  39    int xlen = riscv_cpu_xlen(env);
  40    bool vill = (s2 >> (xlen - 1)) & 0x1;
  41    target_ulong reserved = s2 &
  42                            MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
  43                                            xlen - 1 - R_VTYPE_RESERVED_SHIFT);
  44
  45    if (lmul & 4) {
  46        /* Fractional LMUL. */
  47        if (lmul == 4 ||
  48            cpu->cfg.elen >> (8 - lmul) < sew) {
  49            vill = true;
  50        }
  51    }
  52
  53    if ((sew > cpu->cfg.elen)
  54        || vill
  55        || (ediv != 0)
  56        || (reserved != 0)) {
  57        /* only set vill bit. */
  58        env->vill = 1;
  59        env->vtype = 0;
  60        env->vl = 0;
  61        env->vstart = 0;
  62        return 0;
  63    }
  64
  65    vlmax = vext_get_vlmax(cpu, s2);
  66    if (s1 <= vlmax) {
  67        vl = s1;
  68    } else {
  69        vl = vlmax;
  70    }
  71    env->vl = vl;
  72    env->vtype = s2;
  73    env->vstart = 0;
  74    env->vill = 0;
  75    return vl;
  76}
  77
  78/*
  79 * Note that vector data is stored in host-endian 64-bit chunks,
  80 * so addressing units smaller than that needs a host-endian fixup.
  81 */
  82#ifdef HOST_WORDS_BIGENDIAN
  83#define H1(x)   ((x) ^ 7)
  84#define H1_2(x) ((x) ^ 6)
  85#define H1_4(x) ((x) ^ 4)
  86#define H2(x)   ((x) ^ 3)
  87#define H4(x)   ((x) ^ 1)
  88#define H8(x)   ((x))
  89#else
  90#define H1(x)   (x)
  91#define H1_2(x) (x)
  92#define H1_4(x) (x)
  93#define H2(x)   (x)
  94#define H4(x)   (x)
  95#define H8(x)   (x)
  96#endif
  97
  98static inline uint32_t vext_nf(uint32_t desc)
  99{
 100    return FIELD_EX32(simd_data(desc), VDATA, NF);
 101}
 102
 103static inline uint32_t vext_vm(uint32_t desc)
 104{
 105    return FIELD_EX32(simd_data(desc), VDATA, VM);
 106}
 107
 108/*
 109 * Encode LMUL to lmul as following:
 110 *     LMUL    vlmul    lmul
 111 *      1       000       0
 112 *      2       001       1
 113 *      4       010       2
 114 *      8       011       3
 115 *      -       100       -
 116 *     1/8      101      -3
 117 *     1/4      110      -2
 118 *     1/2      111      -1
 119 */
 120static inline int32_t vext_lmul(uint32_t desc)
 121{
 122    return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
 123}
 124
 125/*
 126 * Get the maximum number of elements can be operated.
 127 *
 128 * esz: log2 of element size in bytes.
 129 */
 130static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
 131{
 132    /*
 133     * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
 134     * so vlen in bytes (vlenb) is encoded as maxsz.
 135     */
 136    uint32_t vlenb = simd_maxsz(desc);
 137
 138    /* Return VLMAX */
 139    int scale = vext_lmul(desc) - esz;
 140    return scale < 0 ? vlenb >> -scale : vlenb << scale;
 141}
 142
 143static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
 144{
 145    return (addr & env->cur_pmmask) | env->cur_pmbase;
 146}
 147
 148/*
 149 * This function checks watchpoint before real load operation.
 150 *
 151 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
 152 * In user mode, there is no watchpoint support now.
 153 *
 154 * It will trigger an exception if there is no mapping in TLB
 155 * and page table walk can't fill the TLB entry. Then the guest
 156 * software can return here after process the exception or never return.
 157 */
 158static void probe_pages(CPURISCVState *env, target_ulong addr,
 159                        target_ulong len, uintptr_t ra,
 160                        MMUAccessType access_type)
 161{
 162    target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
 163    target_ulong curlen = MIN(pagelen, len);
 164
 165    probe_access(env, adjust_addr(env, addr), curlen, access_type,
 166                 cpu_mmu_index(env, false), ra);
 167    if (len > curlen) {
 168        addr += curlen;
 169        curlen = len - curlen;
 170        probe_access(env, adjust_addr(env, addr), curlen, access_type,
 171                     cpu_mmu_index(env, false), ra);
 172    }
 173}
 174
 175static inline void vext_set_elem_mask(void *v0, int index,
 176                                      uint8_t value)
 177{
 178    int idx = index / 64;
 179    int pos = index % 64;
 180    uint64_t old = ((uint64_t *)v0)[idx];
 181    ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
 182}
 183
 184/*
 185 * Earlier designs (pre-0.9) had a varying number of bits
 186 * per mask value (MLEN). In the 0.9 design, MLEN=1.
 187 * (Section 4.5)
 188 */
 189static inline int vext_elem_mask(void *v0, int index)
 190{
 191    int idx = index / 64;
 192    int pos = index  % 64;
 193    return (((uint64_t *)v0)[idx] >> pos) & 1;
 194}
 195
 196/* elements operations for load and store */
 197typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
 198                               uint32_t idx, void *vd, uintptr_t retaddr);
 199
 200#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF)            \
 201static void NAME(CPURISCVState *env, abi_ptr addr,         \
 202                 uint32_t idx, void *vd, uintptr_t retaddr)\
 203{                                                          \
 204    ETYPE *cur = ((ETYPE *)vd + H(idx));                   \
 205    *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr);      \
 206}                                                          \
 207
 208GEN_VEXT_LD_ELEM(lde_b, int8_t,  H1, ldsb)
 209GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
 210GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
 211GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
 212
 213#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF)            \
 214static void NAME(CPURISCVState *env, abi_ptr addr,         \
 215                 uint32_t idx, void *vd, uintptr_t retaddr)\
 216{                                                          \
 217    ETYPE data = *((ETYPE *)vd + H(idx));                  \
 218    cpu_##STSUF##_data_ra(env, addr, data, retaddr);       \
 219}
 220
 221GEN_VEXT_ST_ELEM(ste_b, int8_t,  H1, stb)
 222GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
 223GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
 224GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
 225
 226/*
 227 *** stride: access vector element from strided memory
 228 */
 229static void
 230vext_ldst_stride(void *vd, void *v0, target_ulong base,
 231                 target_ulong stride, CPURISCVState *env,
 232                 uint32_t desc, uint32_t vm,
 233                 vext_ldst_elem_fn *ldst_elem,
 234                 uint32_t esz, uintptr_t ra, MMUAccessType access_type)
 235{
 236    uint32_t i, k;
 237    uint32_t nf = vext_nf(desc);
 238    uint32_t max_elems = vext_max_elems(desc, esz);
 239
 240    for (i = env->vstart; i < env->vl; i++, env->vstart++) {
 241        if (!vm && !vext_elem_mask(v0, i)) {
 242            continue;
 243        }
 244
 245        k = 0;
 246        while (k < nf) {
 247            target_ulong addr = base + stride * i + (k << esz);
 248            ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
 249            k++;
 250        }
 251    }
 252    env->vstart = 0;
 253}
 254
 255#define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN)                        \
 256void HELPER(NAME)(void *vd, void * v0, target_ulong base,               \
 257                  target_ulong stride, CPURISCVState *env,              \
 258                  uint32_t desc)                                        \
 259{                                                                       \
 260    uint32_t vm = vext_vm(desc);                                        \
 261    vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN,      \
 262                     ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
 263}
 264
 265GEN_VEXT_LD_STRIDE(vlse8_v,  int8_t,  lde_b)
 266GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
 267GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
 268GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
 269
 270#define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN)                       \
 271void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
 272                  target_ulong stride, CPURISCVState *env,              \
 273                  uint32_t desc)                                        \
 274{                                                                       \
 275    uint32_t vm = vext_vm(desc);                                        \
 276    vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN,     \
 277                     ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);     \
 278}
 279
 280GEN_VEXT_ST_STRIDE(vsse8_v,  int8_t,  ste_b)
 281GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
 282GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
 283GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
 284
 285/*
 286 *** unit-stride: access elements stored contiguously in memory
 287 */
 288
 289/* unmasked unit-stride load and store operation*/
 290static void
 291vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
 292             vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
 293             uintptr_t ra, MMUAccessType access_type)
 294{
 295    uint32_t i, k;
 296    uint32_t nf = vext_nf(desc);
 297    uint32_t max_elems = vext_max_elems(desc, esz);
 298
 299    /* load bytes from guest memory */
 300    for (i = env->vstart; i < evl; i++, env->vstart++) {
 301        k = 0;
 302        while (k < nf) {
 303            target_ulong addr = base + ((i * nf + k) << esz);
 304            ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
 305            k++;
 306        }
 307    }
 308    env->vstart = 0;
 309}
 310
 311/*
 312 * masked unit-stride load and store operation will be a special case of stride,
 313 * stride = NF * sizeof (MTYPE)
 314 */
 315
 316#define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN)                            \
 317void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,         \
 318                         CPURISCVState *env, uint32_t desc)             \
 319{                                                                       \
 320    uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));             \
 321    vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN,   \
 322                     ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD);      \
 323}                                                                       \
 324                                                                        \
 325void HELPER(NAME)(void *vd, void *v0, target_ulong base,                \
 326                  CPURISCVState *env, uint32_t desc)                    \
 327{                                                                       \
 328    vext_ldst_us(vd, base, env, desc, LOAD_FN,                          \
 329                 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \
 330}
 331
 332GEN_VEXT_LD_US(vle8_v,  int8_t,  lde_b)
 333GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
 334GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
 335GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
 336
 337#define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN)                            \
 338void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base,          \
 339                         CPURISCVState *env, uint32_t desc)              \
 340{                                                                        \
 341    uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE));              \
 342    vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN,   \
 343                     ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE);      \
 344}                                                                        \
 345                                                                         \
 346void HELPER(NAME)(void *vd, void *v0, target_ulong base,                 \
 347                  CPURISCVState *env, uint32_t desc)                     \
 348{                                                                        \
 349    vext_ldst_us(vd, base, env, desc, STORE_FN,                          \
 350                 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \
 351}
 352
 353GEN_VEXT_ST_US(vse8_v,  int8_t,  ste_b)
 354GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
 355GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
 356GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
 357
 358/*
 359 *** unit stride mask load and store, EEW = 1
 360 */
 361void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
 362                    CPURISCVState *env, uint32_t desc)
 363{
 364    /* evl = ceil(vl/8) */
 365    uint8_t evl = (env->vl + 7) >> 3;
 366    vext_ldst_us(vd, base, env, desc, lde_b,
 367                 0, evl, GETPC(), MMU_DATA_LOAD);
 368}
 369
 370void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
 371                    CPURISCVState *env, uint32_t desc)
 372{
 373    /* evl = ceil(vl/8) */
 374    uint8_t evl = (env->vl + 7) >> 3;
 375    vext_ldst_us(vd, base, env, desc, ste_b,
 376                 0, evl, GETPC(), MMU_DATA_STORE);
 377}
 378
 379/*
 380 *** index: access vector element from indexed memory
 381 */
 382typedef target_ulong vext_get_index_addr(target_ulong base,
 383        uint32_t idx, void *vs2);
 384
 385#define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H)        \
 386static target_ulong NAME(target_ulong base,            \
 387                         uint32_t idx, void *vs2)      \
 388{                                                      \
 389    return (base + *((ETYPE *)vs2 + H(idx)));          \
 390}
 391
 392GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t,  H1)
 393GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
 394GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
 395GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
 396
 397static inline void
 398vext_ldst_index(void *vd, void *v0, target_ulong base,
 399                void *vs2, CPURISCVState *env, uint32_t desc,
 400                vext_get_index_addr get_index_addr,
 401                vext_ldst_elem_fn *ldst_elem,
 402                uint32_t esz, uintptr_t ra, MMUAccessType access_type)
 403{
 404    uint32_t i, k;
 405    uint32_t nf = vext_nf(desc);
 406    uint32_t vm = vext_vm(desc);
 407    uint32_t max_elems = vext_max_elems(desc, esz);
 408
 409    /* load bytes from guest memory */
 410    for (i = env->vstart; i < env->vl; i++, env->vstart++) {
 411        if (!vm && !vext_elem_mask(v0, i)) {
 412            continue;
 413        }
 414
 415        k = 0;
 416        while (k < nf) {
 417            abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
 418            ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
 419            k++;
 420        }
 421    }
 422    env->vstart = 0;
 423}
 424
 425#define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN)                  \
 426void HELPER(NAME)(void *vd, void *v0, target_ulong base,                   \
 427                  void *vs2, CPURISCVState *env, uint32_t desc)            \
 428{                                                                          \
 429    vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,                \
 430                    LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \
 431}
 432
 433GEN_VEXT_LD_INDEX(vlxei8_8_v,   int8_t,  idx_b, lde_b)
 434GEN_VEXT_LD_INDEX(vlxei8_16_v,  int16_t, idx_b, lde_h)
 435GEN_VEXT_LD_INDEX(vlxei8_32_v,  int32_t, idx_b, lde_w)
 436GEN_VEXT_LD_INDEX(vlxei8_64_v,  int64_t, idx_b, lde_d)
 437GEN_VEXT_LD_INDEX(vlxei16_8_v,  int8_t,  idx_h, lde_b)
 438GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
 439GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
 440GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
 441GEN_VEXT_LD_INDEX(vlxei32_8_v,  int8_t,  idx_w, lde_b)
 442GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
 443GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
 444GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
 445GEN_VEXT_LD_INDEX(vlxei64_8_v,  int8_t,  idx_d, lde_b)
 446GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
 447GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
 448GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
 449
 450#define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN)       \
 451void HELPER(NAME)(void *vd, void *v0, target_ulong base,         \
 452                  void *vs2, CPURISCVState *env, uint32_t desc)  \
 453{                                                                \
 454    vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN,      \
 455                    STORE_FN, ctzl(sizeof(ETYPE)),               \
 456                    GETPC(), MMU_DATA_STORE);                    \
 457}
 458
 459GEN_VEXT_ST_INDEX(vsxei8_8_v,   int8_t,  idx_b, ste_b)
 460GEN_VEXT_ST_INDEX(vsxei8_16_v,  int16_t, idx_b, ste_h)
 461GEN_VEXT_ST_INDEX(vsxei8_32_v,  int32_t, idx_b, ste_w)
 462GEN_VEXT_ST_INDEX(vsxei8_64_v,  int64_t, idx_b, ste_d)
 463GEN_VEXT_ST_INDEX(vsxei16_8_v,  int8_t,  idx_h, ste_b)
 464GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
 465GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
 466GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
 467GEN_VEXT_ST_INDEX(vsxei32_8_v,  int8_t,  idx_w, ste_b)
 468GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
 469GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
 470GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
 471GEN_VEXT_ST_INDEX(vsxei64_8_v,  int8_t,  idx_d, ste_b)
 472GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
 473GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
 474GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
 475
 476/*
 477 *** unit-stride fault-only-fisrt load instructions
 478 */
 479static inline void
 480vext_ldff(void *vd, void *v0, target_ulong base,
 481          CPURISCVState *env, uint32_t desc,
 482          vext_ldst_elem_fn *ldst_elem,
 483          uint32_t esz, uintptr_t ra)
 484{
 485    void *host;
 486    uint32_t i, k, vl = 0;
 487    uint32_t nf = vext_nf(desc);
 488    uint32_t vm = vext_vm(desc);
 489    uint32_t max_elems = vext_max_elems(desc, esz);
 490    target_ulong addr, offset, remain;
 491
 492    /* probe every access*/
 493    for (i = env->vstart; i < env->vl; i++) {
 494        if (!vm && !vext_elem_mask(v0, i)) {
 495            continue;
 496        }
 497        addr = adjust_addr(env, base + i * (nf << esz));
 498        if (i == 0) {
 499            probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD);
 500        } else {
 501            /* if it triggers an exception, no need to check watchpoint */
 502            remain = nf << esz;
 503            while (remain > 0) {
 504                offset = -(addr | TARGET_PAGE_MASK);
 505                host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
 506                                         cpu_mmu_index(env, false));
 507                if (host) {
 508#ifdef CONFIG_USER_ONLY
 509                    if (page_check_range(addr, offset, PAGE_READ) < 0) {
 510                        vl = i;
 511                        goto ProbeSuccess;
 512                    }
 513#else
 514                    probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
 515#endif
 516                } else {
 517                    vl = i;
 518                    goto ProbeSuccess;
 519                }
 520                if (remain <=  offset) {
 521                    break;
 522                }
 523                remain -= offset;
 524                addr = adjust_addr(env, addr + offset);
 525            }
 526        }
 527    }
 528ProbeSuccess:
 529    /* load bytes from guest memory */
 530    if (vl != 0) {
 531        env->vl = vl;
 532    }
 533    for (i = env->vstart; i < env->vl; i++) {
 534        k = 0;
 535        if (!vm && !vext_elem_mask(v0, i)) {
 536            continue;
 537        }
 538        while (k < nf) {
 539            target_ulong addr = base + ((i * nf + k) << esz);
 540            ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
 541            k++;
 542        }
 543    }
 544    env->vstart = 0;
 545}
 546
 547#define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN)               \
 548void HELPER(NAME)(void *vd, void *v0, target_ulong base,  \
 549                  CPURISCVState *env, uint32_t desc)      \
 550{                                                         \
 551    vext_ldff(vd, v0, base, env, desc, LOAD_FN,           \
 552              ctzl(sizeof(ETYPE)), GETPC());              \
 553}
 554
 555GEN_VEXT_LDFF(vle8ff_v,  int8_t,  lde_b)
 556GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
 557GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
 558GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
 559
 560#define DO_SWAP(N, M) (M)
 561#define DO_AND(N, M)  (N & M)
 562#define DO_XOR(N, M)  (N ^ M)
 563#define DO_OR(N, M)   (N | M)
 564#define DO_ADD(N, M)  (N + M)
 565
 566/* Signed min/max */
 567#define DO_MAX(N, M)  ((N) >= (M) ? (N) : (M))
 568#define DO_MIN(N, M)  ((N) >= (M) ? (M) : (N))
 569
 570/* Unsigned min/max */
 571#define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
 572#define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
 573
 574/*
 575 *** load and store whole register instructions
 576 */
 577static void
 578vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
 579                vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra,
 580                MMUAccessType access_type)
 581{
 582    uint32_t i, k, off, pos;
 583    uint32_t nf = vext_nf(desc);
 584    uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
 585    uint32_t max_elems = vlenb >> esz;
 586
 587    k = env->vstart / max_elems;
 588    off = env->vstart % max_elems;
 589
 590    if (off) {
 591        /* load/store rest of elements of current segment pointed by vstart */
 592        for (pos = off; pos < max_elems; pos++, env->vstart++) {
 593            target_ulong addr = base + ((pos + k * max_elems) << esz);
 594            ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
 595        }
 596        k++;
 597    }
 598
 599    /* load/store elements for rest of segments */
 600    for (; k < nf; k++) {
 601        for (i = 0; i < max_elems; i++, env->vstart++) {
 602            target_ulong addr = base + ((i + k * max_elems) << esz);
 603            ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
 604        }
 605    }
 606
 607    env->vstart = 0;
 608}
 609
 610#define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN)      \
 611void HELPER(NAME)(void *vd, target_ulong base,       \
 612                  CPURISCVState *env, uint32_t desc) \
 613{                                                    \
 614    vext_ldst_whole(vd, base, env, desc, LOAD_FN,    \
 615                    ctzl(sizeof(ETYPE)), GETPC(),    \
 616                    MMU_DATA_LOAD);                  \
 617}
 618
 619GEN_VEXT_LD_WHOLE(vl1re8_v,  int8_t,  lde_b)
 620GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
 621GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
 622GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
 623GEN_VEXT_LD_WHOLE(vl2re8_v,  int8_t,  lde_b)
 624GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
 625GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
 626GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
 627GEN_VEXT_LD_WHOLE(vl4re8_v,  int8_t,  lde_b)
 628GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
 629GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
 630GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
 631GEN_VEXT_LD_WHOLE(vl8re8_v,  int8_t,  lde_b)
 632GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
 633GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
 634GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
 635
 636#define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN)     \
 637void HELPER(NAME)(void *vd, target_ulong base,       \
 638                  CPURISCVState *env, uint32_t desc) \
 639{                                                    \
 640    vext_ldst_whole(vd, base, env, desc, STORE_FN,   \
 641                    ctzl(sizeof(ETYPE)), GETPC(),    \
 642                    MMU_DATA_STORE);                 \
 643}
 644
 645GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
 646GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
 647GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
 648GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
 649
 650/*
 651 *** Vector Integer Arithmetic Instructions
 652 */
 653
 654/* expand macro args before macro */
 655#define RVVCALL(macro, ...)  macro(__VA_ARGS__)
 656
 657/* (TD, T1, T2, TX1, TX2) */
 658#define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
 659#define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
 660#define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
 661#define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
 662#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
 663#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
 664#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
 665#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
 666#define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
 667#define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
 668#define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
 669#define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
 670#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
 671#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
 672#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
 673#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
 674#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
 675#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
 676#define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
 677#define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
 678#define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
 679#define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
 680#define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
 681#define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
 682#define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
 683#define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
 684#define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
 685#define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
 686#define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
 687#define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
 688
 689/* operation of two vector elements */
 690typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
 691
 692#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)    \
 693static void do_##NAME(void *vd, void *vs1, void *vs2, int i)    \
 694{                                                               \
 695    TX1 s1 = *((T1 *)vs1 + HS1(i));                             \
 696    TX2 s2 = *((T2 *)vs2 + HS2(i));                             \
 697    *((TD *)vd + HD(i)) = OP(s2, s1);                           \
 698}
 699#define DO_SUB(N, M) (N - M)
 700#define DO_RSUB(N, M) (M - N)
 701
 702RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
 703RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
 704RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
 705RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
 706RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
 707RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
 708RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
 709RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
 710
 711static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
 712                       CPURISCVState *env, uint32_t desc,
 713                       uint32_t esz, uint32_t dsz,
 714                       opivv2_fn *fn)
 715{
 716    uint32_t vm = vext_vm(desc);
 717    uint32_t vl = env->vl;
 718    uint32_t i;
 719
 720    for (i = env->vstart; i < vl; i++) {
 721        if (!vm && !vext_elem_mask(v0, i)) {
 722            continue;
 723        }
 724        fn(vd, vs1, vs2, i);
 725    }
 726    env->vstart = 0;
 727}
 728
 729/* generate the helpers for OPIVV */
 730#define GEN_VEXT_VV(NAME, ESZ, DSZ)                       \
 731void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
 732                  void *vs2, CPURISCVState *env,          \
 733                  uint32_t desc)                          \
 734{                                                         \
 735    do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,     \
 736               do_##NAME);                                \
 737}
 738
 739GEN_VEXT_VV(vadd_vv_b, 1, 1)
 740GEN_VEXT_VV(vadd_vv_h, 2, 2)
 741GEN_VEXT_VV(vadd_vv_w, 4, 4)
 742GEN_VEXT_VV(vadd_vv_d, 8, 8)
 743GEN_VEXT_VV(vsub_vv_b, 1, 1)
 744GEN_VEXT_VV(vsub_vv_h, 2, 2)
 745GEN_VEXT_VV(vsub_vv_w, 4, 4)
 746GEN_VEXT_VV(vsub_vv_d, 8, 8)
 747
 748typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
 749
 750/*
 751 * (T1)s1 gives the real operator type.
 752 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
 753 */
 754#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
 755static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
 756{                                                                   \
 757    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
 758    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1);                      \
 759}
 760
 761RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
 762RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
 763RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
 764RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
 765RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
 766RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
 767RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
 768RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
 769RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
 770RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
 771RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
 772RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
 773
 774static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
 775                       CPURISCVState *env, uint32_t desc,
 776                       uint32_t esz, uint32_t dsz,
 777                       opivx2_fn fn)
 778{
 779    uint32_t vm = vext_vm(desc);
 780    uint32_t vl = env->vl;
 781    uint32_t i;
 782
 783    for (i = env->vstart; i < vl; i++) {
 784        if (!vm && !vext_elem_mask(v0, i)) {
 785            continue;
 786        }
 787        fn(vd, s1, vs2, i);
 788    }
 789    env->vstart = 0;
 790}
 791
 792/* generate the helpers for OPIVX */
 793#define GEN_VEXT_VX(NAME, ESZ, DSZ)                       \
 794void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
 795                  void *vs2, CPURISCVState *env,          \
 796                  uint32_t desc)                          \
 797{                                                         \
 798    do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,      \
 799               do_##NAME);                                \
 800}
 801
 802GEN_VEXT_VX(vadd_vx_b, 1, 1)
 803GEN_VEXT_VX(vadd_vx_h, 2, 2)
 804GEN_VEXT_VX(vadd_vx_w, 4, 4)
 805GEN_VEXT_VX(vadd_vx_d, 8, 8)
 806GEN_VEXT_VX(vsub_vx_b, 1, 1)
 807GEN_VEXT_VX(vsub_vx_h, 2, 2)
 808GEN_VEXT_VX(vsub_vx_w, 4, 4)
 809GEN_VEXT_VX(vsub_vx_d, 8, 8)
 810GEN_VEXT_VX(vrsub_vx_b, 1, 1)
 811GEN_VEXT_VX(vrsub_vx_h, 2, 2)
 812GEN_VEXT_VX(vrsub_vx_w, 4, 4)
 813GEN_VEXT_VX(vrsub_vx_d, 8, 8)
 814
 815void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
 816{
 817    intptr_t oprsz = simd_oprsz(desc);
 818    intptr_t i;
 819
 820    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 821        *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
 822    }
 823}
 824
 825void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
 826{
 827    intptr_t oprsz = simd_oprsz(desc);
 828    intptr_t i;
 829
 830    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 831        *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
 832    }
 833}
 834
 835void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
 836{
 837    intptr_t oprsz = simd_oprsz(desc);
 838    intptr_t i;
 839
 840    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 841        *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
 842    }
 843}
 844
 845void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
 846{
 847    intptr_t oprsz = simd_oprsz(desc);
 848    intptr_t i;
 849
 850    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 851        *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
 852    }
 853}
 854
 855/* Vector Widening Integer Add/Subtract */
 856#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
 857#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
 858#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
 859#define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
 860#define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
 861#define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
 862#define WOP_WUUU_B  uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
 863#define WOP_WUUU_H  uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
 864#define WOP_WUUU_W  uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
 865#define WOP_WSSS_B  int16_t, int8_t, int16_t, int16_t, int16_t
 866#define WOP_WSSS_H  int32_t, int16_t, int32_t, int32_t, int32_t
 867#define WOP_WSSS_W  int64_t, int32_t, int64_t, int64_t, int64_t
 868RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
 869RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
 870RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
 871RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
 872RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
 873RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
 874RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
 875RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
 876RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
 877RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
 878RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
 879RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
 880RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
 881RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
 882RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
 883RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
 884RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
 885RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
 886RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
 887RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
 888RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
 889RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
 890RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
 891RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
 892GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
 893GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
 894GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
 895GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
 896GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
 897GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
 898GEN_VEXT_VV(vwadd_vv_b, 1, 2)
 899GEN_VEXT_VV(vwadd_vv_h, 2, 4)
 900GEN_VEXT_VV(vwadd_vv_w, 4, 8)
 901GEN_VEXT_VV(vwsub_vv_b, 1, 2)
 902GEN_VEXT_VV(vwsub_vv_h, 2, 4)
 903GEN_VEXT_VV(vwsub_vv_w, 4, 8)
 904GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
 905GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
 906GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
 907GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
 908GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
 909GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
 910GEN_VEXT_VV(vwadd_wv_b, 1, 2)
 911GEN_VEXT_VV(vwadd_wv_h, 2, 4)
 912GEN_VEXT_VV(vwadd_wv_w, 4, 8)
 913GEN_VEXT_VV(vwsub_wv_b, 1, 2)
 914GEN_VEXT_VV(vwsub_wv_h, 2, 4)
 915GEN_VEXT_VV(vwsub_wv_w, 4, 8)
 916
 917RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
 918RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
 919RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
 920RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
 921RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
 922RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
 923RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
 924RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
 925RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
 926RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
 927RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
 928RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
 929RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
 930RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
 931RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
 932RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
 933RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
 934RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
 935RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
 936RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
 937RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
 938RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
 939RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
 940RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
 941GEN_VEXT_VX(vwaddu_vx_b, 1, 2)
 942GEN_VEXT_VX(vwaddu_vx_h, 2, 4)
 943GEN_VEXT_VX(vwaddu_vx_w, 4, 8)
 944GEN_VEXT_VX(vwsubu_vx_b, 1, 2)
 945GEN_VEXT_VX(vwsubu_vx_h, 2, 4)
 946GEN_VEXT_VX(vwsubu_vx_w, 4, 8)
 947GEN_VEXT_VX(vwadd_vx_b, 1, 2)
 948GEN_VEXT_VX(vwadd_vx_h, 2, 4)
 949GEN_VEXT_VX(vwadd_vx_w, 4, 8)
 950GEN_VEXT_VX(vwsub_vx_b, 1, 2)
 951GEN_VEXT_VX(vwsub_vx_h, 2, 4)
 952GEN_VEXT_VX(vwsub_vx_w, 4, 8)
 953GEN_VEXT_VX(vwaddu_wx_b, 1, 2)
 954GEN_VEXT_VX(vwaddu_wx_h, 2, 4)
 955GEN_VEXT_VX(vwaddu_wx_w, 4, 8)
 956GEN_VEXT_VX(vwsubu_wx_b, 1, 2)
 957GEN_VEXT_VX(vwsubu_wx_h, 2, 4)
 958GEN_VEXT_VX(vwsubu_wx_w, 4, 8)
 959GEN_VEXT_VX(vwadd_wx_b, 1, 2)
 960GEN_VEXT_VX(vwadd_wx_h, 2, 4)
 961GEN_VEXT_VX(vwadd_wx_w, 4, 8)
 962GEN_VEXT_VX(vwsub_wx_b, 1, 2)
 963GEN_VEXT_VX(vwsub_wx_h, 2, 4)
 964GEN_VEXT_VX(vwsub_wx_w, 4, 8)
 965
 966/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
 967#define DO_VADC(N, M, C) (N + M + C)
 968#define DO_VSBC(N, M, C) (N - M - C)
 969
 970#define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP)              \
 971void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
 972                  CPURISCVState *env, uint32_t desc)          \
 973{                                                             \
 974    uint32_t vl = env->vl;                                    \
 975    uint32_t i;                                               \
 976                                                              \
 977    for (i = env->vstart; i < vl; i++) {                      \
 978        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
 979        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
 980        ETYPE carry = vext_elem_mask(v0, i);                  \
 981                                                              \
 982        *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry);         \
 983    }                                                         \
 984    env->vstart = 0;                                          \
 985}
 986
 987GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t,  H1, DO_VADC)
 988GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
 989GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
 990GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
 991
 992GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t,  H1, DO_VSBC)
 993GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
 994GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
 995GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
 996
 997#define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP)                         \
 998void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,        \
 999                  CPURISCVState *env, uint32_t desc)                     \
1000{                                                                        \
1001    uint32_t vl = env->vl;                                               \
1002    uint32_t i;                                                          \
1003                                                                         \
1004    for (i = env->vstart; i < vl; i++) {                                 \
1005        ETYPE s2 = *((ETYPE *)vs2 + H(i));                               \
1006        ETYPE carry = vext_elem_mask(v0, i);                             \
1007                                                                         \
1008        *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1009    }                                                                    \
1010    env->vstart = 0;                                          \
1011}
1012
1013GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t,  H1, DO_VADC)
1014GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1015GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1016GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1017
1018GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t,  H1, DO_VSBC)
1019GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1020GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1021GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1022
1023#define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N :           \
1024                          (__typeof(N))(N + M) < N)
1025#define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1026
1027#define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP)             \
1028void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1029                  CPURISCVState *env, uint32_t desc)          \
1030{                                                             \
1031    uint32_t vl = env->vl;                                    \
1032    uint32_t vm = vext_vm(desc);                              \
1033    uint32_t i;                                               \
1034                                                              \
1035    for (i = env->vstart; i < vl; i++) {                      \
1036        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1037        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1038        ETYPE carry = !vm && vext_elem_mask(v0, i);           \
1039        vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry));      \
1040    }                                                         \
1041    env->vstart = 0;                                          \
1042}
1043
1044GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t,  H1, DO_MADC)
1045GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1046GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1047GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1048
1049GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t,  H1, DO_MSBC)
1050GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1051GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1052GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1053
1054#define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP)               \
1055void HELPER(NAME)(void *vd, void *v0, target_ulong s1,          \
1056                  void *vs2, CPURISCVState *env, uint32_t desc) \
1057{                                                               \
1058    uint32_t vl = env->vl;                                      \
1059    uint32_t vm = vext_vm(desc);                                \
1060    uint32_t i;                                                 \
1061                                                                \
1062    for (i = env->vstart; i < vl; i++) {                        \
1063        ETYPE s2 = *((ETYPE *)vs2 + H(i));                      \
1064        ETYPE carry = !vm && vext_elem_mask(v0, i);             \
1065        vext_set_elem_mask(vd, i,                               \
1066                DO_OP(s2, (ETYPE)(target_long)s1, carry));      \
1067    }                                                           \
1068    env->vstart = 0;                                            \
1069}
1070
1071GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t,  H1, DO_MADC)
1072GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1073GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1074GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1075
1076GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t,  H1, DO_MSBC)
1077GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1078GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1079GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1080
1081/* Vector Bitwise Logical Instructions */
1082RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1083RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1084RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1085RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1086RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1087RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1088RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1089RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1090RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1091RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1092RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1093RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1094GEN_VEXT_VV(vand_vv_b, 1, 1)
1095GEN_VEXT_VV(vand_vv_h, 2, 2)
1096GEN_VEXT_VV(vand_vv_w, 4, 4)
1097GEN_VEXT_VV(vand_vv_d, 8, 8)
1098GEN_VEXT_VV(vor_vv_b, 1, 1)
1099GEN_VEXT_VV(vor_vv_h, 2, 2)
1100GEN_VEXT_VV(vor_vv_w, 4, 4)
1101GEN_VEXT_VV(vor_vv_d, 8, 8)
1102GEN_VEXT_VV(vxor_vv_b, 1, 1)
1103GEN_VEXT_VV(vxor_vv_h, 2, 2)
1104GEN_VEXT_VV(vxor_vv_w, 4, 4)
1105GEN_VEXT_VV(vxor_vv_d, 8, 8)
1106
1107RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1108RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1109RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1110RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1111RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1112RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1113RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1114RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1115RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1116RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1117RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1118RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1119GEN_VEXT_VX(vand_vx_b, 1, 1)
1120GEN_VEXT_VX(vand_vx_h, 2, 2)
1121GEN_VEXT_VX(vand_vx_w, 4, 4)
1122GEN_VEXT_VX(vand_vx_d, 8, 8)
1123GEN_VEXT_VX(vor_vx_b, 1, 1)
1124GEN_VEXT_VX(vor_vx_h, 2, 2)
1125GEN_VEXT_VX(vor_vx_w, 4, 4)
1126GEN_VEXT_VX(vor_vx_d, 8, 8)
1127GEN_VEXT_VX(vxor_vx_b, 1, 1)
1128GEN_VEXT_VX(vxor_vx_h, 2, 2)
1129GEN_VEXT_VX(vxor_vx_w, 4, 4)
1130GEN_VEXT_VX(vxor_vx_d, 8, 8)
1131
1132/* Vector Single-Width Bit Shift Instructions */
1133#define DO_SLL(N, M)  (N << (M))
1134#define DO_SRL(N, M)  (N >> (M))
1135
1136/* generate the helpers for shift instructions with two vector operators */
1137#define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK)             \
1138void HELPER(NAME)(void *vd, void *v0, void *vs1,                          \
1139                  void *vs2, CPURISCVState *env, uint32_t desc)           \
1140{                                                                         \
1141    uint32_t vm = vext_vm(desc);                                          \
1142    uint32_t vl = env->vl;                                                \
1143    uint32_t i;                                                           \
1144                                                                          \
1145    for (i = env->vstart; i < vl; i++) {                                  \
1146        if (!vm && !vext_elem_mask(v0, i)) {                              \
1147            continue;                                                     \
1148        }                                                                 \
1149        TS1 s1 = *((TS1 *)vs1 + HS1(i));                                  \
1150        TS2 s2 = *((TS2 *)vs2 + HS2(i));                                  \
1151        *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);                        \
1152    }                                                                     \
1153    env->vstart = 0;                                                      \
1154}
1155
1156GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
1157GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1158GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1159GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1160
1161GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1162GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1163GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1164GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1165
1166GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t,  int8_t, H1, H1, DO_SRL, 0x7)
1167GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1168GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1169GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1170
1171/* generate the helpers for shift instructions with one vector and one scalar */
1172#define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1173void HELPER(NAME)(void *vd, void *v0, target_ulong s1,      \
1174        void *vs2, CPURISCVState *env, uint32_t desc)       \
1175{                                                           \
1176    uint32_t vm = vext_vm(desc);                            \
1177    uint32_t vl = env->vl;                                  \
1178    uint32_t i;                                             \
1179                                                            \
1180    for (i = env->vstart; i < vl; i++) {                    \
1181        if (!vm && !vext_elem_mask(v0, i)) {                \
1182            continue;                                       \
1183        }                                                   \
1184        TS2 s2 = *((TS2 *)vs2 + HS2(i));                    \
1185        *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);            \
1186    }                                                       \
1187    env->vstart = 0;                                        \
1188}
1189
1190GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1191GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1192GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1193GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1194
1195GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1196GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1197GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1198GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1199
1200GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1201GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1202GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1203GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1204
1205/* Vector Narrowing Integer Right Shift Instructions */
1206GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t,  uint16_t, H1, H2, DO_SRL, 0xf)
1207GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1208GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1209GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t,  int16_t, H1, H2, DO_SRL, 0xf)
1210GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1211GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1212GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1213GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1214GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1215GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1216GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1217GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1218
1219/* Vector Integer Comparison Instructions */
1220#define DO_MSEQ(N, M) (N == M)
1221#define DO_MSNE(N, M) (N != M)
1222#define DO_MSLT(N, M) (N < M)
1223#define DO_MSLE(N, M) (N <= M)
1224#define DO_MSGT(N, M) (N > M)
1225
1226#define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP)                \
1227void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
1228                  CPURISCVState *env, uint32_t desc)          \
1229{                                                             \
1230    uint32_t vm = vext_vm(desc);                              \
1231    uint32_t vl = env->vl;                                    \
1232    uint32_t i;                                               \
1233                                                              \
1234    for (i = env->vstart; i < vl; i++) {                      \
1235        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
1236        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
1237        if (!vm && !vext_elem_mask(v0, i)) {                  \
1238            continue;                                         \
1239        }                                                     \
1240        vext_set_elem_mask(vd, i, DO_OP(s2, s1));             \
1241    }                                                         \
1242    env->vstart = 0;                                          \
1243}
1244
1245GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
1246GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1247GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1248GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1249
1250GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t,  H1, DO_MSNE)
1251GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1252GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1253GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1254
1255GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t,  H1, DO_MSLT)
1256GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1257GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1258GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1259
1260GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t,  H1, DO_MSLT)
1261GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1262GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1263GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1264
1265GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t,  H1, DO_MSLE)
1266GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1267GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1268GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1269
1270GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t,  H1, DO_MSLE)
1271GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1272GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1273GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1274
1275#define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP)                      \
1276void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,   \
1277                  CPURISCVState *env, uint32_t desc)                \
1278{                                                                   \
1279    uint32_t vm = vext_vm(desc);                                    \
1280    uint32_t vl = env->vl;                                          \
1281    uint32_t i;                                                     \
1282                                                                    \
1283    for (i = env->vstart; i < vl; i++) {                            \
1284        ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
1285        if (!vm && !vext_elem_mask(v0, i)) {                        \
1286            continue;                                               \
1287        }                                                           \
1288        vext_set_elem_mask(vd, i,                                   \
1289                DO_OP(s2, (ETYPE)(target_long)s1));                 \
1290    }                                                               \
1291    env->vstart = 0;                                                \
1292}
1293
1294GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
1295GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1296GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1297GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1298
1299GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t,  H1, DO_MSNE)
1300GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1301GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1302GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1303
1304GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t,  H1, DO_MSLT)
1305GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1306GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1307GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1308
1309GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t,  H1, DO_MSLT)
1310GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1311GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1312GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1313
1314GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t,  H1, DO_MSLE)
1315GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1316GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1317GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1318
1319GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t,  H1, DO_MSLE)
1320GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1321GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1322GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1323
1324GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t,  H1, DO_MSGT)
1325GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1326GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1327GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1328
1329GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t,  H1, DO_MSGT)
1330GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1331GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1332GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1333
1334/* Vector Integer Min/Max Instructions */
1335RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1336RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1337RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1338RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1339RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1340RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1341RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1342RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1343RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1344RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1345RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1346RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1347RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1348RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1349RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1350RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1351GEN_VEXT_VV(vminu_vv_b, 1, 1)
1352GEN_VEXT_VV(vminu_vv_h, 2, 2)
1353GEN_VEXT_VV(vminu_vv_w, 4, 4)
1354GEN_VEXT_VV(vminu_vv_d, 8, 8)
1355GEN_VEXT_VV(vmin_vv_b, 1, 1)
1356GEN_VEXT_VV(vmin_vv_h, 2, 2)
1357GEN_VEXT_VV(vmin_vv_w, 4, 4)
1358GEN_VEXT_VV(vmin_vv_d, 8, 8)
1359GEN_VEXT_VV(vmaxu_vv_b, 1, 1)
1360GEN_VEXT_VV(vmaxu_vv_h, 2, 2)
1361GEN_VEXT_VV(vmaxu_vv_w, 4, 4)
1362GEN_VEXT_VV(vmaxu_vv_d, 8, 8)
1363GEN_VEXT_VV(vmax_vv_b, 1, 1)
1364GEN_VEXT_VV(vmax_vv_h, 2, 2)
1365GEN_VEXT_VV(vmax_vv_w, 4, 4)
1366GEN_VEXT_VV(vmax_vv_d, 8, 8)
1367
1368RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1369RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1370RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1371RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1372RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1373RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1374RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1375RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1376RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1377RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1378RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1379RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1380RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1381RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1382RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1383RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1384GEN_VEXT_VX(vminu_vx_b, 1, 1)
1385GEN_VEXT_VX(vminu_vx_h, 2, 2)
1386GEN_VEXT_VX(vminu_vx_w, 4, 4)
1387GEN_VEXT_VX(vminu_vx_d, 8, 8)
1388GEN_VEXT_VX(vmin_vx_b, 1, 1)
1389GEN_VEXT_VX(vmin_vx_h, 2, 2)
1390GEN_VEXT_VX(vmin_vx_w, 4, 4)
1391GEN_VEXT_VX(vmin_vx_d, 8, 8)
1392GEN_VEXT_VX(vmaxu_vx_b, 1, 1)
1393GEN_VEXT_VX(vmaxu_vx_h, 2, 2)
1394GEN_VEXT_VX(vmaxu_vx_w, 4, 4)
1395GEN_VEXT_VX(vmaxu_vx_d, 8, 8)
1396GEN_VEXT_VX(vmax_vx_b, 1, 1)
1397GEN_VEXT_VX(vmax_vx_h, 2, 2)
1398GEN_VEXT_VX(vmax_vx_w, 4, 4)
1399GEN_VEXT_VX(vmax_vx_d, 8, 8)
1400
1401/* Vector Single-Width Integer Multiply Instructions */
1402#define DO_MUL(N, M) (N * M)
1403RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1404RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1405RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1406RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1407GEN_VEXT_VV(vmul_vv_b, 1, 1)
1408GEN_VEXT_VV(vmul_vv_h, 2, 2)
1409GEN_VEXT_VV(vmul_vv_w, 4, 4)
1410GEN_VEXT_VV(vmul_vv_d, 8, 8)
1411
1412static int8_t do_mulh_b(int8_t s2, int8_t s1)
1413{
1414    return (int16_t)s2 * (int16_t)s1 >> 8;
1415}
1416
1417static int16_t do_mulh_h(int16_t s2, int16_t s1)
1418{
1419    return (int32_t)s2 * (int32_t)s1 >> 16;
1420}
1421
1422static int32_t do_mulh_w(int32_t s2, int32_t s1)
1423{
1424    return (int64_t)s2 * (int64_t)s1 >> 32;
1425}
1426
1427static int64_t do_mulh_d(int64_t s2, int64_t s1)
1428{
1429    uint64_t hi_64, lo_64;
1430
1431    muls64(&lo_64, &hi_64, s1, s2);
1432    return hi_64;
1433}
1434
1435static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1436{
1437    return (uint16_t)s2 * (uint16_t)s1 >> 8;
1438}
1439
1440static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1441{
1442    return (uint32_t)s2 * (uint32_t)s1 >> 16;
1443}
1444
1445static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1446{
1447    return (uint64_t)s2 * (uint64_t)s1 >> 32;
1448}
1449
1450static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1451{
1452    uint64_t hi_64, lo_64;
1453
1454    mulu64(&lo_64, &hi_64, s2, s1);
1455    return hi_64;
1456}
1457
1458static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1459{
1460    return (int16_t)s2 * (uint16_t)s1 >> 8;
1461}
1462
1463static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1464{
1465    return (int32_t)s2 * (uint32_t)s1 >> 16;
1466}
1467
1468static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1469{
1470    return (int64_t)s2 * (uint64_t)s1 >> 32;
1471}
1472
1473/*
1474 * Let  A = signed operand,
1475 *      B = unsigned operand
1476 *      P = mulu64(A, B), unsigned product
1477 *
1478 * LET  X = 2 ** 64  - A, 2's complement of A
1479 *      SP = signed product
1480 * THEN
1481 *      IF A < 0
1482 *          SP = -X * B
1483 *             = -(2 ** 64 - A) * B
1484 *             = A * B - 2 ** 64 * B
1485 *             = P - 2 ** 64 * B
1486 *      ELSE
1487 *          SP = P
1488 * THEN
1489 *      HI_P -= (A < 0 ? B : 0)
1490 */
1491
1492static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1493{
1494    uint64_t hi_64, lo_64;
1495
1496    mulu64(&lo_64, &hi_64, s2, s1);
1497
1498    hi_64 -= s2 < 0 ? s1 : 0;
1499    return hi_64;
1500}
1501
1502RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1503RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1504RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1505RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1506RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1507RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1508RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1509RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1510RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1511RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1512RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1513RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1514GEN_VEXT_VV(vmulh_vv_b, 1, 1)
1515GEN_VEXT_VV(vmulh_vv_h, 2, 2)
1516GEN_VEXT_VV(vmulh_vv_w, 4, 4)
1517GEN_VEXT_VV(vmulh_vv_d, 8, 8)
1518GEN_VEXT_VV(vmulhu_vv_b, 1, 1)
1519GEN_VEXT_VV(vmulhu_vv_h, 2, 2)
1520GEN_VEXT_VV(vmulhu_vv_w, 4, 4)
1521GEN_VEXT_VV(vmulhu_vv_d, 8, 8)
1522GEN_VEXT_VV(vmulhsu_vv_b, 1, 1)
1523GEN_VEXT_VV(vmulhsu_vv_h, 2, 2)
1524GEN_VEXT_VV(vmulhsu_vv_w, 4, 4)
1525GEN_VEXT_VV(vmulhsu_vv_d, 8, 8)
1526
1527RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1528RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1529RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1530RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1531RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1532RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1533RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1534RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1535RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1536RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1537RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1538RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1539RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1540RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1541RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1542RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1543GEN_VEXT_VX(vmul_vx_b, 1, 1)
1544GEN_VEXT_VX(vmul_vx_h, 2, 2)
1545GEN_VEXT_VX(vmul_vx_w, 4, 4)
1546GEN_VEXT_VX(vmul_vx_d, 8, 8)
1547GEN_VEXT_VX(vmulh_vx_b, 1, 1)
1548GEN_VEXT_VX(vmulh_vx_h, 2, 2)
1549GEN_VEXT_VX(vmulh_vx_w, 4, 4)
1550GEN_VEXT_VX(vmulh_vx_d, 8, 8)
1551GEN_VEXT_VX(vmulhu_vx_b, 1, 1)
1552GEN_VEXT_VX(vmulhu_vx_h, 2, 2)
1553GEN_VEXT_VX(vmulhu_vx_w, 4, 4)
1554GEN_VEXT_VX(vmulhu_vx_d, 8, 8)
1555GEN_VEXT_VX(vmulhsu_vx_b, 1, 1)
1556GEN_VEXT_VX(vmulhsu_vx_h, 2, 2)
1557GEN_VEXT_VX(vmulhsu_vx_w, 4, 4)
1558GEN_VEXT_VX(vmulhsu_vx_d, 8, 8)
1559
1560/* Vector Integer Divide Instructions */
1561#define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1562#define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1563#define DO_DIV(N, M)  (unlikely(M == 0) ? (__typeof(N))(-1) :\
1564        unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1565#define DO_REM(N, M)  (unlikely(M == 0) ? N :\
1566        unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1567
1568RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1569RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1570RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1571RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1572RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1573RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1574RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1575RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1576RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1577RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1578RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1579RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1580RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1581RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1582RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1583RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1584GEN_VEXT_VV(vdivu_vv_b, 1, 1)
1585GEN_VEXT_VV(vdivu_vv_h, 2, 2)
1586GEN_VEXT_VV(vdivu_vv_w, 4, 4)
1587GEN_VEXT_VV(vdivu_vv_d, 8, 8)
1588GEN_VEXT_VV(vdiv_vv_b, 1, 1)
1589GEN_VEXT_VV(vdiv_vv_h, 2, 2)
1590GEN_VEXT_VV(vdiv_vv_w, 4, 4)
1591GEN_VEXT_VV(vdiv_vv_d, 8, 8)
1592GEN_VEXT_VV(vremu_vv_b, 1, 1)
1593GEN_VEXT_VV(vremu_vv_h, 2, 2)
1594GEN_VEXT_VV(vremu_vv_w, 4, 4)
1595GEN_VEXT_VV(vremu_vv_d, 8, 8)
1596GEN_VEXT_VV(vrem_vv_b, 1, 1)
1597GEN_VEXT_VV(vrem_vv_h, 2, 2)
1598GEN_VEXT_VV(vrem_vv_w, 4, 4)
1599GEN_VEXT_VV(vrem_vv_d, 8, 8)
1600
1601RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1602RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1603RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1604RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1605RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1606RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1607RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1608RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1609RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1610RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1611RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1612RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1613RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1614RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1615RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1616RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1617GEN_VEXT_VX(vdivu_vx_b, 1, 1)
1618GEN_VEXT_VX(vdivu_vx_h, 2, 2)
1619GEN_VEXT_VX(vdivu_vx_w, 4, 4)
1620GEN_VEXT_VX(vdivu_vx_d, 8, 8)
1621GEN_VEXT_VX(vdiv_vx_b, 1, 1)
1622GEN_VEXT_VX(vdiv_vx_h, 2, 2)
1623GEN_VEXT_VX(vdiv_vx_w, 4, 4)
1624GEN_VEXT_VX(vdiv_vx_d, 8, 8)
1625GEN_VEXT_VX(vremu_vx_b, 1, 1)
1626GEN_VEXT_VX(vremu_vx_h, 2, 2)
1627GEN_VEXT_VX(vremu_vx_w, 4, 4)
1628GEN_VEXT_VX(vremu_vx_d, 8, 8)
1629GEN_VEXT_VX(vrem_vx_b, 1, 1)
1630GEN_VEXT_VX(vrem_vx_h, 2, 2)
1631GEN_VEXT_VX(vrem_vx_w, 4, 4)
1632GEN_VEXT_VX(vrem_vx_d, 8, 8)
1633
1634/* Vector Widening Integer Multiply Instructions */
1635RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1636RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1637RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1638RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1639RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1640RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1641RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1642RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1643RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1644GEN_VEXT_VV(vwmul_vv_b, 1, 2)
1645GEN_VEXT_VV(vwmul_vv_h, 2, 4)
1646GEN_VEXT_VV(vwmul_vv_w, 4, 8)
1647GEN_VEXT_VV(vwmulu_vv_b, 1, 2)
1648GEN_VEXT_VV(vwmulu_vv_h, 2, 4)
1649GEN_VEXT_VV(vwmulu_vv_w, 4, 8)
1650GEN_VEXT_VV(vwmulsu_vv_b, 1, 2)
1651GEN_VEXT_VV(vwmulsu_vv_h, 2, 4)
1652GEN_VEXT_VV(vwmulsu_vv_w, 4, 8)
1653
1654RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1655RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1656RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1657RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1658RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1659RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1660RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1661RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1662RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1663GEN_VEXT_VX(vwmul_vx_b, 1, 2)
1664GEN_VEXT_VX(vwmul_vx_h, 2, 4)
1665GEN_VEXT_VX(vwmul_vx_w, 4, 8)
1666GEN_VEXT_VX(vwmulu_vx_b, 1, 2)
1667GEN_VEXT_VX(vwmulu_vx_h, 2, 4)
1668GEN_VEXT_VX(vwmulu_vx_w, 4, 8)
1669GEN_VEXT_VX(vwmulsu_vx_b, 1, 2)
1670GEN_VEXT_VX(vwmulsu_vx_h, 2, 4)
1671GEN_VEXT_VX(vwmulsu_vx_w, 4, 8)
1672
1673/* Vector Single-Width Integer Multiply-Add Instructions */
1674#define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
1675static void do_##NAME(void *vd, void *vs1, void *vs2, int i)       \
1676{                                                                  \
1677    TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
1678    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
1679    TD d = *((TD *)vd + HD(i));                                    \
1680    *((TD *)vd + HD(i)) = OP(s2, s1, d);                           \
1681}
1682
1683#define DO_MACC(N, M, D) (M * N + D)
1684#define DO_NMSAC(N, M, D) (-(M * N) + D)
1685#define DO_MADD(N, M, D) (M * D + N)
1686#define DO_NMSUB(N, M, D) (-(M * D) + N)
1687RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1688RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1689RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1690RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1691RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1692RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1693RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1694RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1695RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1696RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1697RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1698RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1699RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1700RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1701RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1702RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1703GEN_VEXT_VV(vmacc_vv_b, 1, 1)
1704GEN_VEXT_VV(vmacc_vv_h, 2, 2)
1705GEN_VEXT_VV(vmacc_vv_w, 4, 4)
1706GEN_VEXT_VV(vmacc_vv_d, 8, 8)
1707GEN_VEXT_VV(vnmsac_vv_b, 1, 1)
1708GEN_VEXT_VV(vnmsac_vv_h, 2, 2)
1709GEN_VEXT_VV(vnmsac_vv_w, 4, 4)
1710GEN_VEXT_VV(vnmsac_vv_d, 8, 8)
1711GEN_VEXT_VV(vmadd_vv_b, 1, 1)
1712GEN_VEXT_VV(vmadd_vv_h, 2, 2)
1713GEN_VEXT_VV(vmadd_vv_w, 4, 4)
1714GEN_VEXT_VV(vmadd_vv_d, 8, 8)
1715GEN_VEXT_VV(vnmsub_vv_b, 1, 1)
1716GEN_VEXT_VV(vnmsub_vv_h, 2, 2)
1717GEN_VEXT_VV(vnmsub_vv_w, 4, 4)
1718GEN_VEXT_VV(vnmsub_vv_d, 8, 8)
1719
1720#define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
1721static void do_##NAME(void *vd, target_long s1, void *vs2, int i)   \
1722{                                                                   \
1723    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1724    TD d = *((TD *)vd + HD(i));                                     \
1725    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d);                   \
1726}
1727
1728RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1729RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1730RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1731RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1732RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1733RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1734RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1735RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1736RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1737RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1738RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1739RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1740RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1741RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1742RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1743RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1744GEN_VEXT_VX(vmacc_vx_b, 1, 1)
1745GEN_VEXT_VX(vmacc_vx_h, 2, 2)
1746GEN_VEXT_VX(vmacc_vx_w, 4, 4)
1747GEN_VEXT_VX(vmacc_vx_d, 8, 8)
1748GEN_VEXT_VX(vnmsac_vx_b, 1, 1)
1749GEN_VEXT_VX(vnmsac_vx_h, 2, 2)
1750GEN_VEXT_VX(vnmsac_vx_w, 4, 4)
1751GEN_VEXT_VX(vnmsac_vx_d, 8, 8)
1752GEN_VEXT_VX(vmadd_vx_b, 1, 1)
1753GEN_VEXT_VX(vmadd_vx_h, 2, 2)
1754GEN_VEXT_VX(vmadd_vx_w, 4, 4)
1755GEN_VEXT_VX(vmadd_vx_d, 8, 8)
1756GEN_VEXT_VX(vnmsub_vx_b, 1, 1)
1757GEN_VEXT_VX(vnmsub_vx_h, 2, 2)
1758GEN_VEXT_VX(vnmsub_vx_w, 4, 4)
1759GEN_VEXT_VX(vnmsub_vx_d, 8, 8)
1760
1761/* Vector Widening Integer Multiply-Add Instructions */
1762RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1763RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1764RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1765RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1766RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1767RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1768RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1769RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1770RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1771GEN_VEXT_VV(vwmaccu_vv_b, 1, 2)
1772GEN_VEXT_VV(vwmaccu_vv_h, 2, 4)
1773GEN_VEXT_VV(vwmaccu_vv_w, 4, 8)
1774GEN_VEXT_VV(vwmacc_vv_b, 1, 2)
1775GEN_VEXT_VV(vwmacc_vv_h, 2, 4)
1776GEN_VEXT_VV(vwmacc_vv_w, 4, 8)
1777GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2)
1778GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4)
1779GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8)
1780
1781RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1782RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1783RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1784RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1785RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1786RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1787RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1788RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1789RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1790RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1791RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1792RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1793GEN_VEXT_VX(vwmaccu_vx_b, 1, 2)
1794GEN_VEXT_VX(vwmaccu_vx_h, 2, 4)
1795GEN_VEXT_VX(vwmaccu_vx_w, 4, 8)
1796GEN_VEXT_VX(vwmacc_vx_b, 1, 2)
1797GEN_VEXT_VX(vwmacc_vx_h, 2, 4)
1798GEN_VEXT_VX(vwmacc_vx_w, 4, 8)
1799GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2)
1800GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4)
1801GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8)
1802GEN_VEXT_VX(vwmaccus_vx_b, 1, 2)
1803GEN_VEXT_VX(vwmaccus_vx_h, 2, 4)
1804GEN_VEXT_VX(vwmaccus_vx_w, 4, 8)
1805
1806/* Vector Integer Merge and Move Instructions */
1807#define GEN_VEXT_VMV_VV(NAME, ETYPE, H)                              \
1808void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env,           \
1809                  uint32_t desc)                                     \
1810{                                                                    \
1811    uint32_t vl = env->vl;                                           \
1812    uint32_t i;                                                      \
1813                                                                     \
1814    for (i = env->vstart; i < vl; i++) {                             \
1815        ETYPE s1 = *((ETYPE *)vs1 + H(i));                           \
1816        *((ETYPE *)vd + H(i)) = s1;                                  \
1817    }                                                                \
1818    env->vstart = 0;                                                 \
1819}
1820
1821GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t,  H1)
1822GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1823GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1824GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1825
1826#define GEN_VEXT_VMV_VX(NAME, ETYPE, H)                              \
1827void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env,         \
1828                  uint32_t desc)                                     \
1829{                                                                    \
1830    uint32_t vl = env->vl;                                           \
1831    uint32_t i;                                                      \
1832                                                                     \
1833    for (i = env->vstart; i < vl; i++) {                             \
1834        *((ETYPE *)vd + H(i)) = (ETYPE)s1;                           \
1835    }                                                                \
1836    env->vstart = 0;                                                 \
1837}
1838
1839GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t,  H1)
1840GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1841GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1842GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1843
1844#define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H)                           \
1845void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,          \
1846                  CPURISCVState *env, uint32_t desc)                 \
1847{                                                                    \
1848    uint32_t vl = env->vl;                                           \
1849    uint32_t i;                                                      \
1850                                                                     \
1851    for (i = env->vstart; i < vl; i++) {                             \
1852        ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1);            \
1853        *((ETYPE *)vd + H(i)) = *(vt + H(i));                        \
1854    }                                                                \
1855    env->vstart = 0;                                                 \
1856}
1857
1858GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t,  H1)
1859GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1860GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1861GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1862
1863#define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H)                           \
1864void HELPER(NAME)(void *vd, void *v0, target_ulong s1,               \
1865                  void *vs2, CPURISCVState *env, uint32_t desc)      \
1866{                                                                    \
1867    uint32_t vl = env->vl;                                           \
1868    uint32_t i;                                                      \
1869                                                                     \
1870    for (i = env->vstart; i < vl; i++) {                             \
1871        ETYPE s2 = *((ETYPE *)vs2 + H(i));                           \
1872        ETYPE d = (!vext_elem_mask(v0, i) ? s2 :                     \
1873                   (ETYPE)(target_long)s1);                          \
1874        *((ETYPE *)vd + H(i)) = d;                                   \
1875    }                                                                \
1876    env->vstart = 0;                                                 \
1877}
1878
1879GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t,  H1)
1880GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1881GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1882GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1883
1884/*
1885 *** Vector Fixed-Point Arithmetic Instructions
1886 */
1887
1888/* Vector Single-Width Saturating Add and Subtract */
1889
1890/*
1891 * As fixed point instructions probably have round mode and saturation,
1892 * define common macros for fixed point here.
1893 */
1894typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1895                          CPURISCVState *env, int vxrm);
1896
1897#define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)     \
1898static inline void                                                  \
1899do_##NAME(void *vd, void *vs1, void *vs2, int i,                    \
1900          CPURISCVState *env, int vxrm)                             \
1901{                                                                   \
1902    TX1 s1 = *((T1 *)vs1 + HS1(i));                                 \
1903    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
1904    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1);                    \
1905}
1906
1907static inline void
1908vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1909             CPURISCVState *env,
1910             uint32_t vl, uint32_t vm, int vxrm,
1911             opivv2_rm_fn *fn)
1912{
1913    for (uint32_t i = env->vstart; i < vl; i++) {
1914        if (!vm && !vext_elem_mask(v0, i)) {
1915            continue;
1916        }
1917        fn(vd, vs1, vs2, i, env, vxrm);
1918    }
1919    env->vstart = 0;
1920}
1921
1922static inline void
1923vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1924             CPURISCVState *env,
1925             uint32_t desc, uint32_t esz, uint32_t dsz,
1926             opivv2_rm_fn *fn)
1927{
1928    uint32_t vm = vext_vm(desc);
1929    uint32_t vl = env->vl;
1930
1931    switch (env->vxrm) {
1932    case 0: /* rnu */
1933        vext_vv_rm_1(vd, v0, vs1, vs2,
1934                     env, vl, vm, 0, fn);
1935        break;
1936    case 1: /* rne */
1937        vext_vv_rm_1(vd, v0, vs1, vs2,
1938                     env, vl, vm, 1, fn);
1939        break;
1940    case 2: /* rdn */
1941        vext_vv_rm_1(vd, v0, vs1, vs2,
1942                     env, vl, vm, 2, fn);
1943        break;
1944    default: /* rod */
1945        vext_vv_rm_1(vd, v0, vs1, vs2,
1946                     env, vl, vm, 3, fn);
1947        break;
1948    }
1949}
1950
1951/* generate helpers for fixed point instructions with OPIVV format */
1952#define GEN_VEXT_VV_RM(NAME, ESZ, DSZ)                          \
1953void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,     \
1954                  CPURISCVState *env, uint32_t desc)            \
1955{                                                               \
1956    vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ,         \
1957                 do_##NAME);                                    \
1958}
1959
1960static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
1961{
1962    uint8_t res = a + b;
1963    if (res < a) {
1964        res = UINT8_MAX;
1965        env->vxsat = 0x1;
1966    }
1967    return res;
1968}
1969
1970static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
1971                               uint16_t b)
1972{
1973    uint16_t res = a + b;
1974    if (res < a) {
1975        res = UINT16_MAX;
1976        env->vxsat = 0x1;
1977    }
1978    return res;
1979}
1980
1981static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
1982                               uint32_t b)
1983{
1984    uint32_t res = a + b;
1985    if (res < a) {
1986        res = UINT32_MAX;
1987        env->vxsat = 0x1;
1988    }
1989    return res;
1990}
1991
1992static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
1993                               uint64_t b)
1994{
1995    uint64_t res = a + b;
1996    if (res < a) {
1997        res = UINT64_MAX;
1998        env->vxsat = 0x1;
1999    }
2000    return res;
2001}
2002
2003RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2004RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2005RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2006RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2007GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1)
2008GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2)
2009GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4)
2010GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8)
2011
2012typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2013                          CPURISCVState *env, int vxrm);
2014
2015#define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)          \
2016static inline void                                                  \
2017do_##NAME(void *vd, target_long s1, void *vs2, int i,               \
2018          CPURISCVState *env, int vxrm)                             \
2019{                                                                   \
2020    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
2021    *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1);           \
2022}
2023
2024static inline void
2025vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2026             CPURISCVState *env,
2027             uint32_t vl, uint32_t vm, int vxrm,
2028             opivx2_rm_fn *fn)
2029{
2030    for (uint32_t i = env->vstart; i < vl; i++) {
2031        if (!vm && !vext_elem_mask(v0, i)) {
2032            continue;
2033        }
2034        fn(vd, s1, vs2, i, env, vxrm);
2035    }
2036    env->vstart = 0;
2037}
2038
2039static inline void
2040vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2041             CPURISCVState *env,
2042             uint32_t desc, uint32_t esz, uint32_t dsz,
2043             opivx2_rm_fn *fn)
2044{
2045    uint32_t vm = vext_vm(desc);
2046    uint32_t vl = env->vl;
2047
2048    switch (env->vxrm) {
2049    case 0: /* rnu */
2050        vext_vx_rm_1(vd, v0, s1, vs2,
2051                     env, vl, vm, 0, fn);
2052        break;
2053    case 1: /* rne */
2054        vext_vx_rm_1(vd, v0, s1, vs2,
2055                     env, vl, vm, 1, fn);
2056        break;
2057    case 2: /* rdn */
2058        vext_vx_rm_1(vd, v0, s1, vs2,
2059                     env, vl, vm, 2, fn);
2060        break;
2061    default: /* rod */
2062        vext_vx_rm_1(vd, v0, s1, vs2,
2063                     env, vl, vm, 3, fn);
2064        break;
2065    }
2066}
2067
2068/* generate helpers for fixed point instructions with OPIVX format */
2069#define GEN_VEXT_VX_RM(NAME, ESZ, DSZ)                    \
2070void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
2071        void *vs2, CPURISCVState *env, uint32_t desc)     \
2072{                                                         \
2073    vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ,    \
2074                 do_##NAME);                              \
2075}
2076
2077RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2078RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2079RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2080RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2081GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1)
2082GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2)
2083GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4)
2084GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8)
2085
2086static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2087{
2088    int8_t res = a + b;
2089    if ((res ^ a) & (res ^ b) & INT8_MIN) {
2090        res = a > 0 ? INT8_MAX : INT8_MIN;
2091        env->vxsat = 0x1;
2092    }
2093    return res;
2094}
2095
2096static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2097{
2098    int16_t res = a + b;
2099    if ((res ^ a) & (res ^ b) & INT16_MIN) {
2100        res = a > 0 ? INT16_MAX : INT16_MIN;
2101        env->vxsat = 0x1;
2102    }
2103    return res;
2104}
2105
2106static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2107{
2108    int32_t res = a + b;
2109    if ((res ^ a) & (res ^ b) & INT32_MIN) {
2110        res = a > 0 ? INT32_MAX : INT32_MIN;
2111        env->vxsat = 0x1;
2112    }
2113    return res;
2114}
2115
2116static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2117{
2118    int64_t res = a + b;
2119    if ((res ^ a) & (res ^ b) & INT64_MIN) {
2120        res = a > 0 ? INT64_MAX : INT64_MIN;
2121        env->vxsat = 0x1;
2122    }
2123    return res;
2124}
2125
2126RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2127RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2128RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2129RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2130GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1)
2131GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2)
2132GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4)
2133GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8)
2134
2135RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2136RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2137RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2138RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2139GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1)
2140GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2)
2141GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4)
2142GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8)
2143
2144static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2145{
2146    uint8_t res = a - b;
2147    if (res > a) {
2148        res = 0;
2149        env->vxsat = 0x1;
2150    }
2151    return res;
2152}
2153
2154static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2155                               uint16_t b)
2156{
2157    uint16_t res = a - b;
2158    if (res > a) {
2159        res = 0;
2160        env->vxsat = 0x1;
2161    }
2162    return res;
2163}
2164
2165static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2166                               uint32_t b)
2167{
2168    uint32_t res = a - b;
2169    if (res > a) {
2170        res = 0;
2171        env->vxsat = 0x1;
2172    }
2173    return res;
2174}
2175
2176static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2177                               uint64_t b)
2178{
2179    uint64_t res = a - b;
2180    if (res > a) {
2181        res = 0;
2182        env->vxsat = 0x1;
2183    }
2184    return res;
2185}
2186
2187RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2188RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2189RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2190RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2191GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1)
2192GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2)
2193GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4)
2194GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8)
2195
2196RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2197RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2198RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2199RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2200GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1)
2201GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2)
2202GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4)
2203GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8)
2204
2205static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2206{
2207    int8_t res = a - b;
2208    if ((res ^ a) & (a ^ b) & INT8_MIN) {
2209        res = a >= 0 ? INT8_MAX : INT8_MIN;
2210        env->vxsat = 0x1;
2211    }
2212    return res;
2213}
2214
2215static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2216{
2217    int16_t res = a - b;
2218    if ((res ^ a) & (a ^ b) & INT16_MIN) {
2219        res = a >= 0 ? INT16_MAX : INT16_MIN;
2220        env->vxsat = 0x1;
2221    }
2222    return res;
2223}
2224
2225static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2226{
2227    int32_t res = a - b;
2228    if ((res ^ a) & (a ^ b) & INT32_MIN) {
2229        res = a >= 0 ? INT32_MAX : INT32_MIN;
2230        env->vxsat = 0x1;
2231    }
2232    return res;
2233}
2234
2235static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2236{
2237    int64_t res = a - b;
2238    if ((res ^ a) & (a ^ b) & INT64_MIN) {
2239        res = a >= 0 ? INT64_MAX : INT64_MIN;
2240        env->vxsat = 0x1;
2241    }
2242    return res;
2243}
2244
2245RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2246RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2247RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2248RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2249GEN_VEXT_VV_RM(vssub_vv_b, 1, 1)
2250GEN_VEXT_VV_RM(vssub_vv_h, 2, 2)
2251GEN_VEXT_VV_RM(vssub_vv_w, 4, 4)
2252GEN_VEXT_VV_RM(vssub_vv_d, 8, 8)
2253
2254RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2255RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2256RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2257RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2258GEN_VEXT_VX_RM(vssub_vx_b, 1, 1)
2259GEN_VEXT_VX_RM(vssub_vx_h, 2, 2)
2260GEN_VEXT_VX_RM(vssub_vx_w, 4, 4)
2261GEN_VEXT_VX_RM(vssub_vx_d, 8, 8)
2262
2263/* Vector Single-Width Averaging Add and Subtract */
2264static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2265{
2266    uint8_t d = extract64(v, shift, 1);
2267    uint8_t d1;
2268    uint64_t D1, D2;
2269
2270    if (shift == 0 || shift > 64) {
2271        return 0;
2272    }
2273
2274    d1 = extract64(v, shift - 1, 1);
2275    D1 = extract64(v, 0, shift);
2276    if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2277        return d1;
2278    } else if (vxrm == 1) { /* round-to-nearest-even */
2279        if (shift > 1) {
2280            D2 = extract64(v, 0, shift - 1);
2281            return d1 & ((D2 != 0) | d);
2282        } else {
2283            return d1 & d;
2284        }
2285    } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2286        return !d & (D1 != 0);
2287    }
2288    return 0; /* round-down (truncate) */
2289}
2290
2291static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2292{
2293    int64_t res = (int64_t)a + b;
2294    uint8_t round = get_round(vxrm, res, 1);
2295
2296    return (res >> 1) + round;
2297}
2298
2299static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2300{
2301    int64_t res = a + b;
2302    uint8_t round = get_round(vxrm, res, 1);
2303    int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2304
2305    /* With signed overflow, bit 64 is inverse of bit 63. */
2306    return ((res >> 1) ^ over) + round;
2307}
2308
2309RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2310RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2311RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2312RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2313GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1)
2314GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2)
2315GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4)
2316GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8)
2317
2318RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2319RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2320RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2321RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2322GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1)
2323GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2)
2324GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4)
2325GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8)
2326
2327static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2328                               uint32_t a, uint32_t b)
2329{
2330    uint64_t res = (uint64_t)a + b;
2331    uint8_t round = get_round(vxrm, res, 1);
2332
2333    return (res >> 1) + round;
2334}
2335
2336static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2337                               uint64_t a, uint64_t b)
2338{
2339    uint64_t res = a + b;
2340    uint8_t round = get_round(vxrm, res, 1);
2341    uint64_t over = (uint64_t)(res < a) << 63;
2342
2343    return ((res >> 1) | over) + round;
2344}
2345
2346RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2347RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2348RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2349RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2350GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1)
2351GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2)
2352GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4)
2353GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8)
2354
2355RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2356RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2357RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2358RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2359GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1)
2360GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2)
2361GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4)
2362GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8)
2363
2364static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2365{
2366    int64_t res = (int64_t)a - b;
2367    uint8_t round = get_round(vxrm, res, 1);
2368
2369    return (res >> 1) + round;
2370}
2371
2372static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2373{
2374    int64_t res = (int64_t)a - b;
2375    uint8_t round = get_round(vxrm, res, 1);
2376    int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2377
2378    /* With signed overflow, bit 64 is inverse of bit 63. */
2379    return ((res >> 1) ^ over) + round;
2380}
2381
2382RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2383RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2384RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2385RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2386GEN_VEXT_VV_RM(vasub_vv_b, 1, 1)
2387GEN_VEXT_VV_RM(vasub_vv_h, 2, 2)
2388GEN_VEXT_VV_RM(vasub_vv_w, 4, 4)
2389GEN_VEXT_VV_RM(vasub_vv_d, 8, 8)
2390
2391RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2392RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2393RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2394RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2395GEN_VEXT_VX_RM(vasub_vx_b, 1, 1)
2396GEN_VEXT_VX_RM(vasub_vx_h, 2, 2)
2397GEN_VEXT_VX_RM(vasub_vx_w, 4, 4)
2398GEN_VEXT_VX_RM(vasub_vx_d, 8, 8)
2399
2400static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2401                               uint32_t a, uint32_t b)
2402{
2403    int64_t res = (int64_t)a - b;
2404    uint8_t round = get_round(vxrm, res, 1);
2405
2406    return (res >> 1) + round;
2407}
2408
2409static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2410                               uint64_t a, uint64_t b)
2411{
2412    uint64_t res = (uint64_t)a - b;
2413    uint8_t round = get_round(vxrm, res, 1);
2414    uint64_t over = (uint64_t)(res > a) << 63;
2415
2416    return ((res >> 1) | over) + round;
2417}
2418
2419RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2420RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2421RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2422RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2423GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1)
2424GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2)
2425GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4)
2426GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8)
2427
2428RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2429RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2430RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2431RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2432GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1)
2433GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2)
2434GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4)
2435GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8)
2436
2437/* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2438static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2439{
2440    uint8_t round;
2441    int16_t res;
2442
2443    res = (int16_t)a * (int16_t)b;
2444    round = get_round(vxrm, res, 7);
2445    res   = (res >> 7) + round;
2446
2447    if (res > INT8_MAX) {
2448        env->vxsat = 0x1;
2449        return INT8_MAX;
2450    } else if (res < INT8_MIN) {
2451        env->vxsat = 0x1;
2452        return INT8_MIN;
2453    } else {
2454        return res;
2455    }
2456}
2457
2458static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2459{
2460    uint8_t round;
2461    int32_t res;
2462
2463    res = (int32_t)a * (int32_t)b;
2464    round = get_round(vxrm, res, 15);
2465    res   = (res >> 15) + round;
2466
2467    if (res > INT16_MAX) {
2468        env->vxsat = 0x1;
2469        return INT16_MAX;
2470    } else if (res < INT16_MIN) {
2471        env->vxsat = 0x1;
2472        return INT16_MIN;
2473    } else {
2474        return res;
2475    }
2476}
2477
2478static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2479{
2480    uint8_t round;
2481    int64_t res;
2482
2483    res = (int64_t)a * (int64_t)b;
2484    round = get_round(vxrm, res, 31);
2485    res   = (res >> 31) + round;
2486
2487    if (res > INT32_MAX) {
2488        env->vxsat = 0x1;
2489        return INT32_MAX;
2490    } else if (res < INT32_MIN) {
2491        env->vxsat = 0x1;
2492        return INT32_MIN;
2493    } else {
2494        return res;
2495    }
2496}
2497
2498static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2499{
2500    uint8_t round;
2501    uint64_t hi_64, lo_64;
2502    int64_t res;
2503
2504    if (a == INT64_MIN && b == INT64_MIN) {
2505        env->vxsat = 1;
2506        return INT64_MAX;
2507    }
2508
2509    muls64(&lo_64, &hi_64, a, b);
2510    round = get_round(vxrm, lo_64, 63);
2511    /*
2512     * Cannot overflow, as there are always
2513     * 2 sign bits after multiply.
2514     */
2515    res = (hi_64 << 1) | (lo_64 >> 63);
2516    if (round) {
2517        if (res == INT64_MAX) {
2518            env->vxsat = 1;
2519        } else {
2520            res += 1;
2521        }
2522    }
2523    return res;
2524}
2525
2526RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2527RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2528RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2529RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2530GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1)
2531GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2)
2532GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4)
2533GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8)
2534
2535RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2536RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2537RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2538RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2539GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1)
2540GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2)
2541GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4)
2542GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8)
2543
2544/* Vector Single-Width Scaling Shift Instructions */
2545static inline uint8_t
2546vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2547{
2548    uint8_t round, shift = b & 0x7;
2549    uint8_t res;
2550
2551    round = get_round(vxrm, a, shift);
2552    res   = (a >> shift)  + round;
2553    return res;
2554}
2555static inline uint16_t
2556vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2557{
2558    uint8_t round, shift = b & 0xf;
2559    uint16_t res;
2560
2561    round = get_round(vxrm, a, shift);
2562    res   = (a >> shift)  + round;
2563    return res;
2564}
2565static inline uint32_t
2566vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2567{
2568    uint8_t round, shift = b & 0x1f;
2569    uint32_t res;
2570
2571    round = get_round(vxrm, a, shift);
2572    res   = (a >> shift)  + round;
2573    return res;
2574}
2575static inline uint64_t
2576vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2577{
2578    uint8_t round, shift = b & 0x3f;
2579    uint64_t res;
2580
2581    round = get_round(vxrm, a, shift);
2582    res   = (a >> shift)  + round;
2583    return res;
2584}
2585RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2586RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2587RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2588RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2589GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1)
2590GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2)
2591GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4)
2592GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8)
2593
2594RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2595RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2596RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2597RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2598GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1)
2599GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2)
2600GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4)
2601GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8)
2602
2603static inline int8_t
2604vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2605{
2606    uint8_t round, shift = b & 0x7;
2607    int8_t res;
2608
2609    round = get_round(vxrm, a, shift);
2610    res   = (a >> shift)  + round;
2611    return res;
2612}
2613static inline int16_t
2614vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2615{
2616    uint8_t round, shift = b & 0xf;
2617    int16_t res;
2618
2619    round = get_round(vxrm, a, shift);
2620    res   = (a >> shift)  + round;
2621    return res;
2622}
2623static inline int32_t
2624vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2625{
2626    uint8_t round, shift = b & 0x1f;
2627    int32_t res;
2628
2629    round = get_round(vxrm, a, shift);
2630    res   = (a >> shift)  + round;
2631    return res;
2632}
2633static inline int64_t
2634vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2635{
2636    uint8_t round, shift = b & 0x3f;
2637    int64_t res;
2638
2639    round = get_round(vxrm, a, shift);
2640    res   = (a >> shift)  + round;
2641    return res;
2642}
2643
2644RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2645RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2646RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2647RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2648GEN_VEXT_VV_RM(vssra_vv_b, 1, 1)
2649GEN_VEXT_VV_RM(vssra_vv_h, 2, 2)
2650GEN_VEXT_VV_RM(vssra_vv_w, 4, 4)
2651GEN_VEXT_VV_RM(vssra_vv_d, 8, 8)
2652
2653RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2654RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2655RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2656RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2657GEN_VEXT_VX_RM(vssra_vx_b, 1, 1)
2658GEN_VEXT_VX_RM(vssra_vx_h, 2, 2)
2659GEN_VEXT_VX_RM(vssra_vx_w, 4, 4)
2660GEN_VEXT_VX_RM(vssra_vx_d, 8, 8)
2661
2662/* Vector Narrowing Fixed-Point Clip Instructions */
2663static inline int8_t
2664vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2665{
2666    uint8_t round, shift = b & 0xf;
2667    int16_t res;
2668
2669    round = get_round(vxrm, a, shift);
2670    res   = (a >> shift)  + round;
2671    if (res > INT8_MAX) {
2672        env->vxsat = 0x1;
2673        return INT8_MAX;
2674    } else if (res < INT8_MIN) {
2675        env->vxsat = 0x1;
2676        return INT8_MIN;
2677    } else {
2678        return res;
2679    }
2680}
2681
2682static inline int16_t
2683vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2684{
2685    uint8_t round, shift = b & 0x1f;
2686    int32_t res;
2687
2688    round = get_round(vxrm, a, shift);
2689    res   = (a >> shift)  + round;
2690    if (res > INT16_MAX) {
2691        env->vxsat = 0x1;
2692        return INT16_MAX;
2693    } else if (res < INT16_MIN) {
2694        env->vxsat = 0x1;
2695        return INT16_MIN;
2696    } else {
2697        return res;
2698    }
2699}
2700
2701static inline int32_t
2702vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2703{
2704    uint8_t round, shift = b & 0x3f;
2705    int64_t res;
2706
2707    round = get_round(vxrm, a, shift);
2708    res   = (a >> shift)  + round;
2709    if (res > INT32_MAX) {
2710        env->vxsat = 0x1;
2711        return INT32_MAX;
2712    } else if (res < INT32_MIN) {
2713        env->vxsat = 0x1;
2714        return INT32_MIN;
2715    } else {
2716        return res;
2717    }
2718}
2719
2720RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2721RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2722RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2723GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1)
2724GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2)
2725GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4)
2726
2727RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2728RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2729RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2730GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1)
2731GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2)
2732GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4)
2733
2734static inline uint8_t
2735vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2736{
2737    uint8_t round, shift = b & 0xf;
2738    uint16_t res;
2739
2740    round = get_round(vxrm, a, shift);
2741    res   = (a >> shift)  + round;
2742    if (res > UINT8_MAX) {
2743        env->vxsat = 0x1;
2744        return UINT8_MAX;
2745    } else {
2746        return res;
2747    }
2748}
2749
2750static inline uint16_t
2751vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2752{
2753    uint8_t round, shift = b & 0x1f;
2754    uint32_t res;
2755
2756    round = get_round(vxrm, a, shift);
2757    res   = (a >> shift)  + round;
2758    if (res > UINT16_MAX) {
2759        env->vxsat = 0x1;
2760        return UINT16_MAX;
2761    } else {
2762        return res;
2763    }
2764}
2765
2766static inline uint32_t
2767vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2768{
2769    uint8_t round, shift = b & 0x3f;
2770    uint64_t res;
2771
2772    round = get_round(vxrm, a, shift);
2773    res   = (a >> shift)  + round;
2774    if (res > UINT32_MAX) {
2775        env->vxsat = 0x1;
2776        return UINT32_MAX;
2777    } else {
2778        return res;
2779    }
2780}
2781
2782RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2783RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2784RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2785GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1)
2786GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2)
2787GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4)
2788
2789RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2790RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2791RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2792GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1)
2793GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2)
2794GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4)
2795
2796/*
2797 *** Vector Float Point Arithmetic Instructions
2798 */
2799/* Vector Single-Width Floating-Point Add/Subtract Instructions */
2800#define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)   \
2801static void do_##NAME(void *vd, void *vs1, void *vs2, int i,   \
2802                      CPURISCVState *env)                      \
2803{                                                              \
2804    TX1 s1 = *((T1 *)vs1 + HS1(i));                            \
2805    TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2806    *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status);         \
2807}
2808
2809#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ)                   \
2810void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
2811                  void *vs2, CPURISCVState *env,          \
2812                  uint32_t desc)                          \
2813{                                                         \
2814    uint32_t vm = vext_vm(desc);                          \
2815    uint32_t vl = env->vl;                                \
2816    uint32_t i;                                           \
2817                                                          \
2818    for (i = env->vstart; i < vl; i++) {                  \
2819        if (!vm && !vext_elem_mask(v0, i)) {              \
2820            continue;                                     \
2821        }                                                 \
2822        do_##NAME(vd, vs1, vs2, i, env);                  \
2823    }                                                     \
2824    env->vstart = 0;                                      \
2825}
2826
2827RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2828RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2829RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2830GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2)
2831GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4)
2832GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8)
2833
2834#define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)        \
2835static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2836                      CPURISCVState *env)                      \
2837{                                                              \
2838    TX2 s2 = *((T2 *)vs2 + HS2(i));                            \
2839    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2840}
2841
2842#define GEN_VEXT_VF(NAME, ESZ, DSZ)                       \
2843void HELPER(NAME)(void *vd, void *v0, uint64_t s1,        \
2844                  void *vs2, CPURISCVState *env,          \
2845                  uint32_t desc)                          \
2846{                                                         \
2847    uint32_t vm = vext_vm(desc);                          \
2848    uint32_t vl = env->vl;                                \
2849    uint32_t i;                                           \
2850                                                          \
2851    for (i = env->vstart; i < vl; i++) {                  \
2852        if (!vm && !vext_elem_mask(v0, i)) {              \
2853            continue;                                     \
2854        }                                                 \
2855        do_##NAME(vd, s1, vs2, i, env);                   \
2856    }                                                     \
2857    env->vstart = 0;                                      \
2858}
2859
2860RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2861RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2862RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2863GEN_VEXT_VF(vfadd_vf_h, 2, 2)
2864GEN_VEXT_VF(vfadd_vf_w, 4, 4)
2865GEN_VEXT_VF(vfadd_vf_d, 8, 8)
2866
2867RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2868RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2869RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2870GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2)
2871GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4)
2872GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8)
2873RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2874RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2875RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2876GEN_VEXT_VF(vfsub_vf_h, 2, 2)
2877GEN_VEXT_VF(vfsub_vf_w, 4, 4)
2878GEN_VEXT_VF(vfsub_vf_d, 8, 8)
2879
2880static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2881{
2882    return float16_sub(b, a, s);
2883}
2884
2885static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2886{
2887    return float32_sub(b, a, s);
2888}
2889
2890static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2891{
2892    return float64_sub(b, a, s);
2893}
2894
2895RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2896RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2897RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2898GEN_VEXT_VF(vfrsub_vf_h, 2, 2)
2899GEN_VEXT_VF(vfrsub_vf_w, 4, 4)
2900GEN_VEXT_VF(vfrsub_vf_d, 8, 8)
2901
2902/* Vector Widening Floating-Point Add/Subtract Instructions */
2903static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2904{
2905    return float32_add(float16_to_float32(a, true, s),
2906            float16_to_float32(b, true, s), s);
2907}
2908
2909static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2910{
2911    return float64_add(float32_to_float64(a, s),
2912            float32_to_float64(b, s), s);
2913
2914}
2915
2916RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
2917RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
2918GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4)
2919GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8)
2920RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
2921RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
2922GEN_VEXT_VF(vfwadd_vf_h, 2, 4)
2923GEN_VEXT_VF(vfwadd_vf_w, 4, 8)
2924
2925static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
2926{
2927    return float32_sub(float16_to_float32(a, true, s),
2928            float16_to_float32(b, true, s), s);
2929}
2930
2931static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
2932{
2933    return float64_sub(float32_to_float64(a, s),
2934            float32_to_float64(b, s), s);
2935
2936}
2937
2938RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
2939RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
2940GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4)
2941GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8)
2942RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
2943RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
2944GEN_VEXT_VF(vfwsub_vf_h, 2, 4)
2945GEN_VEXT_VF(vfwsub_vf_w, 4, 8)
2946
2947static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
2948{
2949    return float32_add(a, float16_to_float32(b, true, s), s);
2950}
2951
2952static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
2953{
2954    return float64_add(a, float32_to_float64(b, s), s);
2955}
2956
2957RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
2958RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
2959GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4)
2960GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8)
2961RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
2962RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
2963GEN_VEXT_VF(vfwadd_wf_h, 2, 4)
2964GEN_VEXT_VF(vfwadd_wf_w, 4, 8)
2965
2966static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
2967{
2968    return float32_sub(a, float16_to_float32(b, true, s), s);
2969}
2970
2971static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
2972{
2973    return float64_sub(a, float32_to_float64(b, s), s);
2974}
2975
2976RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
2977RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
2978GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4)
2979GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8)
2980RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
2981RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
2982GEN_VEXT_VF(vfwsub_wf_h, 2, 4)
2983GEN_VEXT_VF(vfwsub_wf_w, 4, 8)
2984
2985/* Vector Single-Width Floating-Point Multiply/Divide Instructions */
2986RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
2987RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
2988RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
2989GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2)
2990GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4)
2991GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8)
2992RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
2993RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
2994RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
2995GEN_VEXT_VF(vfmul_vf_h, 2, 2)
2996GEN_VEXT_VF(vfmul_vf_w, 4, 4)
2997GEN_VEXT_VF(vfmul_vf_d, 8, 8)
2998
2999RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3000RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3001RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3002GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2)
3003GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4)
3004GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8)
3005RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3006RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3007RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3008GEN_VEXT_VF(vfdiv_vf_h, 2, 2)
3009GEN_VEXT_VF(vfdiv_vf_w, 4, 4)
3010GEN_VEXT_VF(vfdiv_vf_d, 8, 8)
3011
3012static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3013{
3014    return float16_div(b, a, s);
3015}
3016
3017static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3018{
3019    return float32_div(b, a, s);
3020}
3021
3022static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3023{
3024    return float64_div(b, a, s);
3025}
3026
3027RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3028RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3029RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3030GEN_VEXT_VF(vfrdiv_vf_h, 2, 2)
3031GEN_VEXT_VF(vfrdiv_vf_w, 4, 4)
3032GEN_VEXT_VF(vfrdiv_vf_d, 8, 8)
3033
3034/* Vector Widening Floating-Point Multiply */
3035static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3036{
3037    return float32_mul(float16_to_float32(a, true, s),
3038            float16_to_float32(b, true, s), s);
3039}
3040
3041static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3042{
3043    return float64_mul(float32_to_float64(a, s),
3044            float32_to_float64(b, s), s);
3045
3046}
3047RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3048RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3049GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4)
3050GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8)
3051RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3052RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3053GEN_VEXT_VF(vfwmul_vf_h, 2, 4)
3054GEN_VEXT_VF(vfwmul_vf_w, 4, 8)
3055
3056/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3057#define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP)       \
3058static void do_##NAME(void *vd, void *vs1, void *vs2, int i,       \
3059        CPURISCVState *env)                                        \
3060{                                                                  \
3061    TX1 s1 = *((T1 *)vs1 + HS1(i));                                \
3062    TX2 s2 = *((T2 *)vs2 + HS2(i));                                \
3063    TD d = *((TD *)vd + HD(i));                                    \
3064    *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status);          \
3065}
3066
3067static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3068{
3069    return float16_muladd(a, b, d, 0, s);
3070}
3071
3072static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3073{
3074    return float32_muladd(a, b, d, 0, s);
3075}
3076
3077static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3078{
3079    return float64_muladd(a, b, d, 0, s);
3080}
3081
3082RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3083RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3084RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3085GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2)
3086GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4)
3087GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8)
3088
3089#define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)           \
3090static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i,    \
3091        CPURISCVState *env)                                       \
3092{                                                                 \
3093    TX2 s2 = *((T2 *)vs2 + HS2(i));                               \
3094    TD d = *((TD *)vd + HD(i));                                   \
3095    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3096}
3097
3098RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3099RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3100RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3101GEN_VEXT_VF(vfmacc_vf_h, 2, 2)
3102GEN_VEXT_VF(vfmacc_vf_w, 4, 4)
3103GEN_VEXT_VF(vfmacc_vf_d, 8, 8)
3104
3105static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3106{
3107    return float16_muladd(a, b, d,
3108            float_muladd_negate_c | float_muladd_negate_product, s);
3109}
3110
3111static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3112{
3113    return float32_muladd(a, b, d,
3114            float_muladd_negate_c | float_muladd_negate_product, s);
3115}
3116
3117static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3118{
3119    return float64_muladd(a, b, d,
3120            float_muladd_negate_c | float_muladd_negate_product, s);
3121}
3122
3123RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3124RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3125RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3126GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2)
3127GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4)
3128GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8)
3129RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3130RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3131RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3132GEN_VEXT_VF(vfnmacc_vf_h, 2, 2)
3133GEN_VEXT_VF(vfnmacc_vf_w, 4, 4)
3134GEN_VEXT_VF(vfnmacc_vf_d, 8, 8)
3135
3136static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3137{
3138    return float16_muladd(a, b, d, float_muladd_negate_c, s);
3139}
3140
3141static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3142{
3143    return float32_muladd(a, b, d, float_muladd_negate_c, s);
3144}
3145
3146static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3147{
3148    return float64_muladd(a, b, d, float_muladd_negate_c, s);
3149}
3150
3151RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3152RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3153RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3154GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2)
3155GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4)
3156GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8)
3157RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3158RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3159RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3160GEN_VEXT_VF(vfmsac_vf_h, 2, 2)
3161GEN_VEXT_VF(vfmsac_vf_w, 4, 4)
3162GEN_VEXT_VF(vfmsac_vf_d, 8, 8)
3163
3164static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3165{
3166    return float16_muladd(a, b, d, float_muladd_negate_product, s);
3167}
3168
3169static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3170{
3171    return float32_muladd(a, b, d, float_muladd_negate_product, s);
3172}
3173
3174static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3175{
3176    return float64_muladd(a, b, d, float_muladd_negate_product, s);
3177}
3178
3179RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3180RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3181RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3182GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2)
3183GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4)
3184GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8)
3185RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3186RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3187RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3188GEN_VEXT_VF(vfnmsac_vf_h, 2, 2)
3189GEN_VEXT_VF(vfnmsac_vf_w, 4, 4)
3190GEN_VEXT_VF(vfnmsac_vf_d, 8, 8)
3191
3192static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3193{
3194    return float16_muladd(d, b, a, 0, s);
3195}
3196
3197static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3198{
3199    return float32_muladd(d, b, a, 0, s);
3200}
3201
3202static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3203{
3204    return float64_muladd(d, b, a, 0, s);
3205}
3206
3207RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3208RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3209RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3210GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2)
3211GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4)
3212GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8)
3213RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3214RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3215RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3216GEN_VEXT_VF(vfmadd_vf_h, 2, 2)
3217GEN_VEXT_VF(vfmadd_vf_w, 4, 4)
3218GEN_VEXT_VF(vfmadd_vf_d, 8, 8)
3219
3220static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3221{
3222    return float16_muladd(d, b, a,
3223            float_muladd_negate_c | float_muladd_negate_product, s);
3224}
3225
3226static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3227{
3228    return float32_muladd(d, b, a,
3229            float_muladd_negate_c | float_muladd_negate_product, s);
3230}
3231
3232static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3233{
3234    return float64_muladd(d, b, a,
3235            float_muladd_negate_c | float_muladd_negate_product, s);
3236}
3237
3238RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3239RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3240RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3241GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2)
3242GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4)
3243GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8)
3244RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3245RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3246RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3247GEN_VEXT_VF(vfnmadd_vf_h, 2, 2)
3248GEN_VEXT_VF(vfnmadd_vf_w, 4, 4)
3249GEN_VEXT_VF(vfnmadd_vf_d, 8, 8)
3250
3251static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3252{
3253    return float16_muladd(d, b, a, float_muladd_negate_c, s);
3254}
3255
3256static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3257{
3258    return float32_muladd(d, b, a, float_muladd_negate_c, s);
3259}
3260
3261static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3262{
3263    return float64_muladd(d, b, a, float_muladd_negate_c, s);
3264}
3265
3266RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3267RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3268RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3269GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2)
3270GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4)
3271GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8)
3272RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3273RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3274RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3275GEN_VEXT_VF(vfmsub_vf_h, 2, 2)
3276GEN_VEXT_VF(vfmsub_vf_w, 4, 4)
3277GEN_VEXT_VF(vfmsub_vf_d, 8, 8)
3278
3279static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3280{
3281    return float16_muladd(d, b, a, float_muladd_negate_product, s);
3282}
3283
3284static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3285{
3286    return float32_muladd(d, b, a, float_muladd_negate_product, s);
3287}
3288
3289static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3290{
3291    return float64_muladd(d, b, a, float_muladd_negate_product, s);
3292}
3293
3294RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3295RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3296RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3297GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2)
3298GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4)
3299GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8)
3300RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3301RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3302RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3303GEN_VEXT_VF(vfnmsub_vf_h, 2, 2)
3304GEN_VEXT_VF(vfnmsub_vf_w, 4, 4)
3305GEN_VEXT_VF(vfnmsub_vf_d, 8, 8)
3306
3307/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3308static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3309{
3310    return float32_muladd(float16_to_float32(a, true, s),
3311                        float16_to_float32(b, true, s), d, 0, s);
3312}
3313
3314static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3315{
3316    return float64_muladd(float32_to_float64(a, s),
3317                        float32_to_float64(b, s), d, 0, s);
3318}
3319
3320RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3321RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3322GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4)
3323GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8)
3324RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3325RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3326GEN_VEXT_VF(vfwmacc_vf_h, 2, 4)
3327GEN_VEXT_VF(vfwmacc_vf_w, 4, 8)
3328
3329static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3330{
3331    return float32_muladd(float16_to_float32(a, true, s),
3332                        float16_to_float32(b, true, s), d,
3333                        float_muladd_negate_c | float_muladd_negate_product, s);
3334}
3335
3336static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3337{
3338    return float64_muladd(float32_to_float64(a, s),
3339                        float32_to_float64(b, s), d,
3340                        float_muladd_negate_c | float_muladd_negate_product, s);
3341}
3342
3343RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3344RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3345GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4)
3346GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8)
3347RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3348RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3349GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4)
3350GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8)
3351
3352static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3353{
3354    return float32_muladd(float16_to_float32(a, true, s),
3355                        float16_to_float32(b, true, s), d,
3356                        float_muladd_negate_c, s);
3357}
3358
3359static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3360{
3361    return float64_muladd(float32_to_float64(a, s),
3362                        float32_to_float64(b, s), d,
3363                        float_muladd_negate_c, s);
3364}
3365
3366RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3367RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3368GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4)
3369GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8)
3370RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3371RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3372GEN_VEXT_VF(vfwmsac_vf_h, 2, 4)
3373GEN_VEXT_VF(vfwmsac_vf_w, 4, 8)
3374
3375static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3376{
3377    return float32_muladd(float16_to_float32(a, true, s),
3378                        float16_to_float32(b, true, s), d,
3379                        float_muladd_negate_product, s);
3380}
3381
3382static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3383{
3384    return float64_muladd(float32_to_float64(a, s),
3385                        float32_to_float64(b, s), d,
3386                        float_muladd_negate_product, s);
3387}
3388
3389RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3390RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3391GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4)
3392GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8)
3393RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3394RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3395GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4)
3396GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8)
3397
3398/* Vector Floating-Point Square-Root Instruction */
3399/* (TD, T2, TX2) */
3400#define OP_UU_H uint16_t, uint16_t, uint16_t
3401#define OP_UU_W uint32_t, uint32_t, uint32_t
3402#define OP_UU_D uint64_t, uint64_t, uint64_t
3403
3404#define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP)        \
3405static void do_##NAME(void *vd, void *vs2, int i,      \
3406        CPURISCVState *env)                            \
3407{                                                      \
3408    TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
3409    *((TD *)vd + HD(i)) = OP(s2, &env->fp_status);     \
3410}
3411
3412#define GEN_VEXT_V_ENV(NAME, ESZ, DSZ)                 \
3413void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
3414        CPURISCVState *env, uint32_t desc)             \
3415{                                                      \
3416    uint32_t vm = vext_vm(desc);                       \
3417    uint32_t vl = env->vl;                             \
3418    uint32_t i;                                        \
3419                                                       \
3420    if (vl == 0) {                                     \
3421        return;                                        \
3422    }                                                  \
3423    for (i = env->vstart; i < vl; i++) {               \
3424        if (!vm && !vext_elem_mask(v0, i)) {           \
3425            continue;                                  \
3426        }                                              \
3427        do_##NAME(vd, vs2, i, env);                    \
3428    }                                                  \
3429    env->vstart = 0;                                   \
3430}
3431
3432RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3433RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3434RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3435GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2)
3436GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4)
3437GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8)
3438
3439/*
3440 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3441 *
3442 * Adapted from riscv-v-spec recip.c:
3443 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3444 */
3445static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3446{
3447    uint64_t sign = extract64(f, frac_size + exp_size, 1);
3448    uint64_t exp = extract64(f, frac_size, exp_size);
3449    uint64_t frac = extract64(f, 0, frac_size);
3450
3451    const uint8_t lookup_table[] = {
3452        52, 51, 50, 48, 47, 46, 44, 43,
3453        42, 41, 40, 39, 38, 36, 35, 34,
3454        33, 32, 31, 30, 30, 29, 28, 27,
3455        26, 25, 24, 23, 23, 22, 21, 20,
3456        19, 19, 18, 17, 16, 16, 15, 14,
3457        14, 13, 12, 12, 11, 10, 10, 9,
3458        9, 8, 7, 7, 6, 6, 5, 4,
3459        4, 3, 3, 2, 2, 1, 1, 0,
3460        127, 125, 123, 121, 119, 118, 116, 114,
3461        113, 111, 109, 108, 106, 105, 103, 102,
3462        100, 99, 97, 96, 95, 93, 92, 91,
3463        90, 88, 87, 86, 85, 84, 83, 82,
3464        80, 79, 78, 77, 76, 75, 74, 73,
3465        72, 71, 70, 70, 69, 68, 67, 66,
3466        65, 64, 63, 63, 62, 61, 60, 59,
3467        59, 58, 57, 56, 56, 55, 54, 53
3468    };
3469    const int precision = 7;
3470
3471    if (exp == 0 && frac != 0) { /* subnormal */
3472        /* Normalize the subnormal. */
3473        while (extract64(frac, frac_size - 1, 1) == 0) {
3474            exp--;
3475            frac <<= 1;
3476        }
3477
3478        frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3479    }
3480
3481    int idx = ((exp & 1) << (precision - 1)) |
3482                (frac >> (frac_size - precision + 1));
3483    uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3484                            (frac_size - precision);
3485    uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3486
3487    uint64_t val = 0;
3488    val = deposit64(val, 0, frac_size, out_frac);
3489    val = deposit64(val, frac_size, exp_size, out_exp);
3490    val = deposit64(val, frac_size + exp_size, 1, sign);
3491    return val;
3492}
3493
3494static float16 frsqrt7_h(float16 f, float_status *s)
3495{
3496    int exp_size = 5, frac_size = 10;
3497    bool sign = float16_is_neg(f);
3498
3499    /*
3500     * frsqrt7(sNaN) = canonical NaN
3501     * frsqrt7(-inf) = canonical NaN
3502     * frsqrt7(-normal) = canonical NaN
3503     * frsqrt7(-subnormal) = canonical NaN
3504     */
3505    if (float16_is_signaling_nan(f, s) ||
3506            (float16_is_infinity(f) && sign) ||
3507            (float16_is_normal(f) && sign) ||
3508            (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3509        s->float_exception_flags |= float_flag_invalid;
3510        return float16_default_nan(s);
3511    }
3512
3513    /* frsqrt7(qNaN) = canonical NaN */
3514    if (float16_is_quiet_nan(f, s)) {
3515        return float16_default_nan(s);
3516    }
3517
3518    /* frsqrt7(+-0) = +-inf */
3519    if (float16_is_zero(f)) {
3520        s->float_exception_flags |= float_flag_divbyzero;
3521        return float16_set_sign(float16_infinity, sign);
3522    }
3523
3524    /* frsqrt7(+inf) = +0 */
3525    if (float16_is_infinity(f) && !sign) {
3526        return float16_set_sign(float16_zero, sign);
3527    }
3528
3529    /* +normal, +subnormal */
3530    uint64_t val = frsqrt7(f, exp_size, frac_size);
3531    return make_float16(val);
3532}
3533
3534static float32 frsqrt7_s(float32 f, float_status *s)
3535{
3536    int exp_size = 8, frac_size = 23;
3537    bool sign = float32_is_neg(f);
3538
3539    /*
3540     * frsqrt7(sNaN) = canonical NaN
3541     * frsqrt7(-inf) = canonical NaN
3542     * frsqrt7(-normal) = canonical NaN
3543     * frsqrt7(-subnormal) = canonical NaN
3544     */
3545    if (float32_is_signaling_nan(f, s) ||
3546            (float32_is_infinity(f) && sign) ||
3547            (float32_is_normal(f) && sign) ||
3548            (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3549        s->float_exception_flags |= float_flag_invalid;
3550        return float32_default_nan(s);
3551    }
3552
3553    /* frsqrt7(qNaN) = canonical NaN */
3554    if (float32_is_quiet_nan(f, s)) {
3555        return float32_default_nan(s);
3556    }
3557
3558    /* frsqrt7(+-0) = +-inf */
3559    if (float32_is_zero(f)) {
3560        s->float_exception_flags |= float_flag_divbyzero;
3561        return float32_set_sign(float32_infinity, sign);
3562    }
3563
3564    /* frsqrt7(+inf) = +0 */
3565    if (float32_is_infinity(f) && !sign) {
3566        return float32_set_sign(float32_zero, sign);
3567    }
3568
3569    /* +normal, +subnormal */
3570    uint64_t val = frsqrt7(f, exp_size, frac_size);
3571    return make_float32(val);
3572}
3573
3574static float64 frsqrt7_d(float64 f, float_status *s)
3575{
3576    int exp_size = 11, frac_size = 52;
3577    bool sign = float64_is_neg(f);
3578
3579    /*
3580     * frsqrt7(sNaN) = canonical NaN
3581     * frsqrt7(-inf) = canonical NaN
3582     * frsqrt7(-normal) = canonical NaN
3583     * frsqrt7(-subnormal) = canonical NaN
3584     */
3585    if (float64_is_signaling_nan(f, s) ||
3586            (float64_is_infinity(f) && sign) ||
3587            (float64_is_normal(f) && sign) ||
3588            (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3589        s->float_exception_flags |= float_flag_invalid;
3590        return float64_default_nan(s);
3591    }
3592
3593    /* frsqrt7(qNaN) = canonical NaN */
3594    if (float64_is_quiet_nan(f, s)) {
3595        return float64_default_nan(s);
3596    }
3597
3598    /* frsqrt7(+-0) = +-inf */
3599    if (float64_is_zero(f)) {
3600        s->float_exception_flags |= float_flag_divbyzero;
3601        return float64_set_sign(float64_infinity, sign);
3602    }
3603
3604    /* frsqrt7(+inf) = +0 */
3605    if (float64_is_infinity(f) && !sign) {
3606        return float64_set_sign(float64_zero, sign);
3607    }
3608
3609    /* +normal, +subnormal */
3610    uint64_t val = frsqrt7(f, exp_size, frac_size);
3611    return make_float64(val);
3612}
3613
3614RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3615RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3616RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3617GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2)
3618GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4)
3619GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8)
3620
3621/*
3622 * Vector Floating-Point Reciprocal Estimate Instruction
3623 *
3624 * Adapted from riscv-v-spec recip.c:
3625 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3626 */
3627static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3628                      float_status *s)
3629{
3630    uint64_t sign = extract64(f, frac_size + exp_size, 1);
3631    uint64_t exp = extract64(f, frac_size, exp_size);
3632    uint64_t frac = extract64(f, 0, frac_size);
3633
3634    const uint8_t lookup_table[] = {
3635        127, 125, 123, 121, 119, 117, 116, 114,
3636        112, 110, 109, 107, 105, 104, 102, 100,
3637        99, 97, 96, 94, 93, 91, 90, 88,
3638        87, 85, 84, 83, 81, 80, 79, 77,
3639        76, 75, 74, 72, 71, 70, 69, 68,
3640        66, 65, 64, 63, 62, 61, 60, 59,
3641        58, 57, 56, 55, 54, 53, 52, 51,
3642        50, 49, 48, 47, 46, 45, 44, 43,
3643        42, 41, 40, 40, 39, 38, 37, 36,
3644        35, 35, 34, 33, 32, 31, 31, 30,
3645        29, 28, 28, 27, 26, 25, 25, 24,
3646        23, 23, 22, 21, 21, 20, 19, 19,
3647        18, 17, 17, 16, 15, 15, 14, 14,
3648        13, 12, 12, 11, 11, 10, 9, 9,
3649        8, 8, 7, 7, 6, 5, 5, 4,
3650        4, 3, 3, 2, 2, 1, 1, 0
3651    };
3652    const int precision = 7;
3653
3654    if (exp == 0 && frac != 0) { /* subnormal */
3655        /* Normalize the subnormal. */
3656        while (extract64(frac, frac_size - 1, 1) == 0) {
3657            exp--;
3658            frac <<= 1;
3659        }
3660
3661        frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3662
3663        if (exp != 0 && exp != UINT64_MAX) {
3664            /*
3665             * Overflow to inf or max value of same sign,
3666             * depending on sign and rounding mode.
3667             */
3668            s->float_exception_flags |= (float_flag_inexact |
3669                                         float_flag_overflow);
3670
3671            if ((s->float_rounding_mode == float_round_to_zero) ||
3672                ((s->float_rounding_mode == float_round_down) && !sign) ||
3673                ((s->float_rounding_mode == float_round_up) && sign)) {
3674                /* Return greatest/negative finite value. */
3675                return (sign << (exp_size + frac_size)) |
3676                    (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3677            } else {
3678                /* Return +-inf. */
3679                return (sign << (exp_size + frac_size)) |
3680                    MAKE_64BIT_MASK(frac_size, exp_size);
3681            }
3682        }
3683    }
3684
3685    int idx = frac >> (frac_size - precision);
3686    uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3687                            (frac_size - precision);
3688    uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3689
3690    if (out_exp == 0 || out_exp == UINT64_MAX) {
3691        /*
3692         * The result is subnormal, but don't raise the underflow exception,
3693         * because there's no additional loss of precision.
3694         */
3695        out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3696        if (out_exp == UINT64_MAX) {
3697            out_frac >>= 1;
3698            out_exp = 0;
3699        }
3700    }
3701
3702    uint64_t val = 0;
3703    val = deposit64(val, 0, frac_size, out_frac);
3704    val = deposit64(val, frac_size, exp_size, out_exp);
3705    val = deposit64(val, frac_size + exp_size, 1, sign);
3706    return val;
3707}
3708
3709static float16 frec7_h(float16 f, float_status *s)
3710{
3711    int exp_size = 5, frac_size = 10;
3712    bool sign = float16_is_neg(f);
3713
3714    /* frec7(+-inf) = +-0 */
3715    if (float16_is_infinity(f)) {
3716        return float16_set_sign(float16_zero, sign);
3717    }
3718
3719    /* frec7(+-0) = +-inf */
3720    if (float16_is_zero(f)) {
3721        s->float_exception_flags |= float_flag_divbyzero;
3722        return float16_set_sign(float16_infinity, sign);
3723    }
3724
3725    /* frec7(sNaN) = canonical NaN */
3726    if (float16_is_signaling_nan(f, s)) {
3727        s->float_exception_flags |= float_flag_invalid;
3728        return float16_default_nan(s);
3729    }
3730
3731    /* frec7(qNaN) = canonical NaN */
3732    if (float16_is_quiet_nan(f, s)) {
3733        return float16_default_nan(s);
3734    }
3735
3736    /* +-normal, +-subnormal */
3737    uint64_t val = frec7(f, exp_size, frac_size, s);
3738    return make_float16(val);
3739}
3740
3741static float32 frec7_s(float32 f, float_status *s)
3742{
3743    int exp_size = 8, frac_size = 23;
3744    bool sign = float32_is_neg(f);
3745
3746    /* frec7(+-inf) = +-0 */
3747    if (float32_is_infinity(f)) {
3748        return float32_set_sign(float32_zero, sign);
3749    }
3750
3751    /* frec7(+-0) = +-inf */
3752    if (float32_is_zero(f)) {
3753        s->float_exception_flags |= float_flag_divbyzero;
3754        return float32_set_sign(float32_infinity, sign);
3755    }
3756
3757    /* frec7(sNaN) = canonical NaN */
3758    if (float32_is_signaling_nan(f, s)) {
3759        s->float_exception_flags |= float_flag_invalid;
3760        return float32_default_nan(s);
3761    }
3762
3763    /* frec7(qNaN) = canonical NaN */
3764    if (float32_is_quiet_nan(f, s)) {
3765        return float32_default_nan(s);
3766    }
3767
3768    /* +-normal, +-subnormal */
3769    uint64_t val = frec7(f, exp_size, frac_size, s);
3770    return make_float32(val);
3771}
3772
3773static float64 frec7_d(float64 f, float_status *s)
3774{
3775    int exp_size = 11, frac_size = 52;
3776    bool sign = float64_is_neg(f);
3777
3778    /* frec7(+-inf) = +-0 */
3779    if (float64_is_infinity(f)) {
3780        return float64_set_sign(float64_zero, sign);
3781    }
3782
3783    /* frec7(+-0) = +-inf */
3784    if (float64_is_zero(f)) {
3785        s->float_exception_flags |= float_flag_divbyzero;
3786        return float64_set_sign(float64_infinity, sign);
3787    }
3788
3789    /* frec7(sNaN) = canonical NaN */
3790    if (float64_is_signaling_nan(f, s)) {
3791        s->float_exception_flags |= float_flag_invalid;
3792        return float64_default_nan(s);
3793    }
3794
3795    /* frec7(qNaN) = canonical NaN */
3796    if (float64_is_quiet_nan(f, s)) {
3797        return float64_default_nan(s);
3798    }
3799
3800    /* +-normal, +-subnormal */
3801    uint64_t val = frec7(f, exp_size, frac_size, s);
3802    return make_float64(val);
3803}
3804
3805RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3806RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3807RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3808GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2)
3809GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4)
3810GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8)
3811
3812/* Vector Floating-Point MIN/MAX Instructions */
3813RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3814RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3815RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3816GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2)
3817GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4)
3818GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8)
3819RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3820RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3821RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3822GEN_VEXT_VF(vfmin_vf_h, 2, 2)
3823GEN_VEXT_VF(vfmin_vf_w, 4, 4)
3824GEN_VEXT_VF(vfmin_vf_d, 8, 8)
3825
3826RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3827RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3828RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3829GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2)
3830GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4)
3831GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8)
3832RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3833RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3834RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3835GEN_VEXT_VF(vfmax_vf_h, 2, 2)
3836GEN_VEXT_VF(vfmax_vf_w, 4, 4)
3837GEN_VEXT_VF(vfmax_vf_d, 8, 8)
3838
3839/* Vector Floating-Point Sign-Injection Instructions */
3840static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3841{
3842    return deposit64(b, 0, 15, a);
3843}
3844
3845static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3846{
3847    return deposit64(b, 0, 31, a);
3848}
3849
3850static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3851{
3852    return deposit64(b, 0, 63, a);
3853}
3854
3855RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3856RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3857RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3858GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2)
3859GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4)
3860GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8)
3861RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3862RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3863RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3864GEN_VEXT_VF(vfsgnj_vf_h, 2, 2)
3865GEN_VEXT_VF(vfsgnj_vf_w, 4, 4)
3866GEN_VEXT_VF(vfsgnj_vf_d, 8, 8)
3867
3868static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3869{
3870    return deposit64(~b, 0, 15, a);
3871}
3872
3873static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3874{
3875    return deposit64(~b, 0, 31, a);
3876}
3877
3878static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3879{
3880    return deposit64(~b, 0, 63, a);
3881}
3882
3883RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3884RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3885RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3886GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2)
3887GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4)
3888GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8)
3889RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3890RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3891RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3892GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2)
3893GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4)
3894GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8)
3895
3896static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
3897{
3898    return deposit64(b ^ a, 0, 15, a);
3899}
3900
3901static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
3902{
3903    return deposit64(b ^ a, 0, 31, a);
3904}
3905
3906static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
3907{
3908    return deposit64(b ^ a, 0, 63, a);
3909}
3910
3911RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
3912RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
3913RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
3914GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2)
3915GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4)
3916GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8)
3917RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
3918RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
3919RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
3920GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2)
3921GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4)
3922GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8)
3923
3924/* Vector Floating-Point Compare Instructions */
3925#define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP)            \
3926void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,   \
3927                  CPURISCVState *env, uint32_t desc)          \
3928{                                                             \
3929    uint32_t vm = vext_vm(desc);                              \
3930    uint32_t vl = env->vl;                                    \
3931    uint32_t i;                                               \
3932                                                              \
3933    for (i = env->vstart; i < vl; i++) {                      \
3934        ETYPE s1 = *((ETYPE *)vs1 + H(i));                    \
3935        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
3936        if (!vm && !vext_elem_mask(v0, i)) {                  \
3937            continue;                                         \
3938        }                                                     \
3939        vext_set_elem_mask(vd, i,                             \
3940                           DO_OP(s2, s1, &env->fp_status));   \
3941    }                                                         \
3942    env->vstart = 0;                                          \
3943}
3944
3945GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
3946GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
3947GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
3948
3949#define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP)                      \
3950void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2,       \
3951                  CPURISCVState *env, uint32_t desc)                \
3952{                                                                   \
3953    uint32_t vm = vext_vm(desc);                                    \
3954    uint32_t vl = env->vl;                                          \
3955    uint32_t i;                                                     \
3956                                                                    \
3957    for (i = env->vstart; i < vl; i++) {                            \
3958        ETYPE s2 = *((ETYPE *)vs2 + H(i));                          \
3959        if (!vm && !vext_elem_mask(v0, i)) {                        \
3960            continue;                                               \
3961        }                                                           \
3962        vext_set_elem_mask(vd, i,                                   \
3963                           DO_OP(s2, (ETYPE)s1, &env->fp_status));  \
3964    }                                                               \
3965    env->vstart = 0;                                                \
3966}
3967
3968GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
3969GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
3970GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
3971
3972static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
3973{
3974    FloatRelation compare = float16_compare_quiet(a, b, s);
3975    return compare != float_relation_equal;
3976}
3977
3978static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
3979{
3980    FloatRelation compare = float32_compare_quiet(a, b, s);
3981    return compare != float_relation_equal;
3982}
3983
3984static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
3985{
3986    FloatRelation compare = float64_compare_quiet(a, b, s);
3987    return compare != float_relation_equal;
3988}
3989
3990GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
3991GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
3992GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
3993GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
3994GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
3995GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
3996
3997GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
3998GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
3999GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4000GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4001GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4002GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4003
4004GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4005GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4006GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4007GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4008GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4009GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4010
4011static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4012{
4013    FloatRelation compare = float16_compare(a, b, s);
4014    return compare == float_relation_greater;
4015}
4016
4017static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4018{
4019    FloatRelation compare = float32_compare(a, b, s);
4020    return compare == float_relation_greater;
4021}
4022
4023static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4024{
4025    FloatRelation compare = float64_compare(a, b, s);
4026    return compare == float_relation_greater;
4027}
4028
4029GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4030GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4031GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4032
4033static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4034{
4035    FloatRelation compare = float16_compare(a, b, s);
4036    return compare == float_relation_greater ||
4037           compare == float_relation_equal;
4038}
4039
4040static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4041{
4042    FloatRelation compare = float32_compare(a, b, s);
4043    return compare == float_relation_greater ||
4044           compare == float_relation_equal;
4045}
4046
4047static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4048{
4049    FloatRelation compare = float64_compare(a, b, s);
4050    return compare == float_relation_greater ||
4051           compare == float_relation_equal;
4052}
4053
4054GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4055GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4056GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4057
4058/* Vector Floating-Point Classify Instruction */
4059#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP)         \
4060static void do_##NAME(void *vd, void *vs2, int i)      \
4061{                                                      \
4062    TX2 s2 = *((T2 *)vs2 + HS2(i));                    \
4063    *((TD *)vd + HD(i)) = OP(s2);                      \
4064}
4065
4066#define GEN_VEXT_V(NAME, ESZ, DSZ)                     \
4067void HELPER(NAME)(void *vd, void *v0, void *vs2,       \
4068                  CPURISCVState *env, uint32_t desc)   \
4069{                                                      \
4070    uint32_t vm = vext_vm(desc);                       \
4071    uint32_t vl = env->vl;                             \
4072    uint32_t i;                                        \
4073                                                       \
4074    for (i = env->vstart; i < vl; i++) {               \
4075        if (!vm && !vext_elem_mask(v0, i)) {           \
4076            continue;                                  \
4077        }                                              \
4078        do_##NAME(vd, vs2, i);                         \
4079    }                                                  \
4080    env->vstart = 0;                                   \
4081}
4082
4083target_ulong fclass_h(uint64_t frs1)
4084{
4085    float16 f = frs1;
4086    bool sign = float16_is_neg(f);
4087
4088    if (float16_is_infinity(f)) {
4089        return sign ? 1 << 0 : 1 << 7;
4090    } else if (float16_is_zero(f)) {
4091        return sign ? 1 << 3 : 1 << 4;
4092    } else if (float16_is_zero_or_denormal(f)) {
4093        return sign ? 1 << 2 : 1 << 5;
4094    } else if (float16_is_any_nan(f)) {
4095        float_status s = { }; /* for snan_bit_is_one */
4096        return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4097    } else {
4098        return sign ? 1 << 1 : 1 << 6;
4099    }
4100}
4101
4102target_ulong fclass_s(uint64_t frs1)
4103{
4104    float32 f = frs1;
4105    bool sign = float32_is_neg(f);
4106
4107    if (float32_is_infinity(f)) {
4108        return sign ? 1 << 0 : 1 << 7;
4109    } else if (float32_is_zero(f)) {
4110        return sign ? 1 << 3 : 1 << 4;
4111    } else if (float32_is_zero_or_denormal(f)) {
4112        return sign ? 1 << 2 : 1 << 5;
4113    } else if (float32_is_any_nan(f)) {
4114        float_status s = { }; /* for snan_bit_is_one */
4115        return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4116    } else {
4117        return sign ? 1 << 1 : 1 << 6;
4118    }
4119}
4120
4121target_ulong fclass_d(uint64_t frs1)
4122{
4123    float64 f = frs1;
4124    bool sign = float64_is_neg(f);
4125
4126    if (float64_is_infinity(f)) {
4127        return sign ? 1 << 0 : 1 << 7;
4128    } else if (float64_is_zero(f)) {
4129        return sign ? 1 << 3 : 1 << 4;
4130    } else if (float64_is_zero_or_denormal(f)) {
4131        return sign ? 1 << 2 : 1 << 5;
4132    } else if (float64_is_any_nan(f)) {
4133        float_status s = { }; /* for snan_bit_is_one */
4134        return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4135    } else {
4136        return sign ? 1 << 1 : 1 << 6;
4137    }
4138}
4139
4140RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4141RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4142RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4143GEN_VEXT_V(vfclass_v_h, 2, 2)
4144GEN_VEXT_V(vfclass_v_w, 4, 4)
4145GEN_VEXT_V(vfclass_v_d, 8, 8)
4146
4147/* Vector Floating-Point Merge Instruction */
4148#define GEN_VFMERGE_VF(NAME, ETYPE, H)                        \
4149void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4150                  CPURISCVState *env, uint32_t desc)          \
4151{                                                             \
4152    uint32_t vm = vext_vm(desc);                              \
4153    uint32_t vl = env->vl;                                    \
4154    uint32_t i;                                               \
4155                                                              \
4156    for (i = env->vstart; i < vl; i++) {                      \
4157        ETYPE s2 = *((ETYPE *)vs2 + H(i));                    \
4158        *((ETYPE *)vd + H(i))                                 \
4159          = (!vm && !vext_elem_mask(v0, i) ? s2 : s1);        \
4160    }                                                         \
4161    env->vstart = 0;                                          \
4162}
4163
4164GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4165GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4166GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4167
4168/* Single-Width Floating-Point/Integer Type-Convert Instructions */
4169/* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4170RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4171RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4172RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4173GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2)
4174GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4)
4175GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8)
4176
4177/* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4178RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4179RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4180RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4181GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2)
4182GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4)
4183GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8)
4184
4185/* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4186RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4187RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4188RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4189GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2)
4190GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4)
4191GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8)
4192
4193/* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4194RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4195RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4196RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4197GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2)
4198GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4)
4199GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8)
4200
4201/* Widening Floating-Point/Integer Type-Convert Instructions */
4202/* (TD, T2, TX2) */
4203#define WOP_UU_B uint16_t, uint8_t,  uint8_t
4204#define WOP_UU_H uint32_t, uint16_t, uint16_t
4205#define WOP_UU_W uint64_t, uint32_t, uint32_t
4206/* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4207RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4208RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4209GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4)
4210GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8)
4211
4212/* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4213RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4214RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4215GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4)
4216GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8)
4217
4218/* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4219RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4220RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4221RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4222GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2)
4223GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4)
4224GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8)
4225
4226/* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4227RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4228RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4229RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4230GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2)
4231GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4)
4232GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8)
4233
4234/*
4235 * vfwcvt.f.f.v vd, vs2, vm
4236 * Convert single-width float to double-width float.
4237 */
4238static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4239{
4240    return float16_to_float32(a, true, s);
4241}
4242
4243RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4244RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4245GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4)
4246GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8)
4247
4248/* Narrowing Floating-Point/Integer Type-Convert Instructions */
4249/* (TD, T2, TX2) */
4250#define NOP_UU_B uint8_t,  uint16_t, uint32_t
4251#define NOP_UU_H uint16_t, uint32_t, uint32_t
4252#define NOP_UU_W uint32_t, uint64_t, uint64_t
4253/* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4254RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4255RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4256RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4257GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1)
4258GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2)
4259GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4)
4260
4261/* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4262RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4263RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4264RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4265GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1)
4266GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2)
4267GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4)
4268
4269/* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4270RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4271RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4272GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2)
4273GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4)
4274
4275/* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4276RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4277RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4278GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2)
4279GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4)
4280
4281/* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4282static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4283{
4284    return float32_to_float16(a, true, s);
4285}
4286
4287RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4288RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4289GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2)
4290GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4)
4291
4292/*
4293 *** Vector Reduction Operations
4294 */
4295/* Vector Single-Width Integer Reduction Instructions */
4296#define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP)          \
4297void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4298        void *vs2, CPURISCVState *env, uint32_t desc)     \
4299{                                                         \
4300    uint32_t vm = vext_vm(desc);                          \
4301    uint32_t vl = env->vl;                                \
4302    uint32_t i;                                           \
4303    TD s1 =  *((TD *)vs1 + HD(0));                        \
4304                                                          \
4305    for (i = env->vstart; i < vl; i++) {                  \
4306        TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
4307        if (!vm && !vext_elem_mask(v0, i)) {              \
4308            continue;                                     \
4309        }                                                 \
4310        s1 = OP(s1, (TD)s2);                              \
4311    }                                                     \
4312    *((TD *)vd + HD(0)) = s1;                             \
4313    env->vstart = 0;                                      \
4314}
4315
4316/* vd[0] = sum(vs1[0], vs2[*]) */
4317GEN_VEXT_RED(vredsum_vs_b, int8_t,  int8_t,  H1, H1, DO_ADD)
4318GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4319GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4320GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4321
4322/* vd[0] = maxu(vs1[0], vs2[*]) */
4323GEN_VEXT_RED(vredmaxu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MAX)
4324GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4325GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4326GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4327
4328/* vd[0] = max(vs1[0], vs2[*]) */
4329GEN_VEXT_RED(vredmax_vs_b, int8_t,  int8_t,  H1, H1, DO_MAX)
4330GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4331GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4332GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4333
4334/* vd[0] = minu(vs1[0], vs2[*]) */
4335GEN_VEXT_RED(vredminu_vs_b, uint8_t,  uint8_t,  H1, H1, DO_MIN)
4336GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4337GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4338GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4339
4340/* vd[0] = min(vs1[0], vs2[*]) */
4341GEN_VEXT_RED(vredmin_vs_b, int8_t,  int8_t,  H1, H1, DO_MIN)
4342GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4343GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4344GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4345
4346/* vd[0] = and(vs1[0], vs2[*]) */
4347GEN_VEXT_RED(vredand_vs_b, int8_t,  int8_t,  H1, H1, DO_AND)
4348GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4349GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4350GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4351
4352/* vd[0] = or(vs1[0], vs2[*]) */
4353GEN_VEXT_RED(vredor_vs_b, int8_t,  int8_t,  H1, H1, DO_OR)
4354GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4355GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4356GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4357
4358/* vd[0] = xor(vs1[0], vs2[*]) */
4359GEN_VEXT_RED(vredxor_vs_b, int8_t,  int8_t,  H1, H1, DO_XOR)
4360GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4361GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4362GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4363
4364/* Vector Widening Integer Reduction Instructions */
4365/* signed sum reduction into double-width accumulator */
4366GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t,  H2, H1, DO_ADD)
4367GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4368GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4369
4370/* Unsigned sum reduction into double-width accumulator */
4371GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t,  H2, H1, DO_ADD)
4372GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4373GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4374
4375/* Vector Single-Width Floating-Point Reduction Instructions */
4376#define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP)          \
4377void HELPER(NAME)(void *vd, void *v0, void *vs1,           \
4378                  void *vs2, CPURISCVState *env,           \
4379                  uint32_t desc)                           \
4380{                                                          \
4381    uint32_t vm = vext_vm(desc);                           \
4382    uint32_t vl = env->vl;                                 \
4383    uint32_t i;                                            \
4384    TD s1 =  *((TD *)vs1 + HD(0));                         \
4385                                                           \
4386    for (i = env->vstart; i < vl; i++) {                   \
4387        TS2 s2 = *((TS2 *)vs2 + HS2(i));                   \
4388        if (!vm && !vext_elem_mask(v0, i)) {               \
4389            continue;                                      \
4390        }                                                  \
4391        s1 = OP(s1, (TD)s2, &env->fp_status);              \
4392    }                                                      \
4393    *((TD *)vd + HD(0)) = s1;                              \
4394    env->vstart = 0;                                       \
4395}
4396
4397/* Unordered sum */
4398GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4399GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4400GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4401
4402/* Maximum value */
4403GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4404GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4405GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4406
4407/* Minimum value */
4408GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4409GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4410GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4411
4412/* Vector Widening Floating-Point Reduction Instructions */
4413/* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4414void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4415                            void *vs2, CPURISCVState *env, uint32_t desc)
4416{
4417    uint32_t vm = vext_vm(desc);
4418    uint32_t vl = env->vl;
4419    uint32_t i;
4420    uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
4421
4422    for (i = env->vstart; i < vl; i++) {
4423        uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4424        if (!vm && !vext_elem_mask(v0, i)) {
4425            continue;
4426        }
4427        s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4428                         &env->fp_status);
4429    }
4430    *((uint32_t *)vd + H4(0)) = s1;
4431    env->vstart = 0;
4432}
4433
4434void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4435                            void *vs2, CPURISCVState *env, uint32_t desc)
4436{
4437    uint32_t vm = vext_vm(desc);
4438    uint32_t vl = env->vl;
4439    uint32_t i;
4440    uint64_t s1 =  *((uint64_t *)vs1);
4441
4442    for (i = env->vstart; i < vl; i++) {
4443        uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4444        if (!vm && !vext_elem_mask(v0, i)) {
4445            continue;
4446        }
4447        s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4448                         &env->fp_status);
4449    }
4450    *((uint64_t *)vd) = s1;
4451    env->vstart = 0;
4452}
4453
4454/*
4455 *** Vector Mask Operations
4456 */
4457/* Vector Mask-Register Logical Instructions */
4458#define GEN_VEXT_MASK_VV(NAME, OP)                        \
4459void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
4460                  void *vs2, CPURISCVState *env,          \
4461                  uint32_t desc)                          \
4462{                                                         \
4463    uint32_t vl = env->vl;                                \
4464    uint32_t i;                                           \
4465    int a, b;                                             \
4466                                                          \
4467    for (i = env->vstart; i < vl; i++) {                  \
4468        a = vext_elem_mask(vs1, i);                       \
4469        b = vext_elem_mask(vs2, i);                       \
4470        vext_set_elem_mask(vd, i, OP(b, a));              \
4471    }                                                     \
4472    env->vstart = 0;                                      \
4473}
4474
4475#define DO_NAND(N, M)  (!(N & M))
4476#define DO_ANDNOT(N, M)  (N & !M)
4477#define DO_NOR(N, M)  (!(N | M))
4478#define DO_ORNOT(N, M)  (N | !M)
4479#define DO_XNOR(N, M)  (!(N ^ M))
4480
4481GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4482GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4483GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4484GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4485GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4486GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4487GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4488GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4489
4490/* Vector count population in mask vcpop */
4491target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4492                             uint32_t desc)
4493{
4494    target_ulong cnt = 0;
4495    uint32_t vm = vext_vm(desc);
4496    uint32_t vl = env->vl;
4497    int i;
4498
4499    for (i = env->vstart; i < vl; i++) {
4500        if (vm || vext_elem_mask(v0, i)) {
4501            if (vext_elem_mask(vs2, i)) {
4502                cnt++;
4503            }
4504        }
4505    }
4506    env->vstart = 0;
4507    return cnt;
4508}
4509
4510/* vfirst find-first-set mask bit*/
4511target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4512                              uint32_t desc)
4513{
4514    uint32_t vm = vext_vm(desc);
4515    uint32_t vl = env->vl;
4516    int i;
4517
4518    for (i = env->vstart; i < vl; i++) {
4519        if (vm || vext_elem_mask(v0, i)) {
4520            if (vext_elem_mask(vs2, i)) {
4521                return i;
4522            }
4523        }
4524    }
4525    env->vstart = 0;
4526    return -1LL;
4527}
4528
4529enum set_mask_type {
4530    ONLY_FIRST = 1,
4531    INCLUDE_FIRST,
4532    BEFORE_FIRST,
4533};
4534
4535static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4536                   uint32_t desc, enum set_mask_type type)
4537{
4538    uint32_t vm = vext_vm(desc);
4539    uint32_t vl = env->vl;
4540    int i;
4541    bool first_mask_bit = false;
4542
4543    for (i = env->vstart; i < vl; i++) {
4544        if (!vm && !vext_elem_mask(v0, i)) {
4545            continue;
4546        }
4547        /* write a zero to all following active elements */
4548        if (first_mask_bit) {
4549            vext_set_elem_mask(vd, i, 0);
4550            continue;
4551        }
4552        if (vext_elem_mask(vs2, i)) {
4553            first_mask_bit = true;
4554            if (type == BEFORE_FIRST) {
4555                vext_set_elem_mask(vd, i, 0);
4556            } else {
4557                vext_set_elem_mask(vd, i, 1);
4558            }
4559        } else {
4560            if (type == ONLY_FIRST) {
4561                vext_set_elem_mask(vd, i, 0);
4562            } else {
4563                vext_set_elem_mask(vd, i, 1);
4564            }
4565        }
4566    }
4567    env->vstart = 0;
4568}
4569
4570void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4571                     uint32_t desc)
4572{
4573    vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4574}
4575
4576void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4577                     uint32_t desc)
4578{
4579    vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4580}
4581
4582void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4583                     uint32_t desc)
4584{
4585    vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4586}
4587
4588/* Vector Iota Instruction */
4589#define GEN_VEXT_VIOTA_M(NAME, ETYPE, H)                                  \
4590void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env,      \
4591                  uint32_t desc)                                          \
4592{                                                                         \
4593    uint32_t vm = vext_vm(desc);                                          \
4594    uint32_t vl = env->vl;                                                \
4595    uint32_t sum = 0;                                                     \
4596    int i;                                                                \
4597                                                                          \
4598    for (i = env->vstart; i < vl; i++) {                                  \
4599        if (!vm && !vext_elem_mask(v0, i)) {                              \
4600            continue;                                                     \
4601        }                                                                 \
4602        *((ETYPE *)vd + H(i)) = sum;                                      \
4603        if (vext_elem_mask(vs2, i)) {                                     \
4604            sum++;                                                        \
4605        }                                                                 \
4606    }                                                                     \
4607    env->vstart = 0;                                                      \
4608}
4609
4610GEN_VEXT_VIOTA_M(viota_m_b, uint8_t,  H1)
4611GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4612GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4613GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4614
4615/* Vector Element Index Instruction */
4616#define GEN_VEXT_VID_V(NAME, ETYPE, H)                                    \
4617void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc)  \
4618{                                                                         \
4619    uint32_t vm = vext_vm(desc);                                          \
4620    uint32_t vl = env->vl;                                                \
4621    int i;                                                                \
4622                                                                          \
4623    for (i = env->vstart; i < vl; i++) {                                  \
4624        if (!vm && !vext_elem_mask(v0, i)) {                              \
4625            continue;                                                     \
4626        }                                                                 \
4627        *((ETYPE *)vd + H(i)) = i;                                        \
4628    }                                                                     \
4629    env->vstart = 0;                                                      \
4630}
4631
4632GEN_VEXT_VID_V(vid_v_b, uint8_t,  H1)
4633GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4634GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4635GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4636
4637/*
4638 *** Vector Permutation Instructions
4639 */
4640
4641/* Vector Slide Instructions */
4642#define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H)                              \
4643void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4644                  CPURISCVState *env, uint32_t desc)                      \
4645{                                                                         \
4646    uint32_t vm = vext_vm(desc);                                          \
4647    uint32_t vl = env->vl;                                                \
4648    target_ulong offset = s1, i_min, i;                                   \
4649                                                                          \
4650    i_min = MAX(env->vstart, offset);                                     \
4651    for (i = i_min; i < vl; i++) {                                        \
4652        if (!vm && !vext_elem_mask(v0, i)) {                              \
4653            continue;                                                     \
4654        }                                                                 \
4655        *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));          \
4656    }                                                                     \
4657}
4658
4659/* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4660GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t,  H1)
4661GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4662GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4663GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4664
4665#define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H)                            \
4666void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4667                  CPURISCVState *env, uint32_t desc)                      \
4668{                                                                         \
4669    uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4670    uint32_t vm = vext_vm(desc);                                          \
4671    uint32_t vl = env->vl;                                                \
4672    target_ulong i_max, i;                                                \
4673                                                                          \
4674    i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);       \
4675    for (i = env->vstart; i < i_max; ++i) {                               \
4676        if (vm || vext_elem_mask(v0, i)) {                                \
4677            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1));          \
4678        }                                                                 \
4679    }                                                                     \
4680                                                                          \
4681    for (i = i_max; i < vl; ++i) {                                        \
4682        if (vm || vext_elem_mask(v0, i)) {                                \
4683            *((ETYPE *)vd + H(i)) = 0;                                    \
4684        }                                                                 \
4685    }                                                                     \
4686                                                                          \
4687    env->vstart = 0;                                                      \
4688}
4689
4690/* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4691GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t,  H1)
4692GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4693GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4694GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4695
4696#define GEN_VEXT_VSLIE1UP(ESZ, H)                                           \
4697static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4698                     CPURISCVState *env, uint32_t desc)                     \
4699{                                                                           \
4700    typedef uint##ESZ##_t ETYPE;                                            \
4701    uint32_t vm = vext_vm(desc);                                            \
4702    uint32_t vl = env->vl;                                                  \
4703    uint32_t i;                                                             \
4704                                                                            \
4705    for (i = env->vstart; i < vl; i++) {                                    \
4706        if (!vm && !vext_elem_mask(v0, i)) {                                \
4707            continue;                                                       \
4708        }                                                                   \
4709        if (i == 0) {                                                       \
4710            *((ETYPE *)vd + H(i)) = s1;                                     \
4711        } else {                                                            \
4712            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1));             \
4713        }                                                                   \
4714    }                                                                       \
4715    env->vstart = 0;                                                        \
4716}
4717
4718GEN_VEXT_VSLIE1UP(8,  H1)
4719GEN_VEXT_VSLIE1UP(16, H2)
4720GEN_VEXT_VSLIE1UP(32, H4)
4721GEN_VEXT_VSLIE1UP(64, H8)
4722
4723#define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ)                          \
4724void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4725                  CPURISCVState *env, uint32_t desc)              \
4726{                                                                 \
4727    vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);                  \
4728}
4729
4730/* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4731GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4732GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4733GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4734GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4735
4736#define GEN_VEXT_VSLIDE1DOWN(ESZ, H)                                          \
4737static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \
4738                       CPURISCVState *env, uint32_t desc)                     \
4739{                                                                             \
4740    typedef uint##ESZ##_t ETYPE;                                              \
4741    uint32_t vm = vext_vm(desc);                                              \
4742    uint32_t vl = env->vl;                                                    \
4743    uint32_t i;                                                               \
4744                                                                              \
4745    for (i = env->vstart; i < vl; i++) {                                      \
4746        if (!vm && !vext_elem_mask(v0, i)) {                                  \
4747            continue;                                                         \
4748        }                                                                     \
4749        if (i == vl - 1) {                                                    \
4750            *((ETYPE *)vd + H(i)) = s1;                                       \
4751        } else {                                                              \
4752            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1));               \
4753        }                                                                     \
4754    }                                                                         \
4755    env->vstart = 0;                                                          \
4756}
4757
4758GEN_VEXT_VSLIDE1DOWN(8,  H1)
4759GEN_VEXT_VSLIDE1DOWN(16, H2)
4760GEN_VEXT_VSLIDE1DOWN(32, H4)
4761GEN_VEXT_VSLIDE1DOWN(64, H8)
4762
4763#define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ)                        \
4764void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4765                  CPURISCVState *env, uint32_t desc)              \
4766{                                                                 \
4767    vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);                \
4768}
4769
4770/* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4771GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4772GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4773GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4774GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4775
4776/* Vector Floating-Point Slide Instructions */
4777#define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ)                     \
4778void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4779                  CPURISCVState *env, uint32_t desc)          \
4780{                                                             \
4781    vslide1up_##ESZ(vd, v0, s1, vs2, env, desc);              \
4782}
4783
4784/* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4785GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4786GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4787GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4788
4789#define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ)                   \
4790void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4791                  CPURISCVState *env, uint32_t desc)          \
4792{                                                             \
4793    vslide1down_##ESZ(vd, v0, s1, vs2, env, desc);            \
4794}
4795
4796/* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4797GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4798GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4799GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4800
4801/* Vector Register Gather Instruction */
4802#define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2)                    \
4803void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4804                  CPURISCVState *env, uint32_t desc)                      \
4805{                                                                         \
4806    uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2)));             \
4807    uint32_t vm = vext_vm(desc);                                          \
4808    uint32_t vl = env->vl;                                                \
4809    uint64_t index;                                                       \
4810    uint32_t i;                                                           \
4811                                                                          \
4812    for (i = env->vstart; i < vl; i++) {                                  \
4813        if (!vm && !vext_elem_mask(v0, i)) {                              \
4814            continue;                                                     \
4815        }                                                                 \
4816        index = *((TS1 *)vs1 + HS1(i));                                   \
4817        if (index >= vlmax) {                                             \
4818            *((TS2 *)vd + HS2(i)) = 0;                                    \
4819        } else {                                                          \
4820            *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index));           \
4821        }                                                                 \
4822    }                                                                     \
4823    env->vstart = 0;                                                      \
4824}
4825
4826/* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
4827GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t,  uint8_t,  H1, H1)
4828GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
4829GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
4830GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
4831
4832GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t,  H2, H1)
4833GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
4834GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
4835GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
4836
4837#define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H)                              \
4838void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2,         \
4839                  CPURISCVState *env, uint32_t desc)                      \
4840{                                                                         \
4841    uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));           \
4842    uint32_t vm = vext_vm(desc);                                          \
4843    uint32_t vl = env->vl;                                                \
4844    uint64_t index = s1;                                                  \
4845    uint32_t i;                                                           \
4846                                                                          \
4847    for (i = env->vstart; i < vl; i++) {                                  \
4848        if (!vm && !vext_elem_mask(v0, i)) {                              \
4849            continue;                                                     \
4850        }                                                                 \
4851        if (index >= vlmax) {                                             \
4852            *((ETYPE *)vd + H(i)) = 0;                                    \
4853        } else {                                                          \
4854            *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index));           \
4855        }                                                                 \
4856    }                                                                     \
4857    env->vstart = 0;                                                      \
4858}
4859
4860/* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
4861GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t,  H1)
4862GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
4863GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
4864GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
4865
4866/* Vector Compress Instruction */
4867#define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H)                             \
4868void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2,               \
4869                  CPURISCVState *env, uint32_t desc)                      \
4870{                                                                         \
4871    uint32_t vl = env->vl;                                                \
4872    uint32_t num = 0, i;                                                  \
4873                                                                          \
4874    for (i = env->vstart; i < vl; i++) {                                  \
4875        if (!vext_elem_mask(vs1, i)) {                                    \
4876            continue;                                                     \
4877        }                                                                 \
4878        *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i));                 \
4879        num++;                                                            \
4880    }                                                                     \
4881    env->vstart = 0;                                                      \
4882}
4883
4884/* Compress into vd elements of vs2 where vs1 is enabled */
4885GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t,  H1)
4886GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
4887GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
4888GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
4889
4890/* Vector Whole Register Move */
4891#define GEN_VEXT_VMV_WHOLE(NAME, LEN)                      \
4892void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \
4893                  uint32_t desc)                           \
4894{                                                          \
4895    /* EEW = 8 */                                          \
4896    uint32_t maxsz = simd_maxsz(desc);                     \
4897    uint32_t i = env->vstart;                              \
4898                                                           \
4899    memcpy((uint8_t *)vd + H1(i),                          \
4900           (uint8_t *)vs2 + H1(i),                         \
4901           maxsz - env->vstart);                           \
4902                                                           \
4903    env->vstart = 0;                                       \
4904}
4905
4906GEN_VEXT_VMV_WHOLE(vmv1r_v, 1)
4907GEN_VEXT_VMV_WHOLE(vmv2r_v, 2)
4908GEN_VEXT_VMV_WHOLE(vmv4r_v, 4)
4909GEN_VEXT_VMV_WHOLE(vmv8r_v, 8)
4910
4911/* Vector Integer Extension */
4912#define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1)            \
4913void HELPER(NAME)(void *vd, void *v0, void *vs2,                 \
4914                  CPURISCVState *env, uint32_t desc)             \
4915{                                                                \
4916    uint32_t vl = env->vl;                                       \
4917    uint32_t vm = vext_vm(desc);                                 \
4918    uint32_t i;                                                  \
4919                                                                 \
4920    for (i = env->vstart; i < vl; i++) {                         \
4921        if (!vm && !vext_elem_mask(v0, i)) {                     \
4922            continue;                                            \
4923        }                                                        \
4924        *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i));       \
4925    }                                                            \
4926    env->vstart = 0;                                             \
4927}
4928
4929GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t,  H2, H1)
4930GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
4931GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
4932GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t,  H4, H1)
4933GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
4934GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t,  H8, H1)
4935
4936GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t,  H2, H1)
4937GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
4938GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
4939GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t,  H4, H1)
4940GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
4941GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t,  H8, H1)
4942