qemu/target/arm/translate-mve.c
<<
>>
Prefs
   1/*
   2 *  ARM translation: M-profile MVE instructions
   3 *
   4 *  Copyright (c) 2021 Linaro, Ltd.
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "tcg/tcg-op.h"
  22#include "tcg/tcg-op-gvec.h"
  23#include "exec/exec-all.h"
  24#include "exec/gen-icount.h"
  25#include "translate.h"
  26#include "translate-a32.h"
  27
  28/* Include the generated decoder */
  29#include "decode-mve.c.inc"
  30
  31typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
  32typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
  33typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
  34typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
  35typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
  36typedef void MVEGenDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
  37typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32);
  38typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64);
  39
  40/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
  41static inline long mve_qreg_offset(unsigned reg)
  42{
  43    return offsetof(CPUARMState, vfp.zregs[reg].d[0]);
  44}
  45
  46static TCGv_ptr mve_qreg_ptr(unsigned reg)
  47{
  48    TCGv_ptr ret = tcg_temp_new_ptr();
  49    tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg));
  50    return ret;
  51}
  52
  53static bool mve_check_qreg_bank(DisasContext *s, int qmask)
  54{
  55    /*
  56     * Check whether Qregs are in range. For v8.1M only Q0..Q7
  57     * are supported, see VFPSmallRegisterBank().
  58     */
  59    return qmask < 8;
  60}
  61
  62bool mve_eci_check(DisasContext *s)
  63{
  64    /*
  65     * This is a beatwise insn: check that ECI is valid (not a
  66     * reserved value) and note that we are handling it.
  67     * Return true if OK, false if we generated an exception.
  68     */
  69    s->eci_handled = true;
  70    switch (s->eci) {
  71    case ECI_NONE:
  72    case ECI_A0:
  73    case ECI_A0A1:
  74    case ECI_A0A1A2:
  75    case ECI_A0A1A2B0:
  76        return true;
  77    default:
  78        /* Reserved value: INVSTATE UsageFault */
  79        gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
  80                           default_exception_el(s));
  81        return false;
  82    }
  83}
  84
  85static void mve_update_eci(DisasContext *s)
  86{
  87    /*
  88     * The helper function will always update the CPUState field,
  89     * so we only need to update the DisasContext field.
  90     */
  91    if (s->eci) {
  92        s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE;
  93    }
  94}
  95
  96void mve_update_and_store_eci(DisasContext *s)
  97{
  98    /*
  99     * For insns which don't call a helper function that will call
 100     * mve_advance_vpt(), this version updates s->eci and also stores
 101     * it out to the CPUState field.
 102     */
 103    if (s->eci) {
 104        mve_update_eci(s);
 105        store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits);
 106    }
 107}
 108
 109static bool mve_skip_first_beat(DisasContext *s)
 110{
 111    /* Return true if PSR.ECI says we must skip the first beat of this insn */
 112    switch (s->eci) {
 113    case ECI_NONE:
 114        return false;
 115    case ECI_A0:
 116    case ECI_A0A1:
 117    case ECI_A0A1A2:
 118    case ECI_A0A1A2B0:
 119        return true;
 120    default:
 121        g_assert_not_reached();
 122    }
 123}
 124
 125static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn,
 126                    unsigned msize)
 127{
 128    TCGv_i32 addr;
 129    uint32_t offset;
 130    TCGv_ptr qreg;
 131
 132    if (!dc_isar_feature(aa32_mve, s) ||
 133        !mve_check_qreg_bank(s, a->qd) ||
 134        !fn) {
 135        return false;
 136    }
 137
 138    /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
 139    if (a->rn == 15 || (a->rn == 13 && a->w)) {
 140        return false;
 141    }
 142
 143    if (!mve_eci_check(s) || !vfp_access_check(s)) {
 144        return true;
 145    }
 146
 147    offset = a->imm << msize;
 148    if (!a->a) {
 149        offset = -offset;
 150    }
 151    addr = load_reg(s, a->rn);
 152    if (a->p) {
 153        tcg_gen_addi_i32(addr, addr, offset);
 154    }
 155
 156    qreg = mve_qreg_ptr(a->qd);
 157    fn(cpu_env, qreg, addr);
 158    tcg_temp_free_ptr(qreg);
 159
 160    /*
 161     * Writeback always happens after the last beat of the insn,
 162     * regardless of predication
 163     */
 164    if (a->w) {
 165        if (!a->p) {
 166            tcg_gen_addi_i32(addr, addr, offset);
 167        }
 168        store_reg(s, a->rn, addr);
 169    } else {
 170        tcg_temp_free_i32(addr);
 171    }
 172    mve_update_eci(s);
 173    return true;
 174}
 175
 176static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a)
 177{
 178    static MVEGenLdStFn * const ldstfns[4][2] = {
 179        { gen_helper_mve_vstrb, gen_helper_mve_vldrb },
 180        { gen_helper_mve_vstrh, gen_helper_mve_vldrh },
 181        { gen_helper_mve_vstrw, gen_helper_mve_vldrw },
 182        { NULL, NULL }
 183    };
 184    return do_ldst(s, a, ldstfns[a->size][a->l], a->size);
 185}
 186
 187#define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE)           \
 188    static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a)   \
 189    {                                                           \
 190        static MVEGenLdStFn * const ldstfns[2][2] = {           \
 191            { gen_helper_mve_##ST, gen_helper_mve_##SLD },      \
 192            { NULL, gen_helper_mve_##ULD },                     \
 193        };                                                      \
 194        return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE);       \
 195    }
 196
 197DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8)
 198DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8)
 199DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16)
 200
 201static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
 202{
 203    TCGv_ptr qd;
 204    TCGv_i32 rt;
 205
 206    if (!dc_isar_feature(aa32_mve, s) ||
 207        !mve_check_qreg_bank(s, a->qd)) {
 208        return false;
 209    }
 210    if (a->rt == 13 || a->rt == 15) {
 211        /* UNPREDICTABLE; we choose to UNDEF */
 212        return false;
 213    }
 214    if (!mve_eci_check(s) || !vfp_access_check(s)) {
 215        return true;
 216    }
 217
 218    qd = mve_qreg_ptr(a->qd);
 219    rt = load_reg(s, a->rt);
 220    tcg_gen_dup_i32(a->size, rt, rt);
 221    gen_helper_mve_vdup(cpu_env, qd, rt);
 222    tcg_temp_free_ptr(qd);
 223    tcg_temp_free_i32(rt);
 224    mve_update_eci(s);
 225    return true;
 226}
 227
 228static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn)
 229{
 230    TCGv_ptr qd, qm;
 231
 232    if (!dc_isar_feature(aa32_mve, s) ||
 233        !mve_check_qreg_bank(s, a->qd | a->qm) ||
 234        !fn) {
 235        return false;
 236    }
 237
 238    if (!mve_eci_check(s) || !vfp_access_check(s)) {
 239        return true;
 240    }
 241
 242    qd = mve_qreg_ptr(a->qd);
 243    qm = mve_qreg_ptr(a->qm);
 244    fn(cpu_env, qd, qm);
 245    tcg_temp_free_ptr(qd);
 246    tcg_temp_free_ptr(qm);
 247    mve_update_eci(s);
 248    return true;
 249}
 250
 251#define DO_1OP(INSN, FN)                                        \
 252    static bool trans_##INSN(DisasContext *s, arg_1op *a)       \
 253    {                                                           \
 254        static MVEGenOneOpFn * const fns[] = {                  \
 255            gen_helper_mve_##FN##b,                             \
 256            gen_helper_mve_##FN##h,                             \
 257            gen_helper_mve_##FN##w,                             \
 258            NULL,                                               \
 259        };                                                      \
 260        return do_1op(s, a, fns[a->size]);                      \
 261    }
 262
 263DO_1OP(VCLZ, vclz)
 264DO_1OP(VCLS, vcls)
 265DO_1OP(VABS, vabs)
 266DO_1OP(VNEG, vneg)
 267
 268static bool trans_VREV16(DisasContext *s, arg_1op *a)
 269{
 270    static MVEGenOneOpFn * const fns[] = {
 271        gen_helper_mve_vrev16b,
 272        NULL,
 273        NULL,
 274        NULL,
 275    };
 276    return do_1op(s, a, fns[a->size]);
 277}
 278
 279static bool trans_VREV32(DisasContext *s, arg_1op *a)
 280{
 281    static MVEGenOneOpFn * const fns[] = {
 282        gen_helper_mve_vrev32b,
 283        gen_helper_mve_vrev32h,
 284        NULL,
 285        NULL,
 286    };
 287    return do_1op(s, a, fns[a->size]);
 288}
 289
 290static bool trans_VREV64(DisasContext *s, arg_1op *a)
 291{
 292    static MVEGenOneOpFn * const fns[] = {
 293        gen_helper_mve_vrev64b,
 294        gen_helper_mve_vrev64h,
 295        gen_helper_mve_vrev64w,
 296        NULL,
 297    };
 298    return do_1op(s, a, fns[a->size]);
 299}
 300
 301static bool trans_VMVN(DisasContext *s, arg_1op *a)
 302{
 303    return do_1op(s, a, gen_helper_mve_vmvn);
 304}
 305
 306static bool trans_VABS_fp(DisasContext *s, arg_1op *a)
 307{
 308    static MVEGenOneOpFn * const fns[] = {
 309        NULL,
 310        gen_helper_mve_vfabsh,
 311        gen_helper_mve_vfabss,
 312        NULL,
 313    };
 314    if (!dc_isar_feature(aa32_mve_fp, s)) {
 315        return false;
 316    }
 317    return do_1op(s, a, fns[a->size]);
 318}
 319
 320static bool trans_VNEG_fp(DisasContext *s, arg_1op *a)
 321{
 322    static MVEGenOneOpFn * const fns[] = {
 323        NULL,
 324        gen_helper_mve_vfnegh,
 325        gen_helper_mve_vfnegs,
 326        NULL,
 327    };
 328    if (!dc_isar_feature(aa32_mve_fp, s)) {
 329        return false;
 330    }
 331    return do_1op(s, a, fns[a->size]);
 332}
 333
 334static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn)
 335{
 336    TCGv_ptr qd, qn, qm;
 337
 338    if (!dc_isar_feature(aa32_mve, s) ||
 339        !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) ||
 340        !fn) {
 341        return false;
 342    }
 343    if (!mve_eci_check(s) || !vfp_access_check(s)) {
 344        return true;
 345    }
 346
 347    qd = mve_qreg_ptr(a->qd);
 348    qn = mve_qreg_ptr(a->qn);
 349    qm = mve_qreg_ptr(a->qm);
 350    fn(cpu_env, qd, qn, qm);
 351    tcg_temp_free_ptr(qd);
 352    tcg_temp_free_ptr(qn);
 353    tcg_temp_free_ptr(qm);
 354    mve_update_eci(s);
 355    return true;
 356}
 357
 358#define DO_LOGIC(INSN, HELPER)                                  \
 359    static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
 360    {                                                           \
 361        return do_2op(s, a, HELPER);                            \
 362    }
 363
 364DO_LOGIC(VAND, gen_helper_mve_vand)
 365DO_LOGIC(VBIC, gen_helper_mve_vbic)
 366DO_LOGIC(VORR, gen_helper_mve_vorr)
 367DO_LOGIC(VORN, gen_helper_mve_vorn)
 368DO_LOGIC(VEOR, gen_helper_mve_veor)
 369
 370#define DO_2OP(INSN, FN) \
 371    static bool trans_##INSN(DisasContext *s, arg_2op *a)       \
 372    {                                                           \
 373        static MVEGenTwoOpFn * const fns[] = {                  \
 374            gen_helper_mve_##FN##b,                             \
 375            gen_helper_mve_##FN##h,                             \
 376            gen_helper_mve_##FN##w,                             \
 377            NULL,                                               \
 378        };                                                      \
 379        return do_2op(s, a, fns[a->size]);                      \
 380    }
 381
 382DO_2OP(VADD, vadd)
 383DO_2OP(VSUB, vsub)
 384DO_2OP(VMUL, vmul)
 385DO_2OP(VMULH_S, vmulhs)
 386DO_2OP(VMULH_U, vmulhu)
 387DO_2OP(VRMULH_S, vrmulhs)
 388DO_2OP(VRMULH_U, vrmulhu)
 389DO_2OP(VMAX_S, vmaxs)
 390DO_2OP(VMAX_U, vmaxu)
 391DO_2OP(VMIN_S, vmins)
 392DO_2OP(VMIN_U, vminu)
 393DO_2OP(VABD_S, vabds)
 394DO_2OP(VABD_U, vabdu)
 395DO_2OP(VHADD_S, vhadds)
 396DO_2OP(VHADD_U, vhaddu)
 397DO_2OP(VHSUB_S, vhsubs)
 398DO_2OP(VHSUB_U, vhsubu)
 399DO_2OP(VMULL_BS, vmullbs)
 400DO_2OP(VMULL_BU, vmullbu)
 401DO_2OP(VMULL_TS, vmullts)
 402DO_2OP(VMULL_TU, vmulltu)
 403DO_2OP(VQDMULH, vqdmulh)
 404DO_2OP(VQRDMULH, vqrdmulh)
 405DO_2OP(VQADD_S, vqadds)
 406DO_2OP(VQADD_U, vqaddu)
 407DO_2OP(VQSUB_S, vqsubs)
 408DO_2OP(VQSUB_U, vqsubu)
 409DO_2OP(VSHL_S, vshls)
 410DO_2OP(VSHL_U, vshlu)
 411DO_2OP(VRSHL_S, vrshls)
 412DO_2OP(VRSHL_U, vrshlu)
 413DO_2OP(VQSHL_S, vqshls)
 414DO_2OP(VQSHL_U, vqshlu)
 415DO_2OP(VQRSHL_S, vqrshls)
 416DO_2OP(VQRSHL_U, vqrshlu)
 417DO_2OP(VQDMLADH, vqdmladh)
 418DO_2OP(VQDMLADHX, vqdmladhx)
 419DO_2OP(VQRDMLADH, vqrdmladh)
 420DO_2OP(VQRDMLADHX, vqrdmladhx)
 421DO_2OP(VQDMLSDH, vqdmlsdh)
 422DO_2OP(VQDMLSDHX, vqdmlsdhx)
 423DO_2OP(VQRDMLSDH, vqrdmlsdh)
 424DO_2OP(VQRDMLSDHX, vqrdmlsdhx)
 425DO_2OP(VRHADD_S, vrhadds)
 426DO_2OP(VRHADD_U, vrhaddu)
 427/*
 428 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
 429 * so we can reuse the DO_2OP macro. (Our implementation calculates the
 430 * "expected" results in this case.) Similarly for VHCADD.
 431 */
 432DO_2OP(VCADD90, vcadd90)
 433DO_2OP(VCADD270, vcadd270)
 434DO_2OP(VHCADD90, vhcadd90)
 435DO_2OP(VHCADD270, vhcadd270)
 436
 437static bool trans_VQDMULLB(DisasContext *s, arg_2op *a)
 438{
 439    static MVEGenTwoOpFn * const fns[] = {
 440        NULL,
 441        gen_helper_mve_vqdmullbh,
 442        gen_helper_mve_vqdmullbw,
 443        NULL,
 444    };
 445    if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
 446        /* UNPREDICTABLE; we choose to undef */
 447        return false;
 448    }
 449    return do_2op(s, a, fns[a->size]);
 450}
 451
 452static bool trans_VQDMULLT(DisasContext *s, arg_2op *a)
 453{
 454    static MVEGenTwoOpFn * const fns[] = {
 455        NULL,
 456        gen_helper_mve_vqdmullth,
 457        gen_helper_mve_vqdmulltw,
 458        NULL,
 459    };
 460    if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) {
 461        /* UNPREDICTABLE; we choose to undef */
 462        return false;
 463    }
 464    return do_2op(s, a, fns[a->size]);
 465}
 466
 467/*
 468 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry
 469 * of the 32-bit elements in each lane of the input vectors, where the
 470 * carry-out of each add is the carry-in of the next.  The initial carry
 471 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C
 472 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C.
 473 * These insns are subject to beat-wise execution.  Partial execution
 474 * of an I=1 (initial carry input fixed) insn which does not
 475 * execute the first beat must start with the current FPSCR.NZCV
 476 * value, not the fixed constant input.
 477 */
 478static bool trans_VADC(DisasContext *s, arg_2op *a)
 479{
 480    return do_2op(s, a, gen_helper_mve_vadc);
 481}
 482
 483static bool trans_VADCI(DisasContext *s, arg_2op *a)
 484{
 485    if (mve_skip_first_beat(s)) {
 486        return trans_VADC(s, a);
 487    }
 488    return do_2op(s, a, gen_helper_mve_vadci);
 489}
 490
 491static bool trans_VSBC(DisasContext *s, arg_2op *a)
 492{
 493    return do_2op(s, a, gen_helper_mve_vsbc);
 494}
 495
 496static bool trans_VSBCI(DisasContext *s, arg_2op *a)
 497{
 498    if (mve_skip_first_beat(s)) {
 499        return trans_VSBC(s, a);
 500    }
 501    return do_2op(s, a, gen_helper_mve_vsbci);
 502}
 503
 504static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
 505                          MVEGenTwoOpScalarFn fn)
 506{
 507    TCGv_ptr qd, qn;
 508    TCGv_i32 rm;
 509
 510    if (!dc_isar_feature(aa32_mve, s) ||
 511        !mve_check_qreg_bank(s, a->qd | a->qn) ||
 512        !fn) {
 513        return false;
 514    }
 515    if (a->rm == 13 || a->rm == 15) {
 516        /* UNPREDICTABLE */
 517        return false;
 518    }
 519    if (!mve_eci_check(s) || !vfp_access_check(s)) {
 520        return true;
 521    }
 522
 523    qd = mve_qreg_ptr(a->qd);
 524    qn = mve_qreg_ptr(a->qn);
 525    rm = load_reg(s, a->rm);
 526    fn(cpu_env, qd, qn, rm);
 527    tcg_temp_free_i32(rm);
 528    tcg_temp_free_ptr(qd);
 529    tcg_temp_free_ptr(qn);
 530    mve_update_eci(s);
 531    return true;
 532}
 533
 534#define DO_2OP_SCALAR(INSN, FN) \
 535    static bool trans_##INSN(DisasContext *s, arg_2scalar *a)   \
 536    {                                                           \
 537        static MVEGenTwoOpScalarFn * const fns[] = {            \
 538            gen_helper_mve_##FN##b,                             \
 539            gen_helper_mve_##FN##h,                             \
 540            gen_helper_mve_##FN##w,                             \
 541            NULL,                                               \
 542        };                                                      \
 543        return do_2op_scalar(s, a, fns[a->size]);               \
 544    }
 545
 546DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
 547DO_2OP_SCALAR(VSUB_scalar, vsub_scalar)
 548DO_2OP_SCALAR(VMUL_scalar, vmul_scalar)
 549DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar)
 550DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar)
 551DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar)
 552DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar)
 553DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar)
 554DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar)
 555DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar)
 556DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar)
 557DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar)
 558DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar)
 559DO_2OP_SCALAR(VBRSR, vbrsr)
 560
 561static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a)
 562{
 563    static MVEGenTwoOpScalarFn * const fns[] = {
 564        NULL,
 565        gen_helper_mve_vqdmullb_scalarh,
 566        gen_helper_mve_vqdmullb_scalarw,
 567        NULL,
 568    };
 569    if (a->qd == a->qn && a->size == MO_32) {
 570        /* UNPREDICTABLE; we choose to undef */
 571        return false;
 572    }
 573    return do_2op_scalar(s, a, fns[a->size]);
 574}
 575
 576static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a)
 577{
 578    static MVEGenTwoOpScalarFn * const fns[] = {
 579        NULL,
 580        gen_helper_mve_vqdmullt_scalarh,
 581        gen_helper_mve_vqdmullt_scalarw,
 582        NULL,
 583    };
 584    if (a->qd == a->qn && a->size == MO_32) {
 585        /* UNPREDICTABLE; we choose to undef */
 586        return false;
 587    }
 588    return do_2op_scalar(s, a, fns[a->size]);
 589}
 590
 591static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
 592                             MVEGenDualAccOpFn *fn)
 593{
 594    TCGv_ptr qn, qm;
 595    TCGv_i64 rda;
 596    TCGv_i32 rdalo, rdahi;
 597
 598    if (!dc_isar_feature(aa32_mve, s) ||
 599        !mve_check_qreg_bank(s, a->qn | a->qm) ||
 600        !fn) {
 601        return false;
 602    }
 603    /*
 604     * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
 605     * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
 606     */
 607    if (a->rdahi == 13 || a->rdahi == 15) {
 608        return false;
 609    }
 610    if (!mve_eci_check(s) || !vfp_access_check(s)) {
 611        return true;
 612    }
 613
 614    qn = mve_qreg_ptr(a->qn);
 615    qm = mve_qreg_ptr(a->qm);
 616
 617    /*
 618     * This insn is subject to beat-wise execution. Partial execution
 619     * of an A=0 (no-accumulate) insn which does not execute the first
 620     * beat must start with the current rda value, not 0.
 621     */
 622    if (a->a || mve_skip_first_beat(s)) {
 623        rda = tcg_temp_new_i64();
 624        rdalo = load_reg(s, a->rdalo);
 625        rdahi = load_reg(s, a->rdahi);
 626        tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
 627        tcg_temp_free_i32(rdalo);
 628        tcg_temp_free_i32(rdahi);
 629    } else {
 630        rda = tcg_const_i64(0);
 631    }
 632
 633    fn(rda, cpu_env, qn, qm, rda);
 634    tcg_temp_free_ptr(qn);
 635    tcg_temp_free_ptr(qm);
 636
 637    rdalo = tcg_temp_new_i32();
 638    rdahi = tcg_temp_new_i32();
 639    tcg_gen_extrl_i64_i32(rdalo, rda);
 640    tcg_gen_extrh_i64_i32(rdahi, rda);
 641    store_reg(s, a->rdalo, rdalo);
 642    store_reg(s, a->rdahi, rdahi);
 643    tcg_temp_free_i64(rda);
 644    mve_update_eci(s);
 645    return true;
 646}
 647
 648static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a)
 649{
 650    static MVEGenDualAccOpFn * const fns[4][2] = {
 651        { NULL, NULL },
 652        { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh },
 653        { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw },
 654        { NULL, NULL },
 655    };
 656    return do_long_dual_acc(s, a, fns[a->size][a->x]);
 657}
 658
 659static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a)
 660{
 661    static MVEGenDualAccOpFn * const fns[4][2] = {
 662        { NULL, NULL },
 663        { gen_helper_mve_vmlaldavuh, NULL },
 664        { gen_helper_mve_vmlaldavuw, NULL },
 665        { NULL, NULL },
 666    };
 667    return do_long_dual_acc(s, a, fns[a->size][a->x]);
 668}
 669
 670static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a)
 671{
 672    static MVEGenDualAccOpFn * const fns[4][2] = {
 673        { NULL, NULL },
 674        { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh },
 675        { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw },
 676        { NULL, NULL },
 677    };
 678    return do_long_dual_acc(s, a, fns[a->size][a->x]);
 679}
 680
 681static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a)
 682{
 683    static MVEGenDualAccOpFn * const fns[] = {
 684        gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw,
 685    };
 686    return do_long_dual_acc(s, a, fns[a->x]);
 687}
 688
 689static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a)
 690{
 691    static MVEGenDualAccOpFn * const fns[] = {
 692        gen_helper_mve_vrmlaldavhuw, NULL,
 693    };
 694    return do_long_dual_acc(s, a, fns[a->x]);
 695}
 696
 697static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a)
 698{
 699    static MVEGenDualAccOpFn * const fns[] = {
 700        gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw,
 701    };
 702    return do_long_dual_acc(s, a, fns[a->x]);
 703}
 704
 705static bool trans_VPST(DisasContext *s, arg_VPST *a)
 706{
 707    TCGv_i32 vpr;
 708
 709    /* mask == 0 is a "related encoding" */
 710    if (!dc_isar_feature(aa32_mve, s) || !a->mask) {
 711        return false;
 712    }
 713    if (!mve_eci_check(s) || !vfp_access_check(s)) {
 714        return true;
 715    }
 716    /*
 717     * Set the VPR mask fields. We take advantage of MASK01 and MASK23
 718     * being adjacent fields in the register.
 719     *
 720     * This insn is not predicated, but it is subject to beat-wise
 721     * execution, and the mask is updated on the odd-numbered beats.
 722     * So if PSR.ECI says we should skip beat 1, we mustn't update the
 723     * 01 mask field.
 724     */
 725    vpr = load_cpu_field(v7m.vpr);
 726    switch (s->eci) {
 727    case ECI_NONE:
 728    case ECI_A0:
 729        /* Update both 01 and 23 fields */
 730        tcg_gen_deposit_i32(vpr, vpr,
 731                            tcg_constant_i32(a->mask | (a->mask << 4)),
 732                            R_V7M_VPR_MASK01_SHIFT,
 733                            R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH);
 734        break;
 735    case ECI_A0A1:
 736    case ECI_A0A1A2:
 737    case ECI_A0A1A2B0:
 738        /* Update only the 23 mask field */
 739        tcg_gen_deposit_i32(vpr, vpr,
 740                            tcg_constant_i32(a->mask),
 741                            R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH);
 742        break;
 743    default:
 744        g_assert_not_reached();
 745    }
 746    store_cpu_field(vpr, v7m.vpr);
 747    mve_update_and_store_eci(s);
 748    return true;
 749}
 750
 751static bool trans_VADDV(DisasContext *s, arg_VADDV *a)
 752{
 753    /* VADDV: vector add across vector */
 754    static MVEGenVADDVFn * const fns[4][2] = {
 755        { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub },
 756        { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh },
 757        { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw },
 758        { NULL, NULL }
 759    };
 760    TCGv_ptr qm;
 761    TCGv_i32 rda;
 762
 763    if (!dc_isar_feature(aa32_mve, s) ||
 764        a->size == 3) {
 765        return false;
 766    }
 767    if (!mve_eci_check(s) || !vfp_access_check(s)) {
 768        return true;
 769    }
 770
 771    /*
 772     * This insn is subject to beat-wise execution. Partial execution
 773     * of an A=0 (no-accumulate) insn which does not execute the first
 774     * beat must start with the current value of Rda, not zero.
 775     */
 776    if (a->a || mve_skip_first_beat(s)) {
 777        /* Accumulate input from Rda */
 778        rda = load_reg(s, a->rda);
 779    } else {
 780        /* Accumulate starting at zero */
 781        rda = tcg_const_i32(0);
 782    }
 783
 784    qm = mve_qreg_ptr(a->qm);
 785    fns[a->size][a->u](rda, cpu_env, qm, rda);
 786    store_reg(s, a->rda, rda);
 787    tcg_temp_free_ptr(qm);
 788
 789    mve_update_eci(s);
 790    return true;
 791}
 792
 793static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a)
 794{
 795    /*
 796     * Vector Add Long Across Vector: accumulate the 32-bit
 797     * elements of the vector into a 64-bit result stored in
 798     * a pair of general-purpose registers.
 799     * No need to check Qm's bank: it is only 3 bits in decode.
 800     */
 801    TCGv_ptr qm;
 802    TCGv_i64 rda;
 803    TCGv_i32 rdalo, rdahi;
 804
 805    if (!dc_isar_feature(aa32_mve, s)) {
 806        return false;
 807    }
 808    /*
 809     * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related
 810     * encoding; rdalo always has bit 0 clear so cannot be 13 or 15.
 811     */
 812    if (a->rdahi == 13 || a->rdahi == 15) {
 813        return false;
 814    }
 815    if (!mve_eci_check(s) || !vfp_access_check(s)) {
 816        return true;
 817    }
 818
 819    /*
 820     * This insn is subject to beat-wise execution. Partial execution
 821     * of an A=0 (no-accumulate) insn which does not execute the first
 822     * beat must start with the current value of RdaHi:RdaLo, not zero.
 823     */
 824    if (a->a || mve_skip_first_beat(s)) {
 825        /* Accumulate input from RdaHi:RdaLo */
 826        rda = tcg_temp_new_i64();
 827        rdalo = load_reg(s, a->rdalo);
 828        rdahi = load_reg(s, a->rdahi);
 829        tcg_gen_concat_i32_i64(rda, rdalo, rdahi);
 830        tcg_temp_free_i32(rdalo);
 831        tcg_temp_free_i32(rdahi);
 832    } else {
 833        /* Accumulate starting at zero */
 834        rda = tcg_const_i64(0);
 835    }
 836
 837    qm = mve_qreg_ptr(a->qm);
 838    if (a->u) {
 839        gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda);
 840    } else {
 841        gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda);
 842    }
 843    tcg_temp_free_ptr(qm);
 844
 845    rdalo = tcg_temp_new_i32();
 846    rdahi = tcg_temp_new_i32();
 847    tcg_gen_extrl_i64_i32(rdalo, rda);
 848    tcg_gen_extrh_i64_i32(rdahi, rda);
 849    store_reg(s, a->rdalo, rdalo);
 850    store_reg(s, a->rdahi, rdahi);
 851    tcg_temp_free_i64(rda);
 852    mve_update_eci(s);
 853    return true;
 854}
 855
 856static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn)
 857{
 858    TCGv_ptr qd;
 859    uint64_t imm;
 860
 861    if (!dc_isar_feature(aa32_mve, s) ||
 862        !mve_check_qreg_bank(s, a->qd) ||
 863        !fn) {
 864        return false;
 865    }
 866    if (!mve_eci_check(s) || !vfp_access_check(s)) {
 867        return true;
 868    }
 869
 870    imm = asimd_imm_const(a->imm, a->cmode, a->op);
 871
 872    qd = mve_qreg_ptr(a->qd);
 873    fn(cpu_env, qd, tcg_constant_i64(imm));
 874    tcg_temp_free_ptr(qd);
 875    mve_update_eci(s);
 876    return true;
 877}
 878
 879static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a)
 880{
 881    /* Handle decode of cmode/op here between VORR/VBIC/VMOV */
 882    MVEGenOneOpImmFn *fn;
 883
 884    if ((a->cmode & 1) && a->cmode < 12) {
 885        if (a->op) {
 886            /*
 887             * For op=1, the immediate will be inverted by asimd_imm_const(),
 888             * so the VBIC becomes a logical AND operation.
 889             */
 890            fn = gen_helper_mve_vandi;
 891        } else {
 892            fn = gen_helper_mve_vorri;
 893        }
 894    } else {
 895        /* There is one unallocated cmode/op combination in this space */
 896        if (a->cmode == 15 && a->op == 1) {
 897            return false;
 898        }
 899        /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */
 900        fn = gen_helper_mve_vmovi;
 901    }
 902    return do_1imm(s, a, fn);
 903}
 904
 905static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn,
 906                      bool negateshift)
 907{
 908    TCGv_ptr qd, qm;
 909    int shift = a->shift;
 910
 911    if (!dc_isar_feature(aa32_mve, s) ||
 912        !mve_check_qreg_bank(s, a->qd | a->qm) ||
 913        !fn) {
 914        return false;
 915    }
 916    if (!mve_eci_check(s) || !vfp_access_check(s)) {
 917        return true;
 918    }
 919
 920    /*
 921     * When we handle a right shift insn using a left-shift helper
 922     * which permits a negative shift count to indicate a right-shift,
 923     * we must negate the shift count.
 924     */
 925    if (negateshift) {
 926        shift = -shift;
 927    }
 928
 929    qd = mve_qreg_ptr(a->qd);
 930    qm = mve_qreg_ptr(a->qm);
 931    fn(cpu_env, qd, qm, tcg_constant_i32(shift));
 932    tcg_temp_free_ptr(qd);
 933    tcg_temp_free_ptr(qm);
 934    mve_update_eci(s);
 935    return true;
 936}
 937
 938#define DO_2SHIFT(INSN, FN, NEGATESHIFT)                         \
 939    static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
 940    {                                                           \
 941        static MVEGenTwoOpShiftFn * const fns[] = {             \
 942            gen_helper_mve_##FN##b,                             \
 943            gen_helper_mve_##FN##h,                             \
 944            gen_helper_mve_##FN##w,                             \
 945            NULL,                                               \
 946        };                                                      \
 947        return do_2shift(s, a, fns[a->size], NEGATESHIFT);      \
 948    }
 949
 950DO_2SHIFT(VSHLI, vshli_u, false)
 951DO_2SHIFT(VQSHLI_S, vqshli_s, false)
 952DO_2SHIFT(VQSHLI_U, vqshli_u, false)
 953DO_2SHIFT(VQSHLUI, vqshlui_s, false)
 954/* These right shifts use a left-shift helper with negated shift count */
 955DO_2SHIFT(VSHRI_S, vshli_s, true)
 956DO_2SHIFT(VSHRI_U, vshli_u, true)
 957DO_2SHIFT(VRSHRI_S, vrshli_s, true)
 958DO_2SHIFT(VRSHRI_U, vrshli_u, true)
 959
 960DO_2SHIFT(VSRI, vsri, false)
 961DO_2SHIFT(VSLI, vsli, false)
 962
 963#define DO_VSHLL(INSN, FN)                                      \
 964    static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
 965    {                                                           \
 966        static MVEGenTwoOpShiftFn * const fns[] = {             \
 967            gen_helper_mve_##FN##b,                             \
 968            gen_helper_mve_##FN##h,                             \
 969        };                                                      \
 970        return do_2shift(s, a, fns[a->size], false);            \
 971    }
 972
 973DO_VSHLL(VSHLL_BS, vshllbs)
 974DO_VSHLL(VSHLL_BU, vshllbu)
 975DO_VSHLL(VSHLL_TS, vshllts)
 976DO_VSHLL(VSHLL_TU, vshlltu)
 977
 978#define DO_2SHIFT_N(INSN, FN)                                   \
 979    static bool trans_##INSN(DisasContext *s, arg_2shift *a)    \
 980    {                                                           \
 981        static MVEGenTwoOpShiftFn * const fns[] = {             \
 982            gen_helper_mve_##FN##b,                             \
 983            gen_helper_mve_##FN##h,                             \
 984        };                                                      \
 985        return do_2shift(s, a, fns[a->size], false);            \
 986    }
 987
 988DO_2SHIFT_N(VSHRNB, vshrnb)
 989DO_2SHIFT_N(VSHRNT, vshrnt)
 990DO_2SHIFT_N(VRSHRNB, vrshrnb)
 991DO_2SHIFT_N(VRSHRNT, vrshrnt)
 992DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s)
 993DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s)
 994DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u)
 995DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u)
 996DO_2SHIFT_N(VQSHRUNB, vqshrunb)
 997DO_2SHIFT_N(VQSHRUNT, vqshrunt)
 998DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s)
 999DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s)
1000DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u)
1001DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u)
1002DO_2SHIFT_N(VQRSHRUNB, vqrshrunb)
1003DO_2SHIFT_N(VQRSHRUNT, vqrshrunt)
1004
1005static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a)
1006{
1007    /*
1008     * Whole Vector Left Shift with Carry. The carry is taken
1009     * from a general purpose register and written back there.
1010     * An imm of 0 means "shift by 32".
1011     */
1012    TCGv_ptr qd;
1013    TCGv_i32 rdm;
1014
1015    if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) {
1016        return false;
1017    }
1018    if (a->rdm == 13 || a->rdm == 15) {
1019        /* CONSTRAINED UNPREDICTABLE: we UNDEF */
1020        return false;
1021    }
1022    if (!mve_eci_check(s) || !vfp_access_check(s)) {
1023        return true;
1024    }
1025
1026    qd = mve_qreg_ptr(a->qd);
1027    rdm = load_reg(s, a->rdm);
1028    gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm));
1029    store_reg(s, a->rdm, rdm);
1030    tcg_temp_free_ptr(qd);
1031    mve_update_eci(s);
1032    return true;
1033}
1034