qemu/target/arm/translate-sve.c
<<
>>
Prefs
   1/*
   2 * AArch64 SVE translation
   3 *
   4 * Copyright (c) 2018 Linaro, Ltd
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg/tcg-op.h"
  24#include "tcg/tcg-op-gvec.h"
  25#include "tcg/tcg-gvec-desc.h"
  26#include "qemu/log.h"
  27#include "arm_ldst.h"
  28#include "translate.h"
  29#include "internals.h"
  30#include "exec/helper-proto.h"
  31#include "exec/helper-gen.h"
  32#include "exec/log.h"
  33#include "translate-a64.h"
  34#include "fpu/softfloat.h"
  35
  36
  37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  38                         TCGv_i64, uint32_t, uint32_t);
  39
  40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  41                                     TCGv_ptr, TCGv_i32);
  42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  43                                     TCGv_ptr, TCGv_ptr, TCGv_i32);
  44
  45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  47                                         TCGv_ptr, TCGv_i64, TCGv_i32);
  48
  49/*
  50 * Helpers for extracting complex instruction fields.
  51 */
  52
  53/* See e.g. ASR (immediate, predicated).
  54 * Returns -1 for unallocated encoding; diagnose later.
  55 */
  56static int tszimm_esz(DisasContext *s, int x)
  57{
  58    x >>= 3;  /* discard imm3 */
  59    return 31 - clz32(x);
  60}
  61
  62static int tszimm_shr(DisasContext *s, int x)
  63{
  64    return (16 << tszimm_esz(s, x)) - x;
  65}
  66
  67/* See e.g. LSL (immediate, predicated).  */
  68static int tszimm_shl(DisasContext *s, int x)
  69{
  70    return x - (8 << tszimm_esz(s, x));
  71}
  72
  73/* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  74static inline int expand_imm_sh8s(DisasContext *s, int x)
  75{
  76    return (int8_t)x << (x & 0x100 ? 8 : 0);
  77}
  78
  79static inline int expand_imm_sh8u(DisasContext *s, int x)
  80{
  81    return (uint8_t)x << (x & 0x100 ? 8 : 0);
  82}
  83
  84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  85 * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  86 */
  87static inline int msz_dtype(DisasContext *s, int msz)
  88{
  89    static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  90    return dtype[msz];
  91}
  92
  93/*
  94 * Include the generated decoder.
  95 */
  96
  97#include "decode-sve.c.inc"
  98
  99/*
 100 * Implement all of the translator functions referenced by the decoder.
 101 */
 102
 103/* Return the offset info CPUARMState of the predicate vector register Pn.
 104 * Note for this purpose, FFR is P16.
 105 */
 106static inline int pred_full_reg_offset(DisasContext *s, int regno)
 107{
 108    return offsetof(CPUARMState, vfp.pregs[regno]);
 109}
 110
 111/* Return the byte size of the whole predicate register, VL / 64.  */
 112static inline int pred_full_reg_size(DisasContext *s)
 113{
 114    return s->sve_len >> 3;
 115}
 116
 117/* Round up the size of a register to a size allowed by
 118 * the tcg vector infrastructure.  Any operation which uses this
 119 * size may assume that the bits above pred_full_reg_size are zero,
 120 * and must leave them the same way.
 121 *
 122 * Note that this is not needed for the vector registers as they
 123 * are always properly sized for tcg vectors.
 124 */
 125static int size_for_gvec(int size)
 126{
 127    if (size <= 8) {
 128        return 8;
 129    } else {
 130        return QEMU_ALIGN_UP(size, 16);
 131    }
 132}
 133
 134static int pred_gvec_reg_size(DisasContext *s)
 135{
 136    return size_for_gvec(pred_full_reg_size(s));
 137}
 138
 139/* Invoke an out-of-line helper on 2 Zregs. */
 140static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
 141                            int rd, int rn, int data)
 142{
 143    unsigned vsz = vec_full_reg_size(s);
 144    tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 145                       vec_full_reg_offset(s, rn),
 146                       vsz, vsz, data, fn);
 147}
 148
 149/* Invoke an out-of-line helper on 3 Zregs. */
 150static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
 151                             int rd, int rn, int rm, int data)
 152{
 153    unsigned vsz = vec_full_reg_size(s);
 154    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 155                       vec_full_reg_offset(s, rn),
 156                       vec_full_reg_offset(s, rm),
 157                       vsz, vsz, data, fn);
 158}
 159
 160/* Invoke an out-of-line helper on 4 Zregs. */
 161static void gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
 162                              int rd, int rn, int rm, int ra, int data)
 163{
 164    unsigned vsz = vec_full_reg_size(s);
 165    tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 166                       vec_full_reg_offset(s, rn),
 167                       vec_full_reg_offset(s, rm),
 168                       vec_full_reg_offset(s, ra),
 169                       vsz, vsz, data, fn);
 170}
 171
 172/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
 173static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
 174                             int rd, int rn, int pg, int data)
 175{
 176    unsigned vsz = vec_full_reg_size(s);
 177    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 178                       vec_full_reg_offset(s, rn),
 179                       pred_full_reg_offset(s, pg),
 180                       vsz, vsz, data, fn);
 181}
 182
 183/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
 184static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
 185                              int rd, int rn, int rm, int pg, int data)
 186{
 187    unsigned vsz = vec_full_reg_size(s);
 188    tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 189                       vec_full_reg_offset(s, rn),
 190                       vec_full_reg_offset(s, rm),
 191                       pred_full_reg_offset(s, pg),
 192                       vsz, vsz, data, fn);
 193}
 194
 195/* Invoke a vector expander on two Zregs.  */
 196static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
 197                           int esz, int rd, int rn)
 198{
 199    unsigned vsz = vec_full_reg_size(s);
 200    gvec_fn(esz, vec_full_reg_offset(s, rd),
 201            vec_full_reg_offset(s, rn), vsz, vsz);
 202}
 203
 204/* Invoke a vector expander on three Zregs.  */
 205static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
 206                            int esz, int rd, int rn, int rm)
 207{
 208    unsigned vsz = vec_full_reg_size(s);
 209    gvec_fn(esz, vec_full_reg_offset(s, rd),
 210            vec_full_reg_offset(s, rn),
 211            vec_full_reg_offset(s, rm), vsz, vsz);
 212}
 213
 214/* Invoke a vector expander on four Zregs.  */
 215static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
 216                             int esz, int rd, int rn, int rm, int ra)
 217{
 218    unsigned vsz = vec_full_reg_size(s);
 219    gvec_fn(esz, vec_full_reg_offset(s, rd),
 220            vec_full_reg_offset(s, rn),
 221            vec_full_reg_offset(s, rm),
 222            vec_full_reg_offset(s, ra), vsz, vsz);
 223}
 224
 225/* Invoke a vector move on two Zregs.  */
 226static bool do_mov_z(DisasContext *s, int rd, int rn)
 227{
 228    if (sve_access_check(s)) {
 229        gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
 230    }
 231    return true;
 232}
 233
 234/* Initialize a Zreg with replications of a 64-bit immediate.  */
 235static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 236{
 237    unsigned vsz = vec_full_reg_size(s);
 238    tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
 239}
 240
 241/* Invoke a vector expander on three Pregs.  */
 242static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
 243                            int rd, int rn, int rm)
 244{
 245    unsigned psz = pred_gvec_reg_size(s);
 246    gvec_fn(MO_64, pred_full_reg_offset(s, rd),
 247            pred_full_reg_offset(s, rn),
 248            pred_full_reg_offset(s, rm), psz, psz);
 249}
 250
 251/* Invoke a vector move on two Pregs.  */
 252static bool do_mov_p(DisasContext *s, int rd, int rn)
 253{
 254    if (sve_access_check(s)) {
 255        unsigned psz = pred_gvec_reg_size(s);
 256        tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
 257                         pred_full_reg_offset(s, rn), psz, psz);
 258    }
 259    return true;
 260}
 261
 262/* Set the cpu flags as per a return from an SVE helper.  */
 263static void do_pred_flags(TCGv_i32 t)
 264{
 265    tcg_gen_mov_i32(cpu_NF, t);
 266    tcg_gen_andi_i32(cpu_ZF, t, 2);
 267    tcg_gen_andi_i32(cpu_CF, t, 1);
 268    tcg_gen_movi_i32(cpu_VF, 0);
 269}
 270
 271/* Subroutines computing the ARM PredTest psuedofunction.  */
 272static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 273{
 274    TCGv_i32 t = tcg_temp_new_i32();
 275
 276    gen_helper_sve_predtest1(t, d, g);
 277    do_pred_flags(t);
 278    tcg_temp_free_i32(t);
 279}
 280
 281static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 282{
 283    TCGv_ptr dptr = tcg_temp_new_ptr();
 284    TCGv_ptr gptr = tcg_temp_new_ptr();
 285    TCGv_i32 t;
 286
 287    tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 288    tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 289    t = tcg_const_i32(words);
 290
 291    gen_helper_sve_predtest(t, dptr, gptr, t);
 292    tcg_temp_free_ptr(dptr);
 293    tcg_temp_free_ptr(gptr);
 294
 295    do_pred_flags(t);
 296    tcg_temp_free_i32(t);
 297}
 298
 299/* For each element size, the bits within a predicate word that are active.  */
 300const uint64_t pred_esz_masks[4] = {
 301    0xffffffffffffffffull, 0x5555555555555555ull,
 302    0x1111111111111111ull, 0x0101010101010101ull
 303};
 304
 305/*
 306 *** SVE Logical - Unpredicated Group
 307 */
 308
 309static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
 310{
 311    if (sve_access_check(s)) {
 312        gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
 313    }
 314    return true;
 315}
 316
 317static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
 318{
 319    return do_zzz_fn(s, a, tcg_gen_gvec_and);
 320}
 321
 322static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
 323{
 324    return do_zzz_fn(s, a, tcg_gen_gvec_or);
 325}
 326
 327static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
 328{
 329    return do_zzz_fn(s, a, tcg_gen_gvec_xor);
 330}
 331
 332static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
 333{
 334    return do_zzz_fn(s, a, tcg_gen_gvec_andc);
 335}
 336
 337static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
 338{
 339    TCGv_i64 t = tcg_temp_new_i64();
 340    uint64_t mask = dup_const(MO_8, 0xff >> sh);
 341
 342    tcg_gen_xor_i64(t, n, m);
 343    tcg_gen_shri_i64(d, t, sh);
 344    tcg_gen_shli_i64(t, t, 8 - sh);
 345    tcg_gen_andi_i64(d, d, mask);
 346    tcg_gen_andi_i64(t, t, ~mask);
 347    tcg_gen_or_i64(d, d, t);
 348    tcg_temp_free_i64(t);
 349}
 350
 351static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
 352{
 353    TCGv_i64 t = tcg_temp_new_i64();
 354    uint64_t mask = dup_const(MO_16, 0xffff >> sh);
 355
 356    tcg_gen_xor_i64(t, n, m);
 357    tcg_gen_shri_i64(d, t, sh);
 358    tcg_gen_shli_i64(t, t, 16 - sh);
 359    tcg_gen_andi_i64(d, d, mask);
 360    tcg_gen_andi_i64(t, t, ~mask);
 361    tcg_gen_or_i64(d, d, t);
 362    tcg_temp_free_i64(t);
 363}
 364
 365static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
 366{
 367    tcg_gen_xor_i32(d, n, m);
 368    tcg_gen_rotri_i32(d, d, sh);
 369}
 370
 371static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
 372{
 373    tcg_gen_xor_i64(d, n, m);
 374    tcg_gen_rotri_i64(d, d, sh);
 375}
 376
 377static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 378                        TCGv_vec m, int64_t sh)
 379{
 380    tcg_gen_xor_vec(vece, d, n, m);
 381    tcg_gen_rotri_vec(vece, d, d, sh);
 382}
 383
 384void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
 385                  uint32_t rm_ofs, int64_t shift,
 386                  uint32_t opr_sz, uint32_t max_sz)
 387{
 388    static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
 389    static const GVecGen3i ops[4] = {
 390        { .fni8 = gen_xar8_i64,
 391          .fniv = gen_xar_vec,
 392          .fno = gen_helper_sve2_xar_b,
 393          .opt_opc = vecop,
 394          .vece = MO_8 },
 395        { .fni8 = gen_xar16_i64,
 396          .fniv = gen_xar_vec,
 397          .fno = gen_helper_sve2_xar_h,
 398          .opt_opc = vecop,
 399          .vece = MO_16 },
 400        { .fni4 = gen_xar_i32,
 401          .fniv = gen_xar_vec,
 402          .fno = gen_helper_sve2_xar_s,
 403          .opt_opc = vecop,
 404          .vece = MO_32 },
 405        { .fni8 = gen_xar_i64,
 406          .fniv = gen_xar_vec,
 407          .fno = gen_helper_gvec_xar_d,
 408          .opt_opc = vecop,
 409          .vece = MO_64 }
 410    };
 411    int esize = 8 << vece;
 412
 413    /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
 414    tcg_debug_assert(shift >= 0);
 415    tcg_debug_assert(shift <= esize);
 416    shift &= esize - 1;
 417
 418    if (shift == 0) {
 419        /* xar with no rotate devolves to xor. */
 420        tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
 421    } else {
 422        tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
 423                        shift, &ops[vece]);
 424    }
 425}
 426
 427static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
 428{
 429    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
 430        return false;
 431    }
 432    if (sve_access_check(s)) {
 433        unsigned vsz = vec_full_reg_size(s);
 434        gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
 435                     vec_full_reg_offset(s, a->rn),
 436                     vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
 437    }
 438    return true;
 439}
 440
 441static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
 442{
 443    if (!dc_isar_feature(aa64_sve2, s)) {
 444        return false;
 445    }
 446    if (sve_access_check(s)) {
 447        gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
 448    }
 449    return true;
 450}
 451
 452static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 453{
 454    tcg_gen_xor_i64(d, n, m);
 455    tcg_gen_xor_i64(d, d, k);
 456}
 457
 458static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 459                         TCGv_vec m, TCGv_vec k)
 460{
 461    tcg_gen_xor_vec(vece, d, n, m);
 462    tcg_gen_xor_vec(vece, d, d, k);
 463}
 464
 465static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 466                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
 467{
 468    static const GVecGen4 op = {
 469        .fni8 = gen_eor3_i64,
 470        .fniv = gen_eor3_vec,
 471        .fno = gen_helper_sve2_eor3,
 472        .vece = MO_64,
 473        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 474    };
 475    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
 476}
 477
 478static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
 479{
 480    return do_sve2_zzzz_fn(s, a, gen_eor3);
 481}
 482
 483static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 484{
 485    tcg_gen_andc_i64(d, m, k);
 486    tcg_gen_xor_i64(d, d, n);
 487}
 488
 489static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 490                         TCGv_vec m, TCGv_vec k)
 491{
 492    tcg_gen_andc_vec(vece, d, m, k);
 493    tcg_gen_xor_vec(vece, d, d, n);
 494}
 495
 496static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 497                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
 498{
 499    static const GVecGen4 op = {
 500        .fni8 = gen_bcax_i64,
 501        .fniv = gen_bcax_vec,
 502        .fno = gen_helper_sve2_bcax,
 503        .vece = MO_64,
 504        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 505    };
 506    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
 507}
 508
 509static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
 510{
 511    return do_sve2_zzzz_fn(s, a, gen_bcax);
 512}
 513
 514static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 515                    uint32_t a, uint32_t oprsz, uint32_t maxsz)
 516{
 517    /* BSL differs from the generic bitsel in argument ordering. */
 518    tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
 519}
 520
 521static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
 522{
 523    return do_sve2_zzzz_fn(s, a, gen_bsl);
 524}
 525
 526static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 527{
 528    tcg_gen_andc_i64(n, k, n);
 529    tcg_gen_andc_i64(m, m, k);
 530    tcg_gen_or_i64(d, n, m);
 531}
 532
 533static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 534                          TCGv_vec m, TCGv_vec k)
 535{
 536    if (TCG_TARGET_HAS_bitsel_vec) {
 537        tcg_gen_not_vec(vece, n, n);
 538        tcg_gen_bitsel_vec(vece, d, k, n, m);
 539    } else {
 540        tcg_gen_andc_vec(vece, n, k, n);
 541        tcg_gen_andc_vec(vece, m, m, k);
 542        tcg_gen_or_vec(vece, d, n, m);
 543    }
 544}
 545
 546static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 547                      uint32_t a, uint32_t oprsz, uint32_t maxsz)
 548{
 549    static const GVecGen4 op = {
 550        .fni8 = gen_bsl1n_i64,
 551        .fniv = gen_bsl1n_vec,
 552        .fno = gen_helper_sve2_bsl1n,
 553        .vece = MO_64,
 554        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 555    };
 556    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
 557}
 558
 559static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
 560{
 561    return do_sve2_zzzz_fn(s, a, gen_bsl1n);
 562}
 563
 564static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 565{
 566    /*
 567     * Z[dn] = (n & k) | (~m & ~k)
 568     *       =         | ~(m | k)
 569     */
 570    tcg_gen_and_i64(n, n, k);
 571    if (TCG_TARGET_HAS_orc_i64) {
 572        tcg_gen_or_i64(m, m, k);
 573        tcg_gen_orc_i64(d, n, m);
 574    } else {
 575        tcg_gen_nor_i64(m, m, k);
 576        tcg_gen_or_i64(d, n, m);
 577    }
 578}
 579
 580static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 581                          TCGv_vec m, TCGv_vec k)
 582{
 583    if (TCG_TARGET_HAS_bitsel_vec) {
 584        tcg_gen_not_vec(vece, m, m);
 585        tcg_gen_bitsel_vec(vece, d, k, n, m);
 586    } else {
 587        tcg_gen_and_vec(vece, n, n, k);
 588        tcg_gen_or_vec(vece, m, m, k);
 589        tcg_gen_orc_vec(vece, d, n, m);
 590    }
 591}
 592
 593static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 594                      uint32_t a, uint32_t oprsz, uint32_t maxsz)
 595{
 596    static const GVecGen4 op = {
 597        .fni8 = gen_bsl2n_i64,
 598        .fniv = gen_bsl2n_vec,
 599        .fno = gen_helper_sve2_bsl2n,
 600        .vece = MO_64,
 601        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 602    };
 603    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
 604}
 605
 606static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
 607{
 608    return do_sve2_zzzz_fn(s, a, gen_bsl2n);
 609}
 610
 611static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 612{
 613    tcg_gen_and_i64(n, n, k);
 614    tcg_gen_andc_i64(m, m, k);
 615    tcg_gen_nor_i64(d, n, m);
 616}
 617
 618static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 619                          TCGv_vec m, TCGv_vec k)
 620{
 621    tcg_gen_bitsel_vec(vece, d, k, n, m);
 622    tcg_gen_not_vec(vece, d, d);
 623}
 624
 625static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 626                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
 627{
 628    static const GVecGen4 op = {
 629        .fni8 = gen_nbsl_i64,
 630        .fniv = gen_nbsl_vec,
 631        .fno = gen_helper_sve2_nbsl,
 632        .vece = MO_64,
 633        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 634    };
 635    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
 636}
 637
 638static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
 639{
 640    return do_sve2_zzzz_fn(s, a, gen_nbsl);
 641}
 642
 643/*
 644 *** SVE Integer Arithmetic - Unpredicated Group
 645 */
 646
 647static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
 648{
 649    return do_zzz_fn(s, a, tcg_gen_gvec_add);
 650}
 651
 652static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
 653{
 654    return do_zzz_fn(s, a, tcg_gen_gvec_sub);
 655}
 656
 657static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 658{
 659    return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
 660}
 661
 662static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 663{
 664    return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
 665}
 666
 667static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 668{
 669    return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
 670}
 671
 672static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 673{
 674    return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
 675}
 676
 677/*
 678 *** SVE Integer Arithmetic - Binary Predicated Group
 679 */
 680
 681static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 682{
 683    if (fn == NULL) {
 684        return false;
 685    }
 686    if (sve_access_check(s)) {
 687        gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
 688    }
 689    return true;
 690}
 691
 692/* Select active elememnts from Zn and inactive elements from Zm,
 693 * storing the result in Zd.
 694 */
 695static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 696{
 697    static gen_helper_gvec_4 * const fns[4] = {
 698        gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 699        gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 700    };
 701    gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
 702}
 703
 704#define DO_ZPZZ(NAME, name) \
 705static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a)         \
 706{                                                                         \
 707    static gen_helper_gvec_4 * const fns[4] = {                           \
 708        gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 709        gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 710    };                                                                    \
 711    return do_zpzz_ool(s, a, fns[a->esz]);                                \
 712}
 713
 714DO_ZPZZ(AND, and)
 715DO_ZPZZ(EOR, eor)
 716DO_ZPZZ(ORR, orr)
 717DO_ZPZZ(BIC, bic)
 718
 719DO_ZPZZ(ADD, add)
 720DO_ZPZZ(SUB, sub)
 721
 722DO_ZPZZ(SMAX, smax)
 723DO_ZPZZ(UMAX, umax)
 724DO_ZPZZ(SMIN, smin)
 725DO_ZPZZ(UMIN, umin)
 726DO_ZPZZ(SABD, sabd)
 727DO_ZPZZ(UABD, uabd)
 728
 729DO_ZPZZ(MUL, mul)
 730DO_ZPZZ(SMULH, smulh)
 731DO_ZPZZ(UMULH, umulh)
 732
 733DO_ZPZZ(ASR, asr)
 734DO_ZPZZ(LSR, lsr)
 735DO_ZPZZ(LSL, lsl)
 736
 737static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 738{
 739    static gen_helper_gvec_4 * const fns[4] = {
 740        NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 741    };
 742    return do_zpzz_ool(s, a, fns[a->esz]);
 743}
 744
 745static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 746{
 747    static gen_helper_gvec_4 * const fns[4] = {
 748        NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 749    };
 750    return do_zpzz_ool(s, a, fns[a->esz]);
 751}
 752
 753static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
 754{
 755    if (sve_access_check(s)) {
 756        do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 757    }
 758    return true;
 759}
 760
 761#undef DO_ZPZZ
 762
 763/*
 764 *** SVE Integer Arithmetic - Unary Predicated Group
 765 */
 766
 767static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 768{
 769    if (fn == NULL) {
 770        return false;
 771    }
 772    if (sve_access_check(s)) {
 773        gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
 774    }
 775    return true;
 776}
 777
 778#define DO_ZPZ(NAME, name) \
 779static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)           \
 780{                                                                   \
 781    static gen_helper_gvec_3 * const fns[4] = {                     \
 782        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 783        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 784    };                                                              \
 785    return do_zpz_ool(s, a, fns[a->esz]);                           \
 786}
 787
 788DO_ZPZ(CLS, cls)
 789DO_ZPZ(CLZ, clz)
 790DO_ZPZ(CNT_zpz, cnt_zpz)
 791DO_ZPZ(CNOT, cnot)
 792DO_ZPZ(NOT_zpz, not_zpz)
 793DO_ZPZ(ABS, abs)
 794DO_ZPZ(NEG, neg)
 795
 796static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
 797{
 798    static gen_helper_gvec_3 * const fns[4] = {
 799        NULL,
 800        gen_helper_sve_fabs_h,
 801        gen_helper_sve_fabs_s,
 802        gen_helper_sve_fabs_d
 803    };
 804    return do_zpz_ool(s, a, fns[a->esz]);
 805}
 806
 807static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
 808{
 809    static gen_helper_gvec_3 * const fns[4] = {
 810        NULL,
 811        gen_helper_sve_fneg_h,
 812        gen_helper_sve_fneg_s,
 813        gen_helper_sve_fneg_d
 814    };
 815    return do_zpz_ool(s, a, fns[a->esz]);
 816}
 817
 818static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
 819{
 820    static gen_helper_gvec_3 * const fns[4] = {
 821        NULL,
 822        gen_helper_sve_sxtb_h,
 823        gen_helper_sve_sxtb_s,
 824        gen_helper_sve_sxtb_d
 825    };
 826    return do_zpz_ool(s, a, fns[a->esz]);
 827}
 828
 829static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
 830{
 831    static gen_helper_gvec_3 * const fns[4] = {
 832        NULL,
 833        gen_helper_sve_uxtb_h,
 834        gen_helper_sve_uxtb_s,
 835        gen_helper_sve_uxtb_d
 836    };
 837    return do_zpz_ool(s, a, fns[a->esz]);
 838}
 839
 840static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
 841{
 842    static gen_helper_gvec_3 * const fns[4] = {
 843        NULL, NULL,
 844        gen_helper_sve_sxth_s,
 845        gen_helper_sve_sxth_d
 846    };
 847    return do_zpz_ool(s, a, fns[a->esz]);
 848}
 849
 850static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
 851{
 852    static gen_helper_gvec_3 * const fns[4] = {
 853        NULL, NULL,
 854        gen_helper_sve_uxth_s,
 855        gen_helper_sve_uxth_d
 856    };
 857    return do_zpz_ool(s, a, fns[a->esz]);
 858}
 859
 860static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
 861{
 862    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 863}
 864
 865static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
 866{
 867    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 868}
 869
 870#undef DO_ZPZ
 871
 872/*
 873 *** SVE Integer Reduction Group
 874 */
 875
 876typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 877static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 878                       gen_helper_gvec_reduc *fn)
 879{
 880    unsigned vsz = vec_full_reg_size(s);
 881    TCGv_ptr t_zn, t_pg;
 882    TCGv_i32 desc;
 883    TCGv_i64 temp;
 884
 885    if (fn == NULL) {
 886        return false;
 887    }
 888    if (!sve_access_check(s)) {
 889        return true;
 890    }
 891
 892    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 893    temp = tcg_temp_new_i64();
 894    t_zn = tcg_temp_new_ptr();
 895    t_pg = tcg_temp_new_ptr();
 896
 897    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 898    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 899    fn(temp, t_zn, t_pg, desc);
 900    tcg_temp_free_ptr(t_zn);
 901    tcg_temp_free_ptr(t_pg);
 902    tcg_temp_free_i32(desc);
 903
 904    write_fp_dreg(s, a->rd, temp);
 905    tcg_temp_free_i64(temp);
 906    return true;
 907}
 908
 909#define DO_VPZ(NAME, name) \
 910static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
 911{                                                                        \
 912    static gen_helper_gvec_reduc * const fns[4] = {                      \
 913        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 914        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 915    };                                                                   \
 916    return do_vpz_ool(s, a, fns[a->esz]);                                \
 917}
 918
 919DO_VPZ(ORV, orv)
 920DO_VPZ(ANDV, andv)
 921DO_VPZ(EORV, eorv)
 922
 923DO_VPZ(UADDV, uaddv)
 924DO_VPZ(SMAXV, smaxv)
 925DO_VPZ(UMAXV, umaxv)
 926DO_VPZ(SMINV, sminv)
 927DO_VPZ(UMINV, uminv)
 928
 929static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
 930{
 931    static gen_helper_gvec_reduc * const fns[4] = {
 932        gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 933        gen_helper_sve_saddv_s, NULL
 934    };
 935    return do_vpz_ool(s, a, fns[a->esz]);
 936}
 937
 938#undef DO_VPZ
 939
 940/*
 941 *** SVE Shift by Immediate - Predicated Group
 942 */
 943
 944/*
 945 * Copy Zn into Zd, storing zeros into inactive elements.
 946 * If invert, store zeros into the active elements.
 947 */
 948static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
 949                        int esz, bool invert)
 950{
 951    static gen_helper_gvec_3 * const fns[4] = {
 952        gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 953        gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 954    };
 955
 956    if (sve_access_check(s)) {
 957        gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
 958    }
 959    return true;
 960}
 961
 962static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 963                        gen_helper_gvec_3 *fn)
 964{
 965    if (sve_access_check(s)) {
 966        gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
 967    }
 968    return true;
 969}
 970
 971static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
 972{
 973    static gen_helper_gvec_3 * const fns[4] = {
 974        gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 975        gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 976    };
 977    if (a->esz < 0) {
 978        /* Invalid tsz encoding -- see tszimm_esz. */
 979        return false;
 980    }
 981    /* Shift by element size is architecturally valid.  For
 982       arithmetic right-shift, it's the same as by one less. */
 983    a->imm = MIN(a->imm, (8 << a->esz) - 1);
 984    return do_zpzi_ool(s, a, fns[a->esz]);
 985}
 986
 987static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
 988{
 989    static gen_helper_gvec_3 * const fns[4] = {
 990        gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 991        gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 992    };
 993    if (a->esz < 0) {
 994        return false;
 995    }
 996    /* Shift by element size is architecturally valid.
 997       For logical shifts, it is a zeroing operation.  */
 998    if (a->imm >= (8 << a->esz)) {
 999        return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1000    } else {
1001        return do_zpzi_ool(s, a, fns[a->esz]);
1002    }
1003}
1004
1005static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
1006{
1007    static gen_helper_gvec_3 * const fns[4] = {
1008        gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1009        gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1010    };
1011    if (a->esz < 0) {
1012        return false;
1013    }
1014    /* Shift by element size is architecturally valid.
1015       For logical shifts, it is a zeroing operation.  */
1016    if (a->imm >= (8 << a->esz)) {
1017        return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1018    } else {
1019        return do_zpzi_ool(s, a, fns[a->esz]);
1020    }
1021}
1022
1023static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
1024{
1025    static gen_helper_gvec_3 * const fns[4] = {
1026        gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1027        gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1028    };
1029    if (a->esz < 0) {
1030        return false;
1031    }
1032    /* Shift by element size is architecturally valid.  For arithmetic
1033       right shift for division, it is a zeroing operation.  */
1034    if (a->imm >= (8 << a->esz)) {
1035        return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1036    } else {
1037        return do_zpzi_ool(s, a, fns[a->esz]);
1038    }
1039}
1040
1041static bool trans_SQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1042{
1043    static gen_helper_gvec_3 * const fns[4] = {
1044        gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1045        gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1046    };
1047    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1048        return false;
1049    }
1050    return do_zpzi_ool(s, a, fns[a->esz]);
1051}
1052
1053static bool trans_UQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1054{
1055    static gen_helper_gvec_3 * const fns[4] = {
1056        gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1057        gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1058    };
1059    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1060        return false;
1061    }
1062    return do_zpzi_ool(s, a, fns[a->esz]);
1063}
1064
1065static bool trans_SRSHR(DisasContext *s, arg_rpri_esz *a)
1066{
1067    static gen_helper_gvec_3 * const fns[4] = {
1068        gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1069        gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1070    };
1071    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1072        return false;
1073    }
1074    return do_zpzi_ool(s, a, fns[a->esz]);
1075}
1076
1077static bool trans_URSHR(DisasContext *s, arg_rpri_esz *a)
1078{
1079    static gen_helper_gvec_3 * const fns[4] = {
1080        gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1081        gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1082    };
1083    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1084        return false;
1085    }
1086    return do_zpzi_ool(s, a, fns[a->esz]);
1087}
1088
1089static bool trans_SQSHLU(DisasContext *s, arg_rpri_esz *a)
1090{
1091    static gen_helper_gvec_3 * const fns[4] = {
1092        gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1093        gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1094    };
1095    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1096        return false;
1097    }
1098    return do_zpzi_ool(s, a, fns[a->esz]);
1099}
1100
1101/*
1102 *** SVE Bitwise Shift - Predicated Group
1103 */
1104
1105#define DO_ZPZW(NAME, name) \
1106static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a)         \
1107{                                                                         \
1108    static gen_helper_gvec_4 * const fns[3] = {                           \
1109        gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
1110        gen_helper_sve_##name##_zpzw_s,                                   \
1111    };                                                                    \
1112    if (a->esz < 0 || a->esz >= 3) {                                      \
1113        return false;                                                     \
1114    }                                                                     \
1115    return do_zpzz_ool(s, a, fns[a->esz]);                                \
1116}
1117
1118DO_ZPZW(ASR, asr)
1119DO_ZPZW(LSR, lsr)
1120DO_ZPZW(LSL, lsl)
1121
1122#undef DO_ZPZW
1123
1124/*
1125 *** SVE Bitwise Shift - Unpredicated Group
1126 */
1127
1128static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1129                         void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1130                                         int64_t, uint32_t, uint32_t))
1131{
1132    if (a->esz < 0) {
1133        /* Invalid tsz encoding -- see tszimm_esz. */
1134        return false;
1135    }
1136    if (sve_access_check(s)) {
1137        unsigned vsz = vec_full_reg_size(s);
1138        /* Shift by element size is architecturally valid.  For
1139           arithmetic right-shift, it's the same as by one less.
1140           Otherwise it is a zeroing operation.  */
1141        if (a->imm >= 8 << a->esz) {
1142            if (asr) {
1143                a->imm = (8 << a->esz) - 1;
1144            } else {
1145                do_dupi_z(s, a->rd, 0);
1146                return true;
1147            }
1148        }
1149        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1150                vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1151    }
1152    return true;
1153}
1154
1155static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
1156{
1157    return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
1158}
1159
1160static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
1161{
1162    return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
1163}
1164
1165static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
1166{
1167    return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
1168}
1169
1170static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
1171{
1172    if (fn == NULL) {
1173        return false;
1174    }
1175    if (sve_access_check(s)) {
1176        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
1177    }
1178    return true;
1179}
1180
1181#define DO_ZZW(NAME, name) \
1182static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a)           \
1183{                                                                         \
1184    static gen_helper_gvec_3 * const fns[4] = {                           \
1185        gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
1186        gen_helper_sve_##name##_zzw_s, NULL                               \
1187    };                                                                    \
1188    return do_zzw_ool(s, a, fns[a->esz]);                                 \
1189}
1190
1191DO_ZZW(ASR, asr)
1192DO_ZZW(LSR, lsr)
1193DO_ZZW(LSL, lsl)
1194
1195#undef DO_ZZW
1196
1197/*
1198 *** SVE Integer Multiply-Add Group
1199 */
1200
1201static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1202                         gen_helper_gvec_5 *fn)
1203{
1204    if (sve_access_check(s)) {
1205        unsigned vsz = vec_full_reg_size(s);
1206        tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1207                           vec_full_reg_offset(s, a->ra),
1208                           vec_full_reg_offset(s, a->rn),
1209                           vec_full_reg_offset(s, a->rm),
1210                           pred_full_reg_offset(s, a->pg),
1211                           vsz, vsz, 0, fn);
1212    }
1213    return true;
1214}
1215
1216#define DO_ZPZZZ(NAME, name) \
1217static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
1218{                                                                    \
1219    static gen_helper_gvec_5 * const fns[4] = {                      \
1220        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
1221        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
1222    };                                                               \
1223    return do_zpzzz_ool(s, a, fns[a->esz]);                          \
1224}
1225
1226DO_ZPZZZ(MLA, mla)
1227DO_ZPZZZ(MLS, mls)
1228
1229#undef DO_ZPZZZ
1230
1231/*
1232 *** SVE Index Generation Group
1233 */
1234
1235static void do_index(DisasContext *s, int esz, int rd,
1236                     TCGv_i64 start, TCGv_i64 incr)
1237{
1238    unsigned vsz = vec_full_reg_size(s);
1239    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1240    TCGv_ptr t_zd = tcg_temp_new_ptr();
1241
1242    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1243    if (esz == 3) {
1244        gen_helper_sve_index_d(t_zd, start, incr, desc);
1245    } else {
1246        typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1247        static index_fn * const fns[3] = {
1248            gen_helper_sve_index_b,
1249            gen_helper_sve_index_h,
1250            gen_helper_sve_index_s,
1251        };
1252        TCGv_i32 s32 = tcg_temp_new_i32();
1253        TCGv_i32 i32 = tcg_temp_new_i32();
1254
1255        tcg_gen_extrl_i64_i32(s32, start);
1256        tcg_gen_extrl_i64_i32(i32, incr);
1257        fns[esz](t_zd, s32, i32, desc);
1258
1259        tcg_temp_free_i32(s32);
1260        tcg_temp_free_i32(i32);
1261    }
1262    tcg_temp_free_ptr(t_zd);
1263    tcg_temp_free_i32(desc);
1264}
1265
1266static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
1267{
1268    if (sve_access_check(s)) {
1269        TCGv_i64 start = tcg_const_i64(a->imm1);
1270        TCGv_i64 incr = tcg_const_i64(a->imm2);
1271        do_index(s, a->esz, a->rd, start, incr);
1272        tcg_temp_free_i64(start);
1273        tcg_temp_free_i64(incr);
1274    }
1275    return true;
1276}
1277
1278static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
1279{
1280    if (sve_access_check(s)) {
1281        TCGv_i64 start = tcg_const_i64(a->imm);
1282        TCGv_i64 incr = cpu_reg(s, a->rm);
1283        do_index(s, a->esz, a->rd, start, incr);
1284        tcg_temp_free_i64(start);
1285    }
1286    return true;
1287}
1288
1289static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
1290{
1291    if (sve_access_check(s)) {
1292        TCGv_i64 start = cpu_reg(s, a->rn);
1293        TCGv_i64 incr = tcg_const_i64(a->imm);
1294        do_index(s, a->esz, a->rd, start, incr);
1295        tcg_temp_free_i64(incr);
1296    }
1297    return true;
1298}
1299
1300static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
1301{
1302    if (sve_access_check(s)) {
1303        TCGv_i64 start = cpu_reg(s, a->rn);
1304        TCGv_i64 incr = cpu_reg(s, a->rm);
1305        do_index(s, a->esz, a->rd, start, incr);
1306    }
1307    return true;
1308}
1309
1310/*
1311 *** SVE Stack Allocation Group
1312 */
1313
1314static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
1315{
1316    if (sve_access_check(s)) {
1317        TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1318        TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1319        tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1320    }
1321    return true;
1322}
1323
1324static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
1325{
1326    if (sve_access_check(s)) {
1327        TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1328        TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1329        tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1330    }
1331    return true;
1332}
1333
1334static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
1335{
1336    if (sve_access_check(s)) {
1337        TCGv_i64 reg = cpu_reg(s, a->rd);
1338        tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1339    }
1340    return true;
1341}
1342
1343/*
1344 *** SVE Compute Vector Address Group
1345 */
1346
1347static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1348{
1349    if (sve_access_check(s)) {
1350        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
1351    }
1352    return true;
1353}
1354
1355static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
1356{
1357    return do_adr(s, a, gen_helper_sve_adr_p32);
1358}
1359
1360static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
1361{
1362    return do_adr(s, a, gen_helper_sve_adr_p64);
1363}
1364
1365static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
1366{
1367    return do_adr(s, a, gen_helper_sve_adr_s32);
1368}
1369
1370static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
1371{
1372    return do_adr(s, a, gen_helper_sve_adr_u32);
1373}
1374
1375/*
1376 *** SVE Integer Misc - Unpredicated Group
1377 */
1378
1379static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1380{
1381    static gen_helper_gvec_2 * const fns[4] = {
1382        NULL,
1383        gen_helper_sve_fexpa_h,
1384        gen_helper_sve_fexpa_s,
1385        gen_helper_sve_fexpa_d,
1386    };
1387    if (a->esz == 0) {
1388        return false;
1389    }
1390    if (sve_access_check(s)) {
1391        gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
1392    }
1393    return true;
1394}
1395
1396static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1397{
1398    static gen_helper_gvec_3 * const fns[4] = {
1399        NULL,
1400        gen_helper_sve_ftssel_h,
1401        gen_helper_sve_ftssel_s,
1402        gen_helper_sve_ftssel_d,
1403    };
1404    if (a->esz == 0) {
1405        return false;
1406    }
1407    if (sve_access_check(s)) {
1408        gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
1409    }
1410    return true;
1411}
1412
1413/*
1414 *** SVE Predicate Logical Operations Group
1415 */
1416
1417static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1418                          const GVecGen4 *gvec_op)
1419{
1420    if (!sve_access_check(s)) {
1421        return true;
1422    }
1423
1424    unsigned psz = pred_gvec_reg_size(s);
1425    int dofs = pred_full_reg_offset(s, a->rd);
1426    int nofs = pred_full_reg_offset(s, a->rn);
1427    int mofs = pred_full_reg_offset(s, a->rm);
1428    int gofs = pred_full_reg_offset(s, a->pg);
1429
1430    if (!a->s) {
1431        tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1432        return true;
1433    }
1434
1435    if (psz == 8) {
1436        /* Do the operation and the flags generation in temps.  */
1437        TCGv_i64 pd = tcg_temp_new_i64();
1438        TCGv_i64 pn = tcg_temp_new_i64();
1439        TCGv_i64 pm = tcg_temp_new_i64();
1440        TCGv_i64 pg = tcg_temp_new_i64();
1441
1442        tcg_gen_ld_i64(pn, cpu_env, nofs);
1443        tcg_gen_ld_i64(pm, cpu_env, mofs);
1444        tcg_gen_ld_i64(pg, cpu_env, gofs);
1445
1446        gvec_op->fni8(pd, pn, pm, pg);
1447        tcg_gen_st_i64(pd, cpu_env, dofs);
1448
1449        do_predtest1(pd, pg);
1450
1451        tcg_temp_free_i64(pd);
1452        tcg_temp_free_i64(pn);
1453        tcg_temp_free_i64(pm);
1454        tcg_temp_free_i64(pg);
1455    } else {
1456        /* The operation and flags generation is large.  The computation
1457         * of the flags depends on the original contents of the guarding
1458         * predicate.  If the destination overwrites the guarding predicate,
1459         * then the easiest way to get this right is to save a copy.
1460          */
1461        int tofs = gofs;
1462        if (a->rd == a->pg) {
1463            tofs = offsetof(CPUARMState, vfp.preg_tmp);
1464            tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1465        }
1466
1467        tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1468        do_predtest(s, dofs, tofs, psz / 8);
1469    }
1470    return true;
1471}
1472
1473static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1474{
1475    tcg_gen_and_i64(pd, pn, pm);
1476    tcg_gen_and_i64(pd, pd, pg);
1477}
1478
1479static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1480                           TCGv_vec pm, TCGv_vec pg)
1481{
1482    tcg_gen_and_vec(vece, pd, pn, pm);
1483    tcg_gen_and_vec(vece, pd, pd, pg);
1484}
1485
1486static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1487{
1488    static const GVecGen4 op = {
1489        .fni8 = gen_and_pg_i64,
1490        .fniv = gen_and_pg_vec,
1491        .fno = gen_helper_sve_and_pppp,
1492        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1493    };
1494
1495    if (!a->s) {
1496        if (!sve_access_check(s)) {
1497            return true;
1498        }
1499        if (a->rn == a->rm) {
1500            if (a->pg == a->rn) {
1501                do_mov_p(s, a->rd, a->rn);
1502            } else {
1503                gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1504            }
1505            return true;
1506        } else if (a->pg == a->rn || a->pg == a->rm) {
1507            gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1508            return true;
1509        }
1510    }
1511    return do_pppp_flags(s, a, &op);
1512}
1513
1514static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1515{
1516    tcg_gen_andc_i64(pd, pn, pm);
1517    tcg_gen_and_i64(pd, pd, pg);
1518}
1519
1520static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1521                           TCGv_vec pm, TCGv_vec pg)
1522{
1523    tcg_gen_andc_vec(vece, pd, pn, pm);
1524    tcg_gen_and_vec(vece, pd, pd, pg);
1525}
1526
1527static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1528{
1529    static const GVecGen4 op = {
1530        .fni8 = gen_bic_pg_i64,
1531        .fniv = gen_bic_pg_vec,
1532        .fno = gen_helper_sve_bic_pppp,
1533        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1534    };
1535
1536    if (!a->s && a->pg == a->rn) {
1537        if (sve_access_check(s)) {
1538            gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1539        }
1540        return true;
1541    }
1542    return do_pppp_flags(s, a, &op);
1543}
1544
1545static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1546{
1547    tcg_gen_xor_i64(pd, pn, pm);
1548    tcg_gen_and_i64(pd, pd, pg);
1549}
1550
1551static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1552                           TCGv_vec pm, TCGv_vec pg)
1553{
1554    tcg_gen_xor_vec(vece, pd, pn, pm);
1555    tcg_gen_and_vec(vece, pd, pd, pg);
1556}
1557
1558static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1559{
1560    static const GVecGen4 op = {
1561        .fni8 = gen_eor_pg_i64,
1562        .fniv = gen_eor_pg_vec,
1563        .fno = gen_helper_sve_eor_pppp,
1564        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1565    };
1566    return do_pppp_flags(s, a, &op);
1567}
1568
1569static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1570{
1571    if (a->s) {
1572        return false;
1573    }
1574    if (sve_access_check(s)) {
1575        unsigned psz = pred_gvec_reg_size(s);
1576        tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1577                            pred_full_reg_offset(s, a->pg),
1578                            pred_full_reg_offset(s, a->rn),
1579                            pred_full_reg_offset(s, a->rm), psz, psz);
1580    }
1581    return true;
1582}
1583
1584static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1585{
1586    tcg_gen_or_i64(pd, pn, pm);
1587    tcg_gen_and_i64(pd, pd, pg);
1588}
1589
1590static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1591                           TCGv_vec pm, TCGv_vec pg)
1592{
1593    tcg_gen_or_vec(vece, pd, pn, pm);
1594    tcg_gen_and_vec(vece, pd, pd, pg);
1595}
1596
1597static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1598{
1599    static const GVecGen4 op = {
1600        .fni8 = gen_orr_pg_i64,
1601        .fniv = gen_orr_pg_vec,
1602        .fno = gen_helper_sve_orr_pppp,
1603        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1604    };
1605
1606    if (!a->s && a->pg == a->rn && a->rn == a->rm) {
1607        return do_mov_p(s, a->rd, a->rn);
1608    }
1609    return do_pppp_flags(s, a, &op);
1610}
1611
1612static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1613{
1614    tcg_gen_orc_i64(pd, pn, pm);
1615    tcg_gen_and_i64(pd, pd, pg);
1616}
1617
1618static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1619                           TCGv_vec pm, TCGv_vec pg)
1620{
1621    tcg_gen_orc_vec(vece, pd, pn, pm);
1622    tcg_gen_and_vec(vece, pd, pd, pg);
1623}
1624
1625static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1626{
1627    static const GVecGen4 op = {
1628        .fni8 = gen_orn_pg_i64,
1629        .fniv = gen_orn_pg_vec,
1630        .fno = gen_helper_sve_orn_pppp,
1631        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1632    };
1633    return do_pppp_flags(s, a, &op);
1634}
1635
1636static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1637{
1638    tcg_gen_or_i64(pd, pn, pm);
1639    tcg_gen_andc_i64(pd, pg, pd);
1640}
1641
1642static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1643                           TCGv_vec pm, TCGv_vec pg)
1644{
1645    tcg_gen_or_vec(vece, pd, pn, pm);
1646    tcg_gen_andc_vec(vece, pd, pg, pd);
1647}
1648
1649static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1650{
1651    static const GVecGen4 op = {
1652        .fni8 = gen_nor_pg_i64,
1653        .fniv = gen_nor_pg_vec,
1654        .fno = gen_helper_sve_nor_pppp,
1655        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1656    };
1657    return do_pppp_flags(s, a, &op);
1658}
1659
1660static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1661{
1662    tcg_gen_and_i64(pd, pn, pm);
1663    tcg_gen_andc_i64(pd, pg, pd);
1664}
1665
1666static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1667                           TCGv_vec pm, TCGv_vec pg)
1668{
1669    tcg_gen_and_vec(vece, pd, pn, pm);
1670    tcg_gen_andc_vec(vece, pd, pg, pd);
1671}
1672
1673static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1674{
1675    static const GVecGen4 op = {
1676        .fni8 = gen_nand_pg_i64,
1677        .fniv = gen_nand_pg_vec,
1678        .fno = gen_helper_sve_nand_pppp,
1679        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1680    };
1681    return do_pppp_flags(s, a, &op);
1682}
1683
1684/*
1685 *** SVE Predicate Misc Group
1686 */
1687
1688static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1689{
1690    if (sve_access_check(s)) {
1691        int nofs = pred_full_reg_offset(s, a->rn);
1692        int gofs = pred_full_reg_offset(s, a->pg);
1693        int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1694
1695        if (words == 1) {
1696            TCGv_i64 pn = tcg_temp_new_i64();
1697            TCGv_i64 pg = tcg_temp_new_i64();
1698
1699            tcg_gen_ld_i64(pn, cpu_env, nofs);
1700            tcg_gen_ld_i64(pg, cpu_env, gofs);
1701            do_predtest1(pn, pg);
1702
1703            tcg_temp_free_i64(pn);
1704            tcg_temp_free_i64(pg);
1705        } else {
1706            do_predtest(s, nofs, gofs, words);
1707        }
1708    }
1709    return true;
1710}
1711
1712/* See the ARM pseudocode DecodePredCount.  */
1713static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1714{
1715    unsigned elements = fullsz >> esz;
1716    unsigned bound;
1717
1718    switch (pattern) {
1719    case 0x0: /* POW2 */
1720        return pow2floor(elements);
1721    case 0x1: /* VL1 */
1722    case 0x2: /* VL2 */
1723    case 0x3: /* VL3 */
1724    case 0x4: /* VL4 */
1725    case 0x5: /* VL5 */
1726    case 0x6: /* VL6 */
1727    case 0x7: /* VL7 */
1728    case 0x8: /* VL8 */
1729        bound = pattern;
1730        break;
1731    case 0x9: /* VL16 */
1732    case 0xa: /* VL32 */
1733    case 0xb: /* VL64 */
1734    case 0xc: /* VL128 */
1735    case 0xd: /* VL256 */
1736        bound = 16 << (pattern - 9);
1737        break;
1738    case 0x1d: /* MUL4 */
1739        return elements - elements % 4;
1740    case 0x1e: /* MUL3 */
1741        return elements - elements % 3;
1742    case 0x1f: /* ALL */
1743        return elements;
1744    default:   /* #uimm5 */
1745        return 0;
1746    }
1747    return elements >= bound ? bound : 0;
1748}
1749
1750/* This handles all of the predicate initialization instructions,
1751 * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1752 * so that decode_pred_count returns 0.  For SETFFR, we will have
1753 * set RD == 16 == FFR.
1754 */
1755static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1756{
1757    if (!sve_access_check(s)) {
1758        return true;
1759    }
1760
1761    unsigned fullsz = vec_full_reg_size(s);
1762    unsigned ofs = pred_full_reg_offset(s, rd);
1763    unsigned numelem, setsz, i;
1764    uint64_t word, lastword;
1765    TCGv_i64 t;
1766
1767    numelem = decode_pred_count(fullsz, pat, esz);
1768
1769    /* Determine what we must store into each bit, and how many.  */
1770    if (numelem == 0) {
1771        lastword = word = 0;
1772        setsz = fullsz;
1773    } else {
1774        setsz = numelem << esz;
1775        lastword = word = pred_esz_masks[esz];
1776        if (setsz % 64) {
1777            lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1778        }
1779    }
1780
1781    t = tcg_temp_new_i64();
1782    if (fullsz <= 64) {
1783        tcg_gen_movi_i64(t, lastword);
1784        tcg_gen_st_i64(t, cpu_env, ofs);
1785        goto done;
1786    }
1787
1788    if (word == lastword) {
1789        unsigned maxsz = size_for_gvec(fullsz / 8);
1790        unsigned oprsz = size_for_gvec(setsz / 8);
1791
1792        if (oprsz * 8 == setsz) {
1793            tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1794            goto done;
1795        }
1796    }
1797
1798    setsz /= 8;
1799    fullsz /= 8;
1800
1801    tcg_gen_movi_i64(t, word);
1802    for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1803        tcg_gen_st_i64(t, cpu_env, ofs + i);
1804    }
1805    if (lastword != word) {
1806        tcg_gen_movi_i64(t, lastword);
1807        tcg_gen_st_i64(t, cpu_env, ofs + i);
1808        i += 8;
1809    }
1810    if (i < fullsz) {
1811        tcg_gen_movi_i64(t, 0);
1812        for (; i < fullsz; i += 8) {
1813            tcg_gen_st_i64(t, cpu_env, ofs + i);
1814        }
1815    }
1816
1817 done:
1818    tcg_temp_free_i64(t);
1819
1820    /* PTRUES */
1821    if (setflag) {
1822        tcg_gen_movi_i32(cpu_NF, -(word != 0));
1823        tcg_gen_movi_i32(cpu_CF, word == 0);
1824        tcg_gen_movi_i32(cpu_VF, 0);
1825        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1826    }
1827    return true;
1828}
1829
1830static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1831{
1832    return do_predset(s, a->esz, a->rd, a->pat, a->s);
1833}
1834
1835static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1836{
1837    /* Note pat == 31 is #all, to set all elements.  */
1838    return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1839}
1840
1841static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1842{
1843    /* Note pat == 32 is #unimp, to set no elements.  */
1844    return do_predset(s, 0, a->rd, 32, false);
1845}
1846
1847static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1848{
1849    /* The path through do_pppp_flags is complicated enough to want to avoid
1850     * duplication.  Frob the arguments into the form of a predicated AND.
1851     */
1852    arg_rprr_s alt_a = {
1853        .rd = a->rd, .pg = a->pg, .s = a->s,
1854        .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1855    };
1856    return trans_AND_pppp(s, &alt_a);
1857}
1858
1859static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1860{
1861    return do_mov_p(s, a->rd, FFR_PRED_NUM);
1862}
1863
1864static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1865{
1866    return do_mov_p(s, FFR_PRED_NUM, a->rn);
1867}
1868
1869static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1870                            void (*gen_fn)(TCGv_i32, TCGv_ptr,
1871                                           TCGv_ptr, TCGv_i32))
1872{
1873    if (!sve_access_check(s)) {
1874        return true;
1875    }
1876
1877    TCGv_ptr t_pd = tcg_temp_new_ptr();
1878    TCGv_ptr t_pg = tcg_temp_new_ptr();
1879    TCGv_i32 t;
1880    unsigned desc = 0;
1881
1882    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1883    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
1884
1885    tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1886    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1887    t = tcg_const_i32(desc);
1888
1889    gen_fn(t, t_pd, t_pg, t);
1890    tcg_temp_free_ptr(t_pd);
1891    tcg_temp_free_ptr(t_pg);
1892
1893    do_pred_flags(t);
1894    tcg_temp_free_i32(t);
1895    return true;
1896}
1897
1898static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1899{
1900    return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1901}
1902
1903static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1904{
1905    return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1906}
1907
1908/*
1909 *** SVE Element Count Group
1910 */
1911
1912/* Perform an inline saturating addition of a 32-bit value within
1913 * a 64-bit register.  The second operand is known to be positive,
1914 * which halves the comparisions we must perform to bound the result.
1915 */
1916static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1917{
1918    int64_t ibound;
1919    TCGv_i64 bound;
1920    TCGCond cond;
1921
1922    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1923    if (u) {
1924        tcg_gen_ext32u_i64(reg, reg);
1925    } else {
1926        tcg_gen_ext32s_i64(reg, reg);
1927    }
1928    if (d) {
1929        tcg_gen_sub_i64(reg, reg, val);
1930        ibound = (u ? 0 : INT32_MIN);
1931        cond = TCG_COND_LT;
1932    } else {
1933        tcg_gen_add_i64(reg, reg, val);
1934        ibound = (u ? UINT32_MAX : INT32_MAX);
1935        cond = TCG_COND_GT;
1936    }
1937    bound = tcg_const_i64(ibound);
1938    tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1939    tcg_temp_free_i64(bound);
1940}
1941
1942/* Similarly with 64-bit values.  */
1943static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1944{
1945    TCGv_i64 t0 = tcg_temp_new_i64();
1946    TCGv_i64 t2;
1947
1948    if (u) {
1949        if (d) {
1950            tcg_gen_sub_i64(t0, reg, val);
1951            t2 = tcg_constant_i64(0);
1952            tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t2, t0);
1953        } else {
1954            tcg_gen_add_i64(t0, reg, val);
1955            t2 = tcg_constant_i64(-1);
1956            tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t2, t0);
1957        }
1958    } else {
1959        TCGv_i64 t1 = tcg_temp_new_i64();
1960        if (d) {
1961            /* Detect signed overflow for subtraction.  */
1962            tcg_gen_xor_i64(t0, reg, val);
1963            tcg_gen_sub_i64(t1, reg, val);
1964            tcg_gen_xor_i64(reg, reg, t1);
1965            tcg_gen_and_i64(t0, t0, reg);
1966
1967            /* Bound the result.  */
1968            tcg_gen_movi_i64(reg, INT64_MIN);
1969            t2 = tcg_constant_i64(0);
1970            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1971        } else {
1972            /* Detect signed overflow for addition.  */
1973            tcg_gen_xor_i64(t0, reg, val);
1974            tcg_gen_add_i64(reg, reg, val);
1975            tcg_gen_xor_i64(t1, reg, val);
1976            tcg_gen_andc_i64(t0, t1, t0);
1977
1978            /* Bound the result.  */
1979            tcg_gen_movi_i64(t1, INT64_MAX);
1980            t2 = tcg_constant_i64(0);
1981            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1982        }
1983        tcg_temp_free_i64(t1);
1984    }
1985    tcg_temp_free_i64(t0);
1986}
1987
1988/* Similarly with a vector and a scalar operand.  */
1989static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1990                              TCGv_i64 val, bool u, bool d)
1991{
1992    unsigned vsz = vec_full_reg_size(s);
1993    TCGv_ptr dptr, nptr;
1994    TCGv_i32 t32, desc;
1995    TCGv_i64 t64;
1996
1997    dptr = tcg_temp_new_ptr();
1998    nptr = tcg_temp_new_ptr();
1999    tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
2000    tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
2001    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2002
2003    switch (esz) {
2004    case MO_8:
2005        t32 = tcg_temp_new_i32();
2006        tcg_gen_extrl_i64_i32(t32, val);
2007        if (d) {
2008            tcg_gen_neg_i32(t32, t32);
2009        }
2010        if (u) {
2011            gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
2012        } else {
2013            gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
2014        }
2015        tcg_temp_free_i32(t32);
2016        break;
2017
2018    case MO_16:
2019        t32 = tcg_temp_new_i32();
2020        tcg_gen_extrl_i64_i32(t32, val);
2021        if (d) {
2022            tcg_gen_neg_i32(t32, t32);
2023        }
2024        if (u) {
2025            gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
2026        } else {
2027            gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
2028        }
2029        tcg_temp_free_i32(t32);
2030        break;
2031
2032    case MO_32:
2033        t64 = tcg_temp_new_i64();
2034        if (d) {
2035            tcg_gen_neg_i64(t64, val);
2036        } else {
2037            tcg_gen_mov_i64(t64, val);
2038        }
2039        if (u) {
2040            gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
2041        } else {
2042            gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
2043        }
2044        tcg_temp_free_i64(t64);
2045        break;
2046
2047    case MO_64:
2048        if (u) {
2049            if (d) {
2050                gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
2051            } else {
2052                gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
2053            }
2054        } else if (d) {
2055            t64 = tcg_temp_new_i64();
2056            tcg_gen_neg_i64(t64, val);
2057            gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
2058            tcg_temp_free_i64(t64);
2059        } else {
2060            gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
2061        }
2062        break;
2063
2064    default:
2065        g_assert_not_reached();
2066    }
2067
2068    tcg_temp_free_ptr(dptr);
2069    tcg_temp_free_ptr(nptr);
2070    tcg_temp_free_i32(desc);
2071}
2072
2073static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
2074{
2075    if (sve_access_check(s)) {
2076        unsigned fullsz = vec_full_reg_size(s);
2077        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2078        tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2079    }
2080    return true;
2081}
2082
2083static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
2084{
2085    if (sve_access_check(s)) {
2086        unsigned fullsz = vec_full_reg_size(s);
2087        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2088        int inc = numelem * a->imm * (a->d ? -1 : 1);
2089        TCGv_i64 reg = cpu_reg(s, a->rd);
2090
2091        tcg_gen_addi_i64(reg, reg, inc);
2092    }
2093    return true;
2094}
2095
2096static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
2097{
2098    if (!sve_access_check(s)) {
2099        return true;
2100    }
2101
2102    unsigned fullsz = vec_full_reg_size(s);
2103    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2104    int inc = numelem * a->imm;
2105    TCGv_i64 reg = cpu_reg(s, a->rd);
2106
2107    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
2108    if (inc == 0) {
2109        if (a->u) {
2110            tcg_gen_ext32u_i64(reg, reg);
2111        } else {
2112            tcg_gen_ext32s_i64(reg, reg);
2113        }
2114    } else {
2115        TCGv_i64 t = tcg_const_i64(inc);
2116        do_sat_addsub_32(reg, t, a->u, a->d);
2117        tcg_temp_free_i64(t);
2118    }
2119    return true;
2120}
2121
2122static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
2123{
2124    if (!sve_access_check(s)) {
2125        return true;
2126    }
2127
2128    unsigned fullsz = vec_full_reg_size(s);
2129    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2130    int inc = numelem * a->imm;
2131    TCGv_i64 reg = cpu_reg(s, a->rd);
2132
2133    if (inc != 0) {
2134        TCGv_i64 t = tcg_const_i64(inc);
2135        do_sat_addsub_64(reg, t, a->u, a->d);
2136        tcg_temp_free_i64(t);
2137    }
2138    return true;
2139}
2140
2141static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
2142{
2143    if (a->esz == 0) {
2144        return false;
2145    }
2146
2147    unsigned fullsz = vec_full_reg_size(s);
2148    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2149    int inc = numelem * a->imm;
2150
2151    if (inc != 0) {
2152        if (sve_access_check(s)) {
2153            TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
2154            tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2155                              vec_full_reg_offset(s, a->rn),
2156                              t, fullsz, fullsz);
2157            tcg_temp_free_i64(t);
2158        }
2159    } else {
2160        do_mov_z(s, a->rd, a->rn);
2161    }
2162    return true;
2163}
2164
2165static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
2166{
2167    if (a->esz == 0) {
2168        return false;
2169    }
2170
2171    unsigned fullsz = vec_full_reg_size(s);
2172    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2173    int inc = numelem * a->imm;
2174
2175    if (inc != 0) {
2176        if (sve_access_check(s)) {
2177            TCGv_i64 t = tcg_const_i64(inc);
2178            do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
2179            tcg_temp_free_i64(t);
2180        }
2181    } else {
2182        do_mov_z(s, a->rd, a->rn);
2183    }
2184    return true;
2185}
2186
2187/*
2188 *** SVE Bitwise Immediate Group
2189 */
2190
2191static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2192{
2193    uint64_t imm;
2194    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2195                                extract32(a->dbm, 0, 6),
2196                                extract32(a->dbm, 6, 6))) {
2197        return false;
2198    }
2199    if (sve_access_check(s)) {
2200        unsigned vsz = vec_full_reg_size(s);
2201        gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
2202                vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
2203    }
2204    return true;
2205}
2206
2207static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
2208{
2209    return do_zz_dbm(s, a, tcg_gen_gvec_andi);
2210}
2211
2212static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
2213{
2214    return do_zz_dbm(s, a, tcg_gen_gvec_ori);
2215}
2216
2217static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
2218{
2219    return do_zz_dbm(s, a, tcg_gen_gvec_xori);
2220}
2221
2222static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
2223{
2224    uint64_t imm;
2225    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2226                                extract32(a->dbm, 0, 6),
2227                                extract32(a->dbm, 6, 6))) {
2228        return false;
2229    }
2230    if (sve_access_check(s)) {
2231        do_dupi_z(s, a->rd, imm);
2232    }
2233    return true;
2234}
2235
2236/*
2237 *** SVE Integer Wide Immediate - Predicated Group
2238 */
2239
2240/* Implement all merging copies.  This is used for CPY (immediate),
2241 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2242 */
2243static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2244                     TCGv_i64 val)
2245{
2246    typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2247    static gen_cpy * const fns[4] = {
2248        gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2249        gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2250    };
2251    unsigned vsz = vec_full_reg_size(s);
2252    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2253    TCGv_ptr t_zd = tcg_temp_new_ptr();
2254    TCGv_ptr t_zn = tcg_temp_new_ptr();
2255    TCGv_ptr t_pg = tcg_temp_new_ptr();
2256
2257    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2258    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2259    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2260
2261    fns[esz](t_zd, t_zn, t_pg, val, desc);
2262
2263    tcg_temp_free_ptr(t_zd);
2264    tcg_temp_free_ptr(t_zn);
2265    tcg_temp_free_ptr(t_pg);
2266    tcg_temp_free_i32(desc);
2267}
2268
2269static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
2270{
2271    if (a->esz == 0) {
2272        return false;
2273    }
2274    if (sve_access_check(s)) {
2275        /* Decode the VFP immediate.  */
2276        uint64_t imm = vfp_expand_imm(a->esz, a->imm);
2277        TCGv_i64 t_imm = tcg_const_i64(imm);
2278        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
2279        tcg_temp_free_i64(t_imm);
2280    }
2281    return true;
2282}
2283
2284static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
2285{
2286    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
2287        return false;
2288    }
2289    if (sve_access_check(s)) {
2290        TCGv_i64 t_imm = tcg_const_i64(a->imm);
2291        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
2292        tcg_temp_free_i64(t_imm);
2293    }
2294    return true;
2295}
2296
2297static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
2298{
2299    static gen_helper_gvec_2i * const fns[4] = {
2300        gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2301        gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2302    };
2303
2304    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
2305        return false;
2306    }
2307    if (sve_access_check(s)) {
2308        unsigned vsz = vec_full_reg_size(s);
2309        TCGv_i64 t_imm = tcg_const_i64(a->imm);
2310        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2311                            pred_full_reg_offset(s, a->pg),
2312                            t_imm, vsz, vsz, 0, fns[a->esz]);
2313        tcg_temp_free_i64(t_imm);
2314    }
2315    return true;
2316}
2317
2318/*
2319 *** SVE Permute Extract Group
2320 */
2321
2322static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
2323{
2324    if (!sve_access_check(s)) {
2325        return true;
2326    }
2327
2328    unsigned vsz = vec_full_reg_size(s);
2329    unsigned n_ofs = imm >= vsz ? 0 : imm;
2330    unsigned n_siz = vsz - n_ofs;
2331    unsigned d = vec_full_reg_offset(s, rd);
2332    unsigned n = vec_full_reg_offset(s, rn);
2333    unsigned m = vec_full_reg_offset(s, rm);
2334
2335    /* Use host vector move insns if we have appropriate sizes
2336     * and no unfortunate overlap.
2337     */
2338    if (m != d
2339        && n_ofs == size_for_gvec(n_ofs)
2340        && n_siz == size_for_gvec(n_siz)
2341        && (d != n || n_siz <= n_ofs)) {
2342        tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2343        if (n_ofs != 0) {
2344            tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2345        }
2346    } else {
2347        tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2348    }
2349    return true;
2350}
2351
2352static bool trans_EXT(DisasContext *s, arg_EXT *a)
2353{
2354    return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
2355}
2356
2357static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
2358{
2359    if (!dc_isar_feature(aa64_sve2, s)) {
2360        return false;
2361    }
2362    return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
2363}
2364
2365/*
2366 *** SVE Permute - Unpredicated Group
2367 */
2368
2369static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2370{
2371    if (sve_access_check(s)) {
2372        unsigned vsz = vec_full_reg_size(s);
2373        tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2374                             vsz, vsz, cpu_reg_sp(s, a->rn));
2375    }
2376    return true;
2377}
2378
2379static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2380{
2381    if ((a->imm & 0x1f) == 0) {
2382        return false;
2383    }
2384    if (sve_access_check(s)) {
2385        unsigned vsz = vec_full_reg_size(s);
2386        unsigned dofs = vec_full_reg_offset(s, a->rd);
2387        unsigned esz, index;
2388
2389        esz = ctz32(a->imm);
2390        index = a->imm >> (esz + 1);
2391
2392        if ((index << esz) < vsz) {
2393            unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2394            tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2395        } else {
2396            /*
2397             * While dup_mem handles 128-bit elements, dup_imm does not.
2398             * Thankfully element size doesn't matter for splatting zero.
2399             */
2400            tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2401        }
2402    }
2403    return true;
2404}
2405
2406static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2407{
2408    typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2409    static gen_insr * const fns[4] = {
2410        gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2411        gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2412    };
2413    unsigned vsz = vec_full_reg_size(s);
2414    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2415    TCGv_ptr t_zd = tcg_temp_new_ptr();
2416    TCGv_ptr t_zn = tcg_temp_new_ptr();
2417
2418    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2419    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2420
2421    fns[a->esz](t_zd, t_zn, val, desc);
2422
2423    tcg_temp_free_ptr(t_zd);
2424    tcg_temp_free_ptr(t_zn);
2425    tcg_temp_free_i32(desc);
2426}
2427
2428static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2429{
2430    if (sve_access_check(s)) {
2431        TCGv_i64 t = tcg_temp_new_i64();
2432        tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2433        do_insr_i64(s, a, t);
2434        tcg_temp_free_i64(t);
2435    }
2436    return true;
2437}
2438
2439static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2440{
2441    if (sve_access_check(s)) {
2442        do_insr_i64(s, a, cpu_reg(s, a->rm));
2443    }
2444    return true;
2445}
2446
2447static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2448{
2449    static gen_helper_gvec_2 * const fns[4] = {
2450        gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2451        gen_helper_sve_rev_s, gen_helper_sve_rev_d
2452    };
2453
2454    if (sve_access_check(s)) {
2455        gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
2456    }
2457    return true;
2458}
2459
2460static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2461{
2462    static gen_helper_gvec_3 * const fns[4] = {
2463        gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2464        gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2465    };
2466
2467    if (sve_access_check(s)) {
2468        gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
2469    }
2470    return true;
2471}
2472
2473static bool trans_TBL_sve2(DisasContext *s, arg_rrr_esz *a)
2474{
2475    static gen_helper_gvec_4 * const fns[4] = {
2476        gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2477        gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2478    };
2479
2480    if (!dc_isar_feature(aa64_sve2, s)) {
2481        return false;
2482    }
2483    if (sve_access_check(s)) {
2484        gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn,
2485                          (a->rn + 1) % 32, a->rm, 0);
2486    }
2487    return true;
2488}
2489
2490static bool trans_TBX(DisasContext *s, arg_rrr_esz *a)
2491{
2492    static gen_helper_gvec_3 * const fns[4] = {
2493        gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2494        gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2495    };
2496
2497    if (!dc_isar_feature(aa64_sve2, s)) {
2498        return false;
2499    }
2500    if (sve_access_check(s)) {
2501        gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
2502    }
2503    return true;
2504}
2505
2506static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2507{
2508    static gen_helper_gvec_2 * const fns[4][2] = {
2509        { NULL, NULL },
2510        { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2511        { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2512        { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2513    };
2514
2515    if (a->esz == 0) {
2516        return false;
2517    }
2518    if (sve_access_check(s)) {
2519        unsigned vsz = vec_full_reg_size(s);
2520        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2521                           vec_full_reg_offset(s, a->rn)
2522                           + (a->h ? vsz / 2 : 0),
2523                           vsz, vsz, 0, fns[a->esz][a->u]);
2524    }
2525    return true;
2526}
2527
2528/*
2529 *** SVE Permute - Predicates Group
2530 */
2531
2532static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2533                          gen_helper_gvec_3 *fn)
2534{
2535    if (!sve_access_check(s)) {
2536        return true;
2537    }
2538
2539    unsigned vsz = pred_full_reg_size(s);
2540
2541    TCGv_ptr t_d = tcg_temp_new_ptr();
2542    TCGv_ptr t_n = tcg_temp_new_ptr();
2543    TCGv_ptr t_m = tcg_temp_new_ptr();
2544    TCGv_i32 t_desc;
2545    uint32_t desc = 0;
2546
2547    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2548    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2549    desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2550
2551    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2552    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2553    tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2554    t_desc = tcg_const_i32(desc);
2555
2556    fn(t_d, t_n, t_m, t_desc);
2557
2558    tcg_temp_free_ptr(t_d);
2559    tcg_temp_free_ptr(t_n);
2560    tcg_temp_free_ptr(t_m);
2561    tcg_temp_free_i32(t_desc);
2562    return true;
2563}
2564
2565static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2566                          gen_helper_gvec_2 *fn)
2567{
2568    if (!sve_access_check(s)) {
2569        return true;
2570    }
2571
2572    unsigned vsz = pred_full_reg_size(s);
2573    TCGv_ptr t_d = tcg_temp_new_ptr();
2574    TCGv_ptr t_n = tcg_temp_new_ptr();
2575    TCGv_i32 t_desc;
2576    uint32_t desc = 0;
2577
2578    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2579    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2580
2581    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2582    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2583    desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2584    t_desc = tcg_const_i32(desc);
2585
2586    fn(t_d, t_n, t_desc);
2587
2588    tcg_temp_free_i32(t_desc);
2589    tcg_temp_free_ptr(t_d);
2590    tcg_temp_free_ptr(t_n);
2591    return true;
2592}
2593
2594static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2595{
2596    return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2597}
2598
2599static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2600{
2601    return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2602}
2603
2604static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2605{
2606    return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2607}
2608
2609static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2610{
2611    return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2612}
2613
2614static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2615{
2616    return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2617}
2618
2619static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2620{
2621    return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2622}
2623
2624static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2625{
2626    return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2627}
2628
2629static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2630{
2631    return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2632}
2633
2634static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2635{
2636    return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2637}
2638
2639/*
2640 *** SVE Permute - Interleaving Group
2641 */
2642
2643static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2644{
2645    static gen_helper_gvec_3 * const fns[4] = {
2646        gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2647        gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2648    };
2649
2650    if (sve_access_check(s)) {
2651        unsigned vsz = vec_full_reg_size(s);
2652        unsigned high_ofs = high ? vsz / 2 : 0;
2653        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2654                           vec_full_reg_offset(s, a->rn) + high_ofs,
2655                           vec_full_reg_offset(s, a->rm) + high_ofs,
2656                           vsz, vsz, 0, fns[a->esz]);
2657    }
2658    return true;
2659}
2660
2661static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2662                            gen_helper_gvec_3 *fn)
2663{
2664    if (sve_access_check(s)) {
2665        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
2666    }
2667    return true;
2668}
2669
2670static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2671{
2672    return do_zip(s, a, false);
2673}
2674
2675static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2676{
2677    return do_zip(s, a, true);
2678}
2679
2680static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
2681{
2682    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2683        return false;
2684    }
2685    if (sve_access_check(s)) {
2686        unsigned vsz = vec_full_reg_size(s);
2687        unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
2688        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2689                           vec_full_reg_offset(s, a->rn) + high_ofs,
2690                           vec_full_reg_offset(s, a->rm) + high_ofs,
2691                           vsz, vsz, 0, gen_helper_sve2_zip_q);
2692    }
2693    return true;
2694}
2695
2696static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
2697{
2698    return do_zip_q(s, a, false);
2699}
2700
2701static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
2702{
2703    return do_zip_q(s, a, true);
2704}
2705
2706static gen_helper_gvec_3 * const uzp_fns[4] = {
2707    gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2708    gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2709};
2710
2711static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2712{
2713    return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2714}
2715
2716static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2717{
2718    return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2719}
2720
2721static bool trans_UZP1_q(DisasContext *s, arg_rrr_esz *a)
2722{
2723    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2724        return false;
2725    }
2726    return do_zzz_data_ool(s, a, 0, gen_helper_sve2_uzp_q);
2727}
2728
2729static bool trans_UZP2_q(DisasContext *s, arg_rrr_esz *a)
2730{
2731    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2732        return false;
2733    }
2734    return do_zzz_data_ool(s, a, 16, gen_helper_sve2_uzp_q);
2735}
2736
2737static gen_helper_gvec_3 * const trn_fns[4] = {
2738    gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2739    gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2740};
2741
2742static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2743{
2744    return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2745}
2746
2747static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2748{
2749    return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2750}
2751
2752static bool trans_TRN1_q(DisasContext *s, arg_rrr_esz *a)
2753{
2754    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2755        return false;
2756    }
2757    return do_zzz_data_ool(s, a, 0, gen_helper_sve2_trn_q);
2758}
2759
2760static bool trans_TRN2_q(DisasContext *s, arg_rrr_esz *a)
2761{
2762    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2763        return false;
2764    }
2765    return do_zzz_data_ool(s, a, 16, gen_helper_sve2_trn_q);
2766}
2767
2768/*
2769 *** SVE Permute Vector - Predicated Group
2770 */
2771
2772static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2773{
2774    static gen_helper_gvec_3 * const fns[4] = {
2775        NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2776    };
2777    return do_zpz_ool(s, a, fns[a->esz]);
2778}
2779
2780/* Call the helper that computes the ARM LastActiveElement pseudocode
2781 * function, scaled by the element size.  This includes the not found
2782 * indication; e.g. not found for esz=3 is -8.
2783 */
2784static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2785{
2786    /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2787     * round up, as we do elsewhere, because we need the exact size.
2788     */
2789    TCGv_ptr t_p = tcg_temp_new_ptr();
2790    TCGv_i32 t_desc;
2791    unsigned desc = 0;
2792
2793    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2794    desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2795
2796    tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2797    t_desc = tcg_const_i32(desc);
2798
2799    gen_helper_sve_last_active_element(ret, t_p, t_desc);
2800
2801    tcg_temp_free_i32(t_desc);
2802    tcg_temp_free_ptr(t_p);
2803}
2804
2805/* Increment LAST to the offset of the next element in the vector,
2806 * wrapping around to 0.
2807 */
2808static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2809{
2810    unsigned vsz = vec_full_reg_size(s);
2811
2812    tcg_gen_addi_i32(last, last, 1 << esz);
2813    if (is_power_of_2(vsz)) {
2814        tcg_gen_andi_i32(last, last, vsz - 1);
2815    } else {
2816        TCGv_i32 max = tcg_const_i32(vsz);
2817        TCGv_i32 zero = tcg_const_i32(0);
2818        tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2819        tcg_temp_free_i32(max);
2820        tcg_temp_free_i32(zero);
2821    }
2822}
2823
2824/* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2825static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2826{
2827    unsigned vsz = vec_full_reg_size(s);
2828
2829    if (is_power_of_2(vsz)) {
2830        tcg_gen_andi_i32(last, last, vsz - 1);
2831    } else {
2832        TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2833        TCGv_i32 zero = tcg_const_i32(0);
2834        tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2835        tcg_temp_free_i32(max);
2836        tcg_temp_free_i32(zero);
2837    }
2838}
2839
2840/* Load an unsigned element of ESZ from BASE+OFS.  */
2841static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2842{
2843    TCGv_i64 r = tcg_temp_new_i64();
2844
2845    switch (esz) {
2846    case 0:
2847        tcg_gen_ld8u_i64(r, base, ofs);
2848        break;
2849    case 1:
2850        tcg_gen_ld16u_i64(r, base, ofs);
2851        break;
2852    case 2:
2853        tcg_gen_ld32u_i64(r, base, ofs);
2854        break;
2855    case 3:
2856        tcg_gen_ld_i64(r, base, ofs);
2857        break;
2858    default:
2859        g_assert_not_reached();
2860    }
2861    return r;
2862}
2863
2864/* Load an unsigned element of ESZ from RM[LAST].  */
2865static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2866                                 int rm, int esz)
2867{
2868    TCGv_ptr p = tcg_temp_new_ptr();
2869    TCGv_i64 r;
2870
2871    /* Convert offset into vector into offset into ENV.
2872     * The final adjustment for the vector register base
2873     * is added via constant offset to the load.
2874     */
2875#ifdef HOST_WORDS_BIGENDIAN
2876    /* Adjust for element ordering.  See vec_reg_offset.  */
2877    if (esz < 3) {
2878        tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2879    }
2880#endif
2881    tcg_gen_ext_i32_ptr(p, last);
2882    tcg_gen_add_ptr(p, p, cpu_env);
2883
2884    r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2885    tcg_temp_free_ptr(p);
2886
2887    return r;
2888}
2889
2890/* Compute CLAST for a Zreg.  */
2891static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2892{
2893    TCGv_i32 last;
2894    TCGLabel *over;
2895    TCGv_i64 ele;
2896    unsigned vsz, esz = a->esz;
2897
2898    if (!sve_access_check(s)) {
2899        return true;
2900    }
2901
2902    last = tcg_temp_local_new_i32();
2903    over = gen_new_label();
2904
2905    find_last_active(s, last, esz, a->pg);
2906
2907    /* There is of course no movcond for a 2048-bit vector,
2908     * so we must branch over the actual store.
2909     */
2910    tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2911
2912    if (!before) {
2913        incr_last_active(s, last, esz);
2914    }
2915
2916    ele = load_last_active(s, last, a->rm, esz);
2917    tcg_temp_free_i32(last);
2918
2919    vsz = vec_full_reg_size(s);
2920    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2921    tcg_temp_free_i64(ele);
2922
2923    /* If this insn used MOVPRFX, we may need a second move.  */
2924    if (a->rd != a->rn) {
2925        TCGLabel *done = gen_new_label();
2926        tcg_gen_br(done);
2927
2928        gen_set_label(over);
2929        do_mov_z(s, a->rd, a->rn);
2930
2931        gen_set_label(done);
2932    } else {
2933        gen_set_label(over);
2934    }
2935    return true;
2936}
2937
2938static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2939{
2940    return do_clast_vector(s, a, false);
2941}
2942
2943static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2944{
2945    return do_clast_vector(s, a, true);
2946}
2947
2948/* Compute CLAST for a scalar.  */
2949static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2950                            bool before, TCGv_i64 reg_val)
2951{
2952    TCGv_i32 last = tcg_temp_new_i32();
2953    TCGv_i64 ele, cmp, zero;
2954
2955    find_last_active(s, last, esz, pg);
2956
2957    /* Extend the original value of last prior to incrementing.  */
2958    cmp = tcg_temp_new_i64();
2959    tcg_gen_ext_i32_i64(cmp, last);
2960
2961    if (!before) {
2962        incr_last_active(s, last, esz);
2963    }
2964
2965    /* The conceit here is that while last < 0 indicates not found, after
2966     * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2967     * from which we can load garbage.  We then discard the garbage with
2968     * a conditional move.
2969     */
2970    ele = load_last_active(s, last, rm, esz);
2971    tcg_temp_free_i32(last);
2972
2973    zero = tcg_const_i64(0);
2974    tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2975
2976    tcg_temp_free_i64(zero);
2977    tcg_temp_free_i64(cmp);
2978    tcg_temp_free_i64(ele);
2979}
2980
2981/* Compute CLAST for a Vreg.  */
2982static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2983{
2984    if (sve_access_check(s)) {
2985        int esz = a->esz;
2986        int ofs = vec_reg_offset(s, a->rd, 0, esz);
2987        TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2988
2989        do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2990        write_fp_dreg(s, a->rd, reg);
2991        tcg_temp_free_i64(reg);
2992    }
2993    return true;
2994}
2995
2996static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2997{
2998    return do_clast_fp(s, a, false);
2999}
3000
3001static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
3002{
3003    return do_clast_fp(s, a, true);
3004}
3005
3006/* Compute CLAST for a Xreg.  */
3007static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
3008{
3009    TCGv_i64 reg;
3010
3011    if (!sve_access_check(s)) {
3012        return true;
3013    }
3014
3015    reg = cpu_reg(s, a->rd);
3016    switch (a->esz) {
3017    case 0:
3018        tcg_gen_ext8u_i64(reg, reg);
3019        break;
3020    case 1:
3021        tcg_gen_ext16u_i64(reg, reg);
3022        break;
3023    case 2:
3024        tcg_gen_ext32u_i64(reg, reg);
3025        break;
3026    case 3:
3027        break;
3028    default:
3029        g_assert_not_reached();
3030    }
3031
3032    do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
3033    return true;
3034}
3035
3036static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
3037{
3038    return do_clast_general(s, a, false);
3039}
3040
3041static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
3042{
3043    return do_clast_general(s, a, true);
3044}
3045
3046/* Compute LAST for a scalar.  */
3047static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
3048                               int pg, int rm, bool before)
3049{
3050    TCGv_i32 last = tcg_temp_new_i32();
3051    TCGv_i64 ret;
3052
3053    find_last_active(s, last, esz, pg);
3054    if (before) {
3055        wrap_last_active(s, last, esz);
3056    } else {
3057        incr_last_active(s, last, esz);
3058    }
3059
3060    ret = load_last_active(s, last, rm, esz);
3061    tcg_temp_free_i32(last);
3062    return ret;
3063}
3064
3065/* Compute LAST for a Vreg.  */
3066static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
3067{
3068    if (sve_access_check(s)) {
3069        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
3070        write_fp_dreg(s, a->rd, val);
3071        tcg_temp_free_i64(val);
3072    }
3073    return true;
3074}
3075
3076static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
3077{
3078    return do_last_fp(s, a, false);
3079}
3080
3081static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
3082{
3083    return do_last_fp(s, a, true);
3084}
3085
3086/* Compute LAST for a Xreg.  */
3087static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
3088{
3089    if (sve_access_check(s)) {
3090        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
3091        tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
3092        tcg_temp_free_i64(val);
3093    }
3094    return true;
3095}
3096
3097static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
3098{
3099    return do_last_general(s, a, false);
3100}
3101
3102static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
3103{
3104    return do_last_general(s, a, true);
3105}
3106
3107static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
3108{
3109    if (sve_access_check(s)) {
3110        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
3111    }
3112    return true;
3113}
3114
3115static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
3116{
3117    if (sve_access_check(s)) {
3118        int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
3119        TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
3120        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
3121        tcg_temp_free_i64(t);
3122    }
3123    return true;
3124}
3125
3126static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
3127{
3128    static gen_helper_gvec_3 * const fns[4] = {
3129        NULL,
3130        gen_helper_sve_revb_h,
3131        gen_helper_sve_revb_s,
3132        gen_helper_sve_revb_d,
3133    };
3134    return do_zpz_ool(s, a, fns[a->esz]);
3135}
3136
3137static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
3138{
3139    static gen_helper_gvec_3 * const fns[4] = {
3140        NULL,
3141        NULL,
3142        gen_helper_sve_revh_s,
3143        gen_helper_sve_revh_d,
3144    };
3145    return do_zpz_ool(s, a, fns[a->esz]);
3146}
3147
3148static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
3149{
3150    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
3151}
3152
3153static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
3154{
3155    static gen_helper_gvec_3 * const fns[4] = {
3156        gen_helper_sve_rbit_b,
3157        gen_helper_sve_rbit_h,
3158        gen_helper_sve_rbit_s,
3159        gen_helper_sve_rbit_d,
3160    };
3161    return do_zpz_ool(s, a, fns[a->esz]);
3162}
3163
3164static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
3165{
3166    if (sve_access_check(s)) {
3167        gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
3168                          a->rd, a->rn, a->rm, a->pg, a->esz);
3169    }
3170    return true;
3171}
3172
3173static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
3174{
3175    if (!dc_isar_feature(aa64_sve2, s)) {
3176        return false;
3177    }
3178    if (sve_access_check(s)) {
3179        gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
3180                          a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
3181    }
3182    return true;
3183}
3184
3185/*
3186 *** SVE Integer Compare - Vectors Group
3187 */
3188
3189static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
3190                          gen_helper_gvec_flags_4 *gen_fn)
3191{
3192    TCGv_ptr pd, zn, zm, pg;
3193    unsigned vsz;
3194    TCGv_i32 t;
3195
3196    if (gen_fn == NULL) {
3197        return false;
3198    }
3199    if (!sve_access_check(s)) {
3200        return true;
3201    }
3202
3203    vsz = vec_full_reg_size(s);
3204    t = tcg_const_i32(simd_desc(vsz, vsz, 0));
3205    pd = tcg_temp_new_ptr();
3206    zn = tcg_temp_new_ptr();
3207    zm = tcg_temp_new_ptr();
3208    pg = tcg_temp_new_ptr();
3209
3210    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3211    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3212    tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
3213    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3214
3215    gen_fn(t, pd, zn, zm, pg, t);
3216
3217    tcg_temp_free_ptr(pd);
3218    tcg_temp_free_ptr(zn);
3219    tcg_temp_free_ptr(zm);
3220    tcg_temp_free_ptr(pg);
3221
3222    do_pred_flags(t);
3223
3224    tcg_temp_free_i32(t);
3225    return true;
3226}
3227
3228#define DO_PPZZ(NAME, name) \
3229static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)         \
3230{                                                                         \
3231    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
3232        gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
3233        gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
3234    };                                                                    \
3235    return do_ppzz_flags(s, a, fns[a->esz]);                              \
3236}
3237
3238DO_PPZZ(CMPEQ, cmpeq)
3239DO_PPZZ(CMPNE, cmpne)
3240DO_PPZZ(CMPGT, cmpgt)
3241DO_PPZZ(CMPGE, cmpge)
3242DO_PPZZ(CMPHI, cmphi)
3243DO_PPZZ(CMPHS, cmphs)
3244
3245#undef DO_PPZZ
3246
3247#define DO_PPZW(NAME, name) \
3248static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a)         \
3249{                                                                         \
3250    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
3251        gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
3252        gen_helper_sve_##name##_ppzw_s, NULL                              \
3253    };                                                                    \
3254    return do_ppzz_flags(s, a, fns[a->esz]);                              \
3255}
3256
3257DO_PPZW(CMPEQ, cmpeq)
3258DO_PPZW(CMPNE, cmpne)
3259DO_PPZW(CMPGT, cmpgt)
3260DO_PPZW(CMPGE, cmpge)
3261DO_PPZW(CMPHI, cmphi)
3262DO_PPZW(CMPHS, cmphs)
3263DO_PPZW(CMPLT, cmplt)
3264DO_PPZW(CMPLE, cmple)
3265DO_PPZW(CMPLO, cmplo)
3266DO_PPZW(CMPLS, cmpls)
3267
3268#undef DO_PPZW
3269
3270/*
3271 *** SVE Integer Compare - Immediate Groups
3272 */
3273
3274static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
3275                          gen_helper_gvec_flags_3 *gen_fn)
3276{
3277    TCGv_ptr pd, zn, pg;
3278    unsigned vsz;
3279    TCGv_i32 t;
3280
3281    if (gen_fn == NULL) {
3282        return false;
3283    }
3284    if (!sve_access_check(s)) {
3285        return true;
3286    }
3287
3288    vsz = vec_full_reg_size(s);
3289    t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
3290    pd = tcg_temp_new_ptr();
3291    zn = tcg_temp_new_ptr();
3292    pg = tcg_temp_new_ptr();
3293
3294    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3295    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3296    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3297
3298    gen_fn(t, pd, zn, pg, t);
3299
3300    tcg_temp_free_ptr(pd);
3301    tcg_temp_free_ptr(zn);
3302    tcg_temp_free_ptr(pg);
3303
3304    do_pred_flags(t);
3305
3306    tcg_temp_free_i32(t);
3307    return true;
3308}
3309
3310#define DO_PPZI(NAME, name) \
3311static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a)         \
3312{                                                                         \
3313    static gen_helper_gvec_flags_3 * const fns[4] = {                     \
3314        gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
3315        gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
3316    };                                                                    \
3317    return do_ppzi_flags(s, a, fns[a->esz]);                              \
3318}
3319
3320DO_PPZI(CMPEQ, cmpeq)
3321DO_PPZI(CMPNE, cmpne)
3322DO_PPZI(CMPGT, cmpgt)
3323DO_PPZI(CMPGE, cmpge)
3324DO_PPZI(CMPHI, cmphi)
3325DO_PPZI(CMPHS, cmphs)
3326DO_PPZI(CMPLT, cmplt)
3327DO_PPZI(CMPLE, cmple)
3328DO_PPZI(CMPLO, cmplo)
3329DO_PPZI(CMPLS, cmpls)
3330
3331#undef DO_PPZI
3332
3333/*
3334 *** SVE Partition Break Group
3335 */
3336
3337static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3338                    gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3339{
3340    if (!sve_access_check(s)) {
3341        return true;
3342    }
3343
3344    unsigned vsz = pred_full_reg_size(s);
3345
3346    /* Predicate sizes may be smaller and cannot use simd_desc.  */
3347    TCGv_ptr d = tcg_temp_new_ptr();
3348    TCGv_ptr n = tcg_temp_new_ptr();
3349    TCGv_ptr m = tcg_temp_new_ptr();
3350    TCGv_ptr g = tcg_temp_new_ptr();
3351    TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
3352
3353    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3354    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3355    tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3356    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3357
3358    if (a->s) {
3359        fn_s(t, d, n, m, g, t);
3360        do_pred_flags(t);
3361    } else {
3362        fn(d, n, m, g, t);
3363    }
3364    tcg_temp_free_ptr(d);
3365    tcg_temp_free_ptr(n);
3366    tcg_temp_free_ptr(m);
3367    tcg_temp_free_ptr(g);
3368    tcg_temp_free_i32(t);
3369    return true;
3370}
3371
3372static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3373                    gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3374{
3375    if (!sve_access_check(s)) {
3376        return true;
3377    }
3378
3379    unsigned vsz = pred_full_reg_size(s);
3380
3381    /* Predicate sizes may be smaller and cannot use simd_desc.  */
3382    TCGv_ptr d = tcg_temp_new_ptr();
3383    TCGv_ptr n = tcg_temp_new_ptr();
3384    TCGv_ptr g = tcg_temp_new_ptr();
3385    TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
3386
3387    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3388    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3389    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3390
3391    if (a->s) {
3392        fn_s(t, d, n, g, t);
3393        do_pred_flags(t);
3394    } else {
3395        fn(d, n, g, t);
3396    }
3397    tcg_temp_free_ptr(d);
3398    tcg_temp_free_ptr(n);
3399    tcg_temp_free_ptr(g);
3400    tcg_temp_free_i32(t);
3401    return true;
3402}
3403
3404static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
3405{
3406    return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
3407}
3408
3409static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
3410{
3411    return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
3412}
3413
3414static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
3415{
3416    return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
3417}
3418
3419static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
3420{
3421    return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3422}
3423
3424static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
3425{
3426    return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3427}
3428
3429static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
3430{
3431    return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3432}
3433
3434static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
3435{
3436    return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3437}
3438
3439/*
3440 *** SVE Predicate Count Group
3441 */
3442
3443static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3444{
3445    unsigned psz = pred_full_reg_size(s);
3446
3447    if (psz <= 8) {
3448        uint64_t psz_mask;
3449
3450        tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3451        if (pn != pg) {
3452            TCGv_i64 g = tcg_temp_new_i64();
3453            tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3454            tcg_gen_and_i64(val, val, g);
3455            tcg_temp_free_i64(g);
3456        }
3457
3458        /* Reduce the pred_esz_masks value simply to reduce the
3459         * size of the code generated here.
3460         */
3461        psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3462        tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3463
3464        tcg_gen_ctpop_i64(val, val);
3465    } else {
3466        TCGv_ptr t_pn = tcg_temp_new_ptr();
3467        TCGv_ptr t_pg = tcg_temp_new_ptr();
3468        unsigned desc = 0;
3469        TCGv_i32 t_desc;
3470
3471        desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3472        desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
3473
3474        tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3475        tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3476        t_desc = tcg_const_i32(desc);
3477
3478        gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3479        tcg_temp_free_ptr(t_pn);
3480        tcg_temp_free_ptr(t_pg);
3481        tcg_temp_free_i32(t_desc);
3482    }
3483}
3484
3485static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3486{
3487    if (sve_access_check(s)) {
3488        do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3489    }
3490    return true;
3491}
3492
3493static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3494{
3495    if (sve_access_check(s)) {
3496        TCGv_i64 reg = cpu_reg(s, a->rd);
3497        TCGv_i64 val = tcg_temp_new_i64();
3498
3499        do_cntp(s, val, a->esz, a->pg, a->pg);
3500        if (a->d) {
3501            tcg_gen_sub_i64(reg, reg, val);
3502        } else {
3503            tcg_gen_add_i64(reg, reg, val);
3504        }
3505        tcg_temp_free_i64(val);
3506    }
3507    return true;
3508}
3509
3510static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3511{
3512    if (a->esz == 0) {
3513        return false;
3514    }
3515    if (sve_access_check(s)) {
3516        unsigned vsz = vec_full_reg_size(s);
3517        TCGv_i64 val = tcg_temp_new_i64();
3518        GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3519
3520        do_cntp(s, val, a->esz, a->pg, a->pg);
3521        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3522                vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3523    }
3524    return true;
3525}
3526
3527static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3528{
3529    if (sve_access_check(s)) {
3530        TCGv_i64 reg = cpu_reg(s, a->rd);
3531        TCGv_i64 val = tcg_temp_new_i64();
3532
3533        do_cntp(s, val, a->esz, a->pg, a->pg);
3534        do_sat_addsub_32(reg, val, a->u, a->d);
3535    }
3536    return true;
3537}
3538
3539static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3540{
3541    if (sve_access_check(s)) {
3542        TCGv_i64 reg = cpu_reg(s, a->rd);
3543        TCGv_i64 val = tcg_temp_new_i64();
3544
3545        do_cntp(s, val, a->esz, a->pg, a->pg);
3546        do_sat_addsub_64(reg, val, a->u, a->d);
3547    }
3548    return true;
3549}
3550
3551static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3552{
3553    if (a->esz == 0) {
3554        return false;
3555    }
3556    if (sve_access_check(s)) {
3557        TCGv_i64 val = tcg_temp_new_i64();
3558        do_cntp(s, val, a->esz, a->pg, a->pg);
3559        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3560    }
3561    return true;
3562}
3563
3564/*
3565 *** SVE Integer Compare Scalars Group
3566 */
3567
3568static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3569{
3570    if (!sve_access_check(s)) {
3571        return true;
3572    }
3573
3574    TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3575    TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3576    TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3577    TCGv_i64 cmp = tcg_temp_new_i64();
3578
3579    tcg_gen_setcond_i64(cond, cmp, rn, rm);
3580    tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3581    tcg_temp_free_i64(cmp);
3582
3583    /* VF = !NF & !CF.  */
3584    tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3585    tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3586
3587    /* Both NF and VF actually look at bit 31.  */
3588    tcg_gen_neg_i32(cpu_NF, cpu_NF);
3589    tcg_gen_neg_i32(cpu_VF, cpu_VF);
3590    return true;
3591}
3592
3593static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3594{
3595    TCGv_i64 op0, op1, t0, t1, tmax;
3596    TCGv_i32 t2, t3;
3597    TCGv_ptr ptr;
3598    unsigned vsz = vec_full_reg_size(s);
3599    unsigned desc = 0;
3600    TCGCond cond;
3601    uint64_t maxval;
3602    /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3603    bool eq = a->eq == a->lt;
3604
3605    /* The greater-than conditions are all SVE2. */
3606    if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3607        return false;
3608    }
3609    if (!sve_access_check(s)) {
3610        return true;
3611    }
3612
3613    op0 = read_cpu_reg(s, a->rn, 1);
3614    op1 = read_cpu_reg(s, a->rm, 1);
3615
3616    if (!a->sf) {
3617        if (a->u) {
3618            tcg_gen_ext32u_i64(op0, op0);
3619            tcg_gen_ext32u_i64(op1, op1);
3620        } else {
3621            tcg_gen_ext32s_i64(op0, op0);
3622            tcg_gen_ext32s_i64(op1, op1);
3623        }
3624    }
3625
3626    /* For the helper, compress the different conditions into a computation
3627     * of how many iterations for which the condition is true.
3628     */
3629    t0 = tcg_temp_new_i64();
3630    t1 = tcg_temp_new_i64();
3631
3632    if (a->lt) {
3633        tcg_gen_sub_i64(t0, op1, op0);
3634        if (a->u) {
3635            maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3636            cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3637        } else {
3638            maxval = a->sf ? INT64_MAX : INT32_MAX;
3639            cond = eq ? TCG_COND_LE : TCG_COND_LT;
3640        }
3641    } else {
3642        tcg_gen_sub_i64(t0, op0, op1);
3643        if (a->u) {
3644            maxval = 0;
3645            cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3646        } else {
3647            maxval = a->sf ? INT64_MIN : INT32_MIN;
3648            cond = eq ? TCG_COND_GE : TCG_COND_GT;
3649        }
3650    }
3651
3652    tmax = tcg_const_i64(vsz >> a->esz);
3653    if (eq) {
3654        /* Equality means one more iteration.  */
3655        tcg_gen_addi_i64(t0, t0, 1);
3656
3657        /*
3658         * For the less-than while, if op1 is maxval (and the only time
3659         * the addition above could overflow), then we produce an all-true
3660         * predicate by setting the count to the vector length.  This is
3661         * because the pseudocode is described as an increment + compare
3662         * loop, and the maximum integer would always compare true.
3663         * Similarly, the greater-than while has the same issue with the
3664         * minimum integer due to the decrement + compare loop.
3665         */
3666        tcg_gen_movi_i64(t1, maxval);
3667        tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3668    }
3669
3670    /* Bound to the maximum.  */
3671    tcg_gen_umin_i64(t0, t0, tmax);
3672    tcg_temp_free_i64(tmax);
3673
3674    /* Set the count to zero if the condition is false.  */
3675    tcg_gen_movi_i64(t1, 0);
3676    tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3677    tcg_temp_free_i64(t1);
3678
3679    /* Since we're bounded, pass as a 32-bit type.  */
3680    t2 = tcg_temp_new_i32();
3681    tcg_gen_extrl_i64_i32(t2, t0);
3682    tcg_temp_free_i64(t0);
3683
3684    /* Scale elements to bits.  */
3685    tcg_gen_shli_i32(t2, t2, a->esz);
3686
3687    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3688    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3689    t3 = tcg_const_i32(desc);
3690
3691    ptr = tcg_temp_new_ptr();
3692    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3693
3694    if (a->lt) {
3695        gen_helper_sve_whilel(t2, ptr, t2, t3);
3696    } else {
3697        gen_helper_sve_whileg(t2, ptr, t2, t3);
3698    }
3699    do_pred_flags(t2);
3700
3701    tcg_temp_free_ptr(ptr);
3702    tcg_temp_free_i32(t2);
3703    tcg_temp_free_i32(t3);
3704    return true;
3705}
3706
3707static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3708{
3709    TCGv_i64 op0, op1, diff, t1, tmax;
3710    TCGv_i32 t2, t3;
3711    TCGv_ptr ptr;
3712    unsigned vsz = vec_full_reg_size(s);
3713    unsigned desc = 0;
3714
3715    if (!dc_isar_feature(aa64_sve2, s)) {
3716        return false;
3717    }
3718    if (!sve_access_check(s)) {
3719        return true;
3720    }
3721
3722    op0 = read_cpu_reg(s, a->rn, 1);
3723    op1 = read_cpu_reg(s, a->rm, 1);
3724
3725    tmax = tcg_const_i64(vsz);
3726    diff = tcg_temp_new_i64();
3727
3728    if (a->rw) {
3729        /* WHILERW */
3730        /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3731        t1 = tcg_temp_new_i64();
3732        tcg_gen_sub_i64(diff, op0, op1);
3733        tcg_gen_sub_i64(t1, op1, op0);
3734        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3735        tcg_temp_free_i64(t1);
3736        /* Round down to a multiple of ESIZE.  */
3737        tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3738        /* If op1 == op0, diff == 0, and the condition is always true. */
3739        tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3740    } else {
3741        /* WHILEWR */
3742        tcg_gen_sub_i64(diff, op1, op0);
3743        /* Round down to a multiple of ESIZE.  */
3744        tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3745        /* If op0 >= op1, diff <= 0, the condition is always true. */
3746        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3747    }
3748
3749    /* Bound to the maximum.  */
3750    tcg_gen_umin_i64(diff, diff, tmax);
3751    tcg_temp_free_i64(tmax);
3752
3753    /* Since we're bounded, pass as a 32-bit type.  */
3754    t2 = tcg_temp_new_i32();
3755    tcg_gen_extrl_i64_i32(t2, diff);
3756    tcg_temp_free_i64(diff);
3757
3758    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3759    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3760    t3 = tcg_const_i32(desc);
3761
3762    ptr = tcg_temp_new_ptr();
3763    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3764
3765    gen_helper_sve_whilel(t2, ptr, t2, t3);
3766    do_pred_flags(t2);
3767
3768    tcg_temp_free_ptr(ptr);
3769    tcg_temp_free_i32(t2);
3770    tcg_temp_free_i32(t3);
3771    return true;
3772}
3773
3774/*
3775 *** SVE Integer Wide Immediate - Unpredicated Group
3776 */
3777
3778static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3779{
3780    if (a->esz == 0) {
3781        return false;
3782    }
3783    if (sve_access_check(s)) {
3784        unsigned vsz = vec_full_reg_size(s);
3785        int dofs = vec_full_reg_offset(s, a->rd);
3786        uint64_t imm;
3787
3788        /* Decode the VFP immediate.  */
3789        imm = vfp_expand_imm(a->esz, a->imm);
3790        tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3791    }
3792    return true;
3793}
3794
3795static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3796{
3797    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3798        return false;
3799    }
3800    if (sve_access_check(s)) {
3801        unsigned vsz = vec_full_reg_size(s);
3802        int dofs = vec_full_reg_offset(s, a->rd);
3803
3804        tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3805    }
3806    return true;
3807}
3808
3809static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3810{
3811    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3812        return false;
3813    }
3814    if (sve_access_check(s)) {
3815        unsigned vsz = vec_full_reg_size(s);
3816        tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3817                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3818    }
3819    return true;
3820}
3821
3822static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3823{
3824    a->imm = -a->imm;
3825    return trans_ADD_zzi(s, a);
3826}
3827
3828static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3829{
3830    static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3831    static const GVecGen2s op[4] = {
3832        { .fni8 = tcg_gen_vec_sub8_i64,
3833          .fniv = tcg_gen_sub_vec,
3834          .fno = gen_helper_sve_subri_b,
3835          .opt_opc = vecop_list,
3836          .vece = MO_8,
3837          .scalar_first = true },
3838        { .fni8 = tcg_gen_vec_sub16_i64,
3839          .fniv = tcg_gen_sub_vec,
3840          .fno = gen_helper_sve_subri_h,
3841          .opt_opc = vecop_list,
3842          .vece = MO_16,
3843          .scalar_first = true },
3844        { .fni4 = tcg_gen_sub_i32,
3845          .fniv = tcg_gen_sub_vec,
3846          .fno = gen_helper_sve_subri_s,
3847          .opt_opc = vecop_list,
3848          .vece = MO_32,
3849          .scalar_first = true },
3850        { .fni8 = tcg_gen_sub_i64,
3851          .fniv = tcg_gen_sub_vec,
3852          .fno = gen_helper_sve_subri_d,
3853          .opt_opc = vecop_list,
3854          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3855          .vece = MO_64,
3856          .scalar_first = true }
3857    };
3858
3859    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3860        return false;
3861    }
3862    if (sve_access_check(s)) {
3863        unsigned vsz = vec_full_reg_size(s);
3864        TCGv_i64 c = tcg_const_i64(a->imm);
3865        tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3866                        vec_full_reg_offset(s, a->rn),
3867                        vsz, vsz, c, &op[a->esz]);
3868        tcg_temp_free_i64(c);
3869    }
3870    return true;
3871}
3872
3873static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3874{
3875    if (sve_access_check(s)) {
3876        unsigned vsz = vec_full_reg_size(s);
3877        tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3878                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3879    }
3880    return true;
3881}
3882
3883static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3884{
3885    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3886        return false;
3887    }
3888    if (sve_access_check(s)) {
3889        TCGv_i64 val = tcg_const_i64(a->imm);
3890        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3891        tcg_temp_free_i64(val);
3892    }
3893    return true;
3894}
3895
3896static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3897{
3898    return do_zzi_sat(s, a, false, false);
3899}
3900
3901static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3902{
3903    return do_zzi_sat(s, a, true, false);
3904}
3905
3906static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3907{
3908    return do_zzi_sat(s, a, false, true);
3909}
3910
3911static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3912{
3913    return do_zzi_sat(s, a, true, true);
3914}
3915
3916static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3917{
3918    if (sve_access_check(s)) {
3919        unsigned vsz = vec_full_reg_size(s);
3920        TCGv_i64 c = tcg_const_i64(a->imm);
3921
3922        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3923                            vec_full_reg_offset(s, a->rn),
3924                            c, vsz, vsz, 0, fn);
3925        tcg_temp_free_i64(c);
3926    }
3927    return true;
3928}
3929
3930#define DO_ZZI(NAME, name) \
3931static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a)         \
3932{                                                                       \
3933    static gen_helper_gvec_2i * const fns[4] = {                        \
3934        gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3935        gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3936    };                                                                  \
3937    return do_zzi_ool(s, a, fns[a->esz]);                               \
3938}
3939
3940DO_ZZI(SMAX, smax)
3941DO_ZZI(UMAX, umax)
3942DO_ZZI(SMIN, smin)
3943DO_ZZI(UMIN, umin)
3944
3945#undef DO_ZZI
3946
3947static bool trans_DOT_zzzz(DisasContext *s, arg_DOT_zzzz *a)
3948{
3949    static gen_helper_gvec_4 * const fns[2][2] = {
3950        { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3951        { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3952    };
3953
3954    if (sve_access_check(s)) {
3955        gen_gvec_ool_zzzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0);
3956    }
3957    return true;
3958}
3959
3960/*
3961 * SVE Multiply - Indexed
3962 */
3963
3964static bool do_zzxz_ool(DisasContext *s, arg_rrxr_esz *a,
3965                        gen_helper_gvec_4 *fn)
3966{
3967    if (fn == NULL) {
3968        return false;
3969    }
3970    if (sve_access_check(s)) {
3971        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
3972    }
3973    return true;
3974}
3975
3976#define DO_RRXR(NAME, FUNC) \
3977    static bool NAME(DisasContext *s, arg_rrxr_esz *a)  \
3978    { return do_zzxz_ool(s, a, FUNC); }
3979
3980DO_RRXR(trans_SDOT_zzxw_s, gen_helper_gvec_sdot_idx_b)
3981DO_RRXR(trans_SDOT_zzxw_d, gen_helper_gvec_sdot_idx_h)
3982DO_RRXR(trans_UDOT_zzxw_s, gen_helper_gvec_udot_idx_b)
3983DO_RRXR(trans_UDOT_zzxw_d, gen_helper_gvec_udot_idx_h)
3984
3985static bool trans_SUDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3986{
3987    if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3988        return false;
3989    }
3990    return do_zzxz_ool(s, a, gen_helper_gvec_sudot_idx_b);
3991}
3992
3993static bool trans_USDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3994{
3995    if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3996        return false;
3997    }
3998    return do_zzxz_ool(s, a, gen_helper_gvec_usdot_idx_b);
3999}
4000
4001#undef DO_RRXR
4002
4003static bool do_sve2_zzz_data(DisasContext *s, int rd, int rn, int rm, int data,
4004                             gen_helper_gvec_3 *fn)
4005{
4006    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
4007        return false;
4008    }
4009    if (sve_access_check(s)) {
4010        unsigned vsz = vec_full_reg_size(s);
4011        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
4012                           vec_full_reg_offset(s, rn),
4013                           vec_full_reg_offset(s, rm),
4014                           vsz, vsz, data, fn);
4015    }
4016    return true;
4017}
4018
4019#define DO_SVE2_RRX(NAME, FUNC) \
4020    static bool NAME(DisasContext *s, arg_rrx_esz *a)  \
4021    { return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, a->index, FUNC); }
4022
4023DO_SVE2_RRX(trans_MUL_zzx_h, gen_helper_gvec_mul_idx_h)
4024DO_SVE2_RRX(trans_MUL_zzx_s, gen_helper_gvec_mul_idx_s)
4025DO_SVE2_RRX(trans_MUL_zzx_d, gen_helper_gvec_mul_idx_d)
4026
4027DO_SVE2_RRX(trans_SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
4028DO_SVE2_RRX(trans_SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
4029DO_SVE2_RRX(trans_SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
4030
4031DO_SVE2_RRX(trans_SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
4032DO_SVE2_RRX(trans_SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
4033DO_SVE2_RRX(trans_SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
4034
4035#undef DO_SVE2_RRX
4036
4037#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
4038    static bool NAME(DisasContext *s, arg_rrx_esz *a)           \
4039    {                                                           \
4040        return do_sve2_zzz_data(s, a->rd, a->rn, a->rm,         \
4041                                (a->index << 1) | TOP, FUNC);   \
4042    }
4043
4044DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
4045DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
4046DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
4047DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
4048
4049DO_SVE2_RRX_TB(trans_SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
4050DO_SVE2_RRX_TB(trans_SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
4051DO_SVE2_RRX_TB(trans_SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
4052DO_SVE2_RRX_TB(trans_SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
4053
4054DO_SVE2_RRX_TB(trans_UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
4055DO_SVE2_RRX_TB(trans_UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
4056DO_SVE2_RRX_TB(trans_UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
4057DO_SVE2_RRX_TB(trans_UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
4058
4059#undef DO_SVE2_RRX_TB
4060
4061static bool do_sve2_zzzz_data(DisasContext *s, int rd, int rn, int rm, int ra,
4062                              int data, gen_helper_gvec_4 *fn)
4063{
4064    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
4065        return false;
4066    }
4067    if (sve_access_check(s)) {
4068        unsigned vsz = vec_full_reg_size(s);
4069        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
4070                           vec_full_reg_offset(s, rn),
4071                           vec_full_reg_offset(s, rm),
4072                           vec_full_reg_offset(s, ra),
4073                           vsz, vsz, data, fn);
4074    }
4075    return true;
4076}
4077
4078#define DO_SVE2_RRXR(NAME, FUNC) \
4079    static bool NAME(DisasContext *s, arg_rrxr_esz *a)  \
4080    { return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, a->index, FUNC); }
4081
4082DO_SVE2_RRXR(trans_MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
4083DO_SVE2_RRXR(trans_MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
4084DO_SVE2_RRXR(trans_MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
4085
4086DO_SVE2_RRXR(trans_MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
4087DO_SVE2_RRXR(trans_MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
4088DO_SVE2_RRXR(trans_MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
4089
4090DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
4091DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
4092DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
4093
4094DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
4095DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
4096DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
4097
4098#undef DO_SVE2_RRXR
4099
4100#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
4101    static bool NAME(DisasContext *s, arg_rrxr_esz *a)          \
4102    {                                                           \
4103        return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->rd, \
4104                                 (a->index << 1) | TOP, FUNC);  \
4105    }
4106
4107DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
4108DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
4109DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
4110DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
4111
4112DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
4113DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
4114DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
4115DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
4116
4117DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
4118DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
4119DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
4120DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
4121
4122DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
4123DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
4124DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
4125DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
4126
4127DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
4128DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
4129DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
4130DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
4131
4132DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
4133DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
4134DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
4135DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
4136
4137#undef DO_SVE2_RRXR_TB
4138
4139#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
4140    static bool trans_##NAME(DisasContext *s, arg_##NAME *a)       \
4141    {                                                              \
4142        return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra,    \
4143                                 (a->index << 2) | a->rot, FUNC);  \
4144    }
4145
4146DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
4147DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
4148
4149DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
4150DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
4151
4152DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
4153DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
4154
4155#undef DO_SVE2_RRXR_ROT
4156
4157/*
4158 *** SVE Floating Point Multiply-Add Indexed Group
4159 */
4160
4161static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
4162{
4163    static gen_helper_gvec_4_ptr * const fns[3] = {
4164        gen_helper_gvec_fmla_idx_h,
4165        gen_helper_gvec_fmla_idx_s,
4166        gen_helper_gvec_fmla_idx_d,
4167    };
4168
4169    if (sve_access_check(s)) {
4170        unsigned vsz = vec_full_reg_size(s);
4171        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4172        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4173                           vec_full_reg_offset(s, a->rn),
4174                           vec_full_reg_offset(s, a->rm),
4175                           vec_full_reg_offset(s, a->ra),
4176                           status, vsz, vsz, (a->index << 1) | sub,
4177                           fns[a->esz - 1]);
4178        tcg_temp_free_ptr(status);
4179    }
4180    return true;
4181}
4182
4183static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4184{
4185    return do_FMLA_zzxz(s, a, false);
4186}
4187
4188static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4189{
4190    return do_FMLA_zzxz(s, a, true);
4191}
4192
4193/*
4194 *** SVE Floating Point Multiply Indexed Group
4195 */
4196
4197static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
4198{
4199    static gen_helper_gvec_3_ptr * const fns[3] = {
4200        gen_helper_gvec_fmul_idx_h,
4201        gen_helper_gvec_fmul_idx_s,
4202        gen_helper_gvec_fmul_idx_d,
4203    };
4204
4205    if (sve_access_check(s)) {
4206        unsigned vsz = vec_full_reg_size(s);
4207        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4208        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4209                           vec_full_reg_offset(s, a->rn),
4210                           vec_full_reg_offset(s, a->rm),
4211                           status, vsz, vsz, a->index, fns[a->esz - 1]);
4212        tcg_temp_free_ptr(status);
4213    }
4214    return true;
4215}
4216
4217/*
4218 *** SVE Floating Point Fast Reduction Group
4219 */
4220
4221typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
4222                                  TCGv_ptr, TCGv_i32);
4223
4224static void do_reduce(DisasContext *s, arg_rpr_esz *a,
4225                      gen_helper_fp_reduce *fn)
4226{
4227    unsigned vsz = vec_full_reg_size(s);
4228    unsigned p2vsz = pow2ceil(vsz);
4229    TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz));
4230    TCGv_ptr t_zn, t_pg, status;
4231    TCGv_i64 temp;
4232
4233    temp = tcg_temp_new_i64();
4234    t_zn = tcg_temp_new_ptr();
4235    t_pg = tcg_temp_new_ptr();
4236
4237    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
4238    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
4239    status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4240
4241    fn(temp, t_zn, t_pg, status, t_desc);
4242    tcg_temp_free_ptr(t_zn);
4243    tcg_temp_free_ptr(t_pg);
4244    tcg_temp_free_ptr(status);
4245    tcg_temp_free_i32(t_desc);
4246
4247    write_fp_dreg(s, a->rd, temp);
4248    tcg_temp_free_i64(temp);
4249}
4250
4251#define DO_VPZ(NAME, name) \
4252static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
4253{                                                                        \
4254    static gen_helper_fp_reduce * const fns[3] = {                       \
4255        gen_helper_sve_##name##_h,                                       \
4256        gen_helper_sve_##name##_s,                                       \
4257        gen_helper_sve_##name##_d,                                       \
4258    };                                                                   \
4259    if (a->esz == 0) {                                                   \
4260        return false;                                                    \
4261    }                                                                    \
4262    if (sve_access_check(s)) {                                           \
4263        do_reduce(s, a, fns[a->esz - 1]);                                \
4264    }                                                                    \
4265    return true;                                                         \
4266}
4267
4268DO_VPZ(FADDV, faddv)
4269DO_VPZ(FMINNMV, fminnmv)
4270DO_VPZ(FMAXNMV, fmaxnmv)
4271DO_VPZ(FMINV, fminv)
4272DO_VPZ(FMAXV, fmaxv)
4273
4274/*
4275 *** SVE Floating Point Unary Operations - Unpredicated Group
4276 */
4277
4278static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
4279{
4280    unsigned vsz = vec_full_reg_size(s);
4281    TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4282
4283    tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4284                       vec_full_reg_offset(s, a->rn),
4285                       status, vsz, vsz, 0, fn);
4286    tcg_temp_free_ptr(status);
4287}
4288
4289static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
4290{
4291    static gen_helper_gvec_2_ptr * const fns[3] = {
4292        gen_helper_gvec_frecpe_h,
4293        gen_helper_gvec_frecpe_s,
4294        gen_helper_gvec_frecpe_d,
4295    };
4296    if (a->esz == 0) {
4297        return false;
4298    }
4299    if (sve_access_check(s)) {
4300        do_zz_fp(s, a, fns[a->esz - 1]);
4301    }
4302    return true;
4303}
4304
4305static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
4306{
4307    static gen_helper_gvec_2_ptr * const fns[3] = {
4308        gen_helper_gvec_frsqrte_h,
4309        gen_helper_gvec_frsqrte_s,
4310        gen_helper_gvec_frsqrte_d,
4311    };
4312    if (a->esz == 0) {
4313        return false;
4314    }
4315    if (sve_access_check(s)) {
4316        do_zz_fp(s, a, fns[a->esz - 1]);
4317    }
4318    return true;
4319}
4320
4321/*
4322 *** SVE Floating Point Compare with Zero Group
4323 */
4324
4325static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4326                      gen_helper_gvec_3_ptr *fn)
4327{
4328    unsigned vsz = vec_full_reg_size(s);
4329    TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4330
4331    tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
4332                       vec_full_reg_offset(s, a->rn),
4333                       pred_full_reg_offset(s, a->pg),
4334                       status, vsz, vsz, 0, fn);
4335    tcg_temp_free_ptr(status);
4336}
4337
4338#define DO_PPZ(NAME, name) \
4339static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)         \
4340{                                                                 \
4341    static gen_helper_gvec_3_ptr * const fns[3] = {               \
4342        gen_helper_sve_##name##_h,                                \
4343        gen_helper_sve_##name##_s,                                \
4344        gen_helper_sve_##name##_d,                                \
4345    };                                                            \
4346    if (a->esz == 0) {                                            \
4347        return false;                                             \
4348    }                                                             \
4349    if (sve_access_check(s)) {                                    \
4350        do_ppz_fp(s, a, fns[a->esz - 1]);                         \
4351    }                                                             \
4352    return true;                                                  \
4353}
4354
4355DO_PPZ(FCMGE_ppz0, fcmge0)
4356DO_PPZ(FCMGT_ppz0, fcmgt0)
4357DO_PPZ(FCMLE_ppz0, fcmle0)
4358DO_PPZ(FCMLT_ppz0, fcmlt0)
4359DO_PPZ(FCMEQ_ppz0, fcmeq0)
4360DO_PPZ(FCMNE_ppz0, fcmne0)
4361
4362#undef DO_PPZ
4363
4364/*
4365 *** SVE floating-point trig multiply-add coefficient
4366 */
4367
4368static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
4369{
4370    static gen_helper_gvec_3_ptr * const fns[3] = {
4371        gen_helper_sve_ftmad_h,
4372        gen_helper_sve_ftmad_s,
4373        gen_helper_sve_ftmad_d,
4374    };
4375
4376    if (a->esz == 0) {
4377        return false;
4378    }
4379    if (sve_access_check(s)) {
4380        unsigned vsz = vec_full_reg_size(s);
4381        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4382        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4383                           vec_full_reg_offset(s, a->rn),
4384                           vec_full_reg_offset(s, a->rm),
4385                           status, vsz, vsz, a->imm, fns[a->esz - 1]);
4386        tcg_temp_free_ptr(status);
4387    }
4388    return true;
4389}
4390
4391/*
4392 *** SVE Floating Point Accumulating Reduction Group
4393 */
4394
4395static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
4396{
4397    typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
4398                          TCGv_ptr, TCGv_ptr, TCGv_i32);
4399    static fadda_fn * const fns[3] = {
4400        gen_helper_sve_fadda_h,
4401        gen_helper_sve_fadda_s,
4402        gen_helper_sve_fadda_d,
4403    };
4404    unsigned vsz = vec_full_reg_size(s);
4405    TCGv_ptr t_rm, t_pg, t_fpst;
4406    TCGv_i64 t_val;
4407    TCGv_i32 t_desc;
4408
4409    if (a->esz == 0) {
4410        return false;
4411    }
4412    if (!sve_access_check(s)) {
4413        return true;
4414    }
4415
4416    t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
4417    t_rm = tcg_temp_new_ptr();
4418    t_pg = tcg_temp_new_ptr();
4419    tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
4420    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
4421    t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4422    t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
4423
4424    fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
4425
4426    tcg_temp_free_i32(t_desc);
4427    tcg_temp_free_ptr(t_fpst);
4428    tcg_temp_free_ptr(t_pg);
4429    tcg_temp_free_ptr(t_rm);
4430
4431    write_fp_dreg(s, a->rd, t_val);
4432    tcg_temp_free_i64(t_val);
4433    return true;
4434}
4435
4436/*
4437 *** SVE Floating Point Arithmetic - Unpredicated Group
4438 */
4439
4440static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
4441                      gen_helper_gvec_3_ptr *fn)
4442{
4443    if (fn == NULL) {
4444        return false;
4445    }
4446    if (sve_access_check(s)) {
4447        unsigned vsz = vec_full_reg_size(s);
4448        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4449        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4450                           vec_full_reg_offset(s, a->rn),
4451                           vec_full_reg_offset(s, a->rm),
4452                           status, vsz, vsz, 0, fn);
4453        tcg_temp_free_ptr(status);
4454    }
4455    return true;
4456}
4457
4458
4459#define DO_FP3(NAME, name) \
4460static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)           \
4461{                                                                   \
4462    static gen_helper_gvec_3_ptr * const fns[4] = {                 \
4463        NULL, gen_helper_gvec_##name##_h,                           \
4464        gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
4465    };                                                              \
4466    return do_zzz_fp(s, a, fns[a->esz]);                            \
4467}
4468
4469DO_FP3(FADD_zzz, fadd)
4470DO_FP3(FSUB_zzz, fsub)
4471DO_FP3(FMUL_zzz, fmul)
4472DO_FP3(FTSMUL, ftsmul)
4473DO_FP3(FRECPS, recps)
4474DO_FP3(FRSQRTS, rsqrts)
4475
4476#undef DO_FP3
4477
4478/*
4479 *** SVE Floating Point Arithmetic - Predicated Group
4480 */
4481
4482static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
4483                       gen_helper_gvec_4_ptr *fn)
4484{
4485    if (fn == NULL) {
4486        return false;
4487    }
4488    if (sve_access_check(s)) {
4489        unsigned vsz = vec_full_reg_size(s);
4490        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4491        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4492                           vec_full_reg_offset(s, a->rn),
4493                           vec_full_reg_offset(s, a->rm),
4494                           pred_full_reg_offset(s, a->pg),
4495                           status, vsz, vsz, 0, fn);
4496        tcg_temp_free_ptr(status);
4497    }
4498    return true;
4499}
4500
4501#define DO_FP3(NAME, name) \
4502static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)          \
4503{                                                                   \
4504    static gen_helper_gvec_4_ptr * const fns[4] = {                 \
4505        NULL, gen_helper_sve_##name##_h,                            \
4506        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
4507    };                                                              \
4508    return do_zpzz_fp(s, a, fns[a->esz]);                           \
4509}
4510
4511DO_FP3(FADD_zpzz, fadd)
4512DO_FP3(FSUB_zpzz, fsub)
4513DO_FP3(FMUL_zpzz, fmul)
4514DO_FP3(FMIN_zpzz, fmin)
4515DO_FP3(FMAX_zpzz, fmax)
4516DO_FP3(FMINNM_zpzz, fminnum)
4517DO_FP3(FMAXNM_zpzz, fmaxnum)
4518DO_FP3(FABD, fabd)
4519DO_FP3(FSCALE, fscalbn)
4520DO_FP3(FDIV, fdiv)
4521DO_FP3(FMULX, fmulx)
4522
4523#undef DO_FP3
4524
4525typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4526                                      TCGv_i64, TCGv_ptr, TCGv_i32);
4527
4528static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4529                         TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4530{
4531    unsigned vsz = vec_full_reg_size(s);
4532    TCGv_ptr t_zd, t_zn, t_pg, status;
4533    TCGv_i32 desc;
4534
4535    t_zd = tcg_temp_new_ptr();
4536    t_zn = tcg_temp_new_ptr();
4537    t_pg = tcg_temp_new_ptr();
4538    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4539    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4540    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4541
4542    status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
4543    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
4544    fn(t_zd, t_zn, t_pg, scalar, status, desc);
4545
4546    tcg_temp_free_i32(desc);
4547    tcg_temp_free_ptr(status);
4548    tcg_temp_free_ptr(t_pg);
4549    tcg_temp_free_ptr(t_zn);
4550    tcg_temp_free_ptr(t_zd);
4551}
4552
4553static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4554                      gen_helper_sve_fp2scalar *fn)
4555{
4556    TCGv_i64 temp = tcg_const_i64(imm);
4557    do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
4558    tcg_temp_free_i64(temp);
4559}
4560
4561#define DO_FP_IMM(NAME, name, const0, const1) \
4562static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a)         \
4563{                                                                         \
4564    static gen_helper_sve_fp2scalar * const fns[3] = {                    \
4565        gen_helper_sve_##name##_h,                                        \
4566        gen_helper_sve_##name##_s,                                        \
4567        gen_helper_sve_##name##_d                                         \
4568    };                                                                    \
4569    static uint64_t const val[3][2] = {                                   \
4570        { float16_##const0, float16_##const1 },                           \
4571        { float32_##const0, float32_##const1 },                           \
4572        { float64_##const0, float64_##const1 },                           \
4573    };                                                                    \
4574    if (a->esz == 0) {                                                    \
4575        return false;                                                     \
4576    }                                                                     \
4577    if (sve_access_check(s)) {                                            \
4578        do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
4579    }                                                                     \
4580    return true;                                                          \
4581}
4582
4583DO_FP_IMM(FADD, fadds, half, one)
4584DO_FP_IMM(FSUB, fsubs, half, one)
4585DO_FP_IMM(FMUL, fmuls, half, two)
4586DO_FP_IMM(FSUBR, fsubrs, half, one)
4587DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4588DO_FP_IMM(FMINNM, fminnms, zero, one)
4589DO_FP_IMM(FMAX, fmaxs, zero, one)
4590DO_FP_IMM(FMIN, fmins, zero, one)
4591
4592#undef DO_FP_IMM
4593
4594static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4595                      gen_helper_gvec_4_ptr *fn)
4596{
4597    if (fn == NULL) {
4598        return false;
4599    }
4600    if (sve_access_check(s)) {
4601        unsigned vsz = vec_full_reg_size(s);
4602        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4603        tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4604                           vec_full_reg_offset(s, a->rn),
4605                           vec_full_reg_offset(s, a->rm),
4606                           pred_full_reg_offset(s, a->pg),
4607                           status, vsz, vsz, 0, fn);
4608        tcg_temp_free_ptr(status);
4609    }
4610    return true;
4611}
4612
4613#define DO_FPCMP(NAME, name) \
4614static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)     \
4615{                                                                     \
4616    static gen_helper_gvec_4_ptr * const fns[4] = {                   \
4617        NULL, gen_helper_sve_##name##_h,                              \
4618        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
4619    };                                                                \
4620    return do_fp_cmp(s, a, fns[a->esz]);                              \
4621}
4622
4623DO_FPCMP(FCMGE, fcmge)
4624DO_FPCMP(FCMGT, fcmgt)
4625DO_FPCMP(FCMEQ, fcmeq)
4626DO_FPCMP(FCMNE, fcmne)
4627DO_FPCMP(FCMUO, fcmuo)
4628DO_FPCMP(FACGE, facge)
4629DO_FPCMP(FACGT, facgt)
4630
4631#undef DO_FPCMP
4632
4633static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
4634{
4635    static gen_helper_gvec_4_ptr * const fns[3] = {
4636        gen_helper_sve_fcadd_h,
4637        gen_helper_sve_fcadd_s,
4638        gen_helper_sve_fcadd_d
4639    };
4640
4641    if (a->esz == 0) {
4642        return false;
4643    }
4644    if (sve_access_check(s)) {
4645        unsigned vsz = vec_full_reg_size(s);
4646        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4647        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4648                           vec_full_reg_offset(s, a->rn),
4649                           vec_full_reg_offset(s, a->rm),
4650                           pred_full_reg_offset(s, a->pg),
4651                           status, vsz, vsz, a->rot, fns[a->esz - 1]);
4652        tcg_temp_free_ptr(status);
4653    }
4654    return true;
4655}
4656
4657static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
4658                    gen_helper_gvec_5_ptr *fn)
4659{
4660    if (a->esz == 0) {
4661        return false;
4662    }
4663    if (sve_access_check(s)) {
4664        unsigned vsz = vec_full_reg_size(s);
4665        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4666        tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4667                           vec_full_reg_offset(s, a->rn),
4668                           vec_full_reg_offset(s, a->rm),
4669                           vec_full_reg_offset(s, a->ra),
4670                           pred_full_reg_offset(s, a->pg),
4671                           status, vsz, vsz, 0, fn);
4672        tcg_temp_free_ptr(status);
4673    }
4674    return true;
4675}
4676
4677#define DO_FMLA(NAME, name) \
4678static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
4679{                                                                    \
4680    static gen_helper_gvec_5_ptr * const fns[4] = {                  \
4681        NULL, gen_helper_sve_##name##_h,                             \
4682        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
4683    };                                                               \
4684    return do_fmla(s, a, fns[a->esz]);                               \
4685}
4686
4687DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4688DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4689DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4690DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4691
4692#undef DO_FMLA
4693
4694static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
4695{
4696    static gen_helper_gvec_5_ptr * const fns[4] = {
4697        NULL,
4698        gen_helper_sve_fcmla_zpzzz_h,
4699        gen_helper_sve_fcmla_zpzzz_s,
4700        gen_helper_sve_fcmla_zpzzz_d,
4701    };
4702
4703    if (a->esz == 0) {
4704        return false;
4705    }
4706    if (sve_access_check(s)) {
4707        unsigned vsz = vec_full_reg_size(s);
4708        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4709        tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4710                           vec_full_reg_offset(s, a->rn),
4711                           vec_full_reg_offset(s, a->rm),
4712                           vec_full_reg_offset(s, a->ra),
4713                           pred_full_reg_offset(s, a->pg),
4714                           status, vsz, vsz, a->rot, fns[a->esz]);
4715        tcg_temp_free_ptr(status);
4716    }
4717    return true;
4718}
4719
4720static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4721{
4722    static gen_helper_gvec_4_ptr * const fns[2] = {
4723        gen_helper_gvec_fcmlah_idx,
4724        gen_helper_gvec_fcmlas_idx,
4725    };
4726
4727    tcg_debug_assert(a->esz == 1 || a->esz == 2);
4728    tcg_debug_assert(a->rd == a->ra);
4729    if (sve_access_check(s)) {
4730        unsigned vsz = vec_full_reg_size(s);
4731        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4732        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4733                           vec_full_reg_offset(s, a->rn),
4734                           vec_full_reg_offset(s, a->rm),
4735                           vec_full_reg_offset(s, a->ra),
4736                           status, vsz, vsz,
4737                           a->index * 4 + a->rot,
4738                           fns[a->esz - 1]);
4739        tcg_temp_free_ptr(status);
4740    }
4741    return true;
4742}
4743
4744/*
4745 *** SVE Floating Point Unary Operations Predicated Group
4746 */
4747
4748static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4749                       bool is_fp16, gen_helper_gvec_3_ptr *fn)
4750{
4751    if (sve_access_check(s)) {
4752        unsigned vsz = vec_full_reg_size(s);
4753        TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
4754        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4755                           vec_full_reg_offset(s, rn),
4756                           pred_full_reg_offset(s, pg),
4757                           status, vsz, vsz, 0, fn);
4758        tcg_temp_free_ptr(status);
4759    }
4760    return true;
4761}
4762
4763static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4764{
4765    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4766}
4767
4768static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4769{
4770    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4771}
4772
4773static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4774{
4775    if (!dc_isar_feature(aa64_sve_bf16, s)) {
4776        return false;
4777    }
4778    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4779}
4780
4781static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4782{
4783    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4784}
4785
4786static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4787{
4788    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4789}
4790
4791static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4792{
4793    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4794}
4795
4796static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4797{
4798    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4799}
4800
4801static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4802{
4803    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4804}
4805
4806static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4807{
4808    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4809}
4810
4811static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4812{
4813    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4814}
4815
4816static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4817{
4818    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4819}
4820
4821static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4822{
4823    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4824}
4825
4826static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4827{
4828    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4829}
4830
4831static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4832{
4833    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4834}
4835
4836static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4837{
4838    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4839}
4840
4841static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4842{
4843    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4844}
4845
4846static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4847{
4848    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4849}
4850
4851static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4852{
4853    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4854}
4855
4856static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4857{
4858    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4859}
4860
4861static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4862{
4863    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4864}
4865
4866static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4867{
4868    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4869}
4870
4871static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4872    gen_helper_sve_frint_h,
4873    gen_helper_sve_frint_s,
4874    gen_helper_sve_frint_d
4875};
4876
4877static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4878{
4879    if (a->esz == 0) {
4880        return false;
4881    }
4882    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4883                      frint_fns[a->esz - 1]);
4884}
4885
4886static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4887{
4888    static gen_helper_gvec_3_ptr * const fns[3] = {
4889        gen_helper_sve_frintx_h,
4890        gen_helper_sve_frintx_s,
4891        gen_helper_sve_frintx_d
4892    };
4893    if (a->esz == 0) {
4894        return false;
4895    }
4896    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4897}
4898
4899static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4900                          int mode, gen_helper_gvec_3_ptr *fn)
4901{
4902    if (sve_access_check(s)) {
4903        unsigned vsz = vec_full_reg_size(s);
4904        TCGv_i32 tmode = tcg_const_i32(mode);
4905        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4906
4907        gen_helper_set_rmode(tmode, tmode, status);
4908
4909        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4910                           vec_full_reg_offset(s, a->rn),
4911                           pred_full_reg_offset(s, a->pg),
4912                           status, vsz, vsz, 0, fn);
4913
4914        gen_helper_set_rmode(tmode, tmode, status);
4915        tcg_temp_free_i32(tmode);
4916        tcg_temp_free_ptr(status);
4917    }
4918    return true;
4919}
4920
4921static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4922{
4923    if (a->esz == 0) {
4924        return false;
4925    }
4926    return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
4927}
4928
4929static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4930{
4931    if (a->esz == 0) {
4932        return false;
4933    }
4934    return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
4935}
4936
4937static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4938{
4939    if (a->esz == 0) {
4940        return false;
4941    }
4942    return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
4943}
4944
4945static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4946{
4947    if (a->esz == 0) {
4948        return false;
4949    }
4950    return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
4951}
4952
4953static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4954{
4955    if (a->esz == 0) {
4956        return false;
4957    }
4958    return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
4959}
4960
4961static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4962{
4963    static gen_helper_gvec_3_ptr * const fns[3] = {
4964        gen_helper_sve_frecpx_h,
4965        gen_helper_sve_frecpx_s,
4966        gen_helper_sve_frecpx_d
4967    };
4968    if (a->esz == 0) {
4969        return false;
4970    }
4971    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4972}
4973
4974static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4975{
4976    static gen_helper_gvec_3_ptr * const fns[3] = {
4977        gen_helper_sve_fsqrt_h,
4978        gen_helper_sve_fsqrt_s,
4979        gen_helper_sve_fsqrt_d
4980    };
4981    if (a->esz == 0) {
4982        return false;
4983    }
4984    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4985}
4986
4987static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4988{
4989    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4990}
4991
4992static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4993{
4994    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4995}
4996
4997static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4998{
4999    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
5000}
5001
5002static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
5003{
5004    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
5005}
5006
5007static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
5008{
5009    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
5010}
5011
5012static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
5013{
5014    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
5015}
5016
5017static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
5018{
5019    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
5020}
5021
5022static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
5023{
5024    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
5025}
5026
5027static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
5028{
5029    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
5030}
5031
5032static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
5033{
5034    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
5035}
5036
5037static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
5038{
5039    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
5040}
5041
5042static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
5043{
5044    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
5045}
5046
5047static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
5048{
5049    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
5050}
5051
5052static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
5053{
5054    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
5055}
5056
5057/*
5058 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
5059 */
5060
5061/* Subroutine loading a vector register at VOFS of LEN bytes.
5062 * The load should begin at the address Rn + IMM.
5063 */
5064
5065static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5066{
5067    int len_align = QEMU_ALIGN_DOWN(len, 8);
5068    int len_remain = len % 8;
5069    int nparts = len / 8 + ctpop8(len_remain);
5070    int midx = get_mem_index(s);
5071    TCGv_i64 dirty_addr, clean_addr, t0, t1;
5072
5073    dirty_addr = tcg_temp_new_i64();
5074    tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
5075    clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
5076    tcg_temp_free_i64(dirty_addr);
5077
5078    /*
5079     * Note that unpredicated load/store of vector/predicate registers
5080     * are defined as a stream of bytes, which equates to little-endian
5081     * operations on larger quantities.
5082     * Attempt to keep code expansion to a minimum by limiting the
5083     * amount of unrolling done.
5084     */
5085    if (nparts <= 4) {
5086        int i;
5087
5088        t0 = tcg_temp_new_i64();
5089        for (i = 0; i < len_align; i += 8) {
5090            tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
5091            tcg_gen_st_i64(t0, cpu_env, vofs + i);
5092            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5093        }
5094        tcg_temp_free_i64(t0);
5095    } else {
5096        TCGLabel *loop = gen_new_label();
5097        TCGv_ptr tp, i = tcg_const_local_ptr(0);
5098
5099        /* Copy the clean address into a local temp, live across the loop. */
5100        t0 = clean_addr;
5101        clean_addr = new_tmp_a64_local(s);
5102        tcg_gen_mov_i64(clean_addr, t0);
5103
5104        gen_set_label(loop);
5105
5106        t0 = tcg_temp_new_i64();
5107        tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
5108        tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5109
5110        tp = tcg_temp_new_ptr();
5111        tcg_gen_add_ptr(tp, cpu_env, i);
5112        tcg_gen_addi_ptr(i, i, 8);
5113        tcg_gen_st_i64(t0, tp, vofs);
5114        tcg_temp_free_ptr(tp);
5115        tcg_temp_free_i64(t0);
5116
5117        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5118        tcg_temp_free_ptr(i);
5119    }
5120
5121    /*
5122     * Predicate register loads can be any multiple of 2.
5123     * Note that we still store the entire 64-bit unit into cpu_env.
5124     */
5125    if (len_remain) {
5126        t0 = tcg_temp_new_i64();
5127        switch (len_remain) {
5128        case 2:
5129        case 4:
5130        case 8:
5131            tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
5132                                MO_LE | ctz32(len_remain));
5133            break;
5134
5135        case 6:
5136            t1 = tcg_temp_new_i64();
5137            tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
5138            tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5139            tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
5140            tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
5141            tcg_temp_free_i64(t1);
5142            break;
5143
5144        default:
5145            g_assert_not_reached();
5146        }
5147        tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
5148        tcg_temp_free_i64(t0);
5149    }
5150}
5151
5152/* Similarly for stores.  */
5153static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5154{
5155    int len_align = QEMU_ALIGN_DOWN(len, 8);
5156    int len_remain = len % 8;
5157    int nparts = len / 8 + ctpop8(len_remain);
5158    int midx = get_mem_index(s);
5159    TCGv_i64 dirty_addr, clean_addr, t0;
5160
5161    dirty_addr = tcg_temp_new_i64();
5162    tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
5163    clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
5164    tcg_temp_free_i64(dirty_addr);
5165
5166    /* Note that unpredicated load/store of vector/predicate registers
5167     * are defined as a stream of bytes, which equates to little-endian
5168     * operations on larger quantities.  There is no nice way to force
5169     * a little-endian store for aarch64_be-linux-user out of line.
5170     *
5171     * Attempt to keep code expansion to a minimum by limiting the
5172     * amount of unrolling done.
5173     */
5174    if (nparts <= 4) {
5175        int i;
5176
5177        t0 = tcg_temp_new_i64();
5178        for (i = 0; i < len_align; i += 8) {
5179            tcg_gen_ld_i64(t0, cpu_env, vofs + i);
5180            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
5181            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5182        }
5183        tcg_temp_free_i64(t0);
5184    } else {
5185        TCGLabel *loop = gen_new_label();
5186        TCGv_ptr tp, i = tcg_const_local_ptr(0);
5187
5188        /* Copy the clean address into a local temp, live across the loop. */
5189        t0 = clean_addr;
5190        clean_addr = new_tmp_a64_local(s);
5191        tcg_gen_mov_i64(clean_addr, t0);
5192
5193        gen_set_label(loop);
5194
5195        t0 = tcg_temp_new_i64();
5196        tp = tcg_temp_new_ptr();
5197        tcg_gen_add_ptr(tp, cpu_env, i);
5198        tcg_gen_ld_i64(t0, tp, vofs);
5199        tcg_gen_addi_ptr(i, i, 8);
5200        tcg_temp_free_ptr(tp);
5201
5202        tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
5203        tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5204        tcg_temp_free_i64(t0);
5205
5206        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5207        tcg_temp_free_ptr(i);
5208    }
5209
5210    /* Predicate register stores can be any multiple of 2.  */
5211    if (len_remain) {
5212        t0 = tcg_temp_new_i64();
5213        tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5214
5215        switch (len_remain) {
5216        case 2:
5217        case 4:
5218        case 8:
5219            tcg_gen_qemu_st_i64(t0, clean_addr, midx,
5220                                MO_LE | ctz32(len_remain));
5221            break;
5222
5223        case 6:
5224            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
5225            tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5226            tcg_gen_shri_i64(t0, t0, 32);
5227            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5228            break;
5229
5230        default:
5231            g_assert_not_reached();
5232        }
5233        tcg_temp_free_i64(t0);
5234    }
5235}
5236
5237static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
5238{
5239    if (sve_access_check(s)) {
5240        int size = vec_full_reg_size(s);
5241        int off = vec_full_reg_offset(s, a->rd);
5242        do_ldr(s, off, size, a->rn, a->imm * size);
5243    }
5244    return true;
5245}
5246
5247static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
5248{
5249    if (sve_access_check(s)) {
5250        int size = pred_full_reg_size(s);
5251        int off = pred_full_reg_offset(s, a->rd);
5252        do_ldr(s, off, size, a->rn, a->imm * size);
5253    }
5254    return true;
5255}
5256
5257static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5258{
5259    if (sve_access_check(s)) {
5260        int size = vec_full_reg_size(s);
5261        int off = vec_full_reg_offset(s, a->rd);
5262        do_str(s, off, size, a->rn, a->imm * size);
5263    }
5264    return true;
5265}
5266
5267static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5268{
5269    if (sve_access_check(s)) {
5270        int size = pred_full_reg_size(s);
5271        int off = pred_full_reg_offset(s, a->rd);
5272        do_str(s, off, size, a->rn, a->imm * size);
5273    }
5274    return true;
5275}
5276
5277/*
5278 *** SVE Memory - Contiguous Load Group
5279 */
5280
5281/* The memory mode of the dtype.  */
5282static const MemOp dtype_mop[16] = {
5283    MO_UB, MO_UB, MO_UB, MO_UB,
5284    MO_SL, MO_UW, MO_UW, MO_UW,
5285    MO_SW, MO_SW, MO_UL, MO_UL,
5286    MO_SB, MO_SB, MO_SB, MO_UQ
5287};
5288
5289#define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
5290
5291/* The vector element size of dtype.  */
5292static const uint8_t dtype_esz[16] = {
5293    0, 1, 2, 3,
5294    3, 1, 2, 3,
5295    3, 2, 2, 3,
5296    3, 2, 1, 3
5297};
5298
5299static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5300                       int dtype, uint32_t mte_n, bool is_write,
5301                       gen_helper_gvec_mem *fn)
5302{
5303    unsigned vsz = vec_full_reg_size(s);
5304    TCGv_ptr t_pg;
5305    TCGv_i32 t_desc;
5306    int desc = 0;
5307
5308    /*
5309     * For e.g. LD4, there are not enough arguments to pass all 4
5310     * registers as pointers, so encode the regno into the data field.
5311     * For consistency, do this even for LD1.
5312     */
5313    if (s->mte_active[0]) {
5314        int msz = dtype_msz(dtype);
5315
5316        desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5317        desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5318        desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5319        desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
5320        desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
5321        desc <<= SVE_MTEDESC_SHIFT;
5322    } else {
5323        addr = clean_data_tbi(s, addr);
5324    }
5325
5326    desc = simd_desc(vsz, vsz, zt | desc);
5327    t_desc = tcg_const_i32(desc);
5328    t_pg = tcg_temp_new_ptr();
5329
5330    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5331    fn(cpu_env, t_pg, addr, t_desc);
5332
5333    tcg_temp_free_ptr(t_pg);
5334    tcg_temp_free_i32(t_desc);
5335}
5336
5337/* Indexed by [mte][be][dtype][nreg] */
5338static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
5339    { /* mte inactive, little-endian */
5340      { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5341          gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5342        { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5343        { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5344        { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5345
5346        { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
5347        { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
5348          gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
5349        { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
5350        { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
5351
5352        { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
5353        { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
5354        { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
5355          gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
5356        { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
5357
5358        { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5359        { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5360        { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5361        { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
5362          gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
5363
5364      /* mte inactive, big-endian */
5365      { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5366          gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5367        { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5368        { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5369        { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5370
5371        { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
5372        { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
5373          gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
5374        { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
5375        { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
5376
5377        { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
5378        { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
5379        { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
5380          gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
5381        { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
5382
5383        { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5384        { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5385        { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5386        { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
5387          gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
5388
5389    { /* mte active, little-endian */
5390      { { gen_helper_sve_ld1bb_r_mte,
5391          gen_helper_sve_ld2bb_r_mte,
5392          gen_helper_sve_ld3bb_r_mte,
5393          gen_helper_sve_ld4bb_r_mte },
5394        { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5395        { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5396        { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5397
5398        { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
5399        { gen_helper_sve_ld1hh_le_r_mte,
5400          gen_helper_sve_ld2hh_le_r_mte,
5401          gen_helper_sve_ld3hh_le_r_mte,
5402          gen_helper_sve_ld4hh_le_r_mte },
5403        { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
5404        { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
5405
5406        { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
5407        { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
5408        { gen_helper_sve_ld1ss_le_r_mte,
5409          gen_helper_sve_ld2ss_le_r_mte,
5410          gen_helper_sve_ld3ss_le_r_mte,
5411          gen_helper_sve_ld4ss_le_r_mte },
5412        { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
5413
5414        { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5415        { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5416        { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5417        { gen_helper_sve_ld1dd_le_r_mte,
5418          gen_helper_sve_ld2dd_le_r_mte,
5419          gen_helper_sve_ld3dd_le_r_mte,
5420          gen_helper_sve_ld4dd_le_r_mte } },
5421
5422      /* mte active, big-endian */
5423      { { gen_helper_sve_ld1bb_r_mte,
5424          gen_helper_sve_ld2bb_r_mte,
5425          gen_helper_sve_ld3bb_r_mte,
5426          gen_helper_sve_ld4bb_r_mte },
5427        { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5428        { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5429        { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5430
5431        { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
5432        { gen_helper_sve_ld1hh_be_r_mte,
5433          gen_helper_sve_ld2hh_be_r_mte,
5434          gen_helper_sve_ld3hh_be_r_mte,
5435          gen_helper_sve_ld4hh_be_r_mte },
5436        { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
5437        { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
5438
5439        { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
5440        { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
5441        { gen_helper_sve_ld1ss_be_r_mte,
5442          gen_helper_sve_ld2ss_be_r_mte,
5443          gen_helper_sve_ld3ss_be_r_mte,
5444          gen_helper_sve_ld4ss_be_r_mte },
5445        { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
5446
5447        { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5448        { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5449        { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5450        { gen_helper_sve_ld1dd_be_r_mte,
5451          gen_helper_sve_ld2dd_be_r_mte,
5452          gen_helper_sve_ld3dd_be_r_mte,
5453          gen_helper_sve_ld4dd_be_r_mte } } },
5454};
5455
5456static void do_ld_zpa(DisasContext *s, int zt, int pg,
5457                      TCGv_i64 addr, int dtype, int nreg)
5458{
5459    gen_helper_gvec_mem *fn
5460        = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
5461
5462    /*
5463     * While there are holes in the table, they are not
5464     * accessible via the instruction encoding.
5465     */
5466    assert(fn != NULL);
5467    do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
5468}
5469
5470static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
5471{
5472    if (a->rm == 31) {
5473        return false;
5474    }
5475    if (sve_access_check(s)) {
5476        TCGv_i64 addr = new_tmp_a64(s);
5477        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5478        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5479        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5480    }
5481    return true;
5482}
5483
5484static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
5485{
5486    if (sve_access_check(s)) {
5487        int vsz = vec_full_reg_size(s);
5488        int elements = vsz >> dtype_esz[a->dtype];
5489        TCGv_i64 addr = new_tmp_a64(s);
5490
5491        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5492                         (a->imm * elements * (a->nreg + 1))
5493                         << dtype_msz(a->dtype));
5494        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5495    }
5496    return true;
5497}
5498
5499static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
5500{
5501    static gen_helper_gvec_mem * const fns[2][2][16] = {
5502        { /* mte inactive, little-endian */
5503          { gen_helper_sve_ldff1bb_r,
5504            gen_helper_sve_ldff1bhu_r,
5505            gen_helper_sve_ldff1bsu_r,
5506            gen_helper_sve_ldff1bdu_r,
5507
5508            gen_helper_sve_ldff1sds_le_r,
5509            gen_helper_sve_ldff1hh_le_r,
5510            gen_helper_sve_ldff1hsu_le_r,
5511            gen_helper_sve_ldff1hdu_le_r,
5512
5513            gen_helper_sve_ldff1hds_le_r,
5514            gen_helper_sve_ldff1hss_le_r,
5515            gen_helper_sve_ldff1ss_le_r,
5516            gen_helper_sve_ldff1sdu_le_r,
5517
5518            gen_helper_sve_ldff1bds_r,
5519            gen_helper_sve_ldff1bss_r,
5520            gen_helper_sve_ldff1bhs_r,
5521            gen_helper_sve_ldff1dd_le_r },
5522
5523          /* mte inactive, big-endian */
5524          { gen_helper_sve_ldff1bb_r,
5525            gen_helper_sve_ldff1bhu_r,
5526            gen_helper_sve_ldff1bsu_r,
5527            gen_helper_sve_ldff1bdu_r,
5528
5529            gen_helper_sve_ldff1sds_be_r,
5530            gen_helper_sve_ldff1hh_be_r,
5531            gen_helper_sve_ldff1hsu_be_r,
5532            gen_helper_sve_ldff1hdu_be_r,
5533
5534            gen_helper_sve_ldff1hds_be_r,
5535            gen_helper_sve_ldff1hss_be_r,
5536            gen_helper_sve_ldff1ss_be_r,
5537            gen_helper_sve_ldff1sdu_be_r,
5538
5539            gen_helper_sve_ldff1bds_r,
5540            gen_helper_sve_ldff1bss_r,
5541            gen_helper_sve_ldff1bhs_r,
5542            gen_helper_sve_ldff1dd_be_r } },
5543
5544        { /* mte active, little-endian */
5545          { gen_helper_sve_ldff1bb_r_mte,
5546            gen_helper_sve_ldff1bhu_r_mte,
5547            gen_helper_sve_ldff1bsu_r_mte,
5548            gen_helper_sve_ldff1bdu_r_mte,
5549
5550            gen_helper_sve_ldff1sds_le_r_mte,
5551            gen_helper_sve_ldff1hh_le_r_mte,
5552            gen_helper_sve_ldff1hsu_le_r_mte,
5553            gen_helper_sve_ldff1hdu_le_r_mte,
5554
5555            gen_helper_sve_ldff1hds_le_r_mte,
5556            gen_helper_sve_ldff1hss_le_r_mte,
5557            gen_helper_sve_ldff1ss_le_r_mte,
5558            gen_helper_sve_ldff1sdu_le_r_mte,
5559
5560            gen_helper_sve_ldff1bds_r_mte,
5561            gen_helper_sve_ldff1bss_r_mte,
5562            gen_helper_sve_ldff1bhs_r_mte,
5563            gen_helper_sve_ldff1dd_le_r_mte },
5564
5565          /* mte active, big-endian */
5566          { gen_helper_sve_ldff1bb_r_mte,
5567            gen_helper_sve_ldff1bhu_r_mte,
5568            gen_helper_sve_ldff1bsu_r_mte,
5569            gen_helper_sve_ldff1bdu_r_mte,
5570
5571            gen_helper_sve_ldff1sds_be_r_mte,
5572            gen_helper_sve_ldff1hh_be_r_mte,
5573            gen_helper_sve_ldff1hsu_be_r_mte,
5574            gen_helper_sve_ldff1hdu_be_r_mte,
5575
5576            gen_helper_sve_ldff1hds_be_r_mte,
5577            gen_helper_sve_ldff1hss_be_r_mte,
5578            gen_helper_sve_ldff1ss_be_r_mte,
5579            gen_helper_sve_ldff1sdu_be_r_mte,
5580
5581            gen_helper_sve_ldff1bds_r_mte,
5582            gen_helper_sve_ldff1bss_r_mte,
5583            gen_helper_sve_ldff1bhs_r_mte,
5584            gen_helper_sve_ldff1dd_be_r_mte } },
5585    };
5586
5587    if (sve_access_check(s)) {
5588        TCGv_i64 addr = new_tmp_a64(s);
5589        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5590        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5591        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5592                   fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
5593    }
5594    return true;
5595}
5596
5597static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
5598{
5599    static gen_helper_gvec_mem * const fns[2][2][16] = {
5600        { /* mte inactive, little-endian */
5601          { gen_helper_sve_ldnf1bb_r,
5602            gen_helper_sve_ldnf1bhu_r,
5603            gen_helper_sve_ldnf1bsu_r,
5604            gen_helper_sve_ldnf1bdu_r,
5605
5606            gen_helper_sve_ldnf1sds_le_r,
5607            gen_helper_sve_ldnf1hh_le_r,
5608            gen_helper_sve_ldnf1hsu_le_r,
5609            gen_helper_sve_ldnf1hdu_le_r,
5610
5611            gen_helper_sve_ldnf1hds_le_r,
5612            gen_helper_sve_ldnf1hss_le_r,
5613            gen_helper_sve_ldnf1ss_le_r,
5614            gen_helper_sve_ldnf1sdu_le_r,
5615
5616            gen_helper_sve_ldnf1bds_r,
5617            gen_helper_sve_ldnf1bss_r,
5618            gen_helper_sve_ldnf1bhs_r,
5619            gen_helper_sve_ldnf1dd_le_r },
5620
5621          /* mte inactive, big-endian */
5622          { gen_helper_sve_ldnf1bb_r,
5623            gen_helper_sve_ldnf1bhu_r,
5624            gen_helper_sve_ldnf1bsu_r,
5625            gen_helper_sve_ldnf1bdu_r,
5626
5627            gen_helper_sve_ldnf1sds_be_r,
5628            gen_helper_sve_ldnf1hh_be_r,
5629            gen_helper_sve_ldnf1hsu_be_r,
5630            gen_helper_sve_ldnf1hdu_be_r,
5631
5632            gen_helper_sve_ldnf1hds_be_r,
5633            gen_helper_sve_ldnf1hss_be_r,
5634            gen_helper_sve_ldnf1ss_be_r,
5635            gen_helper_sve_ldnf1sdu_be_r,
5636
5637            gen_helper_sve_ldnf1bds_r,
5638            gen_helper_sve_ldnf1bss_r,
5639            gen_helper_sve_ldnf1bhs_r,
5640            gen_helper_sve_ldnf1dd_be_r } },
5641
5642        { /* mte inactive, little-endian */
5643          { gen_helper_sve_ldnf1bb_r_mte,
5644            gen_helper_sve_ldnf1bhu_r_mte,
5645            gen_helper_sve_ldnf1bsu_r_mte,
5646            gen_helper_sve_ldnf1bdu_r_mte,
5647
5648            gen_helper_sve_ldnf1sds_le_r_mte,
5649            gen_helper_sve_ldnf1hh_le_r_mte,
5650            gen_helper_sve_ldnf1hsu_le_r_mte,
5651            gen_helper_sve_ldnf1hdu_le_r_mte,
5652
5653            gen_helper_sve_ldnf1hds_le_r_mte,
5654            gen_helper_sve_ldnf1hss_le_r_mte,
5655            gen_helper_sve_ldnf1ss_le_r_mte,
5656            gen_helper_sve_ldnf1sdu_le_r_mte,
5657
5658            gen_helper_sve_ldnf1bds_r_mte,
5659            gen_helper_sve_ldnf1bss_r_mte,
5660            gen_helper_sve_ldnf1bhs_r_mte,
5661            gen_helper_sve_ldnf1dd_le_r_mte },
5662
5663          /* mte inactive, big-endian */
5664          { gen_helper_sve_ldnf1bb_r_mte,
5665            gen_helper_sve_ldnf1bhu_r_mte,
5666            gen_helper_sve_ldnf1bsu_r_mte,
5667            gen_helper_sve_ldnf1bdu_r_mte,
5668
5669            gen_helper_sve_ldnf1sds_be_r_mte,
5670            gen_helper_sve_ldnf1hh_be_r_mte,
5671            gen_helper_sve_ldnf1hsu_be_r_mte,
5672            gen_helper_sve_ldnf1hdu_be_r_mte,
5673
5674            gen_helper_sve_ldnf1hds_be_r_mte,
5675            gen_helper_sve_ldnf1hss_be_r_mte,
5676            gen_helper_sve_ldnf1ss_be_r_mte,
5677            gen_helper_sve_ldnf1sdu_be_r_mte,
5678
5679            gen_helper_sve_ldnf1bds_r_mte,
5680            gen_helper_sve_ldnf1bss_r_mte,
5681            gen_helper_sve_ldnf1bhs_r_mte,
5682            gen_helper_sve_ldnf1dd_be_r_mte } },
5683    };
5684
5685    if (sve_access_check(s)) {
5686        int vsz = vec_full_reg_size(s);
5687        int elements = vsz >> dtype_esz[a->dtype];
5688        int off = (a->imm * elements) << dtype_msz(a->dtype);
5689        TCGv_i64 addr = new_tmp_a64(s);
5690
5691        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
5692        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5693                   fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
5694    }
5695    return true;
5696}
5697
5698static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5699{
5700    unsigned vsz = vec_full_reg_size(s);
5701    TCGv_ptr t_pg;
5702    int poff;
5703
5704    /* Load the first quadword using the normal predicated load helpers.  */
5705    poff = pred_full_reg_offset(s, pg);
5706    if (vsz > 16) {
5707        /*
5708         * Zero-extend the first 16 bits of the predicate into a temporary.
5709         * This avoids triggering an assert making sure we don't have bits
5710         * set within a predicate beyond VQ, but we have lowered VQ to 1
5711         * for this load operation.
5712         */
5713        TCGv_i64 tmp = tcg_temp_new_i64();
5714#ifdef HOST_WORDS_BIGENDIAN
5715        poff += 6;
5716#endif
5717        tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5718
5719        poff = offsetof(CPUARMState, vfp.preg_tmp);
5720        tcg_gen_st_i64(tmp, cpu_env, poff);
5721        tcg_temp_free_i64(tmp);
5722    }
5723
5724    t_pg = tcg_temp_new_ptr();
5725    tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5726
5727    gen_helper_gvec_mem *fn
5728        = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5729    fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
5730
5731    tcg_temp_free_ptr(t_pg);
5732
5733    /* Replicate that first quadword.  */
5734    if (vsz > 16) {
5735        int doff = vec_full_reg_offset(s, zt);
5736        tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
5737    }
5738}
5739
5740static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
5741{
5742    if (a->rm == 31) {
5743        return false;
5744    }
5745    if (sve_access_check(s)) {
5746        int msz = dtype_msz(a->dtype);
5747        TCGv_i64 addr = new_tmp_a64(s);
5748        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5749        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5750        do_ldrq(s, a->rd, a->pg, addr, a->dtype);
5751    }
5752    return true;
5753}
5754
5755static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
5756{
5757    if (sve_access_check(s)) {
5758        TCGv_i64 addr = new_tmp_a64(s);
5759        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
5760        do_ldrq(s, a->rd, a->pg, addr, a->dtype);
5761    }
5762    return true;
5763}
5764
5765static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5766{
5767    unsigned vsz = vec_full_reg_size(s);
5768    unsigned vsz_r32;
5769    TCGv_ptr t_pg;
5770    int poff, doff;
5771
5772    if (vsz < 32) {
5773        /*
5774         * Note that this UNDEFINED check comes after CheckSVEEnabled()
5775         * in the ARM pseudocode, which is the sve_access_check() done
5776         * in our caller.  We should not now return false from the caller.
5777         */
5778        unallocated_encoding(s);
5779        return;
5780    }
5781
5782    /* Load the first octaword using the normal predicated load helpers.  */
5783
5784    poff = pred_full_reg_offset(s, pg);
5785    if (vsz > 32) {
5786        /*
5787         * Zero-extend the first 32 bits of the predicate into a temporary.
5788         * This avoids triggering an assert making sure we don't have bits
5789         * set within a predicate beyond VQ, but we have lowered VQ to 2
5790         * for this load operation.
5791         */
5792        TCGv_i64 tmp = tcg_temp_new_i64();
5793#ifdef HOST_WORDS_BIGENDIAN
5794        poff += 4;
5795#endif
5796        tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5797
5798        poff = offsetof(CPUARMState, vfp.preg_tmp);
5799        tcg_gen_st_i64(tmp, cpu_env, poff);
5800        tcg_temp_free_i64(tmp);
5801    }
5802
5803    t_pg = tcg_temp_new_ptr();
5804    tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5805
5806    gen_helper_gvec_mem *fn
5807        = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5808    fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5809
5810    tcg_temp_free_ptr(t_pg);
5811
5812    /*
5813     * Replicate that first octaword.
5814     * The replication happens in units of 32; if the full vector size
5815     * is not a multiple of 32, the final bits are zeroed.
5816     */
5817    doff = vec_full_reg_offset(s, zt);
5818    vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5819    if (vsz >= 64) {
5820        tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5821    }
5822    vsz -= vsz_r32;
5823    if (vsz) {
5824        tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5825    }
5826}
5827
5828static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5829{
5830    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5831        return false;
5832    }
5833    if (a->rm == 31) {
5834        return false;
5835    }
5836    if (sve_access_check(s)) {
5837        TCGv_i64 addr = new_tmp_a64(s);
5838        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5839        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5840        do_ldro(s, a->rd, a->pg, addr, a->dtype);
5841    }
5842    return true;
5843}
5844
5845static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5846{
5847    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5848        return false;
5849    }
5850    if (sve_access_check(s)) {
5851        TCGv_i64 addr = new_tmp_a64(s);
5852        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5853        do_ldro(s, a->rd, a->pg, addr, a->dtype);
5854    }
5855    return true;
5856}
5857
5858/* Load and broadcast element.  */
5859static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
5860{
5861    unsigned vsz = vec_full_reg_size(s);
5862    unsigned psz = pred_full_reg_size(s);
5863    unsigned esz = dtype_esz[a->dtype];
5864    unsigned msz = dtype_msz(a->dtype);
5865    TCGLabel *over;
5866    TCGv_i64 temp, clean_addr;
5867
5868    if (!sve_access_check(s)) {
5869        return true;
5870    }
5871
5872    over = gen_new_label();
5873
5874    /* If the guarding predicate has no bits set, no load occurs.  */
5875    if (psz <= 8) {
5876        /* Reduce the pred_esz_masks value simply to reduce the
5877         * size of the code generated here.
5878         */
5879        uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5880        temp = tcg_temp_new_i64();
5881        tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5882        tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5883        tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5884        tcg_temp_free_i64(temp);
5885    } else {
5886        TCGv_i32 t32 = tcg_temp_new_i32();
5887        find_last_active(s, t32, esz, a->pg);
5888        tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5889        tcg_temp_free_i32(t32);
5890    }
5891
5892    /* Load the data.  */
5893    temp = tcg_temp_new_i64();
5894    tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
5895    clean_addr = gen_mte_check1(s, temp, false, true, msz);
5896
5897    tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
5898                        finalize_memop(s, dtype_mop[a->dtype]));
5899
5900    /* Broadcast to *all* elements.  */
5901    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5902                         vsz, vsz, temp);
5903    tcg_temp_free_i64(temp);
5904
5905    /* Zero the inactive elements.  */
5906    gen_set_label(over);
5907    return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
5908}
5909
5910static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5911                      int msz, int esz, int nreg)
5912{
5913    static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5914        { { { gen_helper_sve_st1bb_r,
5915              gen_helper_sve_st1bh_r,
5916              gen_helper_sve_st1bs_r,
5917              gen_helper_sve_st1bd_r },
5918            { NULL,
5919              gen_helper_sve_st1hh_le_r,
5920              gen_helper_sve_st1hs_le_r,
5921              gen_helper_sve_st1hd_le_r },
5922            { NULL, NULL,
5923              gen_helper_sve_st1ss_le_r,
5924              gen_helper_sve_st1sd_le_r },
5925            { NULL, NULL, NULL,
5926              gen_helper_sve_st1dd_le_r } },
5927          { { gen_helper_sve_st1bb_r,
5928              gen_helper_sve_st1bh_r,
5929              gen_helper_sve_st1bs_r,
5930              gen_helper_sve_st1bd_r },
5931            { NULL,
5932              gen_helper_sve_st1hh_be_r,
5933              gen_helper_sve_st1hs_be_r,
5934              gen_helper_sve_st1hd_be_r },
5935            { NULL, NULL,
5936              gen_helper_sve_st1ss_be_r,
5937              gen_helper_sve_st1sd_be_r },
5938            { NULL, NULL, NULL,
5939              gen_helper_sve_st1dd_be_r } } },
5940
5941        { { { gen_helper_sve_st1bb_r_mte,
5942              gen_helper_sve_st1bh_r_mte,
5943              gen_helper_sve_st1bs_r_mte,
5944              gen_helper_sve_st1bd_r_mte },
5945            { NULL,
5946              gen_helper_sve_st1hh_le_r_mte,
5947              gen_helper_sve_st1hs_le_r_mte,
5948              gen_helper_sve_st1hd_le_r_mte },
5949            { NULL, NULL,
5950              gen_helper_sve_st1ss_le_r_mte,
5951              gen_helper_sve_st1sd_le_r_mte },
5952            { NULL, NULL, NULL,
5953              gen_helper_sve_st1dd_le_r_mte } },
5954          { { gen_helper_sve_st1bb_r_mte,
5955              gen_helper_sve_st1bh_r_mte,
5956              gen_helper_sve_st1bs_r_mte,
5957              gen_helper_sve_st1bd_r_mte },
5958            { NULL,
5959              gen_helper_sve_st1hh_be_r_mte,
5960              gen_helper_sve_st1hs_be_r_mte,
5961              gen_helper_sve_st1hd_be_r_mte },
5962            { NULL, NULL,
5963              gen_helper_sve_st1ss_be_r_mte,
5964              gen_helper_sve_st1sd_be_r_mte },
5965            { NULL, NULL, NULL,
5966              gen_helper_sve_st1dd_be_r_mte } } },
5967    };
5968    static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5969        { { { gen_helper_sve_st2bb_r,
5970              gen_helper_sve_st2hh_le_r,
5971              gen_helper_sve_st2ss_le_r,
5972              gen_helper_sve_st2dd_le_r },
5973            { gen_helper_sve_st3bb_r,
5974              gen_helper_sve_st3hh_le_r,
5975              gen_helper_sve_st3ss_le_r,
5976              gen_helper_sve_st3dd_le_r },
5977            { gen_helper_sve_st4bb_r,
5978              gen_helper_sve_st4hh_le_r,
5979              gen_helper_sve_st4ss_le_r,
5980              gen_helper_sve_st4dd_le_r } },
5981          { { gen_helper_sve_st2bb_r,
5982              gen_helper_sve_st2hh_be_r,
5983              gen_helper_sve_st2ss_be_r,
5984              gen_helper_sve_st2dd_be_r },
5985            { gen_helper_sve_st3bb_r,
5986              gen_helper_sve_st3hh_be_r,
5987              gen_helper_sve_st3ss_be_r,
5988              gen_helper_sve_st3dd_be_r },
5989            { gen_helper_sve_st4bb_r,
5990              gen_helper_sve_st4hh_be_r,
5991              gen_helper_sve_st4ss_be_r,
5992              gen_helper_sve_st4dd_be_r } } },
5993        { { { gen_helper_sve_st2bb_r_mte,
5994              gen_helper_sve_st2hh_le_r_mte,
5995              gen_helper_sve_st2ss_le_r_mte,
5996              gen_helper_sve_st2dd_le_r_mte },
5997            { gen_helper_sve_st3bb_r_mte,
5998              gen_helper_sve_st3hh_le_r_mte,
5999              gen_helper_sve_st3ss_le_r_mte,
6000              gen_helper_sve_st3dd_le_r_mte },
6001            { gen_helper_sve_st4bb_r_mte,
6002              gen_helper_sve_st4hh_le_r_mte,
6003              gen_helper_sve_st4ss_le_r_mte,
6004              gen_helper_sve_st4dd_le_r_mte } },
6005          { { gen_helper_sve_st2bb_r_mte,
6006              gen_helper_sve_st2hh_be_r_mte,
6007              gen_helper_sve_st2ss_be_r_mte,
6008              gen_helper_sve_st2dd_be_r_mte },
6009            { gen_helper_sve_st3bb_r_mte,
6010              gen_helper_sve_st3hh_be_r_mte,
6011              gen_helper_sve_st3ss_be_r_mte,
6012              gen_helper_sve_st3dd_be_r_mte },
6013            { gen_helper_sve_st4bb_r_mte,
6014              gen_helper_sve_st4hh_be_r_mte,
6015              gen_helper_sve_st4ss_be_r_mte,
6016              gen_helper_sve_st4dd_be_r_mte } } },
6017    };
6018    gen_helper_gvec_mem *fn;
6019    int be = s->be_data == MO_BE;
6020
6021    if (nreg == 0) {
6022        /* ST1 */
6023        fn = fn_single[s->mte_active[0]][be][msz][esz];
6024        nreg = 1;
6025    } else {
6026        /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
6027        assert(msz == esz);
6028        fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
6029    }
6030    assert(fn != NULL);
6031    do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
6032}
6033
6034static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
6035{
6036    if (a->rm == 31 || a->msz > a->esz) {
6037        return false;
6038    }
6039    if (sve_access_check(s)) {
6040        TCGv_i64 addr = new_tmp_a64(s);
6041        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
6042        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
6043        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
6044    }
6045    return true;
6046}
6047
6048static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
6049{
6050    if (a->msz > a->esz) {
6051        return false;
6052    }
6053    if (sve_access_check(s)) {
6054        int vsz = vec_full_reg_size(s);
6055        int elements = vsz >> a->esz;
6056        TCGv_i64 addr = new_tmp_a64(s);
6057
6058        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
6059                         (a->imm * elements * (a->nreg + 1)) << a->msz);
6060        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
6061    }
6062    return true;
6063}
6064
6065/*
6066 *** SVE gather loads / scatter stores
6067 */
6068
6069static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
6070                       int scale, TCGv_i64 scalar, int msz, bool is_write,
6071                       gen_helper_gvec_mem_scatter *fn)
6072{
6073    unsigned vsz = vec_full_reg_size(s);
6074    TCGv_ptr t_zm = tcg_temp_new_ptr();
6075    TCGv_ptr t_pg = tcg_temp_new_ptr();
6076    TCGv_ptr t_zt = tcg_temp_new_ptr();
6077    TCGv_i32 t_desc;
6078    int desc = 0;
6079
6080    if (s->mte_active[0]) {
6081        desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
6082        desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
6083        desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
6084        desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
6085        desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
6086        desc <<= SVE_MTEDESC_SHIFT;
6087    }
6088    desc = simd_desc(vsz, vsz, desc | scale);
6089    t_desc = tcg_const_i32(desc);
6090
6091    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
6092    tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
6093    tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
6094    fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
6095
6096    tcg_temp_free_ptr(t_zt);
6097    tcg_temp_free_ptr(t_zm);
6098    tcg_temp_free_ptr(t_pg);
6099    tcg_temp_free_i32(t_desc);
6100}
6101
6102/* Indexed by [mte][be][ff][xs][u][msz].  */
6103static gen_helper_gvec_mem_scatter * const
6104gather_load_fn32[2][2][2][2][2][3] = {
6105    { /* MTE Inactive */
6106        { /* Little-endian */
6107            { { { gen_helper_sve_ldbss_zsu,
6108                  gen_helper_sve_ldhss_le_zsu,
6109                  NULL, },
6110                { gen_helper_sve_ldbsu_zsu,
6111                  gen_helper_sve_ldhsu_le_zsu,
6112                  gen_helper_sve_ldss_le_zsu, } },
6113              { { gen_helper_sve_ldbss_zss,
6114                  gen_helper_sve_ldhss_le_zss,
6115                  NULL, },
6116                { gen_helper_sve_ldbsu_zss,
6117                  gen_helper_sve_ldhsu_le_zss,
6118                  gen_helper_sve_ldss_le_zss, } } },
6119
6120            /* First-fault */
6121            { { { gen_helper_sve_ldffbss_zsu,
6122                  gen_helper_sve_ldffhss_le_zsu,
6123                  NULL, },
6124                { gen_helper_sve_ldffbsu_zsu,
6125                  gen_helper_sve_ldffhsu_le_zsu,
6126                  gen_helper_sve_ldffss_le_zsu, } },
6127              { { gen_helper_sve_ldffbss_zss,
6128                  gen_helper_sve_ldffhss_le_zss,
6129                  NULL, },
6130                { gen_helper_sve_ldffbsu_zss,
6131                  gen_helper_sve_ldffhsu_le_zss,
6132                  gen_helper_sve_ldffss_le_zss, } } } },
6133
6134        { /* Big-endian */
6135            { { { gen_helper_sve_ldbss_zsu,
6136                  gen_helper_sve_ldhss_be_zsu,
6137                  NULL, },
6138                { gen_helper_sve_ldbsu_zsu,
6139                  gen_helper_sve_ldhsu_be_zsu,
6140                  gen_helper_sve_ldss_be_zsu, } },
6141              { { gen_helper_sve_ldbss_zss,
6142                  gen_helper_sve_ldhss_be_zss,
6143                  NULL, },
6144                { gen_helper_sve_ldbsu_zss,
6145                  gen_helper_sve_ldhsu_be_zss,
6146                  gen_helper_sve_ldss_be_zss, } } },
6147
6148            /* First-fault */
6149            { { { gen_helper_sve_ldffbss_zsu,
6150                  gen_helper_sve_ldffhss_be_zsu,
6151                  NULL, },
6152                { gen_helper_sve_ldffbsu_zsu,
6153                  gen_helper_sve_ldffhsu_be_zsu,
6154                  gen_helper_sve_ldffss_be_zsu, } },
6155              { { gen_helper_sve_ldffbss_zss,
6156                  gen_helper_sve_ldffhss_be_zss,
6157                  NULL, },
6158                { gen_helper_sve_ldffbsu_zss,
6159                  gen_helper_sve_ldffhsu_be_zss,
6160                  gen_helper_sve_ldffss_be_zss, } } } } },
6161    { /* MTE Active */
6162        { /* Little-endian */
6163            { { { gen_helper_sve_ldbss_zsu_mte,
6164                  gen_helper_sve_ldhss_le_zsu_mte,
6165                  NULL, },
6166                { gen_helper_sve_ldbsu_zsu_mte,
6167                  gen_helper_sve_ldhsu_le_zsu_mte,
6168                  gen_helper_sve_ldss_le_zsu_mte, } },
6169              { { gen_helper_sve_ldbss_zss_mte,
6170                  gen_helper_sve_ldhss_le_zss_mte,
6171                  NULL, },
6172                { gen_helper_sve_ldbsu_zss_mte,
6173                  gen_helper_sve_ldhsu_le_zss_mte,
6174                  gen_helper_sve_ldss_le_zss_mte, } } },
6175
6176            /* First-fault */
6177            { { { gen_helper_sve_ldffbss_zsu_mte,
6178                  gen_helper_sve_ldffhss_le_zsu_mte,
6179                  NULL, },
6180                { gen_helper_sve_ldffbsu_zsu_mte,
6181                  gen_helper_sve_ldffhsu_le_zsu_mte,
6182                  gen_helper_sve_ldffss_le_zsu_mte, } },
6183              { { gen_helper_sve_ldffbss_zss_mte,
6184                  gen_helper_sve_ldffhss_le_zss_mte,
6185                  NULL, },
6186                { gen_helper_sve_ldffbsu_zss_mte,
6187                  gen_helper_sve_ldffhsu_le_zss_mte,
6188                  gen_helper_sve_ldffss_le_zss_mte, } } } },
6189
6190        { /* Big-endian */
6191            { { { gen_helper_sve_ldbss_zsu_mte,
6192                  gen_helper_sve_ldhss_be_zsu_mte,
6193                  NULL, },
6194                { gen_helper_sve_ldbsu_zsu_mte,
6195                  gen_helper_sve_ldhsu_be_zsu_mte,
6196                  gen_helper_sve_ldss_be_zsu_mte, } },
6197              { { gen_helper_sve_ldbss_zss_mte,
6198                  gen_helper_sve_ldhss_be_zss_mte,
6199                  NULL, },
6200                { gen_helper_sve_ldbsu_zss_mte,
6201                  gen_helper_sve_ldhsu_be_zss_mte,
6202                  gen_helper_sve_ldss_be_zss_mte, } } },
6203
6204            /* First-fault */
6205            { { { gen_helper_sve_ldffbss_zsu_mte,
6206                  gen_helper_sve_ldffhss_be_zsu_mte,
6207                  NULL, },
6208                { gen_helper_sve_ldffbsu_zsu_mte,
6209                  gen_helper_sve_ldffhsu_be_zsu_mte,
6210                  gen_helper_sve_ldffss_be_zsu_mte, } },
6211              { { gen_helper_sve_ldffbss_zss_mte,
6212                  gen_helper_sve_ldffhss_be_zss_mte,
6213                  NULL, },
6214                { gen_helper_sve_ldffbsu_zss_mte,
6215                  gen_helper_sve_ldffhsu_be_zss_mte,
6216                  gen_helper_sve_ldffss_be_zss_mte, } } } } },
6217};
6218
6219/* Note that we overload xs=2 to indicate 64-bit offset.  */
6220static gen_helper_gvec_mem_scatter * const
6221gather_load_fn64[2][2][2][3][2][4] = {
6222    { /* MTE Inactive */
6223        { /* Little-endian */
6224            { { { gen_helper_sve_ldbds_zsu,
6225                  gen_helper_sve_ldhds_le_zsu,
6226                  gen_helper_sve_ldsds_le_zsu,
6227                  NULL, },
6228                { gen_helper_sve_ldbdu_zsu,
6229                  gen_helper_sve_ldhdu_le_zsu,
6230                  gen_helper_sve_ldsdu_le_zsu,
6231                  gen_helper_sve_lddd_le_zsu, } },
6232              { { gen_helper_sve_ldbds_zss,
6233                  gen_helper_sve_ldhds_le_zss,
6234                  gen_helper_sve_ldsds_le_zss,
6235                  NULL, },
6236                { gen_helper_sve_ldbdu_zss,
6237                  gen_helper_sve_ldhdu_le_zss,
6238                  gen_helper_sve_ldsdu_le_zss,
6239                  gen_helper_sve_lddd_le_zss, } },
6240              { { gen_helper_sve_ldbds_zd,
6241                  gen_helper_sve_ldhds_le_zd,
6242                  gen_helper_sve_ldsds_le_zd,
6243                  NULL, },
6244                { gen_helper_sve_ldbdu_zd,
6245                  gen_helper_sve_ldhdu_le_zd,
6246                  gen_helper_sve_ldsdu_le_zd,
6247                  gen_helper_sve_lddd_le_zd, } } },
6248
6249            /* First-fault */
6250            { { { gen_helper_sve_ldffbds_zsu,
6251                  gen_helper_sve_ldffhds_le_zsu,
6252                  gen_helper_sve_ldffsds_le_zsu,
6253                  NULL, },
6254                { gen_helper_sve_ldffbdu_zsu,
6255                  gen_helper_sve_ldffhdu_le_zsu,
6256                  gen_helper_sve_ldffsdu_le_zsu,
6257                  gen_helper_sve_ldffdd_le_zsu, } },
6258              { { gen_helper_sve_ldffbds_zss,
6259                  gen_helper_sve_ldffhds_le_zss,
6260                  gen_helper_sve_ldffsds_le_zss,
6261                  NULL, },
6262                { gen_helper_sve_ldffbdu_zss,
6263                  gen_helper_sve_ldffhdu_le_zss,
6264                  gen_helper_sve_ldffsdu_le_zss,
6265                  gen_helper_sve_ldffdd_le_zss, } },
6266              { { gen_helper_sve_ldffbds_zd,
6267                  gen_helper_sve_ldffhds_le_zd,
6268                  gen_helper_sve_ldffsds_le_zd,
6269                  NULL, },
6270                { gen_helper_sve_ldffbdu_zd,
6271                  gen_helper_sve_ldffhdu_le_zd,
6272                  gen_helper_sve_ldffsdu_le_zd,
6273                  gen_helper_sve_ldffdd_le_zd, } } } },
6274        { /* Big-endian */
6275            { { { gen_helper_sve_ldbds_zsu,
6276                  gen_helper_sve_ldhds_be_zsu,
6277                  gen_helper_sve_ldsds_be_zsu,
6278                  NULL, },
6279                { gen_helper_sve_ldbdu_zsu,
6280                  gen_helper_sve_ldhdu_be_zsu,
6281                  gen_helper_sve_ldsdu_be_zsu,
6282                  gen_helper_sve_lddd_be_zsu, } },
6283              { { gen_helper_sve_ldbds_zss,
6284                  gen_helper_sve_ldhds_be_zss,
6285                  gen_helper_sve_ldsds_be_zss,
6286                  NULL, },
6287                { gen_helper_sve_ldbdu_zss,
6288                  gen_helper_sve_ldhdu_be_zss,
6289                  gen_helper_sve_ldsdu_be_zss,
6290                  gen_helper_sve_lddd_be_zss, } },
6291              { { gen_helper_sve_ldbds_zd,
6292                  gen_helper_sve_ldhds_be_zd,
6293                  gen_helper_sve_ldsds_be_zd,
6294                  NULL, },
6295                { gen_helper_sve_ldbdu_zd,
6296                  gen_helper_sve_ldhdu_be_zd,
6297                  gen_helper_sve_ldsdu_be_zd,
6298                  gen_helper_sve_lddd_be_zd, } } },
6299
6300            /* First-fault */
6301            { { { gen_helper_sve_ldffbds_zsu,
6302                  gen_helper_sve_ldffhds_be_zsu,
6303                  gen_helper_sve_ldffsds_be_zsu,
6304                  NULL, },
6305                { gen_helper_sve_ldffbdu_zsu,
6306                  gen_helper_sve_ldffhdu_be_zsu,
6307                  gen_helper_sve_ldffsdu_be_zsu,
6308                  gen_helper_sve_ldffdd_be_zsu, } },
6309              { { gen_helper_sve_ldffbds_zss,
6310                  gen_helper_sve_ldffhds_be_zss,
6311                  gen_helper_sve_ldffsds_be_zss,
6312                  NULL, },
6313                { gen_helper_sve_ldffbdu_zss,
6314                  gen_helper_sve_ldffhdu_be_zss,
6315                  gen_helper_sve_ldffsdu_be_zss,
6316                  gen_helper_sve_ldffdd_be_zss, } },
6317              { { gen_helper_sve_ldffbds_zd,
6318                  gen_helper_sve_ldffhds_be_zd,
6319                  gen_helper_sve_ldffsds_be_zd,
6320                  NULL, },
6321                { gen_helper_sve_ldffbdu_zd,
6322                  gen_helper_sve_ldffhdu_be_zd,
6323                  gen_helper_sve_ldffsdu_be_zd,
6324                  gen_helper_sve_ldffdd_be_zd, } } } } },
6325    { /* MTE Active */
6326        { /* Little-endian */
6327            { { { gen_helper_sve_ldbds_zsu_mte,
6328                  gen_helper_sve_ldhds_le_zsu_mte,
6329                  gen_helper_sve_ldsds_le_zsu_mte,
6330                  NULL, },
6331                { gen_helper_sve_ldbdu_zsu_mte,
6332                  gen_helper_sve_ldhdu_le_zsu_mte,
6333                  gen_helper_sve_ldsdu_le_zsu_mte,
6334                  gen_helper_sve_lddd_le_zsu_mte, } },
6335              { { gen_helper_sve_ldbds_zss_mte,
6336                  gen_helper_sve_ldhds_le_zss_mte,
6337                  gen_helper_sve_ldsds_le_zss_mte,
6338                  NULL, },
6339                { gen_helper_sve_ldbdu_zss_mte,
6340                  gen_helper_sve_ldhdu_le_zss_mte,
6341                  gen_helper_sve_ldsdu_le_zss_mte,
6342                  gen_helper_sve_lddd_le_zss_mte, } },
6343              { { gen_helper_sve_ldbds_zd_mte,
6344                  gen_helper_sve_ldhds_le_zd_mte,
6345                  gen_helper_sve_ldsds_le_zd_mte,
6346                  NULL, },
6347                { gen_helper_sve_ldbdu_zd_mte,
6348                  gen_helper_sve_ldhdu_le_zd_mte,
6349                  gen_helper_sve_ldsdu_le_zd_mte,
6350                  gen_helper_sve_lddd_le_zd_mte, } } },
6351
6352            /* First-fault */
6353            { { { gen_helper_sve_ldffbds_zsu_mte,
6354                  gen_helper_sve_ldffhds_le_zsu_mte,
6355                  gen_helper_sve_ldffsds_le_zsu_mte,
6356                  NULL, },
6357                { gen_helper_sve_ldffbdu_zsu_mte,
6358                  gen_helper_sve_ldffhdu_le_zsu_mte,
6359                  gen_helper_sve_ldffsdu_le_zsu_mte,
6360                  gen_helper_sve_ldffdd_le_zsu_mte, } },
6361              { { gen_helper_sve_ldffbds_zss_mte,
6362                  gen_helper_sve_ldffhds_le_zss_mte,
6363                  gen_helper_sve_ldffsds_le_zss_mte,
6364                  NULL, },
6365                { gen_helper_sve_ldffbdu_zss_mte,
6366                  gen_helper_sve_ldffhdu_le_zss_mte,
6367                  gen_helper_sve_ldffsdu_le_zss_mte,
6368                  gen_helper_sve_ldffdd_le_zss_mte, } },
6369              { { gen_helper_sve_ldffbds_zd_mte,
6370                  gen_helper_sve_ldffhds_le_zd_mte,
6371                  gen_helper_sve_ldffsds_le_zd_mte,
6372                  NULL, },
6373                { gen_helper_sve_ldffbdu_zd_mte,
6374                  gen_helper_sve_ldffhdu_le_zd_mte,
6375                  gen_helper_sve_ldffsdu_le_zd_mte,
6376                  gen_helper_sve_ldffdd_le_zd_mte, } } } },
6377        { /* Big-endian */
6378            { { { gen_helper_sve_ldbds_zsu_mte,
6379                  gen_helper_sve_ldhds_be_zsu_mte,
6380                  gen_helper_sve_ldsds_be_zsu_mte,
6381                  NULL, },
6382                { gen_helper_sve_ldbdu_zsu_mte,
6383                  gen_helper_sve_ldhdu_be_zsu_mte,
6384                  gen_helper_sve_ldsdu_be_zsu_mte,
6385                  gen_helper_sve_lddd_be_zsu_mte, } },
6386              { { gen_helper_sve_ldbds_zss_mte,
6387                  gen_helper_sve_ldhds_be_zss_mte,
6388                  gen_helper_sve_ldsds_be_zss_mte,
6389                  NULL, },
6390                { gen_helper_sve_ldbdu_zss_mte,
6391                  gen_helper_sve_ldhdu_be_zss_mte,
6392                  gen_helper_sve_ldsdu_be_zss_mte,
6393                  gen_helper_sve_lddd_be_zss_mte, } },
6394              { { gen_helper_sve_ldbds_zd_mte,
6395                  gen_helper_sve_ldhds_be_zd_mte,
6396                  gen_helper_sve_ldsds_be_zd_mte,
6397                  NULL, },
6398                { gen_helper_sve_ldbdu_zd_mte,
6399                  gen_helper_sve_ldhdu_be_zd_mte,
6400                  gen_helper_sve_ldsdu_be_zd_mte,
6401                  gen_helper_sve_lddd_be_zd_mte, } } },
6402
6403            /* First-fault */
6404            { { { gen_helper_sve_ldffbds_zsu_mte,
6405                  gen_helper_sve_ldffhds_be_zsu_mte,
6406                  gen_helper_sve_ldffsds_be_zsu_mte,
6407                  NULL, },
6408                { gen_helper_sve_ldffbdu_zsu_mte,
6409                  gen_helper_sve_ldffhdu_be_zsu_mte,
6410                  gen_helper_sve_ldffsdu_be_zsu_mte,
6411                  gen_helper_sve_ldffdd_be_zsu_mte, } },
6412              { { gen_helper_sve_ldffbds_zss_mte,
6413                  gen_helper_sve_ldffhds_be_zss_mte,
6414                  gen_helper_sve_ldffsds_be_zss_mte,
6415                  NULL, },
6416                { gen_helper_sve_ldffbdu_zss_mte,
6417                  gen_helper_sve_ldffhdu_be_zss_mte,
6418                  gen_helper_sve_ldffsdu_be_zss_mte,
6419                  gen_helper_sve_ldffdd_be_zss_mte, } },
6420              { { gen_helper_sve_ldffbds_zd_mte,
6421                  gen_helper_sve_ldffhds_be_zd_mte,
6422                  gen_helper_sve_ldffsds_be_zd_mte,
6423                  NULL, },
6424                { gen_helper_sve_ldffbdu_zd_mte,
6425                  gen_helper_sve_ldffhdu_be_zd_mte,
6426                  gen_helper_sve_ldffsdu_be_zd_mte,
6427                  gen_helper_sve_ldffdd_be_zd_mte, } } } } },
6428};
6429
6430static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
6431{
6432    gen_helper_gvec_mem_scatter *fn = NULL;
6433    bool be = s->be_data == MO_BE;
6434    bool mte = s->mte_active[0];
6435
6436    if (!sve_access_check(s)) {
6437        return true;
6438    }
6439
6440    switch (a->esz) {
6441    case MO_32:
6442        fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
6443        break;
6444    case MO_64:
6445        fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
6446        break;
6447    }
6448    assert(fn != NULL);
6449
6450    do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
6451               cpu_reg_sp(s, a->rn), a->msz, false, fn);
6452    return true;
6453}
6454
6455static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
6456{
6457    gen_helper_gvec_mem_scatter *fn = NULL;
6458    bool be = s->be_data == MO_BE;
6459    bool mte = s->mte_active[0];
6460    TCGv_i64 imm;
6461
6462    if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
6463        return false;
6464    }
6465    if (!sve_access_check(s)) {
6466        return true;
6467    }
6468
6469    switch (a->esz) {
6470    case MO_32:
6471        fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
6472        break;
6473    case MO_64:
6474        fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
6475        break;
6476    }
6477    assert(fn != NULL);
6478
6479    /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
6480     * by loading the immediate into the scalar parameter.
6481     */
6482    imm = tcg_const_i64(a->imm << a->msz);
6483    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
6484    tcg_temp_free_i64(imm);
6485    return true;
6486}
6487
6488static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
6489{
6490    gen_helper_gvec_mem_scatter *fn = NULL;
6491    bool be = s->be_data == MO_BE;
6492    bool mte = s->mte_active[0];
6493
6494    if (a->esz < a->msz + !a->u) {
6495        return false;
6496    }
6497    if (!dc_isar_feature(aa64_sve2, s)) {
6498        return false;
6499    }
6500    if (!sve_access_check(s)) {
6501        return true;
6502    }
6503
6504    switch (a->esz) {
6505    case MO_32:
6506        fn = gather_load_fn32[mte][be][0][0][a->u][a->msz];
6507        break;
6508    case MO_64:
6509        fn = gather_load_fn64[mte][be][0][2][a->u][a->msz];
6510        break;
6511    }
6512    assert(fn != NULL);
6513
6514    do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6515               cpu_reg(s, a->rm), a->msz, false, fn);
6516    return true;
6517}
6518
6519/* Indexed by [mte][be][xs][msz].  */
6520static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
6521    { /* MTE Inactive */
6522        { /* Little-endian */
6523            { gen_helper_sve_stbs_zsu,
6524              gen_helper_sve_sths_le_zsu,
6525              gen_helper_sve_stss_le_zsu, },
6526            { gen_helper_sve_stbs_zss,
6527              gen_helper_sve_sths_le_zss,
6528              gen_helper_sve_stss_le_zss, } },
6529        { /* Big-endian */
6530            { gen_helper_sve_stbs_zsu,
6531              gen_helper_sve_sths_be_zsu,
6532              gen_helper_sve_stss_be_zsu, },
6533            { gen_helper_sve_stbs_zss,
6534              gen_helper_sve_sths_be_zss,
6535              gen_helper_sve_stss_be_zss, } } },
6536    { /* MTE Active */
6537        { /* Little-endian */
6538            { gen_helper_sve_stbs_zsu_mte,
6539              gen_helper_sve_sths_le_zsu_mte,
6540              gen_helper_sve_stss_le_zsu_mte, },
6541            { gen_helper_sve_stbs_zss_mte,
6542              gen_helper_sve_sths_le_zss_mte,
6543              gen_helper_sve_stss_le_zss_mte, } },
6544        { /* Big-endian */
6545            { gen_helper_sve_stbs_zsu_mte,
6546              gen_helper_sve_sths_be_zsu_mte,
6547              gen_helper_sve_stss_be_zsu_mte, },
6548            { gen_helper_sve_stbs_zss_mte,
6549              gen_helper_sve_sths_be_zss_mte,
6550              gen_helper_sve_stss_be_zss_mte, } } },
6551};
6552
6553/* Note that we overload xs=2 to indicate 64-bit offset.  */
6554static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
6555    { /* MTE Inactive */
6556         { /* Little-endian */
6557             { gen_helper_sve_stbd_zsu,
6558               gen_helper_sve_sthd_le_zsu,
6559               gen_helper_sve_stsd_le_zsu,
6560               gen_helper_sve_stdd_le_zsu, },
6561             { gen_helper_sve_stbd_zss,
6562               gen_helper_sve_sthd_le_zss,
6563               gen_helper_sve_stsd_le_zss,
6564               gen_helper_sve_stdd_le_zss, },
6565             { gen_helper_sve_stbd_zd,
6566               gen_helper_sve_sthd_le_zd,
6567               gen_helper_sve_stsd_le_zd,
6568               gen_helper_sve_stdd_le_zd, } },
6569         { /* Big-endian */
6570             { gen_helper_sve_stbd_zsu,
6571               gen_helper_sve_sthd_be_zsu,
6572               gen_helper_sve_stsd_be_zsu,
6573               gen_helper_sve_stdd_be_zsu, },
6574             { gen_helper_sve_stbd_zss,
6575               gen_helper_sve_sthd_be_zss,
6576               gen_helper_sve_stsd_be_zss,
6577               gen_helper_sve_stdd_be_zss, },
6578             { gen_helper_sve_stbd_zd,
6579               gen_helper_sve_sthd_be_zd,
6580               gen_helper_sve_stsd_be_zd,
6581               gen_helper_sve_stdd_be_zd, } } },
6582    { /* MTE Inactive */
6583         { /* Little-endian */
6584             { gen_helper_sve_stbd_zsu_mte,
6585               gen_helper_sve_sthd_le_zsu_mte,
6586               gen_helper_sve_stsd_le_zsu_mte,
6587               gen_helper_sve_stdd_le_zsu_mte, },
6588             { gen_helper_sve_stbd_zss_mte,
6589               gen_helper_sve_sthd_le_zss_mte,
6590               gen_helper_sve_stsd_le_zss_mte,
6591               gen_helper_sve_stdd_le_zss_mte, },
6592             { gen_helper_sve_stbd_zd_mte,
6593               gen_helper_sve_sthd_le_zd_mte,
6594               gen_helper_sve_stsd_le_zd_mte,
6595               gen_helper_sve_stdd_le_zd_mte, } },
6596         { /* Big-endian */
6597             { gen_helper_sve_stbd_zsu_mte,
6598               gen_helper_sve_sthd_be_zsu_mte,
6599               gen_helper_sve_stsd_be_zsu_mte,
6600               gen_helper_sve_stdd_be_zsu_mte, },
6601             { gen_helper_sve_stbd_zss_mte,
6602               gen_helper_sve_sthd_be_zss_mte,
6603               gen_helper_sve_stsd_be_zss_mte,
6604               gen_helper_sve_stdd_be_zss_mte, },
6605             { gen_helper_sve_stbd_zd_mte,
6606               gen_helper_sve_sthd_be_zd_mte,
6607               gen_helper_sve_stsd_be_zd_mte,
6608               gen_helper_sve_stdd_be_zd_mte, } } },
6609};
6610
6611static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
6612{
6613    gen_helper_gvec_mem_scatter *fn;
6614    bool be = s->be_data == MO_BE;
6615    bool mte = s->mte_active[0];
6616
6617    if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6618        return false;
6619    }
6620    if (!sve_access_check(s)) {
6621        return true;
6622    }
6623    switch (a->esz) {
6624    case MO_32:
6625        fn = scatter_store_fn32[mte][be][a->xs][a->msz];
6626        break;
6627    case MO_64:
6628        fn = scatter_store_fn64[mte][be][a->xs][a->msz];
6629        break;
6630    default:
6631        g_assert_not_reached();
6632    }
6633    do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
6634               cpu_reg_sp(s, a->rn), a->msz, true, fn);
6635    return true;
6636}
6637
6638static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
6639{
6640    gen_helper_gvec_mem_scatter *fn = NULL;
6641    bool be = s->be_data == MO_BE;
6642    bool mte = s->mte_active[0];
6643    TCGv_i64 imm;
6644
6645    if (a->esz < a->msz) {
6646        return false;
6647    }
6648    if (!sve_access_check(s)) {
6649        return true;
6650    }
6651
6652    switch (a->esz) {
6653    case MO_32:
6654        fn = scatter_store_fn32[mte][be][0][a->msz];
6655        break;
6656    case MO_64:
6657        fn = scatter_store_fn64[mte][be][2][a->msz];
6658        break;
6659    }
6660    assert(fn != NULL);
6661
6662    /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6663     * by loading the immediate into the scalar parameter.
6664     */
6665    imm = tcg_const_i64(a->imm << a->msz);
6666    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
6667    tcg_temp_free_i64(imm);
6668    return true;
6669}
6670
6671static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
6672{
6673    gen_helper_gvec_mem_scatter *fn;
6674    bool be = s->be_data == MO_BE;
6675    bool mte = s->mte_active[0];
6676
6677    if (a->esz < a->msz) {
6678        return false;
6679    }
6680    if (!dc_isar_feature(aa64_sve2, s)) {
6681        return false;
6682    }
6683    if (!sve_access_check(s)) {
6684        return true;
6685    }
6686
6687    switch (a->esz) {
6688    case MO_32:
6689        fn = scatter_store_fn32[mte][be][0][a->msz];
6690        break;
6691    case MO_64:
6692        fn = scatter_store_fn64[mte][be][2][a->msz];
6693        break;
6694    default:
6695        g_assert_not_reached();
6696    }
6697
6698    do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
6699               cpu_reg(s, a->rm), a->msz, true, fn);
6700    return true;
6701}
6702
6703/*
6704 * Prefetches
6705 */
6706
6707static bool trans_PRF(DisasContext *s, arg_PRF *a)
6708{
6709    /* Prefetch is a nop within QEMU.  */
6710    (void)sve_access_check(s);
6711    return true;
6712}
6713
6714static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
6715{
6716    if (a->rm == 31) {
6717        return false;
6718    }
6719    /* Prefetch is a nop within QEMU.  */
6720    (void)sve_access_check(s);
6721    return true;
6722}
6723
6724/*
6725 * Move Prefix
6726 *
6727 * TODO: The implementation so far could handle predicated merging movprfx.
6728 * The helper functions as written take an extra source register to
6729 * use in the operation, but the result is only written when predication
6730 * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
6731 * to allow the final write back to the destination to be unconditional.
6732 * For predicated zeroing movprfx, we need to rearrange the helpers to
6733 * allow the final write back to zero inactives.
6734 *
6735 * In the meantime, just emit the moves.
6736 */
6737
6738static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
6739{
6740    return do_mov_z(s, a->rd, a->rn);
6741}
6742
6743static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
6744{
6745    if (sve_access_check(s)) {
6746        do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
6747    }
6748    return true;
6749}
6750
6751static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
6752{
6753    return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
6754}
6755
6756/*
6757 * SVE2 Integer Multiply - Unpredicated
6758 */
6759
6760static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
6761{
6762    if (!dc_isar_feature(aa64_sve2, s)) {
6763        return false;
6764    }
6765    if (sve_access_check(s)) {
6766        gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
6767    }
6768    return true;
6769}
6770
6771static bool do_sve2_zzz_ool(DisasContext *s, arg_rrr_esz *a,
6772                            gen_helper_gvec_3 *fn)
6773{
6774    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6775        return false;
6776    }
6777    if (sve_access_check(s)) {
6778        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
6779    }
6780    return true;
6781}
6782
6783static bool trans_SMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6784{
6785    static gen_helper_gvec_3 * const fns[4] = {
6786        gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6787        gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6788    };
6789    return do_sve2_zzz_ool(s, a, fns[a->esz]);
6790}
6791
6792static bool trans_UMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6793{
6794    static gen_helper_gvec_3 * const fns[4] = {
6795        gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6796        gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6797    };
6798    return do_sve2_zzz_ool(s, a, fns[a->esz]);
6799}
6800
6801static bool trans_PMUL_zzz(DisasContext *s, arg_rrr_esz *a)
6802{
6803    return do_sve2_zzz_ool(s, a, gen_helper_gvec_pmul_b);
6804}
6805
6806static bool trans_SQDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6807{
6808    static gen_helper_gvec_3 * const fns[4] = {
6809        gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6810        gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6811    };
6812    return do_sve2_zzz_ool(s, a, fns[a->esz]);
6813}
6814
6815static bool trans_SQRDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6816{
6817    static gen_helper_gvec_3 * const fns[4] = {
6818        gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6819        gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6820    };
6821    return do_sve2_zzz_ool(s, a, fns[a->esz]);
6822}
6823
6824/*
6825 * SVE2 Integer - Predicated
6826 */
6827
6828static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a,
6829                             gen_helper_gvec_4 *fn)
6830{
6831    if (!dc_isar_feature(aa64_sve2, s)) {
6832        return false;
6833    }
6834    return do_zpzz_ool(s, a, fn);
6835}
6836
6837static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6838{
6839    static gen_helper_gvec_4 * const fns[3] = {
6840        gen_helper_sve2_sadalp_zpzz_h,
6841        gen_helper_sve2_sadalp_zpzz_s,
6842        gen_helper_sve2_sadalp_zpzz_d,
6843    };
6844    if (a->esz == 0) {
6845        return false;
6846    }
6847    return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6848}
6849
6850static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6851{
6852    static gen_helper_gvec_4 * const fns[3] = {
6853        gen_helper_sve2_uadalp_zpzz_h,
6854        gen_helper_sve2_uadalp_zpzz_s,
6855        gen_helper_sve2_uadalp_zpzz_d,
6856    };
6857    if (a->esz == 0) {
6858        return false;
6859    }
6860    return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6861}
6862
6863/*
6864 * SVE2 integer unary operations (predicated)
6865 */
6866
6867static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a,
6868                            gen_helper_gvec_3 *fn)
6869{
6870    if (!dc_isar_feature(aa64_sve2, s)) {
6871        return false;
6872    }
6873    return do_zpz_ool(s, a, fn);
6874}
6875
6876static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a)
6877{
6878    if (a->esz != 2) {
6879        return false;
6880    }
6881    return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s);
6882}
6883
6884static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a)
6885{
6886    if (a->esz != 2) {
6887        return false;
6888    }
6889    return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s);
6890}
6891
6892static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a)
6893{
6894    static gen_helper_gvec_3 * const fns[4] = {
6895        gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6896        gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6897    };
6898    return do_sve2_zpz_ool(s, a, fns[a->esz]);
6899}
6900
6901static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a)
6902{
6903    static gen_helper_gvec_3 * const fns[4] = {
6904        gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6905        gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6906    };
6907    return do_sve2_zpz_ool(s, a, fns[a->esz]);
6908}
6909
6910#define DO_SVE2_ZPZZ(NAME, name) \
6911static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)                \
6912{                                                                         \
6913    static gen_helper_gvec_4 * const fns[4] = {                           \
6914        gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
6915        gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \
6916    };                                                                    \
6917    return do_sve2_zpzz_ool(s, a, fns[a->esz]);                           \
6918}
6919
6920DO_SVE2_ZPZZ(SQSHL, sqshl)
6921DO_SVE2_ZPZZ(SQRSHL, sqrshl)
6922DO_SVE2_ZPZZ(SRSHL, srshl)
6923
6924DO_SVE2_ZPZZ(UQSHL, uqshl)
6925DO_SVE2_ZPZZ(UQRSHL, uqrshl)
6926DO_SVE2_ZPZZ(URSHL, urshl)
6927
6928DO_SVE2_ZPZZ(SHADD, shadd)
6929DO_SVE2_ZPZZ(SRHADD, srhadd)
6930DO_SVE2_ZPZZ(SHSUB, shsub)
6931
6932DO_SVE2_ZPZZ(UHADD, uhadd)
6933DO_SVE2_ZPZZ(URHADD, urhadd)
6934DO_SVE2_ZPZZ(UHSUB, uhsub)
6935
6936DO_SVE2_ZPZZ(ADDP, addp)
6937DO_SVE2_ZPZZ(SMAXP, smaxp)
6938DO_SVE2_ZPZZ(UMAXP, umaxp)
6939DO_SVE2_ZPZZ(SMINP, sminp)
6940DO_SVE2_ZPZZ(UMINP, uminp)
6941
6942DO_SVE2_ZPZZ(SQADD_zpzz, sqadd)
6943DO_SVE2_ZPZZ(UQADD_zpzz, uqadd)
6944DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub)
6945DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub)
6946DO_SVE2_ZPZZ(SUQADD, suqadd)
6947DO_SVE2_ZPZZ(USQADD, usqadd)
6948
6949/*
6950 * SVE2 Widening Integer Arithmetic
6951 */
6952
6953static bool do_sve2_zzw_ool(DisasContext *s, arg_rrr_esz *a,
6954                            gen_helper_gvec_3 *fn, int data)
6955{
6956    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6957        return false;
6958    }
6959    if (sve_access_check(s)) {
6960        unsigned vsz = vec_full_reg_size(s);
6961        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
6962                           vec_full_reg_offset(s, a->rn),
6963                           vec_full_reg_offset(s, a->rm),
6964                           vsz, vsz, data, fn);
6965    }
6966    return true;
6967}
6968
6969#define DO_SVE2_ZZZ_TB(NAME, name, SEL1, SEL2) \
6970static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)               \
6971{                                                                       \
6972    static gen_helper_gvec_3 * const fns[4] = {                         \
6973        NULL,                       gen_helper_sve2_##name##_h,         \
6974        gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d,         \
6975    };                                                                  \
6976    return do_sve2_zzw_ool(s, a, fns[a->esz], (SEL2 << 1) | SEL1);      \
6977}
6978
6979DO_SVE2_ZZZ_TB(SADDLB, saddl, false, false)
6980DO_SVE2_ZZZ_TB(SSUBLB, ssubl, false, false)
6981DO_SVE2_ZZZ_TB(SABDLB, sabdl, false, false)
6982
6983DO_SVE2_ZZZ_TB(UADDLB, uaddl, false, false)
6984DO_SVE2_ZZZ_TB(USUBLB, usubl, false, false)
6985DO_SVE2_ZZZ_TB(UABDLB, uabdl, false, false)
6986
6987DO_SVE2_ZZZ_TB(SADDLT, saddl, true, true)
6988DO_SVE2_ZZZ_TB(SSUBLT, ssubl, true, true)
6989DO_SVE2_ZZZ_TB(SABDLT, sabdl, true, true)
6990
6991DO_SVE2_ZZZ_TB(UADDLT, uaddl, true, true)
6992DO_SVE2_ZZZ_TB(USUBLT, usubl, true, true)
6993DO_SVE2_ZZZ_TB(UABDLT, uabdl, true, true)
6994
6995DO_SVE2_ZZZ_TB(SADDLBT, saddl, false, true)
6996DO_SVE2_ZZZ_TB(SSUBLBT, ssubl, false, true)
6997DO_SVE2_ZZZ_TB(SSUBLTB, ssubl, true, false)
6998
6999DO_SVE2_ZZZ_TB(SQDMULLB_zzz, sqdmull_zzz, false, false)
7000DO_SVE2_ZZZ_TB(SQDMULLT_zzz, sqdmull_zzz, true, true)
7001
7002DO_SVE2_ZZZ_TB(SMULLB_zzz, smull_zzz, false, false)
7003DO_SVE2_ZZZ_TB(SMULLT_zzz, smull_zzz, true, true)
7004
7005DO_SVE2_ZZZ_TB(UMULLB_zzz, umull_zzz, false, false)
7006DO_SVE2_ZZZ_TB(UMULLT_zzz, umull_zzz, true, true)
7007
7008static bool do_eor_tb(DisasContext *s, arg_rrr_esz *a, bool sel1)
7009{
7010    static gen_helper_gvec_3 * const fns[4] = {
7011        gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
7012        gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
7013    };
7014    return do_sve2_zzw_ool(s, a, fns[a->esz], (!sel1 << 1) | sel1);
7015}
7016
7017static bool trans_EORBT(DisasContext *s, arg_rrr_esz *a)
7018{
7019    return do_eor_tb(s, a, false);
7020}
7021
7022static bool trans_EORTB(DisasContext *s, arg_rrr_esz *a)
7023{
7024    return do_eor_tb(s, a, true);
7025}
7026
7027static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
7028{
7029    static gen_helper_gvec_3 * const fns[4] = {
7030        gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
7031        NULL,                    gen_helper_sve2_pmull_d,
7032    };
7033    if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
7034        return false;
7035    }
7036    return do_sve2_zzw_ool(s, a, fns[a->esz], sel);
7037}
7038
7039static bool trans_PMULLB(DisasContext *s, arg_rrr_esz *a)
7040{
7041    return do_trans_pmull(s, a, false);
7042}
7043
7044static bool trans_PMULLT(DisasContext *s, arg_rrr_esz *a)
7045{
7046    return do_trans_pmull(s, a, true);
7047}
7048
7049#define DO_SVE2_ZZZ_WTB(NAME, name, SEL2) \
7050static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)       \
7051{                                                               \
7052    static gen_helper_gvec_3 * const fns[4] = {                 \
7053        NULL,                       gen_helper_sve2_##name##_h, \
7054        gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7055    };                                                          \
7056    return do_sve2_zzw_ool(s, a, fns[a->esz], SEL2);            \
7057}
7058
7059DO_SVE2_ZZZ_WTB(SADDWB, saddw, false)
7060DO_SVE2_ZZZ_WTB(SADDWT, saddw, true)
7061DO_SVE2_ZZZ_WTB(SSUBWB, ssubw, false)
7062DO_SVE2_ZZZ_WTB(SSUBWT, ssubw, true)
7063
7064DO_SVE2_ZZZ_WTB(UADDWB, uaddw, false)
7065DO_SVE2_ZZZ_WTB(UADDWT, uaddw, true)
7066DO_SVE2_ZZZ_WTB(USUBWB, usubw, false)
7067DO_SVE2_ZZZ_WTB(USUBWT, usubw, true)
7068
7069static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
7070{
7071    int top = imm & 1;
7072    int shl = imm >> 1;
7073    int halfbits = 4 << vece;
7074
7075    if (top) {
7076        if (shl == halfbits) {
7077            TCGv_vec t = tcg_temp_new_vec_matching(d);
7078            tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
7079            tcg_gen_and_vec(vece, d, n, t);
7080            tcg_temp_free_vec(t);
7081        } else {
7082            tcg_gen_sari_vec(vece, d, n, halfbits);
7083            tcg_gen_shli_vec(vece, d, d, shl);
7084        }
7085    } else {
7086        tcg_gen_shli_vec(vece, d, n, halfbits);
7087        tcg_gen_sari_vec(vece, d, d, halfbits - shl);
7088    }
7089}
7090
7091static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
7092{
7093    int halfbits = 4 << vece;
7094    int top = imm & 1;
7095    int shl = (imm >> 1);
7096    int shift;
7097    uint64_t mask;
7098
7099    mask = MAKE_64BIT_MASK(0, halfbits);
7100    mask <<= shl;
7101    mask = dup_const(vece, mask);
7102
7103    shift = shl - top * halfbits;
7104    if (shift < 0) {
7105        tcg_gen_shri_i64(d, n, -shift);
7106    } else {
7107        tcg_gen_shli_i64(d, n, shift);
7108    }
7109    tcg_gen_andi_i64(d, d, mask);
7110}
7111
7112static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7113{
7114    gen_ushll_i64(MO_16, d, n, imm);
7115}
7116
7117static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7118{
7119    gen_ushll_i64(MO_32, d, n, imm);
7120}
7121
7122static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7123{
7124    gen_ushll_i64(MO_64, d, n, imm);
7125}
7126
7127static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
7128{
7129    int halfbits = 4 << vece;
7130    int top = imm & 1;
7131    int shl = imm >> 1;
7132
7133    if (top) {
7134        if (shl == halfbits) {
7135            TCGv_vec t = tcg_temp_new_vec_matching(d);
7136            tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
7137            tcg_gen_and_vec(vece, d, n, t);
7138            tcg_temp_free_vec(t);
7139        } else {
7140            tcg_gen_shri_vec(vece, d, n, halfbits);
7141            tcg_gen_shli_vec(vece, d, d, shl);
7142        }
7143    } else {
7144        if (shl == 0) {
7145            TCGv_vec t = tcg_temp_new_vec_matching(d);
7146            tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7147            tcg_gen_and_vec(vece, d, n, t);
7148            tcg_temp_free_vec(t);
7149        } else {
7150            tcg_gen_shli_vec(vece, d, n, halfbits);
7151            tcg_gen_shri_vec(vece, d, d, halfbits - shl);
7152        }
7153    }
7154}
7155
7156static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
7157                            bool sel, bool uns)
7158{
7159    static const TCGOpcode sshll_list[] = {
7160        INDEX_op_shli_vec, INDEX_op_sari_vec, 0
7161    };
7162    static const TCGOpcode ushll_list[] = {
7163        INDEX_op_shli_vec, INDEX_op_shri_vec, 0
7164    };
7165    static const GVecGen2i ops[2][3] = {
7166        { { .fniv = gen_sshll_vec,
7167            .opt_opc = sshll_list,
7168            .fno = gen_helper_sve2_sshll_h,
7169            .vece = MO_16 },
7170          { .fniv = gen_sshll_vec,
7171            .opt_opc = sshll_list,
7172            .fno = gen_helper_sve2_sshll_s,
7173            .vece = MO_32 },
7174          { .fniv = gen_sshll_vec,
7175            .opt_opc = sshll_list,
7176            .fno = gen_helper_sve2_sshll_d,
7177            .vece = MO_64 } },
7178        { { .fni8 = gen_ushll16_i64,
7179            .fniv = gen_ushll_vec,
7180            .opt_opc = ushll_list,
7181            .fno = gen_helper_sve2_ushll_h,
7182            .vece = MO_16 },
7183          { .fni8 = gen_ushll32_i64,
7184            .fniv = gen_ushll_vec,
7185            .opt_opc = ushll_list,
7186            .fno = gen_helper_sve2_ushll_s,
7187            .vece = MO_32 },
7188          { .fni8 = gen_ushll64_i64,
7189            .fniv = gen_ushll_vec,
7190            .opt_opc = ushll_list,
7191            .fno = gen_helper_sve2_ushll_d,
7192            .vece = MO_64 } },
7193    };
7194
7195    if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
7196        return false;
7197    }
7198    if (sve_access_check(s)) {
7199        unsigned vsz = vec_full_reg_size(s);
7200        tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7201                        vec_full_reg_offset(s, a->rn),
7202                        vsz, vsz, (a->imm << 1) | sel,
7203                        &ops[uns][a->esz]);
7204    }
7205    return true;
7206}
7207
7208static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
7209{
7210    return do_sve2_shll_tb(s, a, false, false);
7211}
7212
7213static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
7214{
7215    return do_sve2_shll_tb(s, a, true, false);
7216}
7217
7218static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
7219{
7220    return do_sve2_shll_tb(s, a, false, true);
7221}
7222
7223static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
7224{
7225    return do_sve2_shll_tb(s, a, true, true);
7226}
7227
7228static bool trans_BEXT(DisasContext *s, arg_rrr_esz *a)
7229{
7230    static gen_helper_gvec_3 * const fns[4] = {
7231        gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
7232        gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
7233    };
7234    if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7235        return false;
7236    }
7237    return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7238}
7239
7240static bool trans_BDEP(DisasContext *s, arg_rrr_esz *a)
7241{
7242    static gen_helper_gvec_3 * const fns[4] = {
7243        gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
7244        gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
7245    };
7246    if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7247        return false;
7248    }
7249    return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7250}
7251
7252static bool trans_BGRP(DisasContext *s, arg_rrr_esz *a)
7253{
7254    static gen_helper_gvec_3 * const fns[4] = {
7255        gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
7256        gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
7257    };
7258    if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7259        return false;
7260    }
7261    return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7262}
7263
7264static bool do_cadd(DisasContext *s, arg_rrr_esz *a, bool sq, bool rot)
7265{
7266    static gen_helper_gvec_3 * const fns[2][4] = {
7267        { gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
7268          gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d },
7269        { gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
7270          gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d },
7271    };
7272    return do_sve2_zzw_ool(s, a, fns[sq][a->esz], rot);
7273}
7274
7275static bool trans_CADD_rot90(DisasContext *s, arg_rrr_esz *a)
7276{
7277    return do_cadd(s, a, false, false);
7278}
7279
7280static bool trans_CADD_rot270(DisasContext *s, arg_rrr_esz *a)
7281{
7282    return do_cadd(s, a, false, true);
7283}
7284
7285static bool trans_SQCADD_rot90(DisasContext *s, arg_rrr_esz *a)
7286{
7287    return do_cadd(s, a, true, false);
7288}
7289
7290static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a)
7291{
7292    return do_cadd(s, a, true, true);
7293}
7294
7295static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
7296                             gen_helper_gvec_4 *fn, int data)
7297{
7298    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
7299        return false;
7300    }
7301    if (sve_access_check(s)) {
7302        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
7303    }
7304    return true;
7305}
7306
7307static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel)
7308{
7309    static gen_helper_gvec_4 * const fns[2][4] = {
7310        { NULL,                    gen_helper_sve2_sabal_h,
7311          gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d },
7312        { NULL,                    gen_helper_sve2_uabal_h,
7313          gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d },
7314    };
7315    return do_sve2_zzzz_ool(s, a, fns[uns][a->esz], sel);
7316}
7317
7318static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a)
7319{
7320    return do_abal(s, a, false, false);
7321}
7322
7323static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a)
7324{
7325    return do_abal(s, a, false, true);
7326}
7327
7328static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a)
7329{
7330    return do_abal(s, a, true, false);
7331}
7332
7333static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a)
7334{
7335    return do_abal(s, a, true, true);
7336}
7337
7338static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
7339{
7340    static gen_helper_gvec_4 * const fns[2] = {
7341        gen_helper_sve2_adcl_s,
7342        gen_helper_sve2_adcl_d,
7343    };
7344    /*
7345     * Note that in this case the ESZ field encodes both size and sign.
7346     * Split out 'subtract' into bit 1 of the data field for the helper.
7347     */
7348    return do_sve2_zzzz_ool(s, a, fns[a->esz & 1], (a->esz & 2) | sel);
7349}
7350
7351static bool trans_ADCLB(DisasContext *s, arg_rrrr_esz *a)
7352{
7353    return do_adcl(s, a, false);
7354}
7355
7356static bool trans_ADCLT(DisasContext *s, arg_rrrr_esz *a)
7357{
7358    return do_adcl(s, a, true);
7359}
7360
7361static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
7362{
7363    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
7364        return false;
7365    }
7366    if (sve_access_check(s)) {
7367        unsigned vsz = vec_full_reg_size(s);
7368        unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
7369        unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
7370        fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
7371    }
7372    return true;
7373}
7374
7375static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
7376{
7377    return do_sve2_fn2i(s, a, gen_gvec_ssra);
7378}
7379
7380static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
7381{
7382    return do_sve2_fn2i(s, a, gen_gvec_usra);
7383}
7384
7385static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
7386{
7387    return do_sve2_fn2i(s, a, gen_gvec_srsra);
7388}
7389
7390static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
7391{
7392    return do_sve2_fn2i(s, a, gen_gvec_ursra);
7393}
7394
7395static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
7396{
7397    return do_sve2_fn2i(s, a, gen_gvec_sri);
7398}
7399
7400static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
7401{
7402    return do_sve2_fn2i(s, a, gen_gvec_sli);
7403}
7404
7405static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
7406{
7407    if (!dc_isar_feature(aa64_sve2, s)) {
7408        return false;
7409    }
7410    if (sve_access_check(s)) {
7411        gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
7412    }
7413    return true;
7414}
7415
7416static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
7417{
7418    return do_sve2_fn_zzz(s, a, gen_gvec_saba);
7419}
7420
7421static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
7422{
7423    return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
7424}
7425
7426static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
7427                                   const GVecGen2 ops[3])
7428{
7429    if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
7430        !dc_isar_feature(aa64_sve2, s)) {
7431        return false;
7432    }
7433    if (sve_access_check(s)) {
7434        unsigned vsz = vec_full_reg_size(s);
7435        tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
7436                        vec_full_reg_offset(s, a->rn),
7437                        vsz, vsz, &ops[a->esz]);
7438    }
7439    return true;
7440}
7441
7442static const TCGOpcode sqxtn_list[] = {
7443    INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7444};
7445
7446static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7447{
7448    TCGv_vec t = tcg_temp_new_vec_matching(d);
7449    int halfbits = 4 << vece;
7450    int64_t mask = (1ull << halfbits) - 1;
7451    int64_t min = -1ull << (halfbits - 1);
7452    int64_t max = -min - 1;
7453
7454    tcg_gen_dupi_vec(vece, t, min);
7455    tcg_gen_smax_vec(vece, d, n, t);
7456    tcg_gen_dupi_vec(vece, t, max);
7457    tcg_gen_smin_vec(vece, d, d, t);
7458    tcg_gen_dupi_vec(vece, t, mask);
7459    tcg_gen_and_vec(vece, d, d, t);
7460    tcg_temp_free_vec(t);
7461}
7462
7463static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
7464{
7465    static const GVecGen2 ops[3] = {
7466        { .fniv = gen_sqxtnb_vec,
7467          .opt_opc = sqxtn_list,
7468          .fno = gen_helper_sve2_sqxtnb_h,
7469          .vece = MO_16 },
7470        { .fniv = gen_sqxtnb_vec,
7471          .opt_opc = sqxtn_list,
7472          .fno = gen_helper_sve2_sqxtnb_s,
7473          .vece = MO_32 },
7474        { .fniv = gen_sqxtnb_vec,
7475          .opt_opc = sqxtn_list,
7476          .fno = gen_helper_sve2_sqxtnb_d,
7477          .vece = MO_64 },
7478    };
7479    return do_sve2_narrow_extract(s, a, ops);
7480}
7481
7482static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7483{
7484    TCGv_vec t = tcg_temp_new_vec_matching(d);
7485    int halfbits = 4 << vece;
7486    int64_t mask = (1ull << halfbits) - 1;
7487    int64_t min = -1ull << (halfbits - 1);
7488    int64_t max = -min - 1;
7489
7490    tcg_gen_dupi_vec(vece, t, min);
7491    tcg_gen_smax_vec(vece, n, n, t);
7492    tcg_gen_dupi_vec(vece, t, max);
7493    tcg_gen_smin_vec(vece, n, n, t);
7494    tcg_gen_shli_vec(vece, n, n, halfbits);
7495    tcg_gen_dupi_vec(vece, t, mask);
7496    tcg_gen_bitsel_vec(vece, d, t, d, n);
7497    tcg_temp_free_vec(t);
7498}
7499
7500static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
7501{
7502    static const GVecGen2 ops[3] = {
7503        { .fniv = gen_sqxtnt_vec,
7504          .opt_opc = sqxtn_list,
7505          .load_dest = true,
7506          .fno = gen_helper_sve2_sqxtnt_h,
7507          .vece = MO_16 },
7508        { .fniv = gen_sqxtnt_vec,
7509          .opt_opc = sqxtn_list,
7510          .load_dest = true,
7511          .fno = gen_helper_sve2_sqxtnt_s,
7512          .vece = MO_32 },
7513        { .fniv = gen_sqxtnt_vec,
7514          .opt_opc = sqxtn_list,
7515          .load_dest = true,
7516          .fno = gen_helper_sve2_sqxtnt_d,
7517          .vece = MO_64 },
7518    };
7519    return do_sve2_narrow_extract(s, a, ops);
7520}
7521
7522static const TCGOpcode uqxtn_list[] = {
7523    INDEX_op_shli_vec, INDEX_op_umin_vec, 0
7524};
7525
7526static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7527{
7528    TCGv_vec t = tcg_temp_new_vec_matching(d);
7529    int halfbits = 4 << vece;
7530    int64_t max = (1ull << halfbits) - 1;
7531
7532    tcg_gen_dupi_vec(vece, t, max);
7533    tcg_gen_umin_vec(vece, d, n, t);
7534    tcg_temp_free_vec(t);
7535}
7536
7537static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
7538{
7539    static const GVecGen2 ops[3] = {
7540        { .fniv = gen_uqxtnb_vec,
7541          .opt_opc = uqxtn_list,
7542          .fno = gen_helper_sve2_uqxtnb_h,
7543          .vece = MO_16 },
7544        { .fniv = gen_uqxtnb_vec,
7545          .opt_opc = uqxtn_list,
7546          .fno = gen_helper_sve2_uqxtnb_s,
7547          .vece = MO_32 },
7548        { .fniv = gen_uqxtnb_vec,
7549          .opt_opc = uqxtn_list,
7550          .fno = gen_helper_sve2_uqxtnb_d,
7551          .vece = MO_64 },
7552    };
7553    return do_sve2_narrow_extract(s, a, ops);
7554}
7555
7556static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7557{
7558    TCGv_vec t = tcg_temp_new_vec_matching(d);
7559    int halfbits = 4 << vece;
7560    int64_t max = (1ull << halfbits) - 1;
7561
7562    tcg_gen_dupi_vec(vece, t, max);
7563    tcg_gen_umin_vec(vece, n, n, t);
7564    tcg_gen_shli_vec(vece, n, n, halfbits);
7565    tcg_gen_bitsel_vec(vece, d, t, d, n);
7566    tcg_temp_free_vec(t);
7567}
7568
7569static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
7570{
7571    static const GVecGen2 ops[3] = {
7572        { .fniv = gen_uqxtnt_vec,
7573          .opt_opc = uqxtn_list,
7574          .load_dest = true,
7575          .fno = gen_helper_sve2_uqxtnt_h,
7576          .vece = MO_16 },
7577        { .fniv = gen_uqxtnt_vec,
7578          .opt_opc = uqxtn_list,
7579          .load_dest = true,
7580          .fno = gen_helper_sve2_uqxtnt_s,
7581          .vece = MO_32 },
7582        { .fniv = gen_uqxtnt_vec,
7583          .opt_opc = uqxtn_list,
7584          .load_dest = true,
7585          .fno = gen_helper_sve2_uqxtnt_d,
7586          .vece = MO_64 },
7587    };
7588    return do_sve2_narrow_extract(s, a, ops);
7589}
7590
7591static const TCGOpcode sqxtun_list[] = {
7592    INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
7593};
7594
7595static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7596{
7597    TCGv_vec t = tcg_temp_new_vec_matching(d);
7598    int halfbits = 4 << vece;
7599    int64_t max = (1ull << halfbits) - 1;
7600
7601    tcg_gen_dupi_vec(vece, t, 0);
7602    tcg_gen_smax_vec(vece, d, n, t);
7603    tcg_gen_dupi_vec(vece, t, max);
7604    tcg_gen_umin_vec(vece, d, d, t);
7605    tcg_temp_free_vec(t);
7606}
7607
7608static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
7609{
7610    static const GVecGen2 ops[3] = {
7611        { .fniv = gen_sqxtunb_vec,
7612          .opt_opc = sqxtun_list,
7613          .fno = gen_helper_sve2_sqxtunb_h,
7614          .vece = MO_16 },
7615        { .fniv = gen_sqxtunb_vec,
7616          .opt_opc = sqxtun_list,
7617          .fno = gen_helper_sve2_sqxtunb_s,
7618          .vece = MO_32 },
7619        { .fniv = gen_sqxtunb_vec,
7620          .opt_opc = sqxtun_list,
7621          .fno = gen_helper_sve2_sqxtunb_d,
7622          .vece = MO_64 },
7623    };
7624    return do_sve2_narrow_extract(s, a, ops);
7625}
7626
7627static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7628{
7629    TCGv_vec t = tcg_temp_new_vec_matching(d);
7630    int halfbits = 4 << vece;
7631    int64_t max = (1ull << halfbits) - 1;
7632
7633    tcg_gen_dupi_vec(vece, t, 0);
7634    tcg_gen_smax_vec(vece, n, n, t);
7635    tcg_gen_dupi_vec(vece, t, max);
7636    tcg_gen_umin_vec(vece, n, n, t);
7637    tcg_gen_shli_vec(vece, n, n, halfbits);
7638    tcg_gen_bitsel_vec(vece, d, t, d, n);
7639    tcg_temp_free_vec(t);
7640}
7641
7642static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
7643{
7644    static const GVecGen2 ops[3] = {
7645        { .fniv = gen_sqxtunt_vec,
7646          .opt_opc = sqxtun_list,
7647          .load_dest = true,
7648          .fno = gen_helper_sve2_sqxtunt_h,
7649          .vece = MO_16 },
7650        { .fniv = gen_sqxtunt_vec,
7651          .opt_opc = sqxtun_list,
7652          .load_dest = true,
7653          .fno = gen_helper_sve2_sqxtunt_s,
7654          .vece = MO_32 },
7655        { .fniv = gen_sqxtunt_vec,
7656          .opt_opc = sqxtun_list,
7657          .load_dest = true,
7658          .fno = gen_helper_sve2_sqxtunt_d,
7659          .vece = MO_64 },
7660    };
7661    return do_sve2_narrow_extract(s, a, ops);
7662}
7663
7664static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
7665                               const GVecGen2i ops[3])
7666{
7667    if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
7668        return false;
7669    }
7670    assert(a->imm > 0 && a->imm <= (8 << a->esz));
7671    if (sve_access_check(s)) {
7672        unsigned vsz = vec_full_reg_size(s);
7673        tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7674                        vec_full_reg_offset(s, a->rn),
7675                        vsz, vsz, a->imm, &ops[a->esz]);
7676    }
7677    return true;
7678}
7679
7680static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7681{
7682    int halfbits = 4 << vece;
7683    uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7684
7685    tcg_gen_shri_i64(d, n, shr);
7686    tcg_gen_andi_i64(d, d, mask);
7687}
7688
7689static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7690{
7691    gen_shrnb_i64(MO_16, d, n, shr);
7692}
7693
7694static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7695{
7696    gen_shrnb_i64(MO_32, d, n, shr);
7697}
7698
7699static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7700{
7701    gen_shrnb_i64(MO_64, d, n, shr);
7702}
7703
7704static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7705{
7706    TCGv_vec t = tcg_temp_new_vec_matching(d);
7707    int halfbits = 4 << vece;
7708    uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7709
7710    tcg_gen_shri_vec(vece, n, n, shr);
7711    tcg_gen_dupi_vec(vece, t, mask);
7712    tcg_gen_and_vec(vece, d, n, t);
7713    tcg_temp_free_vec(t);
7714}
7715
7716static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
7717{
7718    static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
7719    static const GVecGen2i ops[3] = {
7720        { .fni8 = gen_shrnb16_i64,
7721          .fniv = gen_shrnb_vec,
7722          .opt_opc = vec_list,
7723          .fno = gen_helper_sve2_shrnb_h,
7724          .vece = MO_16 },
7725        { .fni8 = gen_shrnb32_i64,
7726          .fniv = gen_shrnb_vec,
7727          .opt_opc = vec_list,
7728          .fno = gen_helper_sve2_shrnb_s,
7729          .vece = MO_32 },
7730        { .fni8 = gen_shrnb64_i64,
7731          .fniv = gen_shrnb_vec,
7732          .opt_opc = vec_list,
7733          .fno = gen_helper_sve2_shrnb_d,
7734          .vece = MO_64 },
7735    };
7736    return do_sve2_shr_narrow(s, a, ops);
7737}
7738
7739static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7740{
7741    int halfbits = 4 << vece;
7742    uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7743
7744    tcg_gen_shli_i64(n, n, halfbits - shr);
7745    tcg_gen_andi_i64(n, n, ~mask);
7746    tcg_gen_andi_i64(d, d, mask);
7747    tcg_gen_or_i64(d, d, n);
7748}
7749
7750static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7751{
7752    gen_shrnt_i64(MO_16, d, n, shr);
7753}
7754
7755static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7756{
7757    gen_shrnt_i64(MO_32, d, n, shr);
7758}
7759
7760static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7761{
7762    tcg_gen_shri_i64(n, n, shr);
7763    tcg_gen_deposit_i64(d, d, n, 32, 32);
7764}
7765
7766static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7767{
7768    TCGv_vec t = tcg_temp_new_vec_matching(d);
7769    int halfbits = 4 << vece;
7770    uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7771
7772    tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7773    tcg_gen_dupi_vec(vece, t, mask);
7774    tcg_gen_bitsel_vec(vece, d, t, d, n);
7775    tcg_temp_free_vec(t);
7776}
7777
7778static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
7779{
7780    static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
7781    static const GVecGen2i ops[3] = {
7782        { .fni8 = gen_shrnt16_i64,
7783          .fniv = gen_shrnt_vec,
7784          .opt_opc = vec_list,
7785          .load_dest = true,
7786          .fno = gen_helper_sve2_shrnt_h,
7787          .vece = MO_16 },
7788        { .fni8 = gen_shrnt32_i64,
7789          .fniv = gen_shrnt_vec,
7790          .opt_opc = vec_list,
7791          .load_dest = true,
7792          .fno = gen_helper_sve2_shrnt_s,
7793          .vece = MO_32 },
7794        { .fni8 = gen_shrnt64_i64,
7795          .fniv = gen_shrnt_vec,
7796          .opt_opc = vec_list,
7797          .load_dest = true,
7798          .fno = gen_helper_sve2_shrnt_d,
7799          .vece = MO_64 },
7800    };
7801    return do_sve2_shr_narrow(s, a, ops);
7802}
7803
7804static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
7805{
7806    static const GVecGen2i ops[3] = {
7807        { .fno = gen_helper_sve2_rshrnb_h },
7808        { .fno = gen_helper_sve2_rshrnb_s },
7809        { .fno = gen_helper_sve2_rshrnb_d },
7810    };
7811    return do_sve2_shr_narrow(s, a, ops);
7812}
7813
7814static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
7815{
7816    static const GVecGen2i ops[3] = {
7817        { .fno = gen_helper_sve2_rshrnt_h },
7818        { .fno = gen_helper_sve2_rshrnt_s },
7819        { .fno = gen_helper_sve2_rshrnt_d },
7820    };
7821    return do_sve2_shr_narrow(s, a, ops);
7822}
7823
7824static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7825                             TCGv_vec n, int64_t shr)
7826{
7827    TCGv_vec t = tcg_temp_new_vec_matching(d);
7828    int halfbits = 4 << vece;
7829
7830    tcg_gen_sari_vec(vece, n, n, shr);
7831    tcg_gen_dupi_vec(vece, t, 0);
7832    tcg_gen_smax_vec(vece, n, n, t);
7833    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7834    tcg_gen_umin_vec(vece, d, n, t);
7835    tcg_temp_free_vec(t);
7836}
7837
7838static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
7839{
7840    static const TCGOpcode vec_list[] = {
7841        INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7842    };
7843    static const GVecGen2i ops[3] = {
7844        { .fniv = gen_sqshrunb_vec,
7845          .opt_opc = vec_list,
7846          .fno = gen_helper_sve2_sqshrunb_h,
7847          .vece = MO_16 },
7848        { .fniv = gen_sqshrunb_vec,
7849          .opt_opc = vec_list,
7850          .fno = gen_helper_sve2_sqshrunb_s,
7851          .vece = MO_32 },
7852        { .fniv = gen_sqshrunb_vec,
7853          .opt_opc = vec_list,
7854          .fno = gen_helper_sve2_sqshrunb_d,
7855          .vece = MO_64 },
7856    };
7857    return do_sve2_shr_narrow(s, a, ops);
7858}
7859
7860static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7861                             TCGv_vec n, int64_t shr)
7862{
7863    TCGv_vec t = tcg_temp_new_vec_matching(d);
7864    int halfbits = 4 << vece;
7865
7866    tcg_gen_sari_vec(vece, n, n, shr);
7867    tcg_gen_dupi_vec(vece, t, 0);
7868    tcg_gen_smax_vec(vece, n, n, t);
7869    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7870    tcg_gen_umin_vec(vece, n, n, t);
7871    tcg_gen_shli_vec(vece, n, n, halfbits);
7872    tcg_gen_bitsel_vec(vece, d, t, d, n);
7873    tcg_temp_free_vec(t);
7874}
7875
7876static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7877{
7878    static const TCGOpcode vec_list[] = {
7879        INDEX_op_shli_vec, INDEX_op_sari_vec,
7880        INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7881    };
7882    static const GVecGen2i ops[3] = {
7883        { .fniv = gen_sqshrunt_vec,
7884          .opt_opc = vec_list,
7885          .load_dest = true,
7886          .fno = gen_helper_sve2_sqshrunt_h,
7887          .vece = MO_16 },
7888        { .fniv = gen_sqshrunt_vec,
7889          .opt_opc = vec_list,
7890          .load_dest = true,
7891          .fno = gen_helper_sve2_sqshrunt_s,
7892          .vece = MO_32 },
7893        { .fniv = gen_sqshrunt_vec,
7894          .opt_opc = vec_list,
7895          .load_dest = true,
7896          .fno = gen_helper_sve2_sqshrunt_d,
7897          .vece = MO_64 },
7898    };
7899    return do_sve2_shr_narrow(s, a, ops);
7900}
7901
7902static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7903{
7904    static const GVecGen2i ops[3] = {
7905        { .fno = gen_helper_sve2_sqrshrunb_h },
7906        { .fno = gen_helper_sve2_sqrshrunb_s },
7907        { .fno = gen_helper_sve2_sqrshrunb_d },
7908    };
7909    return do_sve2_shr_narrow(s, a, ops);
7910}
7911
7912static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7913{
7914    static const GVecGen2i ops[3] = {
7915        { .fno = gen_helper_sve2_sqrshrunt_h },
7916        { .fno = gen_helper_sve2_sqrshrunt_s },
7917        { .fno = gen_helper_sve2_sqrshrunt_d },
7918    };
7919    return do_sve2_shr_narrow(s, a, ops);
7920}
7921
7922static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7923                            TCGv_vec n, int64_t shr)
7924{
7925    TCGv_vec t = tcg_temp_new_vec_matching(d);
7926    int halfbits = 4 << vece;
7927    int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7928    int64_t min = -max - 1;
7929
7930    tcg_gen_sari_vec(vece, n, n, shr);
7931    tcg_gen_dupi_vec(vece, t, min);
7932    tcg_gen_smax_vec(vece, n, n, t);
7933    tcg_gen_dupi_vec(vece, t, max);
7934    tcg_gen_smin_vec(vece, n, n, t);
7935    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7936    tcg_gen_and_vec(vece, d, n, t);
7937    tcg_temp_free_vec(t);
7938}
7939
7940static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7941{
7942    static const TCGOpcode vec_list[] = {
7943        INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7944    };
7945    static const GVecGen2i ops[3] = {
7946        { .fniv = gen_sqshrnb_vec,
7947          .opt_opc = vec_list,
7948          .fno = gen_helper_sve2_sqshrnb_h,
7949          .vece = MO_16 },
7950        { .fniv = gen_sqshrnb_vec,
7951          .opt_opc = vec_list,
7952          .fno = gen_helper_sve2_sqshrnb_s,
7953          .vece = MO_32 },
7954        { .fniv = gen_sqshrnb_vec,
7955          .opt_opc = vec_list,
7956          .fno = gen_helper_sve2_sqshrnb_d,
7957          .vece = MO_64 },
7958    };
7959    return do_sve2_shr_narrow(s, a, ops);
7960}
7961
7962static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7963                             TCGv_vec n, int64_t shr)
7964{
7965    TCGv_vec t = tcg_temp_new_vec_matching(d);
7966    int halfbits = 4 << vece;
7967    int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7968    int64_t min = -max - 1;
7969
7970    tcg_gen_sari_vec(vece, n, n, shr);
7971    tcg_gen_dupi_vec(vece, t, min);
7972    tcg_gen_smax_vec(vece, n, n, t);
7973    tcg_gen_dupi_vec(vece, t, max);
7974    tcg_gen_smin_vec(vece, n, n, t);
7975    tcg_gen_shli_vec(vece, n, n, halfbits);
7976    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7977    tcg_gen_bitsel_vec(vece, d, t, d, n);
7978    tcg_temp_free_vec(t);
7979}
7980
7981static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7982{
7983    static const TCGOpcode vec_list[] = {
7984        INDEX_op_shli_vec, INDEX_op_sari_vec,
7985        INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7986    };
7987    static const GVecGen2i ops[3] = {
7988        { .fniv = gen_sqshrnt_vec,
7989          .opt_opc = vec_list,
7990          .load_dest = true,
7991          .fno = gen_helper_sve2_sqshrnt_h,
7992          .vece = MO_16 },
7993        { .fniv = gen_sqshrnt_vec,
7994          .opt_opc = vec_list,
7995          .load_dest = true,
7996          .fno = gen_helper_sve2_sqshrnt_s,
7997          .vece = MO_32 },
7998        { .fniv = gen_sqshrnt_vec,
7999          .opt_opc = vec_list,
8000          .load_dest = true,
8001          .fno = gen_helper_sve2_sqshrnt_d,
8002          .vece = MO_64 },
8003    };
8004    return do_sve2_shr_narrow(s, a, ops);
8005}
8006
8007static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
8008{
8009    static const GVecGen2i ops[3] = {
8010        { .fno = gen_helper_sve2_sqrshrnb_h },
8011        { .fno = gen_helper_sve2_sqrshrnb_s },
8012        { .fno = gen_helper_sve2_sqrshrnb_d },
8013    };
8014    return do_sve2_shr_narrow(s, a, ops);
8015}
8016
8017static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
8018{
8019    static const GVecGen2i ops[3] = {
8020        { .fno = gen_helper_sve2_sqrshrnt_h },
8021        { .fno = gen_helper_sve2_sqrshrnt_s },
8022        { .fno = gen_helper_sve2_sqrshrnt_d },
8023    };
8024    return do_sve2_shr_narrow(s, a, ops);
8025}
8026
8027static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
8028                            TCGv_vec n, int64_t shr)
8029{
8030    TCGv_vec t = tcg_temp_new_vec_matching(d);
8031    int halfbits = 4 << vece;
8032
8033    tcg_gen_shri_vec(vece, n, n, shr);
8034    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
8035    tcg_gen_umin_vec(vece, d, n, t);
8036    tcg_temp_free_vec(t);
8037}
8038
8039static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
8040{
8041    static const TCGOpcode vec_list[] = {
8042        INDEX_op_shri_vec, INDEX_op_umin_vec, 0
8043    };
8044    static const GVecGen2i ops[3] = {
8045        { .fniv = gen_uqshrnb_vec,
8046          .opt_opc = vec_list,
8047          .fno = gen_helper_sve2_uqshrnb_h,
8048          .vece = MO_16 },
8049        { .fniv = gen_uqshrnb_vec,
8050          .opt_opc = vec_list,
8051          .fno = gen_helper_sve2_uqshrnb_s,
8052          .vece = MO_32 },
8053        { .fniv = gen_uqshrnb_vec,
8054          .opt_opc = vec_list,
8055          .fno = gen_helper_sve2_uqshrnb_d,
8056          .vece = MO_64 },
8057    };
8058    return do_sve2_shr_narrow(s, a, ops);
8059}
8060
8061static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
8062                            TCGv_vec n, int64_t shr)
8063{
8064    TCGv_vec t = tcg_temp_new_vec_matching(d);
8065    int halfbits = 4 << vece;
8066
8067    tcg_gen_shri_vec(vece, n, n, shr);
8068    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
8069    tcg_gen_umin_vec(vece, n, n, t);
8070    tcg_gen_shli_vec(vece, n, n, halfbits);
8071    tcg_gen_bitsel_vec(vece, d, t, d, n);
8072    tcg_temp_free_vec(t);
8073}
8074
8075static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
8076{
8077    static const TCGOpcode vec_list[] = {
8078        INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
8079    };
8080    static const GVecGen2i ops[3] = {
8081        { .fniv = gen_uqshrnt_vec,
8082          .opt_opc = vec_list,
8083          .load_dest = true,
8084          .fno = gen_helper_sve2_uqshrnt_h,
8085          .vece = MO_16 },
8086        { .fniv = gen_uqshrnt_vec,
8087          .opt_opc = vec_list,
8088          .load_dest = true,
8089          .fno = gen_helper_sve2_uqshrnt_s,
8090          .vece = MO_32 },
8091        { .fniv = gen_uqshrnt_vec,
8092          .opt_opc = vec_list,
8093          .load_dest = true,
8094          .fno = gen_helper_sve2_uqshrnt_d,
8095          .vece = MO_64 },
8096    };
8097    return do_sve2_shr_narrow(s, a, ops);
8098}
8099
8100static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
8101{
8102    static const GVecGen2i ops[3] = {
8103        { .fno = gen_helper_sve2_uqrshrnb_h },
8104        { .fno = gen_helper_sve2_uqrshrnb_s },
8105        { .fno = gen_helper_sve2_uqrshrnb_d },
8106    };
8107    return do_sve2_shr_narrow(s, a, ops);
8108}
8109
8110static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
8111{
8112    static const GVecGen2i ops[3] = {
8113        { .fno = gen_helper_sve2_uqrshrnt_h },
8114        { .fno = gen_helper_sve2_uqrshrnt_s },
8115        { .fno = gen_helper_sve2_uqrshrnt_d },
8116    };
8117    return do_sve2_shr_narrow(s, a, ops);
8118}
8119
8120#define DO_SVE2_ZZZ_NARROW(NAME, name)                                    \
8121static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)                 \
8122{                                                                         \
8123    static gen_helper_gvec_3 * const fns[4] = {                           \
8124        NULL,                       gen_helper_sve2_##name##_h,           \
8125        gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d,           \
8126    };                                                                    \
8127    return do_sve2_zzz_ool(s, a, fns[a->esz]);                            \
8128}
8129
8130DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
8131DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
8132DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
8133DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
8134
8135DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
8136DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
8137DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
8138DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
8139
8140static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
8141                               gen_helper_gvec_flags_4 *fn)
8142{
8143    if (!dc_isar_feature(aa64_sve2, s)) {
8144        return false;
8145    }
8146    return do_ppzz_flags(s, a, fn);
8147}
8148
8149#define DO_SVE2_PPZZ_MATCH(NAME, name)                                      \
8150static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)                  \
8151{                                                                           \
8152    static gen_helper_gvec_flags_4 * const fns[4] = {                       \
8153        gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h,   \
8154        NULL,                            NULL                               \
8155    };                                                                      \
8156    return do_sve2_ppzz_flags(s, a, fns[a->esz]);                           \
8157}
8158
8159DO_SVE2_PPZZ_MATCH(MATCH, match)
8160DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
8161
8162static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
8163{
8164    static gen_helper_gvec_4 * const fns[2] = {
8165        gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
8166    };
8167    if (a->esz < 2) {
8168        return false;
8169    }
8170    return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
8171}
8172
8173static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a)
8174{
8175    if (a->esz != 0) {
8176        return false;
8177    }
8178    return do_sve2_zzz_ool(s, a, gen_helper_sve2_histseg);
8179}
8180
8181static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
8182                            gen_helper_gvec_4_ptr *fn)
8183{
8184    if (!dc_isar_feature(aa64_sve2, s)) {
8185        return false;
8186    }
8187    return do_zpzz_fp(s, a, fn);
8188}
8189
8190#define DO_SVE2_ZPZZ_FP(NAME, name)                                         \
8191static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)                  \
8192{                                                                           \
8193    static gen_helper_gvec_4_ptr * const fns[4] = {                         \
8194        NULL,                            gen_helper_sve2_##name##_zpzz_h,   \
8195        gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d    \
8196    };                                                                      \
8197    return do_sve2_zpzz_fp(s, a, fns[a->esz]);                              \
8198}
8199
8200DO_SVE2_ZPZZ_FP(FADDP, faddp)
8201DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
8202DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
8203DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
8204DO_SVE2_ZPZZ_FP(FMINP, fminp)
8205
8206/*
8207 * SVE Integer Multiply-Add (unpredicated)
8208 */
8209
8210static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
8211{
8212    gen_helper_gvec_4_ptr *fn;
8213
8214    switch (a->esz) {
8215    case MO_32:
8216        if (!dc_isar_feature(aa64_sve_f32mm, s)) {
8217            return false;
8218        }
8219        fn = gen_helper_fmmla_s;
8220        break;
8221    case MO_64:
8222        if (!dc_isar_feature(aa64_sve_f64mm, s)) {
8223            return false;
8224        }
8225        fn = gen_helper_fmmla_d;
8226        break;
8227    default:
8228        return false;
8229    }
8230
8231    if (sve_access_check(s)) {
8232        unsigned vsz = vec_full_reg_size(s);
8233        TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8234        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8235                           vec_full_reg_offset(s, a->rn),
8236                           vec_full_reg_offset(s, a->rm),
8237                           vec_full_reg_offset(s, a->ra),
8238                           status, vsz, vsz, 0, fn);
8239        tcg_temp_free_ptr(status);
8240    }
8241    return true;
8242}
8243
8244static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a,
8245                            bool sel1, bool sel2)
8246{
8247    static gen_helper_gvec_4 * const fns[] = {
8248        NULL,                           gen_helper_sve2_sqdmlal_zzzw_h,
8249        gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
8250    };
8251    return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8252}
8253
8254static bool do_sqdmlsl_zzzw(DisasContext *s, arg_rrrr_esz *a,
8255                            bool sel1, bool sel2)
8256{
8257    static gen_helper_gvec_4 * const fns[] = {
8258        NULL,                           gen_helper_sve2_sqdmlsl_zzzw_h,
8259        gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
8260    };
8261    return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8262}
8263
8264static bool trans_SQDMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8265{
8266    return do_sqdmlal_zzzw(s, a, false, false);
8267}
8268
8269static bool trans_SQDMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8270{
8271    return do_sqdmlal_zzzw(s, a, true, true);
8272}
8273
8274static bool trans_SQDMLALBT(DisasContext *s, arg_rrrr_esz *a)
8275{
8276    return do_sqdmlal_zzzw(s, a, false, true);
8277}
8278
8279static bool trans_SQDMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8280{
8281    return do_sqdmlsl_zzzw(s, a, false, false);
8282}
8283
8284static bool trans_SQDMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8285{
8286    return do_sqdmlsl_zzzw(s, a, true, true);
8287}
8288
8289static bool trans_SQDMLSLBT(DisasContext *s, arg_rrrr_esz *a)
8290{
8291    return do_sqdmlsl_zzzw(s, a, false, true);
8292}
8293
8294static bool trans_SQRDMLAH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8295{
8296    static gen_helper_gvec_4 * const fns[] = {
8297        gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
8298        gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
8299    };
8300    return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8301}
8302
8303static bool trans_SQRDMLSH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8304{
8305    static gen_helper_gvec_4 * const fns[] = {
8306        gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
8307        gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
8308    };
8309    return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8310}
8311
8312static bool do_smlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8313{
8314    static gen_helper_gvec_4 * const fns[] = {
8315        NULL,                         gen_helper_sve2_smlal_zzzw_h,
8316        gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
8317    };
8318    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8319}
8320
8321static bool trans_SMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8322{
8323    return do_smlal_zzzw(s, a, false);
8324}
8325
8326static bool trans_SMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8327{
8328    return do_smlal_zzzw(s, a, true);
8329}
8330
8331static bool do_umlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8332{
8333    static gen_helper_gvec_4 * const fns[] = {
8334        NULL,                         gen_helper_sve2_umlal_zzzw_h,
8335        gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
8336    };
8337    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8338}
8339
8340static bool trans_UMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8341{
8342    return do_umlal_zzzw(s, a, false);
8343}
8344
8345static bool trans_UMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8346{
8347    return do_umlal_zzzw(s, a, true);
8348}
8349
8350static bool do_smlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8351{
8352    static gen_helper_gvec_4 * const fns[] = {
8353        NULL,                         gen_helper_sve2_smlsl_zzzw_h,
8354        gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
8355    };
8356    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8357}
8358
8359static bool trans_SMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8360{
8361    return do_smlsl_zzzw(s, a, false);
8362}
8363
8364static bool trans_SMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8365{
8366    return do_smlsl_zzzw(s, a, true);
8367}
8368
8369static bool do_umlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8370{
8371    static gen_helper_gvec_4 * const fns[] = {
8372        NULL,                         gen_helper_sve2_umlsl_zzzw_h,
8373        gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
8374    };
8375    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8376}
8377
8378static bool trans_UMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8379{
8380    return do_umlsl_zzzw(s, a, false);
8381}
8382
8383static bool trans_UMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8384{
8385    return do_umlsl_zzzw(s, a, true);
8386}
8387
8388static bool trans_CMLA_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8389{
8390    static gen_helper_gvec_4 * const fns[] = {
8391        gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
8392        gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
8393    };
8394
8395    if (!dc_isar_feature(aa64_sve2, s)) {
8396        return false;
8397    }
8398    if (sve_access_check(s)) {
8399        gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8400    }
8401    return true;
8402}
8403
8404static bool trans_CDOT_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8405{
8406    if (!dc_isar_feature(aa64_sve2, s) || a->esz < MO_32) {
8407        return false;
8408    }
8409    if (sve_access_check(s)) {
8410        gen_helper_gvec_4 *fn = (a->esz == MO_32
8411                                 ? gen_helper_sve2_cdot_zzzz_s
8412                                 : gen_helper_sve2_cdot_zzzz_d);
8413        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->rot);
8414    }
8415    return true;
8416}
8417
8418static bool trans_SQRDCMLAH_zzzz(DisasContext *s, arg_SQRDCMLAH_zzzz *a)
8419{
8420    static gen_helper_gvec_4 * const fns[] = {
8421        gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
8422        gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
8423    };
8424
8425    if (!dc_isar_feature(aa64_sve2, s)) {
8426        return false;
8427    }
8428    if (sve_access_check(s)) {
8429        gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8430    }
8431    return true;
8432}
8433
8434static bool trans_USDOT_zzzz(DisasContext *s, arg_USDOT_zzzz *a)
8435{
8436    if (a->esz != 2 || !dc_isar_feature(aa64_sve_i8mm, s)) {
8437        return false;
8438    }
8439    if (sve_access_check(s)) {
8440        unsigned vsz = vec_full_reg_size(s);
8441        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
8442                           vec_full_reg_offset(s, a->rn),
8443                           vec_full_reg_offset(s, a->rm),
8444                           vec_full_reg_offset(s, a->ra),
8445                           vsz, vsz, 0, gen_helper_gvec_usdot_b);
8446    }
8447    return true;
8448}
8449
8450static bool trans_AESMC(DisasContext *s, arg_AESMC *a)
8451{
8452    if (!dc_isar_feature(aa64_sve2_aes, s)) {
8453        return false;
8454    }
8455    if (sve_access_check(s)) {
8456        gen_gvec_ool_zz(s, gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt);
8457    }
8458    return true;
8459}
8460
8461static bool do_aese(DisasContext *s, arg_rrr_esz *a, bool decrypt)
8462{
8463    if (!dc_isar_feature(aa64_sve2_aes, s)) {
8464        return false;
8465    }
8466    if (sve_access_check(s)) {
8467        gen_gvec_ool_zzz(s, gen_helper_crypto_aese,
8468                         a->rd, a->rn, a->rm, decrypt);
8469    }
8470    return true;
8471}
8472
8473static bool trans_AESE(DisasContext *s, arg_rrr_esz *a)
8474{
8475    return do_aese(s, a, false);
8476}
8477
8478static bool trans_AESD(DisasContext *s, arg_rrr_esz *a)
8479{
8480    return do_aese(s, a, true);
8481}
8482
8483static bool do_sm4(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
8484{
8485    if (!dc_isar_feature(aa64_sve2_sm4, s)) {
8486        return false;
8487    }
8488    if (sve_access_check(s)) {
8489        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
8490    }
8491    return true;
8492}
8493
8494static bool trans_SM4E(DisasContext *s, arg_rrr_esz *a)
8495{
8496    return do_sm4(s, a, gen_helper_crypto_sm4e);
8497}
8498
8499static bool trans_SM4EKEY(DisasContext *s, arg_rrr_esz *a)
8500{
8501    return do_sm4(s, a, gen_helper_crypto_sm4ekey);
8502}
8503
8504static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
8505{
8506    if (!dc_isar_feature(aa64_sve2_sha3, s)) {
8507        return false;
8508    }
8509    if (sve_access_check(s)) {
8510        gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm);
8511    }
8512    return true;
8513}
8514
8515static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
8516{
8517    if (!dc_isar_feature(aa64_sve2, s)) {
8518        return false;
8519    }
8520    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
8521}
8522
8523static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
8524{
8525    if (!dc_isar_feature(aa64_sve_bf16, s)) {
8526        return false;
8527    }
8528    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
8529}
8530
8531static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
8532{
8533    if (!dc_isar_feature(aa64_sve2, s)) {
8534        return false;
8535    }
8536    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
8537}
8538
8539static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
8540{
8541    if (!dc_isar_feature(aa64_sve2, s)) {
8542        return false;
8543    }
8544    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
8545}
8546
8547static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
8548{
8549    if (!dc_isar_feature(aa64_sve2, s)) {
8550        return false;
8551    }
8552    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
8553}
8554
8555static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
8556{
8557    if (!dc_isar_feature(aa64_sve2, s)) {
8558        return false;
8559    }
8560    return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
8561}
8562
8563static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
8564{
8565    if (!dc_isar_feature(aa64_sve2, s)) {
8566        return false;
8567    }
8568    return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
8569}
8570
8571static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
8572{
8573    static gen_helper_gvec_3_ptr * const fns[] = {
8574        NULL,               gen_helper_flogb_h,
8575        gen_helper_flogb_s, gen_helper_flogb_d
8576    };
8577
8578    if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
8579        return false;
8580    }
8581    if (sve_access_check(s)) {
8582        TCGv_ptr status =
8583            fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8584        unsigned vsz = vec_full_reg_size(s);
8585
8586        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
8587                           vec_full_reg_offset(s, a->rn),
8588                           pred_full_reg_offset(s, a->pg),
8589                           status, vsz, vsz, 0, fns[a->esz]);
8590        tcg_temp_free_ptr(status);
8591    }
8592    return true;
8593}
8594
8595static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
8596{
8597    if (!dc_isar_feature(aa64_sve2, s)) {
8598        return false;
8599    }
8600    if (sve_access_check(s)) {
8601        unsigned vsz = vec_full_reg_size(s);
8602        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8603                           vec_full_reg_offset(s, a->rn),
8604                           vec_full_reg_offset(s, a->rm),
8605                           vec_full_reg_offset(s, a->ra),
8606                           cpu_env, vsz, vsz, (sel << 1) | sub,
8607                           gen_helper_sve2_fmlal_zzzw_s);
8608    }
8609    return true;
8610}
8611
8612static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8613{
8614    return do_FMLAL_zzzw(s, a, false, false);
8615}
8616
8617static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8618{
8619    return do_FMLAL_zzzw(s, a, false, true);
8620}
8621
8622static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8623{
8624    return do_FMLAL_zzzw(s, a, true, false);
8625}
8626
8627static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8628{
8629    return do_FMLAL_zzzw(s, a, true, true);
8630}
8631
8632static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
8633{
8634    if (!dc_isar_feature(aa64_sve2, s)) {
8635        return false;
8636    }
8637    if (sve_access_check(s)) {
8638        unsigned vsz = vec_full_reg_size(s);
8639        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8640                           vec_full_reg_offset(s, a->rn),
8641                           vec_full_reg_offset(s, a->rm),
8642                           vec_full_reg_offset(s, a->ra),
8643                           cpu_env, vsz, vsz,
8644                           (a->index << 2) | (sel << 1) | sub,
8645                           gen_helper_sve2_fmlal_zzxw_s);
8646    }
8647    return true;
8648}
8649
8650static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8651{
8652    return do_FMLAL_zzxw(s, a, false, false);
8653}
8654
8655static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8656{
8657    return do_FMLAL_zzxw(s, a, false, true);
8658}
8659
8660static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8661{
8662    return do_FMLAL_zzxw(s, a, true, false);
8663}
8664
8665static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8666{
8667    return do_FMLAL_zzxw(s, a, true, true);
8668}
8669
8670static bool do_i8mm_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
8671                             gen_helper_gvec_4 *fn, int data)
8672{
8673    if (!dc_isar_feature(aa64_sve_i8mm, s)) {
8674        return false;
8675    }
8676    if (sve_access_check(s)) {
8677        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
8678    }
8679    return true;
8680}
8681
8682static bool trans_SMMLA(DisasContext *s, arg_rrrr_esz *a)
8683{
8684    return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_smmla_b, 0);
8685}
8686
8687static bool trans_USMMLA(DisasContext *s, arg_rrrr_esz *a)
8688{
8689    return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_usmmla_b, 0);
8690}
8691
8692static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a)
8693{
8694    return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0);
8695}
8696
8697static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a)
8698{
8699    if (!dc_isar_feature(aa64_sve_bf16, s)) {
8700        return false;
8701    }
8702    if (sve_access_check(s)) {
8703        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot,
8704                          a->rd, a->rn, a->rm, a->ra, 0);
8705    }
8706    return true;
8707}
8708
8709static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a)
8710{
8711    if (!dc_isar_feature(aa64_sve_bf16, s)) {
8712        return false;
8713    }
8714    if (sve_access_check(s)) {
8715        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot_idx,
8716                          a->rd, a->rn, a->rm, a->ra, a->index);
8717    }
8718    return true;
8719}
8720
8721static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a)
8722{
8723    if (!dc_isar_feature(aa64_sve_bf16, s)) {
8724        return false;
8725    }
8726    if (sve_access_check(s)) {
8727        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla,
8728                          a->rd, a->rn, a->rm, a->ra, 0);
8729    }
8730    return true;
8731}
8732
8733static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8734{
8735    if (!dc_isar_feature(aa64_sve_bf16, s)) {
8736        return false;
8737    }
8738    if (sve_access_check(s)) {
8739        TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8740        unsigned vsz = vec_full_reg_size(s);
8741
8742        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8743                           vec_full_reg_offset(s, a->rn),
8744                           vec_full_reg_offset(s, a->rm),
8745                           vec_full_reg_offset(s, a->ra),
8746                           status, vsz, vsz, sel,
8747                           gen_helper_gvec_bfmlal);
8748        tcg_temp_free_ptr(status);
8749    }
8750    return true;
8751}
8752
8753static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8754{
8755    return do_BFMLAL_zzzw(s, a, false);
8756}
8757
8758static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8759{
8760    return do_BFMLAL_zzzw(s, a, true);
8761}
8762
8763static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
8764{
8765    if (!dc_isar_feature(aa64_sve_bf16, s)) {
8766        return false;
8767    }
8768    if (sve_access_check(s)) {
8769        TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8770        unsigned vsz = vec_full_reg_size(s);
8771
8772        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8773                           vec_full_reg_offset(s, a->rn),
8774                           vec_full_reg_offset(s, a->rm),
8775                           vec_full_reg_offset(s, a->ra),
8776                           status, vsz, vsz, (a->index << 1) | sel,
8777                           gen_helper_gvec_bfmlal_idx);
8778        tcg_temp_free_ptr(status);
8779    }
8780    return true;
8781}
8782
8783static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8784{
8785    return do_BFMLAL_zzxw(s, a, false);
8786}
8787
8788static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8789{
8790    return do_BFMLAL_zzxw(s, a, true);
8791}
8792