qemu/target/arm/translate-sve.c
<<
>>
Prefs
   1/*
   2 * AArch64 SVE translation
   3 *
   4 * Copyright (c) 2018 Linaro, Ltd
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg/tcg-op.h"
  24#include "tcg/tcg-op-gvec.h"
  25#include "tcg/tcg-gvec-desc.h"
  26#include "qemu/log.h"
  27#include "arm_ldst.h"
  28#include "translate.h"
  29#include "internals.h"
  30#include "exec/helper-proto.h"
  31#include "exec/helper-gen.h"
  32#include "exec/log.h"
  33#include "translate-a64.h"
  34#include "fpu/softfloat.h"
  35
  36
  37typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  38                         TCGv_i64, uint32_t, uint32_t);
  39
  40typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  41                                     TCGv_ptr, TCGv_i32);
  42typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  43                                     TCGv_ptr, TCGv_ptr, TCGv_i32);
  44
  45typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  46typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  47                                         TCGv_ptr, TCGv_i64, TCGv_i32);
  48
  49/*
  50 * Helpers for extracting complex instruction fields.
  51 */
  52
  53/* See e.g. ASR (immediate, predicated).
  54 * Returns -1 for unallocated encoding; diagnose later.
  55 */
  56static int tszimm_esz(DisasContext *s, int x)
  57{
  58    x >>= 3;  /* discard imm3 */
  59    return 31 - clz32(x);
  60}
  61
  62static int tszimm_shr(DisasContext *s, int x)
  63{
  64    return (16 << tszimm_esz(s, x)) - x;
  65}
  66
  67/* See e.g. LSL (immediate, predicated).  */
  68static int tszimm_shl(DisasContext *s, int x)
  69{
  70    return x - (8 << tszimm_esz(s, x));
  71}
  72
  73/* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  74static inline int expand_imm_sh8s(DisasContext *s, int x)
  75{
  76    return (int8_t)x << (x & 0x100 ? 8 : 0);
  77}
  78
  79static inline int expand_imm_sh8u(DisasContext *s, int x)
  80{
  81    return (uint8_t)x << (x & 0x100 ? 8 : 0);
  82}
  83
  84/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  85 * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  86 */
  87static inline int msz_dtype(DisasContext *s, int msz)
  88{
  89    static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  90    return dtype[msz];
  91}
  92
  93/*
  94 * Include the generated decoder.
  95 */
  96
  97#include "decode-sve.c.inc"
  98
  99/*
 100 * Implement all of the translator functions referenced by the decoder.
 101 */
 102
 103/* Return the offset info CPUARMState of the predicate vector register Pn.
 104 * Note for this purpose, FFR is P16.
 105 */
 106static inline int pred_full_reg_offset(DisasContext *s, int regno)
 107{
 108    return offsetof(CPUARMState, vfp.pregs[regno]);
 109}
 110
 111/* Return the byte size of the whole predicate register, VL / 64.  */
 112static inline int pred_full_reg_size(DisasContext *s)
 113{
 114    return s->sve_len >> 3;
 115}
 116
 117/* Round up the size of a register to a size allowed by
 118 * the tcg vector infrastructure.  Any operation which uses this
 119 * size may assume that the bits above pred_full_reg_size are zero,
 120 * and must leave them the same way.
 121 *
 122 * Note that this is not needed for the vector registers as they
 123 * are always properly sized for tcg vectors.
 124 */
 125static int size_for_gvec(int size)
 126{
 127    if (size <= 8) {
 128        return 8;
 129    } else {
 130        return QEMU_ALIGN_UP(size, 16);
 131    }
 132}
 133
 134static int pred_gvec_reg_size(DisasContext *s)
 135{
 136    return size_for_gvec(pred_full_reg_size(s));
 137}
 138
 139/* Invoke an out-of-line helper on 2 Zregs. */
 140static void gen_gvec_ool_zz(DisasContext *s, gen_helper_gvec_2 *fn,
 141                            int rd, int rn, int data)
 142{
 143    unsigned vsz = vec_full_reg_size(s);
 144    tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 145                       vec_full_reg_offset(s, rn),
 146                       vsz, vsz, data, fn);
 147}
 148
 149/* Invoke an out-of-line helper on 3 Zregs. */
 150static void gen_gvec_ool_zzz(DisasContext *s, gen_helper_gvec_3 *fn,
 151                             int rd, int rn, int rm, int data)
 152{
 153    unsigned vsz = vec_full_reg_size(s);
 154    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 155                       vec_full_reg_offset(s, rn),
 156                       vec_full_reg_offset(s, rm),
 157                       vsz, vsz, data, fn);
 158}
 159
 160/* Invoke an out-of-line helper on 4 Zregs. */
 161static void gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
 162                              int rd, int rn, int rm, int ra, int data)
 163{
 164    unsigned vsz = vec_full_reg_size(s);
 165    tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 166                       vec_full_reg_offset(s, rn),
 167                       vec_full_reg_offset(s, rm),
 168                       vec_full_reg_offset(s, ra),
 169                       vsz, vsz, data, fn);
 170}
 171
 172/* Invoke an out-of-line helper on 2 Zregs and a predicate. */
 173static void gen_gvec_ool_zzp(DisasContext *s, gen_helper_gvec_3 *fn,
 174                             int rd, int rn, int pg, int data)
 175{
 176    unsigned vsz = vec_full_reg_size(s);
 177    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 178                       vec_full_reg_offset(s, rn),
 179                       pred_full_reg_offset(s, pg),
 180                       vsz, vsz, data, fn);
 181}
 182
 183/* Invoke an out-of-line helper on 3 Zregs and a predicate. */
 184static void gen_gvec_ool_zzzp(DisasContext *s, gen_helper_gvec_4 *fn,
 185                              int rd, int rn, int rm, int pg, int data)
 186{
 187    unsigned vsz = vec_full_reg_size(s);
 188    tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 189                       vec_full_reg_offset(s, rn),
 190                       vec_full_reg_offset(s, rm),
 191                       pred_full_reg_offset(s, pg),
 192                       vsz, vsz, data, fn);
 193}
 194
 195/* Invoke a vector expander on two Zregs.  */
 196static void gen_gvec_fn_zz(DisasContext *s, GVecGen2Fn *gvec_fn,
 197                           int esz, int rd, int rn)
 198{
 199    unsigned vsz = vec_full_reg_size(s);
 200    gvec_fn(esz, vec_full_reg_offset(s, rd),
 201            vec_full_reg_offset(s, rn), vsz, vsz);
 202}
 203
 204/* Invoke a vector expander on three Zregs.  */
 205static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
 206                            int esz, int rd, int rn, int rm)
 207{
 208    unsigned vsz = vec_full_reg_size(s);
 209    gvec_fn(esz, vec_full_reg_offset(s, rd),
 210            vec_full_reg_offset(s, rn),
 211            vec_full_reg_offset(s, rm), vsz, vsz);
 212}
 213
 214/* Invoke a vector expander on four Zregs.  */
 215static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
 216                             int esz, int rd, int rn, int rm, int ra)
 217{
 218    unsigned vsz = vec_full_reg_size(s);
 219    gvec_fn(esz, vec_full_reg_offset(s, rd),
 220            vec_full_reg_offset(s, rn),
 221            vec_full_reg_offset(s, rm),
 222            vec_full_reg_offset(s, ra), vsz, vsz);
 223}
 224
 225/* Invoke a vector move on two Zregs.  */
 226static bool do_mov_z(DisasContext *s, int rd, int rn)
 227{
 228    if (sve_access_check(s)) {
 229        gen_gvec_fn_zz(s, tcg_gen_gvec_mov, MO_8, rd, rn);
 230    }
 231    return true;
 232}
 233
 234/* Initialize a Zreg with replications of a 64-bit immediate.  */
 235static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 236{
 237    unsigned vsz = vec_full_reg_size(s);
 238    tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), vsz, vsz, word);
 239}
 240
 241/* Invoke a vector expander on three Pregs.  */
 242static void gen_gvec_fn_ppp(DisasContext *s, GVecGen3Fn *gvec_fn,
 243                            int rd, int rn, int rm)
 244{
 245    unsigned psz = pred_gvec_reg_size(s);
 246    gvec_fn(MO_64, pred_full_reg_offset(s, rd),
 247            pred_full_reg_offset(s, rn),
 248            pred_full_reg_offset(s, rm), psz, psz);
 249}
 250
 251/* Invoke a vector move on two Pregs.  */
 252static bool do_mov_p(DisasContext *s, int rd, int rn)
 253{
 254    if (sve_access_check(s)) {
 255        unsigned psz = pred_gvec_reg_size(s);
 256        tcg_gen_gvec_mov(MO_8, pred_full_reg_offset(s, rd),
 257                         pred_full_reg_offset(s, rn), psz, psz);
 258    }
 259    return true;
 260}
 261
 262/* Set the cpu flags as per a return from an SVE helper.  */
 263static void do_pred_flags(TCGv_i32 t)
 264{
 265    tcg_gen_mov_i32(cpu_NF, t);
 266    tcg_gen_andi_i32(cpu_ZF, t, 2);
 267    tcg_gen_andi_i32(cpu_CF, t, 1);
 268    tcg_gen_movi_i32(cpu_VF, 0);
 269}
 270
 271/* Subroutines computing the ARM PredTest psuedofunction.  */
 272static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 273{
 274    TCGv_i32 t = tcg_temp_new_i32();
 275
 276    gen_helper_sve_predtest1(t, d, g);
 277    do_pred_flags(t);
 278    tcg_temp_free_i32(t);
 279}
 280
 281static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 282{
 283    TCGv_ptr dptr = tcg_temp_new_ptr();
 284    TCGv_ptr gptr = tcg_temp_new_ptr();
 285    TCGv_i32 t;
 286
 287    tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 288    tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 289    t = tcg_const_i32(words);
 290
 291    gen_helper_sve_predtest(t, dptr, gptr, t);
 292    tcg_temp_free_ptr(dptr);
 293    tcg_temp_free_ptr(gptr);
 294
 295    do_pred_flags(t);
 296    tcg_temp_free_i32(t);
 297}
 298
 299/* For each element size, the bits within a predicate word that are active.  */
 300const uint64_t pred_esz_masks[4] = {
 301    0xffffffffffffffffull, 0x5555555555555555ull,
 302    0x1111111111111111ull, 0x0101010101010101ull
 303};
 304
 305/*
 306 *** SVE Logical - Unpredicated Group
 307 */
 308
 309static bool do_zzz_fn(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *gvec_fn)
 310{
 311    if (sve_access_check(s)) {
 312        gen_gvec_fn_zzz(s, gvec_fn, a->esz, a->rd, a->rn, a->rm);
 313    }
 314    return true;
 315}
 316
 317static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
 318{
 319    return do_zzz_fn(s, a, tcg_gen_gvec_and);
 320}
 321
 322static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
 323{
 324    return do_zzz_fn(s, a, tcg_gen_gvec_or);
 325}
 326
 327static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
 328{
 329    return do_zzz_fn(s, a, tcg_gen_gvec_xor);
 330}
 331
 332static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
 333{
 334    return do_zzz_fn(s, a, tcg_gen_gvec_andc);
 335}
 336
 337static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
 338{
 339    TCGv_i64 t = tcg_temp_new_i64();
 340    uint64_t mask = dup_const(MO_8, 0xff >> sh);
 341
 342    tcg_gen_xor_i64(t, n, m);
 343    tcg_gen_shri_i64(d, t, sh);
 344    tcg_gen_shli_i64(t, t, 8 - sh);
 345    tcg_gen_andi_i64(d, d, mask);
 346    tcg_gen_andi_i64(t, t, ~mask);
 347    tcg_gen_or_i64(d, d, t);
 348    tcg_temp_free_i64(t);
 349}
 350
 351static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
 352{
 353    TCGv_i64 t = tcg_temp_new_i64();
 354    uint64_t mask = dup_const(MO_16, 0xffff >> sh);
 355
 356    tcg_gen_xor_i64(t, n, m);
 357    tcg_gen_shri_i64(d, t, sh);
 358    tcg_gen_shli_i64(t, t, 16 - sh);
 359    tcg_gen_andi_i64(d, d, mask);
 360    tcg_gen_andi_i64(t, t, ~mask);
 361    tcg_gen_or_i64(d, d, t);
 362    tcg_temp_free_i64(t);
 363}
 364
 365static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh)
 366{
 367    tcg_gen_xor_i32(d, n, m);
 368    tcg_gen_rotri_i32(d, d, sh);
 369}
 370
 371static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh)
 372{
 373    tcg_gen_xor_i64(d, n, m);
 374    tcg_gen_rotri_i64(d, d, sh);
 375}
 376
 377static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 378                        TCGv_vec m, int64_t sh)
 379{
 380    tcg_gen_xor_vec(vece, d, n, m);
 381    tcg_gen_rotri_vec(vece, d, d, sh);
 382}
 383
 384void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
 385                  uint32_t rm_ofs, int64_t shift,
 386                  uint32_t opr_sz, uint32_t max_sz)
 387{
 388    static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 };
 389    static const GVecGen3i ops[4] = {
 390        { .fni8 = gen_xar8_i64,
 391          .fniv = gen_xar_vec,
 392          .fno = gen_helper_sve2_xar_b,
 393          .opt_opc = vecop,
 394          .vece = MO_8 },
 395        { .fni8 = gen_xar16_i64,
 396          .fniv = gen_xar_vec,
 397          .fno = gen_helper_sve2_xar_h,
 398          .opt_opc = vecop,
 399          .vece = MO_16 },
 400        { .fni4 = gen_xar_i32,
 401          .fniv = gen_xar_vec,
 402          .fno = gen_helper_sve2_xar_s,
 403          .opt_opc = vecop,
 404          .vece = MO_32 },
 405        { .fni8 = gen_xar_i64,
 406          .fniv = gen_xar_vec,
 407          .fno = gen_helper_gvec_xar_d,
 408          .opt_opc = vecop,
 409          .vece = MO_64 }
 410    };
 411    int esize = 8 << vece;
 412
 413    /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */
 414    tcg_debug_assert(shift >= 0);
 415    tcg_debug_assert(shift <= esize);
 416    shift &= esize - 1;
 417
 418    if (shift == 0) {
 419        /* xar with no rotate devolves to xor. */
 420        tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz);
 421    } else {
 422        tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz,
 423                        shift, &ops[vece]);
 424    }
 425}
 426
 427static bool trans_XAR(DisasContext *s, arg_rrri_esz *a)
 428{
 429    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
 430        return false;
 431    }
 432    if (sve_access_check(s)) {
 433        unsigned vsz = vec_full_reg_size(s);
 434        gen_gvec_xar(a->esz, vec_full_reg_offset(s, a->rd),
 435                     vec_full_reg_offset(s, a->rn),
 436                     vec_full_reg_offset(s, a->rm), a->imm, vsz, vsz);
 437    }
 438    return true;
 439}
 440
 441static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
 442{
 443    if (!dc_isar_feature(aa64_sve2, s)) {
 444        return false;
 445    }
 446    if (sve_access_check(s)) {
 447        gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
 448    }
 449    return true;
 450}
 451
 452static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 453{
 454    tcg_gen_xor_i64(d, n, m);
 455    tcg_gen_xor_i64(d, d, k);
 456}
 457
 458static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 459                         TCGv_vec m, TCGv_vec k)
 460{
 461    tcg_gen_xor_vec(vece, d, n, m);
 462    tcg_gen_xor_vec(vece, d, d, k);
 463}
 464
 465static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 466                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
 467{
 468    static const GVecGen4 op = {
 469        .fni8 = gen_eor3_i64,
 470        .fniv = gen_eor3_vec,
 471        .fno = gen_helper_sve2_eor3,
 472        .vece = MO_64,
 473        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 474    };
 475    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
 476}
 477
 478static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
 479{
 480    return do_sve2_zzzz_fn(s, a, gen_eor3);
 481}
 482
 483static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 484{
 485    tcg_gen_andc_i64(d, m, k);
 486    tcg_gen_xor_i64(d, d, n);
 487}
 488
 489static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 490                         TCGv_vec m, TCGv_vec k)
 491{
 492    tcg_gen_andc_vec(vece, d, m, k);
 493    tcg_gen_xor_vec(vece, d, d, n);
 494}
 495
 496static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 497                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
 498{
 499    static const GVecGen4 op = {
 500        .fni8 = gen_bcax_i64,
 501        .fniv = gen_bcax_vec,
 502        .fno = gen_helper_sve2_bcax,
 503        .vece = MO_64,
 504        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 505    };
 506    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
 507}
 508
 509static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
 510{
 511    return do_sve2_zzzz_fn(s, a, gen_bcax);
 512}
 513
 514static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 515                    uint32_t a, uint32_t oprsz, uint32_t maxsz)
 516{
 517    /* BSL differs from the generic bitsel in argument ordering. */
 518    tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
 519}
 520
 521static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
 522{
 523    return do_sve2_zzzz_fn(s, a, gen_bsl);
 524}
 525
 526static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 527{
 528    tcg_gen_andc_i64(n, k, n);
 529    tcg_gen_andc_i64(m, m, k);
 530    tcg_gen_or_i64(d, n, m);
 531}
 532
 533static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 534                          TCGv_vec m, TCGv_vec k)
 535{
 536    if (TCG_TARGET_HAS_bitsel_vec) {
 537        tcg_gen_not_vec(vece, n, n);
 538        tcg_gen_bitsel_vec(vece, d, k, n, m);
 539    } else {
 540        tcg_gen_andc_vec(vece, n, k, n);
 541        tcg_gen_andc_vec(vece, m, m, k);
 542        tcg_gen_or_vec(vece, d, n, m);
 543    }
 544}
 545
 546static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 547                      uint32_t a, uint32_t oprsz, uint32_t maxsz)
 548{
 549    static const GVecGen4 op = {
 550        .fni8 = gen_bsl1n_i64,
 551        .fniv = gen_bsl1n_vec,
 552        .fno = gen_helper_sve2_bsl1n,
 553        .vece = MO_64,
 554        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 555    };
 556    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
 557}
 558
 559static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
 560{
 561    return do_sve2_zzzz_fn(s, a, gen_bsl1n);
 562}
 563
 564static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 565{
 566    /*
 567     * Z[dn] = (n & k) | (~m & ~k)
 568     *       =         | ~(m | k)
 569     */
 570    tcg_gen_and_i64(n, n, k);
 571    if (TCG_TARGET_HAS_orc_i64) {
 572        tcg_gen_or_i64(m, m, k);
 573        tcg_gen_orc_i64(d, n, m);
 574    } else {
 575        tcg_gen_nor_i64(m, m, k);
 576        tcg_gen_or_i64(d, n, m);
 577    }
 578}
 579
 580static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 581                          TCGv_vec m, TCGv_vec k)
 582{
 583    if (TCG_TARGET_HAS_bitsel_vec) {
 584        tcg_gen_not_vec(vece, m, m);
 585        tcg_gen_bitsel_vec(vece, d, k, n, m);
 586    } else {
 587        tcg_gen_and_vec(vece, n, n, k);
 588        tcg_gen_or_vec(vece, m, m, k);
 589        tcg_gen_orc_vec(vece, d, n, m);
 590    }
 591}
 592
 593static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 594                      uint32_t a, uint32_t oprsz, uint32_t maxsz)
 595{
 596    static const GVecGen4 op = {
 597        .fni8 = gen_bsl2n_i64,
 598        .fniv = gen_bsl2n_vec,
 599        .fno = gen_helper_sve2_bsl2n,
 600        .vece = MO_64,
 601        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 602    };
 603    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
 604}
 605
 606static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
 607{
 608    return do_sve2_zzzz_fn(s, a, gen_bsl2n);
 609}
 610
 611static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
 612{
 613    tcg_gen_and_i64(n, n, k);
 614    tcg_gen_andc_i64(m, m, k);
 615    tcg_gen_nor_i64(d, n, m);
 616}
 617
 618static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
 619                          TCGv_vec m, TCGv_vec k)
 620{
 621    tcg_gen_bitsel_vec(vece, d, k, n, m);
 622    tcg_gen_not_vec(vece, d, d);
 623}
 624
 625static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 626                     uint32_t a, uint32_t oprsz, uint32_t maxsz)
 627{
 628    static const GVecGen4 op = {
 629        .fni8 = gen_nbsl_i64,
 630        .fniv = gen_nbsl_vec,
 631        .fno = gen_helper_sve2_nbsl,
 632        .vece = MO_64,
 633        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
 634    };
 635    tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
 636}
 637
 638static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
 639{
 640    return do_sve2_zzzz_fn(s, a, gen_nbsl);
 641}
 642
 643/*
 644 *** SVE Integer Arithmetic - Unpredicated Group
 645 */
 646
 647static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
 648{
 649    return do_zzz_fn(s, a, tcg_gen_gvec_add);
 650}
 651
 652static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
 653{
 654    return do_zzz_fn(s, a, tcg_gen_gvec_sub);
 655}
 656
 657static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 658{
 659    return do_zzz_fn(s, a, tcg_gen_gvec_ssadd);
 660}
 661
 662static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 663{
 664    return do_zzz_fn(s, a, tcg_gen_gvec_sssub);
 665}
 666
 667static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 668{
 669    return do_zzz_fn(s, a, tcg_gen_gvec_usadd);
 670}
 671
 672static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 673{
 674    return do_zzz_fn(s, a, tcg_gen_gvec_ussub);
 675}
 676
 677/*
 678 *** SVE Integer Arithmetic - Binary Predicated Group
 679 */
 680
 681static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 682{
 683    if (fn == NULL) {
 684        return false;
 685    }
 686    if (sve_access_check(s)) {
 687        gen_gvec_ool_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0);
 688    }
 689    return true;
 690}
 691
 692/* Select active elememnts from Zn and inactive elements from Zm,
 693 * storing the result in Zd.
 694 */
 695static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 696{
 697    static gen_helper_gvec_4 * const fns[4] = {
 698        gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 699        gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 700    };
 701    gen_gvec_ool_zzzp(s, fns[esz], rd, rn, rm, pg, 0);
 702}
 703
 704#define DO_ZPZZ(NAME, name) \
 705static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a)         \
 706{                                                                         \
 707    static gen_helper_gvec_4 * const fns[4] = {                           \
 708        gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 709        gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 710    };                                                                    \
 711    return do_zpzz_ool(s, a, fns[a->esz]);                                \
 712}
 713
 714DO_ZPZZ(AND, and)
 715DO_ZPZZ(EOR, eor)
 716DO_ZPZZ(ORR, orr)
 717DO_ZPZZ(BIC, bic)
 718
 719DO_ZPZZ(ADD, add)
 720DO_ZPZZ(SUB, sub)
 721
 722DO_ZPZZ(SMAX, smax)
 723DO_ZPZZ(UMAX, umax)
 724DO_ZPZZ(SMIN, smin)
 725DO_ZPZZ(UMIN, umin)
 726DO_ZPZZ(SABD, sabd)
 727DO_ZPZZ(UABD, uabd)
 728
 729DO_ZPZZ(MUL, mul)
 730DO_ZPZZ(SMULH, smulh)
 731DO_ZPZZ(UMULH, umulh)
 732
 733DO_ZPZZ(ASR, asr)
 734DO_ZPZZ(LSR, lsr)
 735DO_ZPZZ(LSL, lsl)
 736
 737static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 738{
 739    static gen_helper_gvec_4 * const fns[4] = {
 740        NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 741    };
 742    return do_zpzz_ool(s, a, fns[a->esz]);
 743}
 744
 745static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 746{
 747    static gen_helper_gvec_4 * const fns[4] = {
 748        NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 749    };
 750    return do_zpzz_ool(s, a, fns[a->esz]);
 751}
 752
 753static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
 754{
 755    if (sve_access_check(s)) {
 756        do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 757    }
 758    return true;
 759}
 760
 761#undef DO_ZPZZ
 762
 763/*
 764 *** SVE Integer Arithmetic - Unary Predicated Group
 765 */
 766
 767static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 768{
 769    if (fn == NULL) {
 770        return false;
 771    }
 772    if (sve_access_check(s)) {
 773        gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, 0);
 774    }
 775    return true;
 776}
 777
 778#define DO_ZPZ(NAME, name) \
 779static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)           \
 780{                                                                   \
 781    static gen_helper_gvec_3 * const fns[4] = {                     \
 782        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 783        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 784    };                                                              \
 785    return do_zpz_ool(s, a, fns[a->esz]);                           \
 786}
 787
 788DO_ZPZ(CLS, cls)
 789DO_ZPZ(CLZ, clz)
 790DO_ZPZ(CNT_zpz, cnt_zpz)
 791DO_ZPZ(CNOT, cnot)
 792DO_ZPZ(NOT_zpz, not_zpz)
 793DO_ZPZ(ABS, abs)
 794DO_ZPZ(NEG, neg)
 795
 796static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
 797{
 798    static gen_helper_gvec_3 * const fns[4] = {
 799        NULL,
 800        gen_helper_sve_fabs_h,
 801        gen_helper_sve_fabs_s,
 802        gen_helper_sve_fabs_d
 803    };
 804    return do_zpz_ool(s, a, fns[a->esz]);
 805}
 806
 807static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
 808{
 809    static gen_helper_gvec_3 * const fns[4] = {
 810        NULL,
 811        gen_helper_sve_fneg_h,
 812        gen_helper_sve_fneg_s,
 813        gen_helper_sve_fneg_d
 814    };
 815    return do_zpz_ool(s, a, fns[a->esz]);
 816}
 817
 818static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
 819{
 820    static gen_helper_gvec_3 * const fns[4] = {
 821        NULL,
 822        gen_helper_sve_sxtb_h,
 823        gen_helper_sve_sxtb_s,
 824        gen_helper_sve_sxtb_d
 825    };
 826    return do_zpz_ool(s, a, fns[a->esz]);
 827}
 828
 829static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
 830{
 831    static gen_helper_gvec_3 * const fns[4] = {
 832        NULL,
 833        gen_helper_sve_uxtb_h,
 834        gen_helper_sve_uxtb_s,
 835        gen_helper_sve_uxtb_d
 836    };
 837    return do_zpz_ool(s, a, fns[a->esz]);
 838}
 839
 840static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
 841{
 842    static gen_helper_gvec_3 * const fns[4] = {
 843        NULL, NULL,
 844        gen_helper_sve_sxth_s,
 845        gen_helper_sve_sxth_d
 846    };
 847    return do_zpz_ool(s, a, fns[a->esz]);
 848}
 849
 850static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
 851{
 852    static gen_helper_gvec_3 * const fns[4] = {
 853        NULL, NULL,
 854        gen_helper_sve_uxth_s,
 855        gen_helper_sve_uxth_d
 856    };
 857    return do_zpz_ool(s, a, fns[a->esz]);
 858}
 859
 860static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
 861{
 862    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 863}
 864
 865static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
 866{
 867    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 868}
 869
 870#undef DO_ZPZ
 871
 872/*
 873 *** SVE Integer Reduction Group
 874 */
 875
 876typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 877static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 878                       gen_helper_gvec_reduc *fn)
 879{
 880    unsigned vsz = vec_full_reg_size(s);
 881    TCGv_ptr t_zn, t_pg;
 882    TCGv_i32 desc;
 883    TCGv_i64 temp;
 884
 885    if (fn == NULL) {
 886        return false;
 887    }
 888    if (!sve_access_check(s)) {
 889        return true;
 890    }
 891
 892    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 893    temp = tcg_temp_new_i64();
 894    t_zn = tcg_temp_new_ptr();
 895    t_pg = tcg_temp_new_ptr();
 896
 897    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 898    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 899    fn(temp, t_zn, t_pg, desc);
 900    tcg_temp_free_ptr(t_zn);
 901    tcg_temp_free_ptr(t_pg);
 902    tcg_temp_free_i32(desc);
 903
 904    write_fp_dreg(s, a->rd, temp);
 905    tcg_temp_free_i64(temp);
 906    return true;
 907}
 908
 909#define DO_VPZ(NAME, name) \
 910static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
 911{                                                                        \
 912    static gen_helper_gvec_reduc * const fns[4] = {                      \
 913        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 914        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 915    };                                                                   \
 916    return do_vpz_ool(s, a, fns[a->esz]);                                \
 917}
 918
 919DO_VPZ(ORV, orv)
 920DO_VPZ(ANDV, andv)
 921DO_VPZ(EORV, eorv)
 922
 923DO_VPZ(UADDV, uaddv)
 924DO_VPZ(SMAXV, smaxv)
 925DO_VPZ(UMAXV, umaxv)
 926DO_VPZ(SMINV, sminv)
 927DO_VPZ(UMINV, uminv)
 928
 929static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
 930{
 931    static gen_helper_gvec_reduc * const fns[4] = {
 932        gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 933        gen_helper_sve_saddv_s, NULL
 934    };
 935    return do_vpz_ool(s, a, fns[a->esz]);
 936}
 937
 938#undef DO_VPZ
 939
 940/*
 941 *** SVE Shift by Immediate - Predicated Group
 942 */
 943
 944/*
 945 * Copy Zn into Zd, storing zeros into inactive elements.
 946 * If invert, store zeros into the active elements.
 947 */
 948static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
 949                        int esz, bool invert)
 950{
 951    static gen_helper_gvec_3 * const fns[4] = {
 952        gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 953        gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 954    };
 955
 956    if (sve_access_check(s)) {
 957        gen_gvec_ool_zzp(s, fns[esz], rd, rn, pg, invert);
 958    }
 959    return true;
 960}
 961
 962static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 963                        gen_helper_gvec_3 *fn)
 964{
 965    if (sve_access_check(s)) {
 966        gen_gvec_ool_zzp(s, fn, a->rd, a->rn, a->pg, a->imm);
 967    }
 968    return true;
 969}
 970
 971static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
 972{
 973    static gen_helper_gvec_3 * const fns[4] = {
 974        gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 975        gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 976    };
 977    if (a->esz < 0) {
 978        /* Invalid tsz encoding -- see tszimm_esz. */
 979        return false;
 980    }
 981    /* Shift by element size is architecturally valid.  For
 982       arithmetic right-shift, it's the same as by one less. */
 983    a->imm = MIN(a->imm, (8 << a->esz) - 1);
 984    return do_zpzi_ool(s, a, fns[a->esz]);
 985}
 986
 987static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
 988{
 989    static gen_helper_gvec_3 * const fns[4] = {
 990        gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 991        gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 992    };
 993    if (a->esz < 0) {
 994        return false;
 995    }
 996    /* Shift by element size is architecturally valid.
 997       For logical shifts, it is a zeroing operation.  */
 998    if (a->imm >= (8 << a->esz)) {
 999        return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1000    } else {
1001        return do_zpzi_ool(s, a, fns[a->esz]);
1002    }
1003}
1004
1005static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
1006{
1007    static gen_helper_gvec_3 * const fns[4] = {
1008        gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
1009        gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
1010    };
1011    if (a->esz < 0) {
1012        return false;
1013    }
1014    /* Shift by element size is architecturally valid.
1015       For logical shifts, it is a zeroing operation.  */
1016    if (a->imm >= (8 << a->esz)) {
1017        return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1018    } else {
1019        return do_zpzi_ool(s, a, fns[a->esz]);
1020    }
1021}
1022
1023static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
1024{
1025    static gen_helper_gvec_3 * const fns[4] = {
1026        gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
1027        gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
1028    };
1029    if (a->esz < 0) {
1030        return false;
1031    }
1032    /* Shift by element size is architecturally valid.  For arithmetic
1033       right shift for division, it is a zeroing operation.  */
1034    if (a->imm >= (8 << a->esz)) {
1035        return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
1036    } else {
1037        return do_zpzi_ool(s, a, fns[a->esz]);
1038    }
1039}
1040
1041static bool trans_SQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1042{
1043    static gen_helper_gvec_3 * const fns[4] = {
1044        gen_helper_sve2_sqshl_zpzi_b, gen_helper_sve2_sqshl_zpzi_h,
1045        gen_helper_sve2_sqshl_zpzi_s, gen_helper_sve2_sqshl_zpzi_d,
1046    };
1047    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1048        return false;
1049    }
1050    return do_zpzi_ool(s, a, fns[a->esz]);
1051}
1052
1053static bool trans_UQSHL_zpzi(DisasContext *s, arg_rpri_esz *a)
1054{
1055    static gen_helper_gvec_3 * const fns[4] = {
1056        gen_helper_sve2_uqshl_zpzi_b, gen_helper_sve2_uqshl_zpzi_h,
1057        gen_helper_sve2_uqshl_zpzi_s, gen_helper_sve2_uqshl_zpzi_d,
1058    };
1059    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1060        return false;
1061    }
1062    return do_zpzi_ool(s, a, fns[a->esz]);
1063}
1064
1065static bool trans_SRSHR(DisasContext *s, arg_rpri_esz *a)
1066{
1067    static gen_helper_gvec_3 * const fns[4] = {
1068        gen_helper_sve2_srshr_b, gen_helper_sve2_srshr_h,
1069        gen_helper_sve2_srshr_s, gen_helper_sve2_srshr_d,
1070    };
1071    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1072        return false;
1073    }
1074    return do_zpzi_ool(s, a, fns[a->esz]);
1075}
1076
1077static bool trans_URSHR(DisasContext *s, arg_rpri_esz *a)
1078{
1079    static gen_helper_gvec_3 * const fns[4] = {
1080        gen_helper_sve2_urshr_b, gen_helper_sve2_urshr_h,
1081        gen_helper_sve2_urshr_s, gen_helper_sve2_urshr_d,
1082    };
1083    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1084        return false;
1085    }
1086    return do_zpzi_ool(s, a, fns[a->esz]);
1087}
1088
1089static bool trans_SQSHLU(DisasContext *s, arg_rpri_esz *a)
1090{
1091    static gen_helper_gvec_3 * const fns[4] = {
1092        gen_helper_sve2_sqshlu_b, gen_helper_sve2_sqshlu_h,
1093        gen_helper_sve2_sqshlu_s, gen_helper_sve2_sqshlu_d,
1094    };
1095    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
1096        return false;
1097    }
1098    return do_zpzi_ool(s, a, fns[a->esz]);
1099}
1100
1101/*
1102 *** SVE Bitwise Shift - Predicated Group
1103 */
1104
1105#define DO_ZPZW(NAME, name) \
1106static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a)         \
1107{                                                                         \
1108    static gen_helper_gvec_4 * const fns[3] = {                           \
1109        gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
1110        gen_helper_sve_##name##_zpzw_s,                                   \
1111    };                                                                    \
1112    if (a->esz < 0 || a->esz >= 3) {                                      \
1113        return false;                                                     \
1114    }                                                                     \
1115    return do_zpzz_ool(s, a, fns[a->esz]);                                \
1116}
1117
1118DO_ZPZW(ASR, asr)
1119DO_ZPZW(LSR, lsr)
1120DO_ZPZW(LSL, lsl)
1121
1122#undef DO_ZPZW
1123
1124/*
1125 *** SVE Bitwise Shift - Unpredicated Group
1126 */
1127
1128static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
1129                         void (*gvec_fn)(unsigned, uint32_t, uint32_t,
1130                                         int64_t, uint32_t, uint32_t))
1131{
1132    if (a->esz < 0) {
1133        /* Invalid tsz encoding -- see tszimm_esz. */
1134        return false;
1135    }
1136    if (sve_access_check(s)) {
1137        unsigned vsz = vec_full_reg_size(s);
1138        /* Shift by element size is architecturally valid.  For
1139           arithmetic right-shift, it's the same as by one less.
1140           Otherwise it is a zeroing operation.  */
1141        if (a->imm >= 8 << a->esz) {
1142            if (asr) {
1143                a->imm = (8 << a->esz) - 1;
1144            } else {
1145                do_dupi_z(s, a->rd, 0);
1146                return true;
1147            }
1148        }
1149        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
1150                vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
1151    }
1152    return true;
1153}
1154
1155static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
1156{
1157    return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
1158}
1159
1160static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
1161{
1162    return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
1163}
1164
1165static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
1166{
1167    return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
1168}
1169
1170static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
1171{
1172    if (fn == NULL) {
1173        return false;
1174    }
1175    if (sve_access_check(s)) {
1176        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
1177    }
1178    return true;
1179}
1180
1181#define DO_ZZW(NAME, name) \
1182static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a)           \
1183{                                                                         \
1184    static gen_helper_gvec_3 * const fns[4] = {                           \
1185        gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
1186        gen_helper_sve_##name##_zzw_s, NULL                               \
1187    };                                                                    \
1188    return do_zzw_ool(s, a, fns[a->esz]);                                 \
1189}
1190
1191DO_ZZW(ASR, asr)
1192DO_ZZW(LSR, lsr)
1193DO_ZZW(LSL, lsl)
1194
1195#undef DO_ZZW
1196
1197/*
1198 *** SVE Integer Multiply-Add Group
1199 */
1200
1201static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
1202                         gen_helper_gvec_5 *fn)
1203{
1204    if (sve_access_check(s)) {
1205        unsigned vsz = vec_full_reg_size(s);
1206        tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
1207                           vec_full_reg_offset(s, a->ra),
1208                           vec_full_reg_offset(s, a->rn),
1209                           vec_full_reg_offset(s, a->rm),
1210                           pred_full_reg_offset(s, a->pg),
1211                           vsz, vsz, 0, fn);
1212    }
1213    return true;
1214}
1215
1216#define DO_ZPZZZ(NAME, name) \
1217static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
1218{                                                                    \
1219    static gen_helper_gvec_5 * const fns[4] = {                      \
1220        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
1221        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
1222    };                                                               \
1223    return do_zpzzz_ool(s, a, fns[a->esz]);                          \
1224}
1225
1226DO_ZPZZZ(MLA, mla)
1227DO_ZPZZZ(MLS, mls)
1228
1229#undef DO_ZPZZZ
1230
1231/*
1232 *** SVE Index Generation Group
1233 */
1234
1235static void do_index(DisasContext *s, int esz, int rd,
1236                     TCGv_i64 start, TCGv_i64 incr)
1237{
1238    unsigned vsz = vec_full_reg_size(s);
1239    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1240    TCGv_ptr t_zd = tcg_temp_new_ptr();
1241
1242    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1243    if (esz == 3) {
1244        gen_helper_sve_index_d(t_zd, start, incr, desc);
1245    } else {
1246        typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
1247        static index_fn * const fns[3] = {
1248            gen_helper_sve_index_b,
1249            gen_helper_sve_index_h,
1250            gen_helper_sve_index_s,
1251        };
1252        TCGv_i32 s32 = tcg_temp_new_i32();
1253        TCGv_i32 i32 = tcg_temp_new_i32();
1254
1255        tcg_gen_extrl_i64_i32(s32, start);
1256        tcg_gen_extrl_i64_i32(i32, incr);
1257        fns[esz](t_zd, s32, i32, desc);
1258
1259        tcg_temp_free_i32(s32);
1260        tcg_temp_free_i32(i32);
1261    }
1262    tcg_temp_free_ptr(t_zd);
1263    tcg_temp_free_i32(desc);
1264}
1265
1266static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
1267{
1268    if (sve_access_check(s)) {
1269        TCGv_i64 start = tcg_const_i64(a->imm1);
1270        TCGv_i64 incr = tcg_const_i64(a->imm2);
1271        do_index(s, a->esz, a->rd, start, incr);
1272        tcg_temp_free_i64(start);
1273        tcg_temp_free_i64(incr);
1274    }
1275    return true;
1276}
1277
1278static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
1279{
1280    if (sve_access_check(s)) {
1281        TCGv_i64 start = tcg_const_i64(a->imm);
1282        TCGv_i64 incr = cpu_reg(s, a->rm);
1283        do_index(s, a->esz, a->rd, start, incr);
1284        tcg_temp_free_i64(start);
1285    }
1286    return true;
1287}
1288
1289static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
1290{
1291    if (sve_access_check(s)) {
1292        TCGv_i64 start = cpu_reg(s, a->rn);
1293        TCGv_i64 incr = tcg_const_i64(a->imm);
1294        do_index(s, a->esz, a->rd, start, incr);
1295        tcg_temp_free_i64(incr);
1296    }
1297    return true;
1298}
1299
1300static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
1301{
1302    if (sve_access_check(s)) {
1303        TCGv_i64 start = cpu_reg(s, a->rn);
1304        TCGv_i64 incr = cpu_reg(s, a->rm);
1305        do_index(s, a->esz, a->rd, start, incr);
1306    }
1307    return true;
1308}
1309
1310/*
1311 *** SVE Stack Allocation Group
1312 */
1313
1314static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
1315{
1316    if (sve_access_check(s)) {
1317        TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1318        TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1319        tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
1320    }
1321    return true;
1322}
1323
1324static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
1325{
1326    if (sve_access_check(s)) {
1327        TCGv_i64 rd = cpu_reg_sp(s, a->rd);
1328        TCGv_i64 rn = cpu_reg_sp(s, a->rn);
1329        tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
1330    }
1331    return true;
1332}
1333
1334static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
1335{
1336    if (sve_access_check(s)) {
1337        TCGv_i64 reg = cpu_reg(s, a->rd);
1338        tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
1339    }
1340    return true;
1341}
1342
1343/*
1344 *** SVE Compute Vector Address Group
1345 */
1346
1347static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
1348{
1349    if (sve_access_check(s)) {
1350        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
1351    }
1352    return true;
1353}
1354
1355static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
1356{
1357    return do_adr(s, a, gen_helper_sve_adr_p32);
1358}
1359
1360static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
1361{
1362    return do_adr(s, a, gen_helper_sve_adr_p64);
1363}
1364
1365static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
1366{
1367    return do_adr(s, a, gen_helper_sve_adr_s32);
1368}
1369
1370static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
1371{
1372    return do_adr(s, a, gen_helper_sve_adr_u32);
1373}
1374
1375/*
1376 *** SVE Integer Misc - Unpredicated Group
1377 */
1378
1379static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1380{
1381    static gen_helper_gvec_2 * const fns[4] = {
1382        NULL,
1383        gen_helper_sve_fexpa_h,
1384        gen_helper_sve_fexpa_s,
1385        gen_helper_sve_fexpa_d,
1386    };
1387    if (a->esz == 0) {
1388        return false;
1389    }
1390    if (sve_access_check(s)) {
1391        gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
1392    }
1393    return true;
1394}
1395
1396static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1397{
1398    static gen_helper_gvec_3 * const fns[4] = {
1399        NULL,
1400        gen_helper_sve_ftssel_h,
1401        gen_helper_sve_ftssel_s,
1402        gen_helper_sve_ftssel_d,
1403    };
1404    if (a->esz == 0) {
1405        return false;
1406    }
1407    if (sve_access_check(s)) {
1408        gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
1409    }
1410    return true;
1411}
1412
1413/*
1414 *** SVE Predicate Logical Operations Group
1415 */
1416
1417static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1418                          const GVecGen4 *gvec_op)
1419{
1420    if (!sve_access_check(s)) {
1421        return true;
1422    }
1423
1424    unsigned psz = pred_gvec_reg_size(s);
1425    int dofs = pred_full_reg_offset(s, a->rd);
1426    int nofs = pred_full_reg_offset(s, a->rn);
1427    int mofs = pred_full_reg_offset(s, a->rm);
1428    int gofs = pred_full_reg_offset(s, a->pg);
1429
1430    if (!a->s) {
1431        tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1432        return true;
1433    }
1434
1435    if (psz == 8) {
1436        /* Do the operation and the flags generation in temps.  */
1437        TCGv_i64 pd = tcg_temp_new_i64();
1438        TCGv_i64 pn = tcg_temp_new_i64();
1439        TCGv_i64 pm = tcg_temp_new_i64();
1440        TCGv_i64 pg = tcg_temp_new_i64();
1441
1442        tcg_gen_ld_i64(pn, cpu_env, nofs);
1443        tcg_gen_ld_i64(pm, cpu_env, mofs);
1444        tcg_gen_ld_i64(pg, cpu_env, gofs);
1445
1446        gvec_op->fni8(pd, pn, pm, pg);
1447        tcg_gen_st_i64(pd, cpu_env, dofs);
1448
1449        do_predtest1(pd, pg);
1450
1451        tcg_temp_free_i64(pd);
1452        tcg_temp_free_i64(pn);
1453        tcg_temp_free_i64(pm);
1454        tcg_temp_free_i64(pg);
1455    } else {
1456        /* The operation and flags generation is large.  The computation
1457         * of the flags depends on the original contents of the guarding
1458         * predicate.  If the destination overwrites the guarding predicate,
1459         * then the easiest way to get this right is to save a copy.
1460          */
1461        int tofs = gofs;
1462        if (a->rd == a->pg) {
1463            tofs = offsetof(CPUARMState, vfp.preg_tmp);
1464            tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1465        }
1466
1467        tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1468        do_predtest(s, dofs, tofs, psz / 8);
1469    }
1470    return true;
1471}
1472
1473static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1474{
1475    tcg_gen_and_i64(pd, pn, pm);
1476    tcg_gen_and_i64(pd, pd, pg);
1477}
1478
1479static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1480                           TCGv_vec pm, TCGv_vec pg)
1481{
1482    tcg_gen_and_vec(vece, pd, pn, pm);
1483    tcg_gen_and_vec(vece, pd, pd, pg);
1484}
1485
1486static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1487{
1488    static const GVecGen4 op = {
1489        .fni8 = gen_and_pg_i64,
1490        .fniv = gen_and_pg_vec,
1491        .fno = gen_helper_sve_and_pppp,
1492        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1493    };
1494
1495    if (!a->s) {
1496        if (!sve_access_check(s)) {
1497            return true;
1498        }
1499        if (a->rn == a->rm) {
1500            if (a->pg == a->rn) {
1501                do_mov_p(s, a->rd, a->rn);
1502            } else {
1503                gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->pg);
1504            }
1505            return true;
1506        } else if (a->pg == a->rn || a->pg == a->rm) {
1507            gen_gvec_fn_ppp(s, tcg_gen_gvec_and, a->rd, a->rn, a->rm);
1508            return true;
1509        }
1510    }
1511    return do_pppp_flags(s, a, &op);
1512}
1513
1514static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1515{
1516    tcg_gen_andc_i64(pd, pn, pm);
1517    tcg_gen_and_i64(pd, pd, pg);
1518}
1519
1520static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1521                           TCGv_vec pm, TCGv_vec pg)
1522{
1523    tcg_gen_andc_vec(vece, pd, pn, pm);
1524    tcg_gen_and_vec(vece, pd, pd, pg);
1525}
1526
1527static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1528{
1529    static const GVecGen4 op = {
1530        .fni8 = gen_bic_pg_i64,
1531        .fniv = gen_bic_pg_vec,
1532        .fno = gen_helper_sve_bic_pppp,
1533        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1534    };
1535
1536    if (!a->s && a->pg == a->rn) {
1537        if (sve_access_check(s)) {
1538            gen_gvec_fn_ppp(s, tcg_gen_gvec_andc, a->rd, a->rn, a->rm);
1539        }
1540        return true;
1541    }
1542    return do_pppp_flags(s, a, &op);
1543}
1544
1545static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1546{
1547    tcg_gen_xor_i64(pd, pn, pm);
1548    tcg_gen_and_i64(pd, pd, pg);
1549}
1550
1551static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1552                           TCGv_vec pm, TCGv_vec pg)
1553{
1554    tcg_gen_xor_vec(vece, pd, pn, pm);
1555    tcg_gen_and_vec(vece, pd, pd, pg);
1556}
1557
1558static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1559{
1560    static const GVecGen4 op = {
1561        .fni8 = gen_eor_pg_i64,
1562        .fniv = gen_eor_pg_vec,
1563        .fno = gen_helper_sve_eor_pppp,
1564        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1565    };
1566    return do_pppp_flags(s, a, &op);
1567}
1568
1569static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1570{
1571    if (a->s) {
1572        return false;
1573    }
1574    if (sve_access_check(s)) {
1575        unsigned psz = pred_gvec_reg_size(s);
1576        tcg_gen_gvec_bitsel(MO_8, pred_full_reg_offset(s, a->rd),
1577                            pred_full_reg_offset(s, a->pg),
1578                            pred_full_reg_offset(s, a->rn),
1579                            pred_full_reg_offset(s, a->rm), psz, psz);
1580    }
1581    return true;
1582}
1583
1584static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1585{
1586    tcg_gen_or_i64(pd, pn, pm);
1587    tcg_gen_and_i64(pd, pd, pg);
1588}
1589
1590static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1591                           TCGv_vec pm, TCGv_vec pg)
1592{
1593    tcg_gen_or_vec(vece, pd, pn, pm);
1594    tcg_gen_and_vec(vece, pd, pd, pg);
1595}
1596
1597static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1598{
1599    static const GVecGen4 op = {
1600        .fni8 = gen_orr_pg_i64,
1601        .fniv = gen_orr_pg_vec,
1602        .fno = gen_helper_sve_orr_pppp,
1603        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1604    };
1605
1606    if (!a->s && a->pg == a->rn && a->rn == a->rm) {
1607        return do_mov_p(s, a->rd, a->rn);
1608    }
1609    return do_pppp_flags(s, a, &op);
1610}
1611
1612static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1613{
1614    tcg_gen_orc_i64(pd, pn, pm);
1615    tcg_gen_and_i64(pd, pd, pg);
1616}
1617
1618static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1619                           TCGv_vec pm, TCGv_vec pg)
1620{
1621    tcg_gen_orc_vec(vece, pd, pn, pm);
1622    tcg_gen_and_vec(vece, pd, pd, pg);
1623}
1624
1625static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1626{
1627    static const GVecGen4 op = {
1628        .fni8 = gen_orn_pg_i64,
1629        .fniv = gen_orn_pg_vec,
1630        .fno = gen_helper_sve_orn_pppp,
1631        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1632    };
1633    return do_pppp_flags(s, a, &op);
1634}
1635
1636static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1637{
1638    tcg_gen_or_i64(pd, pn, pm);
1639    tcg_gen_andc_i64(pd, pg, pd);
1640}
1641
1642static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1643                           TCGv_vec pm, TCGv_vec pg)
1644{
1645    tcg_gen_or_vec(vece, pd, pn, pm);
1646    tcg_gen_andc_vec(vece, pd, pg, pd);
1647}
1648
1649static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1650{
1651    static const GVecGen4 op = {
1652        .fni8 = gen_nor_pg_i64,
1653        .fniv = gen_nor_pg_vec,
1654        .fno = gen_helper_sve_nor_pppp,
1655        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1656    };
1657    return do_pppp_flags(s, a, &op);
1658}
1659
1660static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1661{
1662    tcg_gen_and_i64(pd, pn, pm);
1663    tcg_gen_andc_i64(pd, pg, pd);
1664}
1665
1666static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1667                           TCGv_vec pm, TCGv_vec pg)
1668{
1669    tcg_gen_and_vec(vece, pd, pn, pm);
1670    tcg_gen_andc_vec(vece, pd, pg, pd);
1671}
1672
1673static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1674{
1675    static const GVecGen4 op = {
1676        .fni8 = gen_nand_pg_i64,
1677        .fniv = gen_nand_pg_vec,
1678        .fno = gen_helper_sve_nand_pppp,
1679        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1680    };
1681    return do_pppp_flags(s, a, &op);
1682}
1683
1684/*
1685 *** SVE Predicate Misc Group
1686 */
1687
1688static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1689{
1690    if (sve_access_check(s)) {
1691        int nofs = pred_full_reg_offset(s, a->rn);
1692        int gofs = pred_full_reg_offset(s, a->pg);
1693        int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1694
1695        if (words == 1) {
1696            TCGv_i64 pn = tcg_temp_new_i64();
1697            TCGv_i64 pg = tcg_temp_new_i64();
1698
1699            tcg_gen_ld_i64(pn, cpu_env, nofs);
1700            tcg_gen_ld_i64(pg, cpu_env, gofs);
1701            do_predtest1(pn, pg);
1702
1703            tcg_temp_free_i64(pn);
1704            tcg_temp_free_i64(pg);
1705        } else {
1706            do_predtest(s, nofs, gofs, words);
1707        }
1708    }
1709    return true;
1710}
1711
1712/* See the ARM pseudocode DecodePredCount.  */
1713static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1714{
1715    unsigned elements = fullsz >> esz;
1716    unsigned bound;
1717
1718    switch (pattern) {
1719    case 0x0: /* POW2 */
1720        return pow2floor(elements);
1721    case 0x1: /* VL1 */
1722    case 0x2: /* VL2 */
1723    case 0x3: /* VL3 */
1724    case 0x4: /* VL4 */
1725    case 0x5: /* VL5 */
1726    case 0x6: /* VL6 */
1727    case 0x7: /* VL7 */
1728    case 0x8: /* VL8 */
1729        bound = pattern;
1730        break;
1731    case 0x9: /* VL16 */
1732    case 0xa: /* VL32 */
1733    case 0xb: /* VL64 */
1734    case 0xc: /* VL128 */
1735    case 0xd: /* VL256 */
1736        bound = 16 << (pattern - 9);
1737        break;
1738    case 0x1d: /* MUL4 */
1739        return elements - elements % 4;
1740    case 0x1e: /* MUL3 */
1741        return elements - elements % 3;
1742    case 0x1f: /* ALL */
1743        return elements;
1744    default:   /* #uimm5 */
1745        return 0;
1746    }
1747    return elements >= bound ? bound : 0;
1748}
1749
1750/* This handles all of the predicate initialization instructions,
1751 * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1752 * so that decode_pred_count returns 0.  For SETFFR, we will have
1753 * set RD == 16 == FFR.
1754 */
1755static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1756{
1757    if (!sve_access_check(s)) {
1758        return true;
1759    }
1760
1761    unsigned fullsz = vec_full_reg_size(s);
1762    unsigned ofs = pred_full_reg_offset(s, rd);
1763    unsigned numelem, setsz, i;
1764    uint64_t word, lastword;
1765    TCGv_i64 t;
1766
1767    numelem = decode_pred_count(fullsz, pat, esz);
1768
1769    /* Determine what we must store into each bit, and how many.  */
1770    if (numelem == 0) {
1771        lastword = word = 0;
1772        setsz = fullsz;
1773    } else {
1774        setsz = numelem << esz;
1775        lastword = word = pred_esz_masks[esz];
1776        if (setsz % 64) {
1777            lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1778        }
1779    }
1780
1781    t = tcg_temp_new_i64();
1782    if (fullsz <= 64) {
1783        tcg_gen_movi_i64(t, lastword);
1784        tcg_gen_st_i64(t, cpu_env, ofs);
1785        goto done;
1786    }
1787
1788    if (word == lastword) {
1789        unsigned maxsz = size_for_gvec(fullsz / 8);
1790        unsigned oprsz = size_for_gvec(setsz / 8);
1791
1792        if (oprsz * 8 == setsz) {
1793            tcg_gen_gvec_dup_imm(MO_64, ofs, oprsz, maxsz, word);
1794            goto done;
1795        }
1796    }
1797
1798    setsz /= 8;
1799    fullsz /= 8;
1800
1801    tcg_gen_movi_i64(t, word);
1802    for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1803        tcg_gen_st_i64(t, cpu_env, ofs + i);
1804    }
1805    if (lastword != word) {
1806        tcg_gen_movi_i64(t, lastword);
1807        tcg_gen_st_i64(t, cpu_env, ofs + i);
1808        i += 8;
1809    }
1810    if (i < fullsz) {
1811        tcg_gen_movi_i64(t, 0);
1812        for (; i < fullsz; i += 8) {
1813            tcg_gen_st_i64(t, cpu_env, ofs + i);
1814        }
1815    }
1816
1817 done:
1818    tcg_temp_free_i64(t);
1819
1820    /* PTRUES */
1821    if (setflag) {
1822        tcg_gen_movi_i32(cpu_NF, -(word != 0));
1823        tcg_gen_movi_i32(cpu_CF, word == 0);
1824        tcg_gen_movi_i32(cpu_VF, 0);
1825        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1826    }
1827    return true;
1828}
1829
1830static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1831{
1832    return do_predset(s, a->esz, a->rd, a->pat, a->s);
1833}
1834
1835static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1836{
1837    /* Note pat == 31 is #all, to set all elements.  */
1838    return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1839}
1840
1841static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1842{
1843    /* Note pat == 32 is #unimp, to set no elements.  */
1844    return do_predset(s, 0, a->rd, 32, false);
1845}
1846
1847static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1848{
1849    /* The path through do_pppp_flags is complicated enough to want to avoid
1850     * duplication.  Frob the arguments into the form of a predicated AND.
1851     */
1852    arg_rprr_s alt_a = {
1853        .rd = a->rd, .pg = a->pg, .s = a->s,
1854        .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1855    };
1856    return trans_AND_pppp(s, &alt_a);
1857}
1858
1859static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1860{
1861    return do_mov_p(s, a->rd, FFR_PRED_NUM);
1862}
1863
1864static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1865{
1866    return do_mov_p(s, FFR_PRED_NUM, a->rn);
1867}
1868
1869static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1870                            void (*gen_fn)(TCGv_i32, TCGv_ptr,
1871                                           TCGv_ptr, TCGv_i32))
1872{
1873    if (!sve_access_check(s)) {
1874        return true;
1875    }
1876
1877    TCGv_ptr t_pd = tcg_temp_new_ptr();
1878    TCGv_ptr t_pg = tcg_temp_new_ptr();
1879    TCGv_i32 t;
1880    unsigned desc = 0;
1881
1882    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
1883    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
1884
1885    tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1886    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1887    t = tcg_const_i32(desc);
1888
1889    gen_fn(t, t_pd, t_pg, t);
1890    tcg_temp_free_ptr(t_pd);
1891    tcg_temp_free_ptr(t_pg);
1892
1893    do_pred_flags(t);
1894    tcg_temp_free_i32(t);
1895    return true;
1896}
1897
1898static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1899{
1900    return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1901}
1902
1903static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1904{
1905    return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1906}
1907
1908/*
1909 *** SVE Element Count Group
1910 */
1911
1912/* Perform an inline saturating addition of a 32-bit value within
1913 * a 64-bit register.  The second operand is known to be positive,
1914 * which halves the comparisions we must perform to bound the result.
1915 */
1916static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1917{
1918    int64_t ibound;
1919    TCGv_i64 bound;
1920    TCGCond cond;
1921
1922    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1923    if (u) {
1924        tcg_gen_ext32u_i64(reg, reg);
1925    } else {
1926        tcg_gen_ext32s_i64(reg, reg);
1927    }
1928    if (d) {
1929        tcg_gen_sub_i64(reg, reg, val);
1930        ibound = (u ? 0 : INT32_MIN);
1931        cond = TCG_COND_LT;
1932    } else {
1933        tcg_gen_add_i64(reg, reg, val);
1934        ibound = (u ? UINT32_MAX : INT32_MAX);
1935        cond = TCG_COND_GT;
1936    }
1937    bound = tcg_const_i64(ibound);
1938    tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1939    tcg_temp_free_i64(bound);
1940}
1941
1942/* Similarly with 64-bit values.  */
1943static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1944{
1945    TCGv_i64 t0 = tcg_temp_new_i64();
1946    TCGv_i64 t1 = tcg_temp_new_i64();
1947    TCGv_i64 t2;
1948
1949    if (u) {
1950        if (d) {
1951            tcg_gen_sub_i64(t0, reg, val);
1952            tcg_gen_movi_i64(t1, 0);
1953            tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1954        } else {
1955            tcg_gen_add_i64(t0, reg, val);
1956            tcg_gen_movi_i64(t1, -1);
1957            tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1958        }
1959    } else {
1960        if (d) {
1961            /* Detect signed overflow for subtraction.  */
1962            tcg_gen_xor_i64(t0, reg, val);
1963            tcg_gen_sub_i64(t1, reg, val);
1964            tcg_gen_xor_i64(reg, reg, t1);
1965            tcg_gen_and_i64(t0, t0, reg);
1966
1967            /* Bound the result.  */
1968            tcg_gen_movi_i64(reg, INT64_MIN);
1969            t2 = tcg_const_i64(0);
1970            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1971        } else {
1972            /* Detect signed overflow for addition.  */
1973            tcg_gen_xor_i64(t0, reg, val);
1974            tcg_gen_add_i64(reg, reg, val);
1975            tcg_gen_xor_i64(t1, reg, val);
1976            tcg_gen_andc_i64(t0, t1, t0);
1977
1978            /* Bound the result.  */
1979            tcg_gen_movi_i64(t1, INT64_MAX);
1980            t2 = tcg_const_i64(0);
1981            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1982        }
1983        tcg_temp_free_i64(t2);
1984    }
1985    tcg_temp_free_i64(t0);
1986    tcg_temp_free_i64(t1);
1987}
1988
1989/* Similarly with a vector and a scalar operand.  */
1990static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1991                              TCGv_i64 val, bool u, bool d)
1992{
1993    unsigned vsz = vec_full_reg_size(s);
1994    TCGv_ptr dptr, nptr;
1995    TCGv_i32 t32, desc;
1996    TCGv_i64 t64;
1997
1998    dptr = tcg_temp_new_ptr();
1999    nptr = tcg_temp_new_ptr();
2000    tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
2001    tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
2002    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2003
2004    switch (esz) {
2005    case MO_8:
2006        t32 = tcg_temp_new_i32();
2007        tcg_gen_extrl_i64_i32(t32, val);
2008        if (d) {
2009            tcg_gen_neg_i32(t32, t32);
2010        }
2011        if (u) {
2012            gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
2013        } else {
2014            gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
2015        }
2016        tcg_temp_free_i32(t32);
2017        break;
2018
2019    case MO_16:
2020        t32 = tcg_temp_new_i32();
2021        tcg_gen_extrl_i64_i32(t32, val);
2022        if (d) {
2023            tcg_gen_neg_i32(t32, t32);
2024        }
2025        if (u) {
2026            gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
2027        } else {
2028            gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
2029        }
2030        tcg_temp_free_i32(t32);
2031        break;
2032
2033    case MO_32:
2034        t64 = tcg_temp_new_i64();
2035        if (d) {
2036            tcg_gen_neg_i64(t64, val);
2037        } else {
2038            tcg_gen_mov_i64(t64, val);
2039        }
2040        if (u) {
2041            gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
2042        } else {
2043            gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
2044        }
2045        tcg_temp_free_i64(t64);
2046        break;
2047
2048    case MO_64:
2049        if (u) {
2050            if (d) {
2051                gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
2052            } else {
2053                gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
2054            }
2055        } else if (d) {
2056            t64 = tcg_temp_new_i64();
2057            tcg_gen_neg_i64(t64, val);
2058            gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
2059            tcg_temp_free_i64(t64);
2060        } else {
2061            gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
2062        }
2063        break;
2064
2065    default:
2066        g_assert_not_reached();
2067    }
2068
2069    tcg_temp_free_ptr(dptr);
2070    tcg_temp_free_ptr(nptr);
2071    tcg_temp_free_i32(desc);
2072}
2073
2074static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
2075{
2076    if (sve_access_check(s)) {
2077        unsigned fullsz = vec_full_reg_size(s);
2078        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2079        tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
2080    }
2081    return true;
2082}
2083
2084static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
2085{
2086    if (sve_access_check(s)) {
2087        unsigned fullsz = vec_full_reg_size(s);
2088        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2089        int inc = numelem * a->imm * (a->d ? -1 : 1);
2090        TCGv_i64 reg = cpu_reg(s, a->rd);
2091
2092        tcg_gen_addi_i64(reg, reg, inc);
2093    }
2094    return true;
2095}
2096
2097static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
2098{
2099    if (!sve_access_check(s)) {
2100        return true;
2101    }
2102
2103    unsigned fullsz = vec_full_reg_size(s);
2104    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2105    int inc = numelem * a->imm;
2106    TCGv_i64 reg = cpu_reg(s, a->rd);
2107
2108    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
2109    if (inc == 0) {
2110        if (a->u) {
2111            tcg_gen_ext32u_i64(reg, reg);
2112        } else {
2113            tcg_gen_ext32s_i64(reg, reg);
2114        }
2115    } else {
2116        TCGv_i64 t = tcg_const_i64(inc);
2117        do_sat_addsub_32(reg, t, a->u, a->d);
2118        tcg_temp_free_i64(t);
2119    }
2120    return true;
2121}
2122
2123static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
2124{
2125    if (!sve_access_check(s)) {
2126        return true;
2127    }
2128
2129    unsigned fullsz = vec_full_reg_size(s);
2130    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2131    int inc = numelem * a->imm;
2132    TCGv_i64 reg = cpu_reg(s, a->rd);
2133
2134    if (inc != 0) {
2135        TCGv_i64 t = tcg_const_i64(inc);
2136        do_sat_addsub_64(reg, t, a->u, a->d);
2137        tcg_temp_free_i64(t);
2138    }
2139    return true;
2140}
2141
2142static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
2143{
2144    if (a->esz == 0) {
2145        return false;
2146    }
2147
2148    unsigned fullsz = vec_full_reg_size(s);
2149    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2150    int inc = numelem * a->imm;
2151
2152    if (inc != 0) {
2153        if (sve_access_check(s)) {
2154            TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
2155            tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
2156                              vec_full_reg_offset(s, a->rn),
2157                              t, fullsz, fullsz);
2158            tcg_temp_free_i64(t);
2159        }
2160    } else {
2161        do_mov_z(s, a->rd, a->rn);
2162    }
2163    return true;
2164}
2165
2166static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
2167{
2168    if (a->esz == 0) {
2169        return false;
2170    }
2171
2172    unsigned fullsz = vec_full_reg_size(s);
2173    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
2174    int inc = numelem * a->imm;
2175
2176    if (inc != 0) {
2177        if (sve_access_check(s)) {
2178            TCGv_i64 t = tcg_const_i64(inc);
2179            do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
2180            tcg_temp_free_i64(t);
2181        }
2182    } else {
2183        do_mov_z(s, a->rd, a->rn);
2184    }
2185    return true;
2186}
2187
2188/*
2189 *** SVE Bitwise Immediate Group
2190 */
2191
2192static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
2193{
2194    uint64_t imm;
2195    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2196                                extract32(a->dbm, 0, 6),
2197                                extract32(a->dbm, 6, 6))) {
2198        return false;
2199    }
2200    if (sve_access_check(s)) {
2201        unsigned vsz = vec_full_reg_size(s);
2202        gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
2203                vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
2204    }
2205    return true;
2206}
2207
2208static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
2209{
2210    return do_zz_dbm(s, a, tcg_gen_gvec_andi);
2211}
2212
2213static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
2214{
2215    return do_zz_dbm(s, a, tcg_gen_gvec_ori);
2216}
2217
2218static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
2219{
2220    return do_zz_dbm(s, a, tcg_gen_gvec_xori);
2221}
2222
2223static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
2224{
2225    uint64_t imm;
2226    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
2227                                extract32(a->dbm, 0, 6),
2228                                extract32(a->dbm, 6, 6))) {
2229        return false;
2230    }
2231    if (sve_access_check(s)) {
2232        do_dupi_z(s, a->rd, imm);
2233    }
2234    return true;
2235}
2236
2237/*
2238 *** SVE Integer Wide Immediate - Predicated Group
2239 */
2240
2241/* Implement all merging copies.  This is used for CPY (immediate),
2242 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
2243 */
2244static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
2245                     TCGv_i64 val)
2246{
2247    typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2248    static gen_cpy * const fns[4] = {
2249        gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
2250        gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
2251    };
2252    unsigned vsz = vec_full_reg_size(s);
2253    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2254    TCGv_ptr t_zd = tcg_temp_new_ptr();
2255    TCGv_ptr t_zn = tcg_temp_new_ptr();
2256    TCGv_ptr t_pg = tcg_temp_new_ptr();
2257
2258    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
2259    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
2260    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
2261
2262    fns[esz](t_zd, t_zn, t_pg, val, desc);
2263
2264    tcg_temp_free_ptr(t_zd);
2265    tcg_temp_free_ptr(t_zn);
2266    tcg_temp_free_ptr(t_pg);
2267    tcg_temp_free_i32(desc);
2268}
2269
2270static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
2271{
2272    if (a->esz == 0) {
2273        return false;
2274    }
2275    if (sve_access_check(s)) {
2276        /* Decode the VFP immediate.  */
2277        uint64_t imm = vfp_expand_imm(a->esz, a->imm);
2278        TCGv_i64 t_imm = tcg_const_i64(imm);
2279        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
2280        tcg_temp_free_i64(t_imm);
2281    }
2282    return true;
2283}
2284
2285static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
2286{
2287    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
2288        return false;
2289    }
2290    if (sve_access_check(s)) {
2291        TCGv_i64 t_imm = tcg_const_i64(a->imm);
2292        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
2293        tcg_temp_free_i64(t_imm);
2294    }
2295    return true;
2296}
2297
2298static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
2299{
2300    static gen_helper_gvec_2i * const fns[4] = {
2301        gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
2302        gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
2303    };
2304
2305    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
2306        return false;
2307    }
2308    if (sve_access_check(s)) {
2309        unsigned vsz = vec_full_reg_size(s);
2310        TCGv_i64 t_imm = tcg_const_i64(a->imm);
2311        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
2312                            pred_full_reg_offset(s, a->pg),
2313                            t_imm, vsz, vsz, 0, fns[a->esz]);
2314        tcg_temp_free_i64(t_imm);
2315    }
2316    return true;
2317}
2318
2319/*
2320 *** SVE Permute Extract Group
2321 */
2322
2323static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
2324{
2325    if (!sve_access_check(s)) {
2326        return true;
2327    }
2328
2329    unsigned vsz = vec_full_reg_size(s);
2330    unsigned n_ofs = imm >= vsz ? 0 : imm;
2331    unsigned n_siz = vsz - n_ofs;
2332    unsigned d = vec_full_reg_offset(s, rd);
2333    unsigned n = vec_full_reg_offset(s, rn);
2334    unsigned m = vec_full_reg_offset(s, rm);
2335
2336    /* Use host vector move insns if we have appropriate sizes
2337     * and no unfortunate overlap.
2338     */
2339    if (m != d
2340        && n_ofs == size_for_gvec(n_ofs)
2341        && n_siz == size_for_gvec(n_siz)
2342        && (d != n || n_siz <= n_ofs)) {
2343        tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2344        if (n_ofs != 0) {
2345            tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2346        }
2347    } else {
2348        tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2349    }
2350    return true;
2351}
2352
2353static bool trans_EXT(DisasContext *s, arg_EXT *a)
2354{
2355    return do_EXT(s, a->rd, a->rn, a->rm, a->imm);
2356}
2357
2358static bool trans_EXT_sve2(DisasContext *s, arg_rri *a)
2359{
2360    if (!dc_isar_feature(aa64_sve2, s)) {
2361        return false;
2362    }
2363    return do_EXT(s, a->rd, a->rn, (a->rn + 1) % 32, a->imm);
2364}
2365
2366/*
2367 *** SVE Permute - Unpredicated Group
2368 */
2369
2370static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2371{
2372    if (sve_access_check(s)) {
2373        unsigned vsz = vec_full_reg_size(s);
2374        tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2375                             vsz, vsz, cpu_reg_sp(s, a->rn));
2376    }
2377    return true;
2378}
2379
2380static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2381{
2382    if ((a->imm & 0x1f) == 0) {
2383        return false;
2384    }
2385    if (sve_access_check(s)) {
2386        unsigned vsz = vec_full_reg_size(s);
2387        unsigned dofs = vec_full_reg_offset(s, a->rd);
2388        unsigned esz, index;
2389
2390        esz = ctz32(a->imm);
2391        index = a->imm >> (esz + 1);
2392
2393        if ((index << esz) < vsz) {
2394            unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2395            tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2396        } else {
2397            /*
2398             * While dup_mem handles 128-bit elements, dup_imm does not.
2399             * Thankfully element size doesn't matter for splatting zero.
2400             */
2401            tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0);
2402        }
2403    }
2404    return true;
2405}
2406
2407static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2408{
2409    typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2410    static gen_insr * const fns[4] = {
2411        gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2412        gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2413    };
2414    unsigned vsz = vec_full_reg_size(s);
2415    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2416    TCGv_ptr t_zd = tcg_temp_new_ptr();
2417    TCGv_ptr t_zn = tcg_temp_new_ptr();
2418
2419    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2420    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2421
2422    fns[a->esz](t_zd, t_zn, val, desc);
2423
2424    tcg_temp_free_ptr(t_zd);
2425    tcg_temp_free_ptr(t_zn);
2426    tcg_temp_free_i32(desc);
2427}
2428
2429static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2430{
2431    if (sve_access_check(s)) {
2432        TCGv_i64 t = tcg_temp_new_i64();
2433        tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2434        do_insr_i64(s, a, t);
2435        tcg_temp_free_i64(t);
2436    }
2437    return true;
2438}
2439
2440static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2441{
2442    if (sve_access_check(s)) {
2443        do_insr_i64(s, a, cpu_reg(s, a->rm));
2444    }
2445    return true;
2446}
2447
2448static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2449{
2450    static gen_helper_gvec_2 * const fns[4] = {
2451        gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2452        gen_helper_sve_rev_s, gen_helper_sve_rev_d
2453    };
2454
2455    if (sve_access_check(s)) {
2456        gen_gvec_ool_zz(s, fns[a->esz], a->rd, a->rn, 0);
2457    }
2458    return true;
2459}
2460
2461static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2462{
2463    static gen_helper_gvec_3 * const fns[4] = {
2464        gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2465        gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2466    };
2467
2468    if (sve_access_check(s)) {
2469        gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
2470    }
2471    return true;
2472}
2473
2474static bool trans_TBL_sve2(DisasContext *s, arg_rrr_esz *a)
2475{
2476    static gen_helper_gvec_4 * const fns[4] = {
2477        gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
2478        gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d
2479    };
2480
2481    if (!dc_isar_feature(aa64_sve2, s)) {
2482        return false;
2483    }
2484    if (sve_access_check(s)) {
2485        gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn,
2486                          (a->rn + 1) % 32, a->rm, 0);
2487    }
2488    return true;
2489}
2490
2491static bool trans_TBX(DisasContext *s, arg_rrr_esz *a)
2492{
2493    static gen_helper_gvec_3 * const fns[4] = {
2494        gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
2495        gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
2496    };
2497
2498    if (!dc_isar_feature(aa64_sve2, s)) {
2499        return false;
2500    }
2501    if (sve_access_check(s)) {
2502        gen_gvec_ool_zzz(s, fns[a->esz], a->rd, a->rn, a->rm, 0);
2503    }
2504    return true;
2505}
2506
2507static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2508{
2509    static gen_helper_gvec_2 * const fns[4][2] = {
2510        { NULL, NULL },
2511        { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2512        { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2513        { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2514    };
2515
2516    if (a->esz == 0) {
2517        return false;
2518    }
2519    if (sve_access_check(s)) {
2520        unsigned vsz = vec_full_reg_size(s);
2521        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2522                           vec_full_reg_offset(s, a->rn)
2523                           + (a->h ? vsz / 2 : 0),
2524                           vsz, vsz, 0, fns[a->esz][a->u]);
2525    }
2526    return true;
2527}
2528
2529/*
2530 *** SVE Permute - Predicates Group
2531 */
2532
2533static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2534                          gen_helper_gvec_3 *fn)
2535{
2536    if (!sve_access_check(s)) {
2537        return true;
2538    }
2539
2540    unsigned vsz = pred_full_reg_size(s);
2541
2542    TCGv_ptr t_d = tcg_temp_new_ptr();
2543    TCGv_ptr t_n = tcg_temp_new_ptr();
2544    TCGv_ptr t_m = tcg_temp_new_ptr();
2545    TCGv_i32 t_desc;
2546    uint32_t desc = 0;
2547
2548    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2549    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2550    desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2551
2552    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2553    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2554    tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2555    t_desc = tcg_const_i32(desc);
2556
2557    fn(t_d, t_n, t_m, t_desc);
2558
2559    tcg_temp_free_ptr(t_d);
2560    tcg_temp_free_ptr(t_n);
2561    tcg_temp_free_ptr(t_m);
2562    tcg_temp_free_i32(t_desc);
2563    return true;
2564}
2565
2566static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2567                          gen_helper_gvec_2 *fn)
2568{
2569    if (!sve_access_check(s)) {
2570        return true;
2571    }
2572
2573    unsigned vsz = pred_full_reg_size(s);
2574    TCGv_ptr t_d = tcg_temp_new_ptr();
2575    TCGv_ptr t_n = tcg_temp_new_ptr();
2576    TCGv_i32 t_desc;
2577    uint32_t desc = 0;
2578
2579    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2580    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2581
2582    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz);
2583    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
2584    desc = FIELD_DP32(desc, PREDDESC, DATA, high_odd);
2585    t_desc = tcg_const_i32(desc);
2586
2587    fn(t_d, t_n, t_desc);
2588
2589    tcg_temp_free_i32(t_desc);
2590    tcg_temp_free_ptr(t_d);
2591    tcg_temp_free_ptr(t_n);
2592    return true;
2593}
2594
2595static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2596{
2597    return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2598}
2599
2600static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2601{
2602    return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2603}
2604
2605static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2606{
2607    return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2608}
2609
2610static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2611{
2612    return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2613}
2614
2615static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2616{
2617    return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2618}
2619
2620static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2621{
2622    return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2623}
2624
2625static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2626{
2627    return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2628}
2629
2630static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2631{
2632    return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2633}
2634
2635static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2636{
2637    return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2638}
2639
2640/*
2641 *** SVE Permute - Interleaving Group
2642 */
2643
2644static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2645{
2646    static gen_helper_gvec_3 * const fns[4] = {
2647        gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2648        gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2649    };
2650
2651    if (sve_access_check(s)) {
2652        unsigned vsz = vec_full_reg_size(s);
2653        unsigned high_ofs = high ? vsz / 2 : 0;
2654        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2655                           vec_full_reg_offset(s, a->rn) + high_ofs,
2656                           vec_full_reg_offset(s, a->rm) + high_ofs,
2657                           vsz, vsz, 0, fns[a->esz]);
2658    }
2659    return true;
2660}
2661
2662static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2663                            gen_helper_gvec_3 *fn)
2664{
2665    if (sve_access_check(s)) {
2666        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, data);
2667    }
2668    return true;
2669}
2670
2671static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2672{
2673    return do_zip(s, a, false);
2674}
2675
2676static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2677{
2678    return do_zip(s, a, true);
2679}
2680
2681static bool do_zip_q(DisasContext *s, arg_rrr_esz *a, bool high)
2682{
2683    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2684        return false;
2685    }
2686    if (sve_access_check(s)) {
2687        unsigned vsz = vec_full_reg_size(s);
2688        unsigned high_ofs = high ? QEMU_ALIGN_DOWN(vsz, 32) / 2 : 0;
2689        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2690                           vec_full_reg_offset(s, a->rn) + high_ofs,
2691                           vec_full_reg_offset(s, a->rm) + high_ofs,
2692                           vsz, vsz, 0, gen_helper_sve2_zip_q);
2693    }
2694    return true;
2695}
2696
2697static bool trans_ZIP1_q(DisasContext *s, arg_rrr_esz *a)
2698{
2699    return do_zip_q(s, a, false);
2700}
2701
2702static bool trans_ZIP2_q(DisasContext *s, arg_rrr_esz *a)
2703{
2704    return do_zip_q(s, a, true);
2705}
2706
2707static gen_helper_gvec_3 * const uzp_fns[4] = {
2708    gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2709    gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2710};
2711
2712static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2713{
2714    return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2715}
2716
2717static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2718{
2719    return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2720}
2721
2722static bool trans_UZP1_q(DisasContext *s, arg_rrr_esz *a)
2723{
2724    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2725        return false;
2726    }
2727    return do_zzz_data_ool(s, a, 0, gen_helper_sve2_uzp_q);
2728}
2729
2730static bool trans_UZP2_q(DisasContext *s, arg_rrr_esz *a)
2731{
2732    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2733        return false;
2734    }
2735    return do_zzz_data_ool(s, a, 16, gen_helper_sve2_uzp_q);
2736}
2737
2738static gen_helper_gvec_3 * const trn_fns[4] = {
2739    gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2740    gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2741};
2742
2743static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2744{
2745    return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2746}
2747
2748static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2749{
2750    return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2751}
2752
2753static bool trans_TRN1_q(DisasContext *s, arg_rrr_esz *a)
2754{
2755    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2756        return false;
2757    }
2758    return do_zzz_data_ool(s, a, 0, gen_helper_sve2_trn_q);
2759}
2760
2761static bool trans_TRN2_q(DisasContext *s, arg_rrr_esz *a)
2762{
2763    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
2764        return false;
2765    }
2766    return do_zzz_data_ool(s, a, 16, gen_helper_sve2_trn_q);
2767}
2768
2769/*
2770 *** SVE Permute Vector - Predicated Group
2771 */
2772
2773static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2774{
2775    static gen_helper_gvec_3 * const fns[4] = {
2776        NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2777    };
2778    return do_zpz_ool(s, a, fns[a->esz]);
2779}
2780
2781/* Call the helper that computes the ARM LastActiveElement pseudocode
2782 * function, scaled by the element size.  This includes the not found
2783 * indication; e.g. not found for esz=3 is -8.
2784 */
2785static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2786{
2787    /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2788     * round up, as we do elsewhere, because we need the exact size.
2789     */
2790    TCGv_ptr t_p = tcg_temp_new_ptr();
2791    TCGv_i32 t_desc;
2792    unsigned desc = 0;
2793
2794    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
2795    desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
2796
2797    tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2798    t_desc = tcg_const_i32(desc);
2799
2800    gen_helper_sve_last_active_element(ret, t_p, t_desc);
2801
2802    tcg_temp_free_i32(t_desc);
2803    tcg_temp_free_ptr(t_p);
2804}
2805
2806/* Increment LAST to the offset of the next element in the vector,
2807 * wrapping around to 0.
2808 */
2809static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2810{
2811    unsigned vsz = vec_full_reg_size(s);
2812
2813    tcg_gen_addi_i32(last, last, 1 << esz);
2814    if (is_power_of_2(vsz)) {
2815        tcg_gen_andi_i32(last, last, vsz - 1);
2816    } else {
2817        TCGv_i32 max = tcg_const_i32(vsz);
2818        TCGv_i32 zero = tcg_const_i32(0);
2819        tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2820        tcg_temp_free_i32(max);
2821        tcg_temp_free_i32(zero);
2822    }
2823}
2824
2825/* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2826static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2827{
2828    unsigned vsz = vec_full_reg_size(s);
2829
2830    if (is_power_of_2(vsz)) {
2831        tcg_gen_andi_i32(last, last, vsz - 1);
2832    } else {
2833        TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2834        TCGv_i32 zero = tcg_const_i32(0);
2835        tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2836        tcg_temp_free_i32(max);
2837        tcg_temp_free_i32(zero);
2838    }
2839}
2840
2841/* Load an unsigned element of ESZ from BASE+OFS.  */
2842static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2843{
2844    TCGv_i64 r = tcg_temp_new_i64();
2845
2846    switch (esz) {
2847    case 0:
2848        tcg_gen_ld8u_i64(r, base, ofs);
2849        break;
2850    case 1:
2851        tcg_gen_ld16u_i64(r, base, ofs);
2852        break;
2853    case 2:
2854        tcg_gen_ld32u_i64(r, base, ofs);
2855        break;
2856    case 3:
2857        tcg_gen_ld_i64(r, base, ofs);
2858        break;
2859    default:
2860        g_assert_not_reached();
2861    }
2862    return r;
2863}
2864
2865/* Load an unsigned element of ESZ from RM[LAST].  */
2866static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2867                                 int rm, int esz)
2868{
2869    TCGv_ptr p = tcg_temp_new_ptr();
2870    TCGv_i64 r;
2871
2872    /* Convert offset into vector into offset into ENV.
2873     * The final adjustment for the vector register base
2874     * is added via constant offset to the load.
2875     */
2876#ifdef HOST_WORDS_BIGENDIAN
2877    /* Adjust for element ordering.  See vec_reg_offset.  */
2878    if (esz < 3) {
2879        tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2880    }
2881#endif
2882    tcg_gen_ext_i32_ptr(p, last);
2883    tcg_gen_add_ptr(p, p, cpu_env);
2884
2885    r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2886    tcg_temp_free_ptr(p);
2887
2888    return r;
2889}
2890
2891/* Compute CLAST for a Zreg.  */
2892static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2893{
2894    TCGv_i32 last;
2895    TCGLabel *over;
2896    TCGv_i64 ele;
2897    unsigned vsz, esz = a->esz;
2898
2899    if (!sve_access_check(s)) {
2900        return true;
2901    }
2902
2903    last = tcg_temp_local_new_i32();
2904    over = gen_new_label();
2905
2906    find_last_active(s, last, esz, a->pg);
2907
2908    /* There is of course no movcond for a 2048-bit vector,
2909     * so we must branch over the actual store.
2910     */
2911    tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2912
2913    if (!before) {
2914        incr_last_active(s, last, esz);
2915    }
2916
2917    ele = load_last_active(s, last, a->rm, esz);
2918    tcg_temp_free_i32(last);
2919
2920    vsz = vec_full_reg_size(s);
2921    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2922    tcg_temp_free_i64(ele);
2923
2924    /* If this insn used MOVPRFX, we may need a second move.  */
2925    if (a->rd != a->rn) {
2926        TCGLabel *done = gen_new_label();
2927        tcg_gen_br(done);
2928
2929        gen_set_label(over);
2930        do_mov_z(s, a->rd, a->rn);
2931
2932        gen_set_label(done);
2933    } else {
2934        gen_set_label(over);
2935    }
2936    return true;
2937}
2938
2939static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2940{
2941    return do_clast_vector(s, a, false);
2942}
2943
2944static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2945{
2946    return do_clast_vector(s, a, true);
2947}
2948
2949/* Compute CLAST for a scalar.  */
2950static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2951                            bool before, TCGv_i64 reg_val)
2952{
2953    TCGv_i32 last = tcg_temp_new_i32();
2954    TCGv_i64 ele, cmp, zero;
2955
2956    find_last_active(s, last, esz, pg);
2957
2958    /* Extend the original value of last prior to incrementing.  */
2959    cmp = tcg_temp_new_i64();
2960    tcg_gen_ext_i32_i64(cmp, last);
2961
2962    if (!before) {
2963        incr_last_active(s, last, esz);
2964    }
2965
2966    /* The conceit here is that while last < 0 indicates not found, after
2967     * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2968     * from which we can load garbage.  We then discard the garbage with
2969     * a conditional move.
2970     */
2971    ele = load_last_active(s, last, rm, esz);
2972    tcg_temp_free_i32(last);
2973
2974    zero = tcg_const_i64(0);
2975    tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2976
2977    tcg_temp_free_i64(zero);
2978    tcg_temp_free_i64(cmp);
2979    tcg_temp_free_i64(ele);
2980}
2981
2982/* Compute CLAST for a Vreg.  */
2983static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2984{
2985    if (sve_access_check(s)) {
2986        int esz = a->esz;
2987        int ofs = vec_reg_offset(s, a->rd, 0, esz);
2988        TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2989
2990        do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2991        write_fp_dreg(s, a->rd, reg);
2992        tcg_temp_free_i64(reg);
2993    }
2994    return true;
2995}
2996
2997static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2998{
2999    return do_clast_fp(s, a, false);
3000}
3001
3002static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
3003{
3004    return do_clast_fp(s, a, true);
3005}
3006
3007/* Compute CLAST for a Xreg.  */
3008static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
3009{
3010    TCGv_i64 reg;
3011
3012    if (!sve_access_check(s)) {
3013        return true;
3014    }
3015
3016    reg = cpu_reg(s, a->rd);
3017    switch (a->esz) {
3018    case 0:
3019        tcg_gen_ext8u_i64(reg, reg);
3020        break;
3021    case 1:
3022        tcg_gen_ext16u_i64(reg, reg);
3023        break;
3024    case 2:
3025        tcg_gen_ext32u_i64(reg, reg);
3026        break;
3027    case 3:
3028        break;
3029    default:
3030        g_assert_not_reached();
3031    }
3032
3033    do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
3034    return true;
3035}
3036
3037static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
3038{
3039    return do_clast_general(s, a, false);
3040}
3041
3042static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
3043{
3044    return do_clast_general(s, a, true);
3045}
3046
3047/* Compute LAST for a scalar.  */
3048static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
3049                               int pg, int rm, bool before)
3050{
3051    TCGv_i32 last = tcg_temp_new_i32();
3052    TCGv_i64 ret;
3053
3054    find_last_active(s, last, esz, pg);
3055    if (before) {
3056        wrap_last_active(s, last, esz);
3057    } else {
3058        incr_last_active(s, last, esz);
3059    }
3060
3061    ret = load_last_active(s, last, rm, esz);
3062    tcg_temp_free_i32(last);
3063    return ret;
3064}
3065
3066/* Compute LAST for a Vreg.  */
3067static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
3068{
3069    if (sve_access_check(s)) {
3070        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
3071        write_fp_dreg(s, a->rd, val);
3072        tcg_temp_free_i64(val);
3073    }
3074    return true;
3075}
3076
3077static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
3078{
3079    return do_last_fp(s, a, false);
3080}
3081
3082static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
3083{
3084    return do_last_fp(s, a, true);
3085}
3086
3087/* Compute LAST for a Xreg.  */
3088static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
3089{
3090    if (sve_access_check(s)) {
3091        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
3092        tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
3093        tcg_temp_free_i64(val);
3094    }
3095    return true;
3096}
3097
3098static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
3099{
3100    return do_last_general(s, a, false);
3101}
3102
3103static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
3104{
3105    return do_last_general(s, a, true);
3106}
3107
3108static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
3109{
3110    if (sve_access_check(s)) {
3111        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
3112    }
3113    return true;
3114}
3115
3116static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
3117{
3118    if (sve_access_check(s)) {
3119        int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
3120        TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
3121        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
3122        tcg_temp_free_i64(t);
3123    }
3124    return true;
3125}
3126
3127static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
3128{
3129    static gen_helper_gvec_3 * const fns[4] = {
3130        NULL,
3131        gen_helper_sve_revb_h,
3132        gen_helper_sve_revb_s,
3133        gen_helper_sve_revb_d,
3134    };
3135    return do_zpz_ool(s, a, fns[a->esz]);
3136}
3137
3138static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
3139{
3140    static gen_helper_gvec_3 * const fns[4] = {
3141        NULL,
3142        NULL,
3143        gen_helper_sve_revh_s,
3144        gen_helper_sve_revh_d,
3145    };
3146    return do_zpz_ool(s, a, fns[a->esz]);
3147}
3148
3149static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
3150{
3151    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
3152}
3153
3154static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
3155{
3156    static gen_helper_gvec_3 * const fns[4] = {
3157        gen_helper_sve_rbit_b,
3158        gen_helper_sve_rbit_h,
3159        gen_helper_sve_rbit_s,
3160        gen_helper_sve_rbit_d,
3161    };
3162    return do_zpz_ool(s, a, fns[a->esz]);
3163}
3164
3165static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
3166{
3167    if (sve_access_check(s)) {
3168        gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
3169                          a->rd, a->rn, a->rm, a->pg, a->esz);
3170    }
3171    return true;
3172}
3173
3174static bool trans_SPLICE_sve2(DisasContext *s, arg_rpr_esz *a)
3175{
3176    if (!dc_isar_feature(aa64_sve2, s)) {
3177        return false;
3178    }
3179    if (sve_access_check(s)) {
3180        gen_gvec_ool_zzzp(s, gen_helper_sve_splice,
3181                          a->rd, a->rn, (a->rn + 1) % 32, a->pg, a->esz);
3182    }
3183    return true;
3184}
3185
3186/*
3187 *** SVE Integer Compare - Vectors Group
3188 */
3189
3190static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
3191                          gen_helper_gvec_flags_4 *gen_fn)
3192{
3193    TCGv_ptr pd, zn, zm, pg;
3194    unsigned vsz;
3195    TCGv_i32 t;
3196
3197    if (gen_fn == NULL) {
3198        return false;
3199    }
3200    if (!sve_access_check(s)) {
3201        return true;
3202    }
3203
3204    vsz = vec_full_reg_size(s);
3205    t = tcg_const_i32(simd_desc(vsz, vsz, 0));
3206    pd = tcg_temp_new_ptr();
3207    zn = tcg_temp_new_ptr();
3208    zm = tcg_temp_new_ptr();
3209    pg = tcg_temp_new_ptr();
3210
3211    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3212    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3213    tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
3214    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3215
3216    gen_fn(t, pd, zn, zm, pg, t);
3217
3218    tcg_temp_free_ptr(pd);
3219    tcg_temp_free_ptr(zn);
3220    tcg_temp_free_ptr(zm);
3221    tcg_temp_free_ptr(pg);
3222
3223    do_pred_flags(t);
3224
3225    tcg_temp_free_i32(t);
3226    return true;
3227}
3228
3229#define DO_PPZZ(NAME, name) \
3230static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)         \
3231{                                                                         \
3232    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
3233        gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
3234        gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
3235    };                                                                    \
3236    return do_ppzz_flags(s, a, fns[a->esz]);                              \
3237}
3238
3239DO_PPZZ(CMPEQ, cmpeq)
3240DO_PPZZ(CMPNE, cmpne)
3241DO_PPZZ(CMPGT, cmpgt)
3242DO_PPZZ(CMPGE, cmpge)
3243DO_PPZZ(CMPHI, cmphi)
3244DO_PPZZ(CMPHS, cmphs)
3245
3246#undef DO_PPZZ
3247
3248#define DO_PPZW(NAME, name) \
3249static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a)         \
3250{                                                                         \
3251    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
3252        gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
3253        gen_helper_sve_##name##_ppzw_s, NULL                              \
3254    };                                                                    \
3255    return do_ppzz_flags(s, a, fns[a->esz]);                              \
3256}
3257
3258DO_PPZW(CMPEQ, cmpeq)
3259DO_PPZW(CMPNE, cmpne)
3260DO_PPZW(CMPGT, cmpgt)
3261DO_PPZW(CMPGE, cmpge)
3262DO_PPZW(CMPHI, cmphi)
3263DO_PPZW(CMPHS, cmphs)
3264DO_PPZW(CMPLT, cmplt)
3265DO_PPZW(CMPLE, cmple)
3266DO_PPZW(CMPLO, cmplo)
3267DO_PPZW(CMPLS, cmpls)
3268
3269#undef DO_PPZW
3270
3271/*
3272 *** SVE Integer Compare - Immediate Groups
3273 */
3274
3275static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
3276                          gen_helper_gvec_flags_3 *gen_fn)
3277{
3278    TCGv_ptr pd, zn, pg;
3279    unsigned vsz;
3280    TCGv_i32 t;
3281
3282    if (gen_fn == NULL) {
3283        return false;
3284    }
3285    if (!sve_access_check(s)) {
3286        return true;
3287    }
3288
3289    vsz = vec_full_reg_size(s);
3290    t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
3291    pd = tcg_temp_new_ptr();
3292    zn = tcg_temp_new_ptr();
3293    pg = tcg_temp_new_ptr();
3294
3295    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
3296    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
3297    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3298
3299    gen_fn(t, pd, zn, pg, t);
3300
3301    tcg_temp_free_ptr(pd);
3302    tcg_temp_free_ptr(zn);
3303    tcg_temp_free_ptr(pg);
3304
3305    do_pred_flags(t);
3306
3307    tcg_temp_free_i32(t);
3308    return true;
3309}
3310
3311#define DO_PPZI(NAME, name) \
3312static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a)         \
3313{                                                                         \
3314    static gen_helper_gvec_flags_3 * const fns[4] = {                     \
3315        gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
3316        gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
3317    };                                                                    \
3318    return do_ppzi_flags(s, a, fns[a->esz]);                              \
3319}
3320
3321DO_PPZI(CMPEQ, cmpeq)
3322DO_PPZI(CMPNE, cmpne)
3323DO_PPZI(CMPGT, cmpgt)
3324DO_PPZI(CMPGE, cmpge)
3325DO_PPZI(CMPHI, cmphi)
3326DO_PPZI(CMPHS, cmphs)
3327DO_PPZI(CMPLT, cmplt)
3328DO_PPZI(CMPLE, cmple)
3329DO_PPZI(CMPLO, cmplo)
3330DO_PPZI(CMPLS, cmpls)
3331
3332#undef DO_PPZI
3333
3334/*
3335 *** SVE Partition Break Group
3336 */
3337
3338static bool do_brk3(DisasContext *s, arg_rprr_s *a,
3339                    gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
3340{
3341    if (!sve_access_check(s)) {
3342        return true;
3343    }
3344
3345    unsigned vsz = pred_full_reg_size(s);
3346
3347    /* Predicate sizes may be smaller and cannot use simd_desc.  */
3348    TCGv_ptr d = tcg_temp_new_ptr();
3349    TCGv_ptr n = tcg_temp_new_ptr();
3350    TCGv_ptr m = tcg_temp_new_ptr();
3351    TCGv_ptr g = tcg_temp_new_ptr();
3352    TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
3353
3354    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3355    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3356    tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
3357    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3358
3359    if (a->s) {
3360        fn_s(t, d, n, m, g, t);
3361        do_pred_flags(t);
3362    } else {
3363        fn(d, n, m, g, t);
3364    }
3365    tcg_temp_free_ptr(d);
3366    tcg_temp_free_ptr(n);
3367    tcg_temp_free_ptr(m);
3368    tcg_temp_free_ptr(g);
3369    tcg_temp_free_i32(t);
3370    return true;
3371}
3372
3373static bool do_brk2(DisasContext *s, arg_rpr_s *a,
3374                    gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
3375{
3376    if (!sve_access_check(s)) {
3377        return true;
3378    }
3379
3380    unsigned vsz = pred_full_reg_size(s);
3381
3382    /* Predicate sizes may be smaller and cannot use simd_desc.  */
3383    TCGv_ptr d = tcg_temp_new_ptr();
3384    TCGv_ptr n = tcg_temp_new_ptr();
3385    TCGv_ptr g = tcg_temp_new_ptr();
3386    TCGv_i32 t = tcg_const_i32(FIELD_DP32(0, PREDDESC, OPRSZ, vsz));
3387
3388    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
3389    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
3390    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
3391
3392    if (a->s) {
3393        fn_s(t, d, n, g, t);
3394        do_pred_flags(t);
3395    } else {
3396        fn(d, n, g, t);
3397    }
3398    tcg_temp_free_ptr(d);
3399    tcg_temp_free_ptr(n);
3400    tcg_temp_free_ptr(g);
3401    tcg_temp_free_i32(t);
3402    return true;
3403}
3404
3405static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
3406{
3407    return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
3408}
3409
3410static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
3411{
3412    return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
3413}
3414
3415static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
3416{
3417    return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
3418}
3419
3420static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
3421{
3422    return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
3423}
3424
3425static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
3426{
3427    return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3428}
3429
3430static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
3431{
3432    return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3433}
3434
3435static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
3436{
3437    return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3438}
3439
3440/*
3441 *** SVE Predicate Count Group
3442 */
3443
3444static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3445{
3446    unsigned psz = pred_full_reg_size(s);
3447
3448    if (psz <= 8) {
3449        uint64_t psz_mask;
3450
3451        tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3452        if (pn != pg) {
3453            TCGv_i64 g = tcg_temp_new_i64();
3454            tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3455            tcg_gen_and_i64(val, val, g);
3456            tcg_temp_free_i64(g);
3457        }
3458
3459        /* Reduce the pred_esz_masks value simply to reduce the
3460         * size of the code generated here.
3461         */
3462        psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3463        tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3464
3465        tcg_gen_ctpop_i64(val, val);
3466    } else {
3467        TCGv_ptr t_pn = tcg_temp_new_ptr();
3468        TCGv_ptr t_pg = tcg_temp_new_ptr();
3469        unsigned desc = 0;
3470        TCGv_i32 t_desc;
3471
3472        desc = FIELD_DP32(desc, PREDDESC, OPRSZ, psz);
3473        desc = FIELD_DP32(desc, PREDDESC, ESZ, esz);
3474
3475        tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3476        tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3477        t_desc = tcg_const_i32(desc);
3478
3479        gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3480        tcg_temp_free_ptr(t_pn);
3481        tcg_temp_free_ptr(t_pg);
3482        tcg_temp_free_i32(t_desc);
3483    }
3484}
3485
3486static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3487{
3488    if (sve_access_check(s)) {
3489        do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3490    }
3491    return true;
3492}
3493
3494static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3495{
3496    if (sve_access_check(s)) {
3497        TCGv_i64 reg = cpu_reg(s, a->rd);
3498        TCGv_i64 val = tcg_temp_new_i64();
3499
3500        do_cntp(s, val, a->esz, a->pg, a->pg);
3501        if (a->d) {
3502            tcg_gen_sub_i64(reg, reg, val);
3503        } else {
3504            tcg_gen_add_i64(reg, reg, val);
3505        }
3506        tcg_temp_free_i64(val);
3507    }
3508    return true;
3509}
3510
3511static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3512{
3513    if (a->esz == 0) {
3514        return false;
3515    }
3516    if (sve_access_check(s)) {
3517        unsigned vsz = vec_full_reg_size(s);
3518        TCGv_i64 val = tcg_temp_new_i64();
3519        GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3520
3521        do_cntp(s, val, a->esz, a->pg, a->pg);
3522        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3523                vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3524    }
3525    return true;
3526}
3527
3528static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3529{
3530    if (sve_access_check(s)) {
3531        TCGv_i64 reg = cpu_reg(s, a->rd);
3532        TCGv_i64 val = tcg_temp_new_i64();
3533
3534        do_cntp(s, val, a->esz, a->pg, a->pg);
3535        do_sat_addsub_32(reg, val, a->u, a->d);
3536    }
3537    return true;
3538}
3539
3540static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3541{
3542    if (sve_access_check(s)) {
3543        TCGv_i64 reg = cpu_reg(s, a->rd);
3544        TCGv_i64 val = tcg_temp_new_i64();
3545
3546        do_cntp(s, val, a->esz, a->pg, a->pg);
3547        do_sat_addsub_64(reg, val, a->u, a->d);
3548    }
3549    return true;
3550}
3551
3552static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3553{
3554    if (a->esz == 0) {
3555        return false;
3556    }
3557    if (sve_access_check(s)) {
3558        TCGv_i64 val = tcg_temp_new_i64();
3559        do_cntp(s, val, a->esz, a->pg, a->pg);
3560        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3561    }
3562    return true;
3563}
3564
3565/*
3566 *** SVE Integer Compare Scalars Group
3567 */
3568
3569static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3570{
3571    if (!sve_access_check(s)) {
3572        return true;
3573    }
3574
3575    TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3576    TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3577    TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3578    TCGv_i64 cmp = tcg_temp_new_i64();
3579
3580    tcg_gen_setcond_i64(cond, cmp, rn, rm);
3581    tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3582    tcg_temp_free_i64(cmp);
3583
3584    /* VF = !NF & !CF.  */
3585    tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3586    tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3587
3588    /* Both NF and VF actually look at bit 31.  */
3589    tcg_gen_neg_i32(cpu_NF, cpu_NF);
3590    tcg_gen_neg_i32(cpu_VF, cpu_VF);
3591    return true;
3592}
3593
3594static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3595{
3596    TCGv_i64 op0, op1, t0, t1, tmax;
3597    TCGv_i32 t2, t3;
3598    TCGv_ptr ptr;
3599    unsigned vsz = vec_full_reg_size(s);
3600    unsigned desc = 0;
3601    TCGCond cond;
3602    uint64_t maxval;
3603    /* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
3604    bool eq = a->eq == a->lt;
3605
3606    /* The greater-than conditions are all SVE2. */
3607    if (!a->lt && !dc_isar_feature(aa64_sve2, s)) {
3608        return false;
3609    }
3610    if (!sve_access_check(s)) {
3611        return true;
3612    }
3613
3614    op0 = read_cpu_reg(s, a->rn, 1);
3615    op1 = read_cpu_reg(s, a->rm, 1);
3616
3617    if (!a->sf) {
3618        if (a->u) {
3619            tcg_gen_ext32u_i64(op0, op0);
3620            tcg_gen_ext32u_i64(op1, op1);
3621        } else {
3622            tcg_gen_ext32s_i64(op0, op0);
3623            tcg_gen_ext32s_i64(op1, op1);
3624        }
3625    }
3626
3627    /* For the helper, compress the different conditions into a computation
3628     * of how many iterations for which the condition is true.
3629     */
3630    t0 = tcg_temp_new_i64();
3631    t1 = tcg_temp_new_i64();
3632
3633    if (a->lt) {
3634        tcg_gen_sub_i64(t0, op1, op0);
3635        if (a->u) {
3636            maxval = a->sf ? UINT64_MAX : UINT32_MAX;
3637            cond = eq ? TCG_COND_LEU : TCG_COND_LTU;
3638        } else {
3639            maxval = a->sf ? INT64_MAX : INT32_MAX;
3640            cond = eq ? TCG_COND_LE : TCG_COND_LT;
3641        }
3642    } else {
3643        tcg_gen_sub_i64(t0, op0, op1);
3644        if (a->u) {
3645            maxval = 0;
3646            cond = eq ? TCG_COND_GEU : TCG_COND_GTU;
3647        } else {
3648            maxval = a->sf ? INT64_MIN : INT32_MIN;
3649            cond = eq ? TCG_COND_GE : TCG_COND_GT;
3650        }
3651    }
3652
3653    tmax = tcg_const_i64(vsz >> a->esz);
3654    if (eq) {
3655        /* Equality means one more iteration.  */
3656        tcg_gen_addi_i64(t0, t0, 1);
3657
3658        /*
3659         * For the less-than while, if op1 is maxval (and the only time
3660         * the addition above could overflow), then we produce an all-true
3661         * predicate by setting the count to the vector length.  This is
3662         * because the pseudocode is described as an increment + compare
3663         * loop, and the maximum integer would always compare true.
3664         * Similarly, the greater-than while has the same issue with the
3665         * minimum integer due to the decrement + compare loop.
3666         */
3667        tcg_gen_movi_i64(t1, maxval);
3668        tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3669    }
3670
3671    /* Bound to the maximum.  */
3672    tcg_gen_umin_i64(t0, t0, tmax);
3673    tcg_temp_free_i64(tmax);
3674
3675    /* Set the count to zero if the condition is false.  */
3676    tcg_gen_movi_i64(t1, 0);
3677    tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3678    tcg_temp_free_i64(t1);
3679
3680    /* Since we're bounded, pass as a 32-bit type.  */
3681    t2 = tcg_temp_new_i32();
3682    tcg_gen_extrl_i64_i32(t2, t0);
3683    tcg_temp_free_i64(t0);
3684
3685    /* Scale elements to bits.  */
3686    tcg_gen_shli_i32(t2, t2, a->esz);
3687
3688    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3689    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3690    t3 = tcg_const_i32(desc);
3691
3692    ptr = tcg_temp_new_ptr();
3693    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3694
3695    if (a->lt) {
3696        gen_helper_sve_whilel(t2, ptr, t2, t3);
3697    } else {
3698        gen_helper_sve_whileg(t2, ptr, t2, t3);
3699    }
3700    do_pred_flags(t2);
3701
3702    tcg_temp_free_ptr(ptr);
3703    tcg_temp_free_i32(t2);
3704    tcg_temp_free_i32(t3);
3705    return true;
3706}
3707
3708static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
3709{
3710    TCGv_i64 op0, op1, diff, t1, tmax;
3711    TCGv_i32 t2, t3;
3712    TCGv_ptr ptr;
3713    unsigned vsz = vec_full_reg_size(s);
3714    unsigned desc = 0;
3715
3716    if (!dc_isar_feature(aa64_sve2, s)) {
3717        return false;
3718    }
3719    if (!sve_access_check(s)) {
3720        return true;
3721    }
3722
3723    op0 = read_cpu_reg(s, a->rn, 1);
3724    op1 = read_cpu_reg(s, a->rm, 1);
3725
3726    tmax = tcg_const_i64(vsz);
3727    diff = tcg_temp_new_i64();
3728
3729    if (a->rw) {
3730        /* WHILERW */
3731        /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
3732        t1 = tcg_temp_new_i64();
3733        tcg_gen_sub_i64(diff, op0, op1);
3734        tcg_gen_sub_i64(t1, op1, op0);
3735        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
3736        tcg_temp_free_i64(t1);
3737        /* Round down to a multiple of ESIZE.  */
3738        tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3739        /* If op1 == op0, diff == 0, and the condition is always true. */
3740        tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
3741    } else {
3742        /* WHILEWR */
3743        tcg_gen_sub_i64(diff, op1, op0);
3744        /* Round down to a multiple of ESIZE.  */
3745        tcg_gen_andi_i64(diff, diff, -1 << a->esz);
3746        /* If op0 >= op1, diff <= 0, the condition is always true. */
3747        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
3748    }
3749
3750    /* Bound to the maximum.  */
3751    tcg_gen_umin_i64(diff, diff, tmax);
3752    tcg_temp_free_i64(tmax);
3753
3754    /* Since we're bounded, pass as a 32-bit type.  */
3755    t2 = tcg_temp_new_i32();
3756    tcg_gen_extrl_i64_i32(t2, diff);
3757    tcg_temp_free_i64(diff);
3758
3759    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
3760    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
3761    t3 = tcg_const_i32(desc);
3762
3763    ptr = tcg_temp_new_ptr();
3764    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3765
3766    gen_helper_sve_whilel(t2, ptr, t2, t3);
3767    do_pred_flags(t2);
3768
3769    tcg_temp_free_ptr(ptr);
3770    tcg_temp_free_i32(t2);
3771    tcg_temp_free_i32(t3);
3772    return true;
3773}
3774
3775/*
3776 *** SVE Integer Wide Immediate - Unpredicated Group
3777 */
3778
3779static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3780{
3781    if (a->esz == 0) {
3782        return false;
3783    }
3784    if (sve_access_check(s)) {
3785        unsigned vsz = vec_full_reg_size(s);
3786        int dofs = vec_full_reg_offset(s, a->rd);
3787        uint64_t imm;
3788
3789        /* Decode the VFP immediate.  */
3790        imm = vfp_expand_imm(a->esz, a->imm);
3791        tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, imm);
3792    }
3793    return true;
3794}
3795
3796static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3797{
3798    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3799        return false;
3800    }
3801    if (sve_access_check(s)) {
3802        unsigned vsz = vec_full_reg_size(s);
3803        int dofs = vec_full_reg_offset(s, a->rd);
3804
3805        tcg_gen_gvec_dup_imm(a->esz, dofs, vsz, vsz, a->imm);
3806    }
3807    return true;
3808}
3809
3810static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3811{
3812    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3813        return false;
3814    }
3815    if (sve_access_check(s)) {
3816        unsigned vsz = vec_full_reg_size(s);
3817        tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3818                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3819    }
3820    return true;
3821}
3822
3823static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3824{
3825    a->imm = -a->imm;
3826    return trans_ADD_zzi(s, a);
3827}
3828
3829static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3830{
3831    static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3832    static const GVecGen2s op[4] = {
3833        { .fni8 = tcg_gen_vec_sub8_i64,
3834          .fniv = tcg_gen_sub_vec,
3835          .fno = gen_helper_sve_subri_b,
3836          .opt_opc = vecop_list,
3837          .vece = MO_8,
3838          .scalar_first = true },
3839        { .fni8 = tcg_gen_vec_sub16_i64,
3840          .fniv = tcg_gen_sub_vec,
3841          .fno = gen_helper_sve_subri_h,
3842          .opt_opc = vecop_list,
3843          .vece = MO_16,
3844          .scalar_first = true },
3845        { .fni4 = tcg_gen_sub_i32,
3846          .fniv = tcg_gen_sub_vec,
3847          .fno = gen_helper_sve_subri_s,
3848          .opt_opc = vecop_list,
3849          .vece = MO_32,
3850          .scalar_first = true },
3851        { .fni8 = tcg_gen_sub_i64,
3852          .fniv = tcg_gen_sub_vec,
3853          .fno = gen_helper_sve_subri_d,
3854          .opt_opc = vecop_list,
3855          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3856          .vece = MO_64,
3857          .scalar_first = true }
3858    };
3859
3860    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3861        return false;
3862    }
3863    if (sve_access_check(s)) {
3864        unsigned vsz = vec_full_reg_size(s);
3865        TCGv_i64 c = tcg_const_i64(a->imm);
3866        tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3867                        vec_full_reg_offset(s, a->rn),
3868                        vsz, vsz, c, &op[a->esz]);
3869        tcg_temp_free_i64(c);
3870    }
3871    return true;
3872}
3873
3874static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3875{
3876    if (sve_access_check(s)) {
3877        unsigned vsz = vec_full_reg_size(s);
3878        tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3879                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3880    }
3881    return true;
3882}
3883
3884static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3885{
3886    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3887        return false;
3888    }
3889    if (sve_access_check(s)) {
3890        TCGv_i64 val = tcg_const_i64(a->imm);
3891        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3892        tcg_temp_free_i64(val);
3893    }
3894    return true;
3895}
3896
3897static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3898{
3899    return do_zzi_sat(s, a, false, false);
3900}
3901
3902static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3903{
3904    return do_zzi_sat(s, a, true, false);
3905}
3906
3907static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3908{
3909    return do_zzi_sat(s, a, false, true);
3910}
3911
3912static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3913{
3914    return do_zzi_sat(s, a, true, true);
3915}
3916
3917static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3918{
3919    if (sve_access_check(s)) {
3920        unsigned vsz = vec_full_reg_size(s);
3921        TCGv_i64 c = tcg_const_i64(a->imm);
3922
3923        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3924                            vec_full_reg_offset(s, a->rn),
3925                            c, vsz, vsz, 0, fn);
3926        tcg_temp_free_i64(c);
3927    }
3928    return true;
3929}
3930
3931#define DO_ZZI(NAME, name) \
3932static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a)         \
3933{                                                                       \
3934    static gen_helper_gvec_2i * const fns[4] = {                        \
3935        gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3936        gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3937    };                                                                  \
3938    return do_zzi_ool(s, a, fns[a->esz]);                               \
3939}
3940
3941DO_ZZI(SMAX, smax)
3942DO_ZZI(UMAX, umax)
3943DO_ZZI(SMIN, smin)
3944DO_ZZI(UMIN, umin)
3945
3946#undef DO_ZZI
3947
3948static bool trans_DOT_zzzz(DisasContext *s, arg_DOT_zzzz *a)
3949{
3950    static gen_helper_gvec_4 * const fns[2][2] = {
3951        { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3952        { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3953    };
3954
3955    if (sve_access_check(s)) {
3956        gen_gvec_ool_zzzz(s, fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0);
3957    }
3958    return true;
3959}
3960
3961/*
3962 * SVE Multiply - Indexed
3963 */
3964
3965static bool do_zzxz_ool(DisasContext *s, arg_rrxr_esz *a,
3966                        gen_helper_gvec_4 *fn)
3967{
3968    if (fn == NULL) {
3969        return false;
3970    }
3971    if (sve_access_check(s)) {
3972        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index);
3973    }
3974    return true;
3975}
3976
3977#define DO_RRXR(NAME, FUNC) \
3978    static bool NAME(DisasContext *s, arg_rrxr_esz *a)  \
3979    { return do_zzxz_ool(s, a, FUNC); }
3980
3981DO_RRXR(trans_SDOT_zzxw_s, gen_helper_gvec_sdot_idx_b)
3982DO_RRXR(trans_SDOT_zzxw_d, gen_helper_gvec_sdot_idx_h)
3983DO_RRXR(trans_UDOT_zzxw_s, gen_helper_gvec_udot_idx_b)
3984DO_RRXR(trans_UDOT_zzxw_d, gen_helper_gvec_udot_idx_h)
3985
3986static bool trans_SUDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3987{
3988    if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3989        return false;
3990    }
3991    return do_zzxz_ool(s, a, gen_helper_gvec_sudot_idx_b);
3992}
3993
3994static bool trans_USDOT_zzxw_s(DisasContext *s, arg_rrxr_esz *a)
3995{
3996    if (!dc_isar_feature(aa64_sve_i8mm, s)) {
3997        return false;
3998    }
3999    return do_zzxz_ool(s, a, gen_helper_gvec_usdot_idx_b);
4000}
4001
4002#undef DO_RRXR
4003
4004static bool do_sve2_zzz_data(DisasContext *s, int rd, int rn, int rm, int data,
4005                             gen_helper_gvec_3 *fn)
4006{
4007    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
4008        return false;
4009    }
4010    if (sve_access_check(s)) {
4011        unsigned vsz = vec_full_reg_size(s);
4012        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
4013                           vec_full_reg_offset(s, rn),
4014                           vec_full_reg_offset(s, rm),
4015                           vsz, vsz, data, fn);
4016    }
4017    return true;
4018}
4019
4020#define DO_SVE2_RRX(NAME, FUNC) \
4021    static bool NAME(DisasContext *s, arg_rrx_esz *a)  \
4022    { return do_sve2_zzz_data(s, a->rd, a->rn, a->rm, a->index, FUNC); }
4023
4024DO_SVE2_RRX(trans_MUL_zzx_h, gen_helper_gvec_mul_idx_h)
4025DO_SVE2_RRX(trans_MUL_zzx_s, gen_helper_gvec_mul_idx_s)
4026DO_SVE2_RRX(trans_MUL_zzx_d, gen_helper_gvec_mul_idx_d)
4027
4028DO_SVE2_RRX(trans_SQDMULH_zzx_h, gen_helper_sve2_sqdmulh_idx_h)
4029DO_SVE2_RRX(trans_SQDMULH_zzx_s, gen_helper_sve2_sqdmulh_idx_s)
4030DO_SVE2_RRX(trans_SQDMULH_zzx_d, gen_helper_sve2_sqdmulh_idx_d)
4031
4032DO_SVE2_RRX(trans_SQRDMULH_zzx_h, gen_helper_sve2_sqrdmulh_idx_h)
4033DO_SVE2_RRX(trans_SQRDMULH_zzx_s, gen_helper_sve2_sqrdmulh_idx_s)
4034DO_SVE2_RRX(trans_SQRDMULH_zzx_d, gen_helper_sve2_sqrdmulh_idx_d)
4035
4036#undef DO_SVE2_RRX
4037
4038#define DO_SVE2_RRX_TB(NAME, FUNC, TOP) \
4039    static bool NAME(DisasContext *s, arg_rrx_esz *a)           \
4040    {                                                           \
4041        return do_sve2_zzz_data(s, a->rd, a->rn, a->rm,         \
4042                                (a->index << 1) | TOP, FUNC);   \
4043    }
4044
4045DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_s, gen_helper_sve2_sqdmull_idx_s, false)
4046DO_SVE2_RRX_TB(trans_SQDMULLB_zzx_d, gen_helper_sve2_sqdmull_idx_d, false)
4047DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_s, gen_helper_sve2_sqdmull_idx_s, true)
4048DO_SVE2_RRX_TB(trans_SQDMULLT_zzx_d, gen_helper_sve2_sqdmull_idx_d, true)
4049
4050DO_SVE2_RRX_TB(trans_SMULLB_zzx_s, gen_helper_sve2_smull_idx_s, false)
4051DO_SVE2_RRX_TB(trans_SMULLB_zzx_d, gen_helper_sve2_smull_idx_d, false)
4052DO_SVE2_RRX_TB(trans_SMULLT_zzx_s, gen_helper_sve2_smull_idx_s, true)
4053DO_SVE2_RRX_TB(trans_SMULLT_zzx_d, gen_helper_sve2_smull_idx_d, true)
4054
4055DO_SVE2_RRX_TB(trans_UMULLB_zzx_s, gen_helper_sve2_umull_idx_s, false)
4056DO_SVE2_RRX_TB(trans_UMULLB_zzx_d, gen_helper_sve2_umull_idx_d, false)
4057DO_SVE2_RRX_TB(trans_UMULLT_zzx_s, gen_helper_sve2_umull_idx_s, true)
4058DO_SVE2_RRX_TB(trans_UMULLT_zzx_d, gen_helper_sve2_umull_idx_d, true)
4059
4060#undef DO_SVE2_RRX_TB
4061
4062static bool do_sve2_zzzz_data(DisasContext *s, int rd, int rn, int rm, int ra,
4063                              int data, gen_helper_gvec_4 *fn)
4064{
4065    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
4066        return false;
4067    }
4068    if (sve_access_check(s)) {
4069        unsigned vsz = vec_full_reg_size(s);
4070        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
4071                           vec_full_reg_offset(s, rn),
4072                           vec_full_reg_offset(s, rm),
4073                           vec_full_reg_offset(s, ra),
4074                           vsz, vsz, data, fn);
4075    }
4076    return true;
4077}
4078
4079#define DO_SVE2_RRXR(NAME, FUNC) \
4080    static bool NAME(DisasContext *s, arg_rrxr_esz *a)  \
4081    { return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra, a->index, FUNC); }
4082
4083DO_SVE2_RRXR(trans_MLA_zzxz_h, gen_helper_gvec_mla_idx_h)
4084DO_SVE2_RRXR(trans_MLA_zzxz_s, gen_helper_gvec_mla_idx_s)
4085DO_SVE2_RRXR(trans_MLA_zzxz_d, gen_helper_gvec_mla_idx_d)
4086
4087DO_SVE2_RRXR(trans_MLS_zzxz_h, gen_helper_gvec_mls_idx_h)
4088DO_SVE2_RRXR(trans_MLS_zzxz_s, gen_helper_gvec_mls_idx_s)
4089DO_SVE2_RRXR(trans_MLS_zzxz_d, gen_helper_gvec_mls_idx_d)
4090
4091DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_h, gen_helper_sve2_sqrdmlah_idx_h)
4092DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_s, gen_helper_sve2_sqrdmlah_idx_s)
4093DO_SVE2_RRXR(trans_SQRDMLAH_zzxz_d, gen_helper_sve2_sqrdmlah_idx_d)
4094
4095DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_h, gen_helper_sve2_sqrdmlsh_idx_h)
4096DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_s, gen_helper_sve2_sqrdmlsh_idx_s)
4097DO_SVE2_RRXR(trans_SQRDMLSH_zzxz_d, gen_helper_sve2_sqrdmlsh_idx_d)
4098
4099#undef DO_SVE2_RRXR
4100
4101#define DO_SVE2_RRXR_TB(NAME, FUNC, TOP) \
4102    static bool NAME(DisasContext *s, arg_rrxr_esz *a)          \
4103    {                                                           \
4104        return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->rd, \
4105                                 (a->index << 1) | TOP, FUNC);  \
4106    }
4107
4108DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, false)
4109DO_SVE2_RRXR_TB(trans_SQDMLALB_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, false)
4110DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_s, gen_helper_sve2_sqdmlal_idx_s, true)
4111DO_SVE2_RRXR_TB(trans_SQDMLALT_zzxw_d, gen_helper_sve2_sqdmlal_idx_d, true)
4112
4113DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, false)
4114DO_SVE2_RRXR_TB(trans_SQDMLSLB_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, false)
4115DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_s, gen_helper_sve2_sqdmlsl_idx_s, true)
4116DO_SVE2_RRXR_TB(trans_SQDMLSLT_zzxw_d, gen_helper_sve2_sqdmlsl_idx_d, true)
4117
4118DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_s, gen_helper_sve2_smlal_idx_s, false)
4119DO_SVE2_RRXR_TB(trans_SMLALB_zzxw_d, gen_helper_sve2_smlal_idx_d, false)
4120DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_s, gen_helper_sve2_smlal_idx_s, true)
4121DO_SVE2_RRXR_TB(trans_SMLALT_zzxw_d, gen_helper_sve2_smlal_idx_d, true)
4122
4123DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_s, gen_helper_sve2_umlal_idx_s, false)
4124DO_SVE2_RRXR_TB(trans_UMLALB_zzxw_d, gen_helper_sve2_umlal_idx_d, false)
4125DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_s, gen_helper_sve2_umlal_idx_s, true)
4126DO_SVE2_RRXR_TB(trans_UMLALT_zzxw_d, gen_helper_sve2_umlal_idx_d, true)
4127
4128DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_s, gen_helper_sve2_smlsl_idx_s, false)
4129DO_SVE2_RRXR_TB(trans_SMLSLB_zzxw_d, gen_helper_sve2_smlsl_idx_d, false)
4130DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_s, gen_helper_sve2_smlsl_idx_s, true)
4131DO_SVE2_RRXR_TB(trans_SMLSLT_zzxw_d, gen_helper_sve2_smlsl_idx_d, true)
4132
4133DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_s, gen_helper_sve2_umlsl_idx_s, false)
4134DO_SVE2_RRXR_TB(trans_UMLSLB_zzxw_d, gen_helper_sve2_umlsl_idx_d, false)
4135DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_s, gen_helper_sve2_umlsl_idx_s, true)
4136DO_SVE2_RRXR_TB(trans_UMLSLT_zzxw_d, gen_helper_sve2_umlsl_idx_d, true)
4137
4138#undef DO_SVE2_RRXR_TB
4139
4140#define DO_SVE2_RRXR_ROT(NAME, FUNC) \
4141    static bool trans_##NAME(DisasContext *s, arg_##NAME *a)       \
4142    {                                                              \
4143        return do_sve2_zzzz_data(s, a->rd, a->rn, a->rm, a->ra,    \
4144                                 (a->index << 2) | a->rot, FUNC);  \
4145    }
4146
4147DO_SVE2_RRXR_ROT(CMLA_zzxz_h, gen_helper_sve2_cmla_idx_h)
4148DO_SVE2_RRXR_ROT(CMLA_zzxz_s, gen_helper_sve2_cmla_idx_s)
4149
4150DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_h, gen_helper_sve2_sqrdcmlah_idx_h)
4151DO_SVE2_RRXR_ROT(SQRDCMLAH_zzxz_s, gen_helper_sve2_sqrdcmlah_idx_s)
4152
4153DO_SVE2_RRXR_ROT(CDOT_zzxw_s, gen_helper_sve2_cdot_idx_s)
4154DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
4155
4156#undef DO_SVE2_RRXR_ROT
4157
4158/*
4159 *** SVE Floating Point Multiply-Add Indexed Group
4160 */
4161
4162static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
4163{
4164    static gen_helper_gvec_4_ptr * const fns[3] = {
4165        gen_helper_gvec_fmla_idx_h,
4166        gen_helper_gvec_fmla_idx_s,
4167        gen_helper_gvec_fmla_idx_d,
4168    };
4169
4170    if (sve_access_check(s)) {
4171        unsigned vsz = vec_full_reg_size(s);
4172        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4173        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4174                           vec_full_reg_offset(s, a->rn),
4175                           vec_full_reg_offset(s, a->rm),
4176                           vec_full_reg_offset(s, a->ra),
4177                           status, vsz, vsz, (a->index << 1) | sub,
4178                           fns[a->esz - 1]);
4179        tcg_temp_free_ptr(status);
4180    }
4181    return true;
4182}
4183
4184static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4185{
4186    return do_FMLA_zzxz(s, a, false);
4187}
4188
4189static bool trans_FMLS_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
4190{
4191    return do_FMLA_zzxz(s, a, true);
4192}
4193
4194/*
4195 *** SVE Floating Point Multiply Indexed Group
4196 */
4197
4198static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
4199{
4200    static gen_helper_gvec_3_ptr * const fns[3] = {
4201        gen_helper_gvec_fmul_idx_h,
4202        gen_helper_gvec_fmul_idx_s,
4203        gen_helper_gvec_fmul_idx_d,
4204    };
4205
4206    if (sve_access_check(s)) {
4207        unsigned vsz = vec_full_reg_size(s);
4208        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4209        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4210                           vec_full_reg_offset(s, a->rn),
4211                           vec_full_reg_offset(s, a->rm),
4212                           status, vsz, vsz, a->index, fns[a->esz - 1]);
4213        tcg_temp_free_ptr(status);
4214    }
4215    return true;
4216}
4217
4218/*
4219 *** SVE Floating Point Fast Reduction Group
4220 */
4221
4222typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
4223                                  TCGv_ptr, TCGv_i32);
4224
4225static void do_reduce(DisasContext *s, arg_rpr_esz *a,
4226                      gen_helper_fp_reduce *fn)
4227{
4228    unsigned vsz = vec_full_reg_size(s);
4229    unsigned p2vsz = pow2ceil(vsz);
4230    TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, vsz, p2vsz));
4231    TCGv_ptr t_zn, t_pg, status;
4232    TCGv_i64 temp;
4233
4234    temp = tcg_temp_new_i64();
4235    t_zn = tcg_temp_new_ptr();
4236    t_pg = tcg_temp_new_ptr();
4237
4238    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
4239    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
4240    status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4241
4242    fn(temp, t_zn, t_pg, status, t_desc);
4243    tcg_temp_free_ptr(t_zn);
4244    tcg_temp_free_ptr(t_pg);
4245    tcg_temp_free_ptr(status);
4246    tcg_temp_free_i32(t_desc);
4247
4248    write_fp_dreg(s, a->rd, temp);
4249    tcg_temp_free_i64(temp);
4250}
4251
4252#define DO_VPZ(NAME, name) \
4253static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
4254{                                                                        \
4255    static gen_helper_fp_reduce * const fns[3] = {                       \
4256        gen_helper_sve_##name##_h,                                       \
4257        gen_helper_sve_##name##_s,                                       \
4258        gen_helper_sve_##name##_d,                                       \
4259    };                                                                   \
4260    if (a->esz == 0) {                                                   \
4261        return false;                                                    \
4262    }                                                                    \
4263    if (sve_access_check(s)) {                                           \
4264        do_reduce(s, a, fns[a->esz - 1]);                                \
4265    }                                                                    \
4266    return true;                                                         \
4267}
4268
4269DO_VPZ(FADDV, faddv)
4270DO_VPZ(FMINNMV, fminnmv)
4271DO_VPZ(FMAXNMV, fmaxnmv)
4272DO_VPZ(FMINV, fminv)
4273DO_VPZ(FMAXV, fmaxv)
4274
4275/*
4276 *** SVE Floating Point Unary Operations - Unpredicated Group
4277 */
4278
4279static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
4280{
4281    unsigned vsz = vec_full_reg_size(s);
4282    TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4283
4284    tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
4285                       vec_full_reg_offset(s, a->rn),
4286                       status, vsz, vsz, 0, fn);
4287    tcg_temp_free_ptr(status);
4288}
4289
4290static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
4291{
4292    static gen_helper_gvec_2_ptr * const fns[3] = {
4293        gen_helper_gvec_frecpe_h,
4294        gen_helper_gvec_frecpe_s,
4295        gen_helper_gvec_frecpe_d,
4296    };
4297    if (a->esz == 0) {
4298        return false;
4299    }
4300    if (sve_access_check(s)) {
4301        do_zz_fp(s, a, fns[a->esz - 1]);
4302    }
4303    return true;
4304}
4305
4306static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
4307{
4308    static gen_helper_gvec_2_ptr * const fns[3] = {
4309        gen_helper_gvec_frsqrte_h,
4310        gen_helper_gvec_frsqrte_s,
4311        gen_helper_gvec_frsqrte_d,
4312    };
4313    if (a->esz == 0) {
4314        return false;
4315    }
4316    if (sve_access_check(s)) {
4317        do_zz_fp(s, a, fns[a->esz - 1]);
4318    }
4319    return true;
4320}
4321
4322/*
4323 *** SVE Floating Point Compare with Zero Group
4324 */
4325
4326static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
4327                      gen_helper_gvec_3_ptr *fn)
4328{
4329    unsigned vsz = vec_full_reg_size(s);
4330    TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4331
4332    tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
4333                       vec_full_reg_offset(s, a->rn),
4334                       pred_full_reg_offset(s, a->pg),
4335                       status, vsz, vsz, 0, fn);
4336    tcg_temp_free_ptr(status);
4337}
4338
4339#define DO_PPZ(NAME, name) \
4340static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)         \
4341{                                                                 \
4342    static gen_helper_gvec_3_ptr * const fns[3] = {               \
4343        gen_helper_sve_##name##_h,                                \
4344        gen_helper_sve_##name##_s,                                \
4345        gen_helper_sve_##name##_d,                                \
4346    };                                                            \
4347    if (a->esz == 0) {                                            \
4348        return false;                                             \
4349    }                                                             \
4350    if (sve_access_check(s)) {                                    \
4351        do_ppz_fp(s, a, fns[a->esz - 1]);                         \
4352    }                                                             \
4353    return true;                                                  \
4354}
4355
4356DO_PPZ(FCMGE_ppz0, fcmge0)
4357DO_PPZ(FCMGT_ppz0, fcmgt0)
4358DO_PPZ(FCMLE_ppz0, fcmle0)
4359DO_PPZ(FCMLT_ppz0, fcmlt0)
4360DO_PPZ(FCMEQ_ppz0, fcmeq0)
4361DO_PPZ(FCMNE_ppz0, fcmne0)
4362
4363#undef DO_PPZ
4364
4365/*
4366 *** SVE floating-point trig multiply-add coefficient
4367 */
4368
4369static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
4370{
4371    static gen_helper_gvec_3_ptr * const fns[3] = {
4372        gen_helper_sve_ftmad_h,
4373        gen_helper_sve_ftmad_s,
4374        gen_helper_sve_ftmad_d,
4375    };
4376
4377    if (a->esz == 0) {
4378        return false;
4379    }
4380    if (sve_access_check(s)) {
4381        unsigned vsz = vec_full_reg_size(s);
4382        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4383        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4384                           vec_full_reg_offset(s, a->rn),
4385                           vec_full_reg_offset(s, a->rm),
4386                           status, vsz, vsz, a->imm, fns[a->esz - 1]);
4387        tcg_temp_free_ptr(status);
4388    }
4389    return true;
4390}
4391
4392/*
4393 *** SVE Floating Point Accumulating Reduction Group
4394 */
4395
4396static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
4397{
4398    typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
4399                          TCGv_ptr, TCGv_ptr, TCGv_i32);
4400    static fadda_fn * const fns[3] = {
4401        gen_helper_sve_fadda_h,
4402        gen_helper_sve_fadda_s,
4403        gen_helper_sve_fadda_d,
4404    };
4405    unsigned vsz = vec_full_reg_size(s);
4406    TCGv_ptr t_rm, t_pg, t_fpst;
4407    TCGv_i64 t_val;
4408    TCGv_i32 t_desc;
4409
4410    if (a->esz == 0) {
4411        return false;
4412    }
4413    if (!sve_access_check(s)) {
4414        return true;
4415    }
4416
4417    t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
4418    t_rm = tcg_temp_new_ptr();
4419    t_pg = tcg_temp_new_ptr();
4420    tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
4421    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
4422    t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4423    t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
4424
4425    fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
4426
4427    tcg_temp_free_i32(t_desc);
4428    tcg_temp_free_ptr(t_fpst);
4429    tcg_temp_free_ptr(t_pg);
4430    tcg_temp_free_ptr(t_rm);
4431
4432    write_fp_dreg(s, a->rd, t_val);
4433    tcg_temp_free_i64(t_val);
4434    return true;
4435}
4436
4437/*
4438 *** SVE Floating Point Arithmetic - Unpredicated Group
4439 */
4440
4441static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
4442                      gen_helper_gvec_3_ptr *fn)
4443{
4444    if (fn == NULL) {
4445        return false;
4446    }
4447    if (sve_access_check(s)) {
4448        unsigned vsz = vec_full_reg_size(s);
4449        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4450        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4451                           vec_full_reg_offset(s, a->rn),
4452                           vec_full_reg_offset(s, a->rm),
4453                           status, vsz, vsz, 0, fn);
4454        tcg_temp_free_ptr(status);
4455    }
4456    return true;
4457}
4458
4459
4460#define DO_FP3(NAME, name) \
4461static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)           \
4462{                                                                   \
4463    static gen_helper_gvec_3_ptr * const fns[4] = {                 \
4464        NULL, gen_helper_gvec_##name##_h,                           \
4465        gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
4466    };                                                              \
4467    return do_zzz_fp(s, a, fns[a->esz]);                            \
4468}
4469
4470DO_FP3(FADD_zzz, fadd)
4471DO_FP3(FSUB_zzz, fsub)
4472DO_FP3(FMUL_zzz, fmul)
4473DO_FP3(FTSMUL, ftsmul)
4474DO_FP3(FRECPS, recps)
4475DO_FP3(FRSQRTS, rsqrts)
4476
4477#undef DO_FP3
4478
4479/*
4480 *** SVE Floating Point Arithmetic - Predicated Group
4481 */
4482
4483static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
4484                       gen_helper_gvec_4_ptr *fn)
4485{
4486    if (fn == NULL) {
4487        return false;
4488    }
4489    if (sve_access_check(s)) {
4490        unsigned vsz = vec_full_reg_size(s);
4491        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4492        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4493                           vec_full_reg_offset(s, a->rn),
4494                           vec_full_reg_offset(s, a->rm),
4495                           pred_full_reg_offset(s, a->pg),
4496                           status, vsz, vsz, 0, fn);
4497        tcg_temp_free_ptr(status);
4498    }
4499    return true;
4500}
4501
4502#define DO_FP3(NAME, name) \
4503static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)          \
4504{                                                                   \
4505    static gen_helper_gvec_4_ptr * const fns[4] = {                 \
4506        NULL, gen_helper_sve_##name##_h,                            \
4507        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
4508    };                                                              \
4509    return do_zpzz_fp(s, a, fns[a->esz]);                           \
4510}
4511
4512DO_FP3(FADD_zpzz, fadd)
4513DO_FP3(FSUB_zpzz, fsub)
4514DO_FP3(FMUL_zpzz, fmul)
4515DO_FP3(FMIN_zpzz, fmin)
4516DO_FP3(FMAX_zpzz, fmax)
4517DO_FP3(FMINNM_zpzz, fminnum)
4518DO_FP3(FMAXNM_zpzz, fmaxnum)
4519DO_FP3(FABD, fabd)
4520DO_FP3(FSCALE, fscalbn)
4521DO_FP3(FDIV, fdiv)
4522DO_FP3(FMULX, fmulx)
4523
4524#undef DO_FP3
4525
4526typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
4527                                      TCGv_i64, TCGv_ptr, TCGv_i32);
4528
4529static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
4530                         TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
4531{
4532    unsigned vsz = vec_full_reg_size(s);
4533    TCGv_ptr t_zd, t_zn, t_pg, status;
4534    TCGv_i32 desc;
4535
4536    t_zd = tcg_temp_new_ptr();
4537    t_zn = tcg_temp_new_ptr();
4538    t_pg = tcg_temp_new_ptr();
4539    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
4540    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
4541    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4542
4543    status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
4544    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
4545    fn(t_zd, t_zn, t_pg, scalar, status, desc);
4546
4547    tcg_temp_free_i32(desc);
4548    tcg_temp_free_ptr(status);
4549    tcg_temp_free_ptr(t_pg);
4550    tcg_temp_free_ptr(t_zn);
4551    tcg_temp_free_ptr(t_zd);
4552}
4553
4554static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
4555                      gen_helper_sve_fp2scalar *fn)
4556{
4557    TCGv_i64 temp = tcg_const_i64(imm);
4558    do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
4559    tcg_temp_free_i64(temp);
4560}
4561
4562#define DO_FP_IMM(NAME, name, const0, const1) \
4563static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a)         \
4564{                                                                         \
4565    static gen_helper_sve_fp2scalar * const fns[3] = {                    \
4566        gen_helper_sve_##name##_h,                                        \
4567        gen_helper_sve_##name##_s,                                        \
4568        gen_helper_sve_##name##_d                                         \
4569    };                                                                    \
4570    static uint64_t const val[3][2] = {                                   \
4571        { float16_##const0, float16_##const1 },                           \
4572        { float32_##const0, float32_##const1 },                           \
4573        { float64_##const0, float64_##const1 },                           \
4574    };                                                                    \
4575    if (a->esz == 0) {                                                    \
4576        return false;                                                     \
4577    }                                                                     \
4578    if (sve_access_check(s)) {                                            \
4579        do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
4580    }                                                                     \
4581    return true;                                                          \
4582}
4583
4584DO_FP_IMM(FADD, fadds, half, one)
4585DO_FP_IMM(FSUB, fsubs, half, one)
4586DO_FP_IMM(FMUL, fmuls, half, two)
4587DO_FP_IMM(FSUBR, fsubrs, half, one)
4588DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
4589DO_FP_IMM(FMINNM, fminnms, zero, one)
4590DO_FP_IMM(FMAX, fmaxs, zero, one)
4591DO_FP_IMM(FMIN, fmins, zero, one)
4592
4593#undef DO_FP_IMM
4594
4595static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
4596                      gen_helper_gvec_4_ptr *fn)
4597{
4598    if (fn == NULL) {
4599        return false;
4600    }
4601    if (sve_access_check(s)) {
4602        unsigned vsz = vec_full_reg_size(s);
4603        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4604        tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
4605                           vec_full_reg_offset(s, a->rn),
4606                           vec_full_reg_offset(s, a->rm),
4607                           pred_full_reg_offset(s, a->pg),
4608                           status, vsz, vsz, 0, fn);
4609        tcg_temp_free_ptr(status);
4610    }
4611    return true;
4612}
4613
4614#define DO_FPCMP(NAME, name) \
4615static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)     \
4616{                                                                     \
4617    static gen_helper_gvec_4_ptr * const fns[4] = {                   \
4618        NULL, gen_helper_sve_##name##_h,                              \
4619        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
4620    };                                                                \
4621    return do_fp_cmp(s, a, fns[a->esz]);                              \
4622}
4623
4624DO_FPCMP(FCMGE, fcmge)
4625DO_FPCMP(FCMGT, fcmgt)
4626DO_FPCMP(FCMEQ, fcmeq)
4627DO_FPCMP(FCMNE, fcmne)
4628DO_FPCMP(FCMUO, fcmuo)
4629DO_FPCMP(FACGE, facge)
4630DO_FPCMP(FACGT, facgt)
4631
4632#undef DO_FPCMP
4633
4634static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
4635{
4636    static gen_helper_gvec_4_ptr * const fns[3] = {
4637        gen_helper_sve_fcadd_h,
4638        gen_helper_sve_fcadd_s,
4639        gen_helper_sve_fcadd_d
4640    };
4641
4642    if (a->esz == 0) {
4643        return false;
4644    }
4645    if (sve_access_check(s)) {
4646        unsigned vsz = vec_full_reg_size(s);
4647        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4648        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4649                           vec_full_reg_offset(s, a->rn),
4650                           vec_full_reg_offset(s, a->rm),
4651                           pred_full_reg_offset(s, a->pg),
4652                           status, vsz, vsz, a->rot, fns[a->esz - 1]);
4653        tcg_temp_free_ptr(status);
4654    }
4655    return true;
4656}
4657
4658static bool do_fmla(DisasContext *s, arg_rprrr_esz *a,
4659                    gen_helper_gvec_5_ptr *fn)
4660{
4661    if (a->esz == 0) {
4662        return false;
4663    }
4664    if (sve_access_check(s)) {
4665        unsigned vsz = vec_full_reg_size(s);
4666        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4667        tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4668                           vec_full_reg_offset(s, a->rn),
4669                           vec_full_reg_offset(s, a->rm),
4670                           vec_full_reg_offset(s, a->ra),
4671                           pred_full_reg_offset(s, a->pg),
4672                           status, vsz, vsz, 0, fn);
4673        tcg_temp_free_ptr(status);
4674    }
4675    return true;
4676}
4677
4678#define DO_FMLA(NAME, name) \
4679static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
4680{                                                                    \
4681    static gen_helper_gvec_5_ptr * const fns[4] = {                  \
4682        NULL, gen_helper_sve_##name##_h,                             \
4683        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
4684    };                                                               \
4685    return do_fmla(s, a, fns[a->esz]);                               \
4686}
4687
4688DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4689DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4690DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4691DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4692
4693#undef DO_FMLA
4694
4695static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
4696{
4697    static gen_helper_gvec_5_ptr * const fns[4] = {
4698        NULL,
4699        gen_helper_sve_fcmla_zpzzz_h,
4700        gen_helper_sve_fcmla_zpzzz_s,
4701        gen_helper_sve_fcmla_zpzzz_d,
4702    };
4703
4704    if (a->esz == 0) {
4705        return false;
4706    }
4707    if (sve_access_check(s)) {
4708        unsigned vsz = vec_full_reg_size(s);
4709        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4710        tcg_gen_gvec_5_ptr(vec_full_reg_offset(s, a->rd),
4711                           vec_full_reg_offset(s, a->rn),
4712                           vec_full_reg_offset(s, a->rm),
4713                           vec_full_reg_offset(s, a->ra),
4714                           pred_full_reg_offset(s, a->pg),
4715                           status, vsz, vsz, a->rot, fns[a->esz]);
4716        tcg_temp_free_ptr(status);
4717    }
4718    return true;
4719}
4720
4721static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4722{
4723    static gen_helper_gvec_4_ptr * const fns[2] = {
4724        gen_helper_gvec_fcmlah_idx,
4725        gen_helper_gvec_fcmlas_idx,
4726    };
4727
4728    tcg_debug_assert(a->esz == 1 || a->esz == 2);
4729    tcg_debug_assert(a->rd == a->ra);
4730    if (sve_access_check(s)) {
4731        unsigned vsz = vec_full_reg_size(s);
4732        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4733        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
4734                           vec_full_reg_offset(s, a->rn),
4735                           vec_full_reg_offset(s, a->rm),
4736                           vec_full_reg_offset(s, a->ra),
4737                           status, vsz, vsz,
4738                           a->index * 4 + a->rot,
4739                           fns[a->esz - 1]);
4740        tcg_temp_free_ptr(status);
4741    }
4742    return true;
4743}
4744
4745/*
4746 *** SVE Floating Point Unary Operations Predicated Group
4747 */
4748
4749static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4750                       bool is_fp16, gen_helper_gvec_3_ptr *fn)
4751{
4752    if (sve_access_check(s)) {
4753        unsigned vsz = vec_full_reg_size(s);
4754        TCGv_ptr status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
4755        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4756                           vec_full_reg_offset(s, rn),
4757                           pred_full_reg_offset(s, pg),
4758                           status, vsz, vsz, 0, fn);
4759        tcg_temp_free_ptr(status);
4760    }
4761    return true;
4762}
4763
4764static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4765{
4766    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4767}
4768
4769static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4770{
4771    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4772}
4773
4774static bool trans_BFCVT(DisasContext *s, arg_rpr_esz *a)
4775{
4776    if (!dc_isar_feature(aa64_sve_bf16, s)) {
4777        return false;
4778    }
4779    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvt);
4780}
4781
4782static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4783{
4784    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4785}
4786
4787static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4788{
4789    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4790}
4791
4792static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4793{
4794    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4795}
4796
4797static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4798{
4799    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4800}
4801
4802static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4803{
4804    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4805}
4806
4807static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4808{
4809    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4810}
4811
4812static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4813{
4814    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4815}
4816
4817static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4818{
4819    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4820}
4821
4822static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4823{
4824    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4825}
4826
4827static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4828{
4829    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4830}
4831
4832static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4833{
4834    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4835}
4836
4837static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4838{
4839    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4840}
4841
4842static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4843{
4844    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4845}
4846
4847static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4848{
4849    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4850}
4851
4852static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4853{
4854    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4855}
4856
4857static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4858{
4859    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4860}
4861
4862static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4863{
4864    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4865}
4866
4867static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4868{
4869    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4870}
4871
4872static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4873    gen_helper_sve_frint_h,
4874    gen_helper_sve_frint_s,
4875    gen_helper_sve_frint_d
4876};
4877
4878static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4879{
4880    if (a->esz == 0) {
4881        return false;
4882    }
4883    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4884                      frint_fns[a->esz - 1]);
4885}
4886
4887static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4888{
4889    static gen_helper_gvec_3_ptr * const fns[3] = {
4890        gen_helper_sve_frintx_h,
4891        gen_helper_sve_frintx_s,
4892        gen_helper_sve_frintx_d
4893    };
4894    if (a->esz == 0) {
4895        return false;
4896    }
4897    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4898}
4899
4900static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
4901                          int mode, gen_helper_gvec_3_ptr *fn)
4902{
4903    if (sve_access_check(s)) {
4904        unsigned vsz = vec_full_reg_size(s);
4905        TCGv_i32 tmode = tcg_const_i32(mode);
4906        TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
4907
4908        gen_helper_set_rmode(tmode, tmode, status);
4909
4910        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4911                           vec_full_reg_offset(s, a->rn),
4912                           pred_full_reg_offset(s, a->pg),
4913                           status, vsz, vsz, 0, fn);
4914
4915        gen_helper_set_rmode(tmode, tmode, status);
4916        tcg_temp_free_i32(tmode);
4917        tcg_temp_free_ptr(status);
4918    }
4919    return true;
4920}
4921
4922static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4923{
4924    if (a->esz == 0) {
4925        return false;
4926    }
4927    return do_frint_mode(s, a, float_round_nearest_even, frint_fns[a->esz - 1]);
4928}
4929
4930static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4931{
4932    if (a->esz == 0) {
4933        return false;
4934    }
4935    return do_frint_mode(s, a, float_round_up, frint_fns[a->esz - 1]);
4936}
4937
4938static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4939{
4940    if (a->esz == 0) {
4941        return false;
4942    }
4943    return do_frint_mode(s, a, float_round_down, frint_fns[a->esz - 1]);
4944}
4945
4946static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4947{
4948    if (a->esz == 0) {
4949        return false;
4950    }
4951    return do_frint_mode(s, a, float_round_to_zero, frint_fns[a->esz - 1]);
4952}
4953
4954static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4955{
4956    if (a->esz == 0) {
4957        return false;
4958    }
4959    return do_frint_mode(s, a, float_round_ties_away, frint_fns[a->esz - 1]);
4960}
4961
4962static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4963{
4964    static gen_helper_gvec_3_ptr * const fns[3] = {
4965        gen_helper_sve_frecpx_h,
4966        gen_helper_sve_frecpx_s,
4967        gen_helper_sve_frecpx_d
4968    };
4969    if (a->esz == 0) {
4970        return false;
4971    }
4972    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4973}
4974
4975static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4976{
4977    static gen_helper_gvec_3_ptr * const fns[3] = {
4978        gen_helper_sve_fsqrt_h,
4979        gen_helper_sve_fsqrt_s,
4980        gen_helper_sve_fsqrt_d
4981    };
4982    if (a->esz == 0) {
4983        return false;
4984    }
4985    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4986}
4987
4988static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4989{
4990    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4991}
4992
4993static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4994{
4995    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4996}
4997
4998static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4999{
5000    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
5001}
5002
5003static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
5004{
5005    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
5006}
5007
5008static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
5009{
5010    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
5011}
5012
5013static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
5014{
5015    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
5016}
5017
5018static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
5019{
5020    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
5021}
5022
5023static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
5024{
5025    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
5026}
5027
5028static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
5029{
5030    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
5031}
5032
5033static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
5034{
5035    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
5036}
5037
5038static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
5039{
5040    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
5041}
5042
5043static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
5044{
5045    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
5046}
5047
5048static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
5049{
5050    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
5051}
5052
5053static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
5054{
5055    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
5056}
5057
5058/*
5059 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
5060 */
5061
5062/* Subroutine loading a vector register at VOFS of LEN bytes.
5063 * The load should begin at the address Rn + IMM.
5064 */
5065
5066static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5067{
5068    int len_align = QEMU_ALIGN_DOWN(len, 8);
5069    int len_remain = len % 8;
5070    int nparts = len / 8 + ctpop8(len_remain);
5071    int midx = get_mem_index(s);
5072    TCGv_i64 dirty_addr, clean_addr, t0, t1;
5073
5074    dirty_addr = tcg_temp_new_i64();
5075    tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
5076    clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
5077    tcg_temp_free_i64(dirty_addr);
5078
5079    /*
5080     * Note that unpredicated load/store of vector/predicate registers
5081     * are defined as a stream of bytes, which equates to little-endian
5082     * operations on larger quantities.
5083     * Attempt to keep code expansion to a minimum by limiting the
5084     * amount of unrolling done.
5085     */
5086    if (nparts <= 4) {
5087        int i;
5088
5089        t0 = tcg_temp_new_i64();
5090        for (i = 0; i < len_align; i += 8) {
5091            tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
5092            tcg_gen_st_i64(t0, cpu_env, vofs + i);
5093            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5094        }
5095        tcg_temp_free_i64(t0);
5096    } else {
5097        TCGLabel *loop = gen_new_label();
5098        TCGv_ptr tp, i = tcg_const_local_ptr(0);
5099
5100        /* Copy the clean address into a local temp, live across the loop. */
5101        t0 = clean_addr;
5102        clean_addr = new_tmp_a64_local(s);
5103        tcg_gen_mov_i64(clean_addr, t0);
5104
5105        gen_set_label(loop);
5106
5107        t0 = tcg_temp_new_i64();
5108        tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
5109        tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5110
5111        tp = tcg_temp_new_ptr();
5112        tcg_gen_add_ptr(tp, cpu_env, i);
5113        tcg_gen_addi_ptr(i, i, 8);
5114        tcg_gen_st_i64(t0, tp, vofs);
5115        tcg_temp_free_ptr(tp);
5116        tcg_temp_free_i64(t0);
5117
5118        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5119        tcg_temp_free_ptr(i);
5120    }
5121
5122    /*
5123     * Predicate register loads can be any multiple of 2.
5124     * Note that we still store the entire 64-bit unit into cpu_env.
5125     */
5126    if (len_remain) {
5127        t0 = tcg_temp_new_i64();
5128        switch (len_remain) {
5129        case 2:
5130        case 4:
5131        case 8:
5132            tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
5133                                MO_LE | ctz32(len_remain));
5134            break;
5135
5136        case 6:
5137            t1 = tcg_temp_new_i64();
5138            tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
5139            tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5140            tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
5141            tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
5142            tcg_temp_free_i64(t1);
5143            break;
5144
5145        default:
5146            g_assert_not_reached();
5147        }
5148        tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
5149        tcg_temp_free_i64(t0);
5150    }
5151}
5152
5153/* Similarly for stores.  */
5154static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
5155{
5156    int len_align = QEMU_ALIGN_DOWN(len, 8);
5157    int len_remain = len % 8;
5158    int nparts = len / 8 + ctpop8(len_remain);
5159    int midx = get_mem_index(s);
5160    TCGv_i64 dirty_addr, clean_addr, t0;
5161
5162    dirty_addr = tcg_temp_new_i64();
5163    tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
5164    clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
5165    tcg_temp_free_i64(dirty_addr);
5166
5167    /* Note that unpredicated load/store of vector/predicate registers
5168     * are defined as a stream of bytes, which equates to little-endian
5169     * operations on larger quantities.  There is no nice way to force
5170     * a little-endian store for aarch64_be-linux-user out of line.
5171     *
5172     * Attempt to keep code expansion to a minimum by limiting the
5173     * amount of unrolling done.
5174     */
5175    if (nparts <= 4) {
5176        int i;
5177
5178        t0 = tcg_temp_new_i64();
5179        for (i = 0; i < len_align; i += 8) {
5180            tcg_gen_ld_i64(t0, cpu_env, vofs + i);
5181            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
5182            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5183        }
5184        tcg_temp_free_i64(t0);
5185    } else {
5186        TCGLabel *loop = gen_new_label();
5187        TCGv_ptr tp, i = tcg_const_local_ptr(0);
5188
5189        /* Copy the clean address into a local temp, live across the loop. */
5190        t0 = clean_addr;
5191        clean_addr = new_tmp_a64_local(s);
5192        tcg_gen_mov_i64(clean_addr, t0);
5193
5194        gen_set_label(loop);
5195
5196        t0 = tcg_temp_new_i64();
5197        tp = tcg_temp_new_ptr();
5198        tcg_gen_add_ptr(tp, cpu_env, i);
5199        tcg_gen_ld_i64(t0, tp, vofs);
5200        tcg_gen_addi_ptr(i, i, 8);
5201        tcg_temp_free_ptr(tp);
5202
5203        tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
5204        tcg_gen_addi_i64(clean_addr, clean_addr, 8);
5205        tcg_temp_free_i64(t0);
5206
5207        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
5208        tcg_temp_free_ptr(i);
5209    }
5210
5211    /* Predicate register stores can be any multiple of 2.  */
5212    if (len_remain) {
5213        t0 = tcg_temp_new_i64();
5214        tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
5215
5216        switch (len_remain) {
5217        case 2:
5218        case 4:
5219        case 8:
5220            tcg_gen_qemu_st_i64(t0, clean_addr, midx,
5221                                MO_LE | ctz32(len_remain));
5222            break;
5223
5224        case 6:
5225            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
5226            tcg_gen_addi_i64(clean_addr, clean_addr, 4);
5227            tcg_gen_shri_i64(t0, t0, 32);
5228            tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
5229            break;
5230
5231        default:
5232            g_assert_not_reached();
5233        }
5234        tcg_temp_free_i64(t0);
5235    }
5236}
5237
5238static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
5239{
5240    if (sve_access_check(s)) {
5241        int size = vec_full_reg_size(s);
5242        int off = vec_full_reg_offset(s, a->rd);
5243        do_ldr(s, off, size, a->rn, a->imm * size);
5244    }
5245    return true;
5246}
5247
5248static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
5249{
5250    if (sve_access_check(s)) {
5251        int size = pred_full_reg_size(s);
5252        int off = pred_full_reg_offset(s, a->rd);
5253        do_ldr(s, off, size, a->rn, a->imm * size);
5254    }
5255    return true;
5256}
5257
5258static bool trans_STR_zri(DisasContext *s, arg_rri *a)
5259{
5260    if (sve_access_check(s)) {
5261        int size = vec_full_reg_size(s);
5262        int off = vec_full_reg_offset(s, a->rd);
5263        do_str(s, off, size, a->rn, a->imm * size);
5264    }
5265    return true;
5266}
5267
5268static bool trans_STR_pri(DisasContext *s, arg_rri *a)
5269{
5270    if (sve_access_check(s)) {
5271        int size = pred_full_reg_size(s);
5272        int off = pred_full_reg_offset(s, a->rd);
5273        do_str(s, off, size, a->rn, a->imm * size);
5274    }
5275    return true;
5276}
5277
5278/*
5279 *** SVE Memory - Contiguous Load Group
5280 */
5281
5282/* The memory mode of the dtype.  */
5283static const MemOp dtype_mop[16] = {
5284    MO_UB, MO_UB, MO_UB, MO_UB,
5285    MO_SL, MO_UW, MO_UW, MO_UW,
5286    MO_SW, MO_SW, MO_UL, MO_UL,
5287    MO_SB, MO_SB, MO_SB, MO_Q
5288};
5289
5290#define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
5291
5292/* The vector element size of dtype.  */
5293static const uint8_t dtype_esz[16] = {
5294    0, 1, 2, 3,
5295    3, 1, 2, 3,
5296    3, 2, 2, 3,
5297    3, 2, 1, 3
5298};
5299
5300static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5301                       int dtype, uint32_t mte_n, bool is_write,
5302                       gen_helper_gvec_mem *fn)
5303{
5304    unsigned vsz = vec_full_reg_size(s);
5305    TCGv_ptr t_pg;
5306    TCGv_i32 t_desc;
5307    int desc = 0;
5308
5309    /*
5310     * For e.g. LD4, there are not enough arguments to pass all 4
5311     * registers as pointers, so encode the regno into the data field.
5312     * For consistency, do this even for LD1.
5313     */
5314    if (s->mte_active[0]) {
5315        int msz = dtype_msz(dtype);
5316
5317        desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
5318        desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
5319        desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
5320        desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
5321        desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (mte_n << msz) - 1);
5322        desc <<= SVE_MTEDESC_SHIFT;
5323    } else {
5324        addr = clean_data_tbi(s, addr);
5325    }
5326
5327    desc = simd_desc(vsz, vsz, zt | desc);
5328    t_desc = tcg_const_i32(desc);
5329    t_pg = tcg_temp_new_ptr();
5330
5331    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5332    fn(cpu_env, t_pg, addr, t_desc);
5333
5334    tcg_temp_free_ptr(t_pg);
5335    tcg_temp_free_i32(t_desc);
5336}
5337
5338/* Indexed by [mte][be][dtype][nreg] */
5339static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
5340    { /* mte inactive, little-endian */
5341      { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5342          gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5343        { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5344        { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5345        { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5346
5347        { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
5348        { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
5349          gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
5350        { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
5351        { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
5352
5353        { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
5354        { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
5355        { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
5356          gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
5357        { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
5358
5359        { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5360        { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5361        { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5362        { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
5363          gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
5364
5365      /* mte inactive, big-endian */
5366      { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
5367          gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
5368        { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
5369        { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
5370        { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
5371
5372        { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
5373        { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
5374          gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
5375        { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
5376        { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
5377
5378        { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
5379        { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
5380        { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
5381          gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
5382        { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
5383
5384        { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
5385        { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
5386        { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
5387        { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
5388          gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
5389
5390    { /* mte active, little-endian */
5391      { { gen_helper_sve_ld1bb_r_mte,
5392          gen_helper_sve_ld2bb_r_mte,
5393          gen_helper_sve_ld3bb_r_mte,
5394          gen_helper_sve_ld4bb_r_mte },
5395        { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5396        { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5397        { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5398
5399        { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
5400        { gen_helper_sve_ld1hh_le_r_mte,
5401          gen_helper_sve_ld2hh_le_r_mte,
5402          gen_helper_sve_ld3hh_le_r_mte,
5403          gen_helper_sve_ld4hh_le_r_mte },
5404        { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
5405        { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
5406
5407        { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
5408        { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
5409        { gen_helper_sve_ld1ss_le_r_mte,
5410          gen_helper_sve_ld2ss_le_r_mte,
5411          gen_helper_sve_ld3ss_le_r_mte,
5412          gen_helper_sve_ld4ss_le_r_mte },
5413        { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
5414
5415        { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5416        { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5417        { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5418        { gen_helper_sve_ld1dd_le_r_mte,
5419          gen_helper_sve_ld2dd_le_r_mte,
5420          gen_helper_sve_ld3dd_le_r_mte,
5421          gen_helper_sve_ld4dd_le_r_mte } },
5422
5423      /* mte active, big-endian */
5424      { { gen_helper_sve_ld1bb_r_mte,
5425          gen_helper_sve_ld2bb_r_mte,
5426          gen_helper_sve_ld3bb_r_mte,
5427          gen_helper_sve_ld4bb_r_mte },
5428        { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
5429        { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
5430        { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
5431
5432        { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
5433        { gen_helper_sve_ld1hh_be_r_mte,
5434          gen_helper_sve_ld2hh_be_r_mte,
5435          gen_helper_sve_ld3hh_be_r_mte,
5436          gen_helper_sve_ld4hh_be_r_mte },
5437        { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
5438        { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
5439
5440        { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
5441        { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
5442        { gen_helper_sve_ld1ss_be_r_mte,
5443          gen_helper_sve_ld2ss_be_r_mte,
5444          gen_helper_sve_ld3ss_be_r_mte,
5445          gen_helper_sve_ld4ss_be_r_mte },
5446        { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
5447
5448        { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
5449        { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
5450        { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
5451        { gen_helper_sve_ld1dd_be_r_mte,
5452          gen_helper_sve_ld2dd_be_r_mte,
5453          gen_helper_sve_ld3dd_be_r_mte,
5454          gen_helper_sve_ld4dd_be_r_mte } } },
5455};
5456
5457static void do_ld_zpa(DisasContext *s, int zt, int pg,
5458                      TCGv_i64 addr, int dtype, int nreg)
5459{
5460    gen_helper_gvec_mem *fn
5461        = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
5462
5463    /*
5464     * While there are holes in the table, they are not
5465     * accessible via the instruction encoding.
5466     */
5467    assert(fn != NULL);
5468    do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
5469}
5470
5471static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
5472{
5473    if (a->rm == 31) {
5474        return false;
5475    }
5476    if (sve_access_check(s)) {
5477        TCGv_i64 addr = new_tmp_a64(s);
5478        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5479        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5480        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5481    }
5482    return true;
5483}
5484
5485static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
5486{
5487    if (sve_access_check(s)) {
5488        int vsz = vec_full_reg_size(s);
5489        int elements = vsz >> dtype_esz[a->dtype];
5490        TCGv_i64 addr = new_tmp_a64(s);
5491
5492        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5493                         (a->imm * elements * (a->nreg + 1))
5494                         << dtype_msz(a->dtype));
5495        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
5496    }
5497    return true;
5498}
5499
5500static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
5501{
5502    static gen_helper_gvec_mem * const fns[2][2][16] = {
5503        { /* mte inactive, little-endian */
5504          { gen_helper_sve_ldff1bb_r,
5505            gen_helper_sve_ldff1bhu_r,
5506            gen_helper_sve_ldff1bsu_r,
5507            gen_helper_sve_ldff1bdu_r,
5508
5509            gen_helper_sve_ldff1sds_le_r,
5510            gen_helper_sve_ldff1hh_le_r,
5511            gen_helper_sve_ldff1hsu_le_r,
5512            gen_helper_sve_ldff1hdu_le_r,
5513
5514            gen_helper_sve_ldff1hds_le_r,
5515            gen_helper_sve_ldff1hss_le_r,
5516            gen_helper_sve_ldff1ss_le_r,
5517            gen_helper_sve_ldff1sdu_le_r,
5518
5519            gen_helper_sve_ldff1bds_r,
5520            gen_helper_sve_ldff1bss_r,
5521            gen_helper_sve_ldff1bhs_r,
5522            gen_helper_sve_ldff1dd_le_r },
5523
5524          /* mte inactive, big-endian */
5525          { gen_helper_sve_ldff1bb_r,
5526            gen_helper_sve_ldff1bhu_r,
5527            gen_helper_sve_ldff1bsu_r,
5528            gen_helper_sve_ldff1bdu_r,
5529
5530            gen_helper_sve_ldff1sds_be_r,
5531            gen_helper_sve_ldff1hh_be_r,
5532            gen_helper_sve_ldff1hsu_be_r,
5533            gen_helper_sve_ldff1hdu_be_r,
5534
5535            gen_helper_sve_ldff1hds_be_r,
5536            gen_helper_sve_ldff1hss_be_r,
5537            gen_helper_sve_ldff1ss_be_r,
5538            gen_helper_sve_ldff1sdu_be_r,
5539
5540            gen_helper_sve_ldff1bds_r,
5541            gen_helper_sve_ldff1bss_r,
5542            gen_helper_sve_ldff1bhs_r,
5543            gen_helper_sve_ldff1dd_be_r } },
5544
5545        { /* mte active, little-endian */
5546          { gen_helper_sve_ldff1bb_r_mte,
5547            gen_helper_sve_ldff1bhu_r_mte,
5548            gen_helper_sve_ldff1bsu_r_mte,
5549            gen_helper_sve_ldff1bdu_r_mte,
5550
5551            gen_helper_sve_ldff1sds_le_r_mte,
5552            gen_helper_sve_ldff1hh_le_r_mte,
5553            gen_helper_sve_ldff1hsu_le_r_mte,
5554            gen_helper_sve_ldff1hdu_le_r_mte,
5555
5556            gen_helper_sve_ldff1hds_le_r_mte,
5557            gen_helper_sve_ldff1hss_le_r_mte,
5558            gen_helper_sve_ldff1ss_le_r_mte,
5559            gen_helper_sve_ldff1sdu_le_r_mte,
5560
5561            gen_helper_sve_ldff1bds_r_mte,
5562            gen_helper_sve_ldff1bss_r_mte,
5563            gen_helper_sve_ldff1bhs_r_mte,
5564            gen_helper_sve_ldff1dd_le_r_mte },
5565
5566          /* mte active, big-endian */
5567          { gen_helper_sve_ldff1bb_r_mte,
5568            gen_helper_sve_ldff1bhu_r_mte,
5569            gen_helper_sve_ldff1bsu_r_mte,
5570            gen_helper_sve_ldff1bdu_r_mte,
5571
5572            gen_helper_sve_ldff1sds_be_r_mte,
5573            gen_helper_sve_ldff1hh_be_r_mte,
5574            gen_helper_sve_ldff1hsu_be_r_mte,
5575            gen_helper_sve_ldff1hdu_be_r_mte,
5576
5577            gen_helper_sve_ldff1hds_be_r_mte,
5578            gen_helper_sve_ldff1hss_be_r_mte,
5579            gen_helper_sve_ldff1ss_be_r_mte,
5580            gen_helper_sve_ldff1sdu_be_r_mte,
5581
5582            gen_helper_sve_ldff1bds_r_mte,
5583            gen_helper_sve_ldff1bss_r_mte,
5584            gen_helper_sve_ldff1bhs_r_mte,
5585            gen_helper_sve_ldff1dd_be_r_mte } },
5586    };
5587
5588    if (sve_access_check(s)) {
5589        TCGv_i64 addr = new_tmp_a64(s);
5590        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5591        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5592        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5593                   fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
5594    }
5595    return true;
5596}
5597
5598static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
5599{
5600    static gen_helper_gvec_mem * const fns[2][2][16] = {
5601        { /* mte inactive, little-endian */
5602          { gen_helper_sve_ldnf1bb_r,
5603            gen_helper_sve_ldnf1bhu_r,
5604            gen_helper_sve_ldnf1bsu_r,
5605            gen_helper_sve_ldnf1bdu_r,
5606
5607            gen_helper_sve_ldnf1sds_le_r,
5608            gen_helper_sve_ldnf1hh_le_r,
5609            gen_helper_sve_ldnf1hsu_le_r,
5610            gen_helper_sve_ldnf1hdu_le_r,
5611
5612            gen_helper_sve_ldnf1hds_le_r,
5613            gen_helper_sve_ldnf1hss_le_r,
5614            gen_helper_sve_ldnf1ss_le_r,
5615            gen_helper_sve_ldnf1sdu_le_r,
5616
5617            gen_helper_sve_ldnf1bds_r,
5618            gen_helper_sve_ldnf1bss_r,
5619            gen_helper_sve_ldnf1bhs_r,
5620            gen_helper_sve_ldnf1dd_le_r },
5621
5622          /* mte inactive, big-endian */
5623          { gen_helper_sve_ldnf1bb_r,
5624            gen_helper_sve_ldnf1bhu_r,
5625            gen_helper_sve_ldnf1bsu_r,
5626            gen_helper_sve_ldnf1bdu_r,
5627
5628            gen_helper_sve_ldnf1sds_be_r,
5629            gen_helper_sve_ldnf1hh_be_r,
5630            gen_helper_sve_ldnf1hsu_be_r,
5631            gen_helper_sve_ldnf1hdu_be_r,
5632
5633            gen_helper_sve_ldnf1hds_be_r,
5634            gen_helper_sve_ldnf1hss_be_r,
5635            gen_helper_sve_ldnf1ss_be_r,
5636            gen_helper_sve_ldnf1sdu_be_r,
5637
5638            gen_helper_sve_ldnf1bds_r,
5639            gen_helper_sve_ldnf1bss_r,
5640            gen_helper_sve_ldnf1bhs_r,
5641            gen_helper_sve_ldnf1dd_be_r } },
5642
5643        { /* mte inactive, little-endian */
5644          { gen_helper_sve_ldnf1bb_r_mte,
5645            gen_helper_sve_ldnf1bhu_r_mte,
5646            gen_helper_sve_ldnf1bsu_r_mte,
5647            gen_helper_sve_ldnf1bdu_r_mte,
5648
5649            gen_helper_sve_ldnf1sds_le_r_mte,
5650            gen_helper_sve_ldnf1hh_le_r_mte,
5651            gen_helper_sve_ldnf1hsu_le_r_mte,
5652            gen_helper_sve_ldnf1hdu_le_r_mte,
5653
5654            gen_helper_sve_ldnf1hds_le_r_mte,
5655            gen_helper_sve_ldnf1hss_le_r_mte,
5656            gen_helper_sve_ldnf1ss_le_r_mte,
5657            gen_helper_sve_ldnf1sdu_le_r_mte,
5658
5659            gen_helper_sve_ldnf1bds_r_mte,
5660            gen_helper_sve_ldnf1bss_r_mte,
5661            gen_helper_sve_ldnf1bhs_r_mte,
5662            gen_helper_sve_ldnf1dd_le_r_mte },
5663
5664          /* mte inactive, big-endian */
5665          { gen_helper_sve_ldnf1bb_r_mte,
5666            gen_helper_sve_ldnf1bhu_r_mte,
5667            gen_helper_sve_ldnf1bsu_r_mte,
5668            gen_helper_sve_ldnf1bdu_r_mte,
5669
5670            gen_helper_sve_ldnf1sds_be_r_mte,
5671            gen_helper_sve_ldnf1hh_be_r_mte,
5672            gen_helper_sve_ldnf1hsu_be_r_mte,
5673            gen_helper_sve_ldnf1hdu_be_r_mte,
5674
5675            gen_helper_sve_ldnf1hds_be_r_mte,
5676            gen_helper_sve_ldnf1hss_be_r_mte,
5677            gen_helper_sve_ldnf1ss_be_r_mte,
5678            gen_helper_sve_ldnf1sdu_be_r_mte,
5679
5680            gen_helper_sve_ldnf1bds_r_mte,
5681            gen_helper_sve_ldnf1bss_r_mte,
5682            gen_helper_sve_ldnf1bhs_r_mte,
5683            gen_helper_sve_ldnf1dd_be_r_mte } },
5684    };
5685
5686    if (sve_access_check(s)) {
5687        int vsz = vec_full_reg_size(s);
5688        int elements = vsz >> dtype_esz[a->dtype];
5689        int off = (a->imm * elements) << dtype_msz(a->dtype);
5690        TCGv_i64 addr = new_tmp_a64(s);
5691
5692        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
5693        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
5694                   fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
5695    }
5696    return true;
5697}
5698
5699static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5700{
5701    unsigned vsz = vec_full_reg_size(s);
5702    TCGv_ptr t_pg;
5703    int poff;
5704
5705    /* Load the first quadword using the normal predicated load helpers.  */
5706    poff = pred_full_reg_offset(s, pg);
5707    if (vsz > 16) {
5708        /*
5709         * Zero-extend the first 16 bits of the predicate into a temporary.
5710         * This avoids triggering an assert making sure we don't have bits
5711         * set within a predicate beyond VQ, but we have lowered VQ to 1
5712         * for this load operation.
5713         */
5714        TCGv_i64 tmp = tcg_temp_new_i64();
5715#ifdef HOST_WORDS_BIGENDIAN
5716        poff += 6;
5717#endif
5718        tcg_gen_ld16u_i64(tmp, cpu_env, poff);
5719
5720        poff = offsetof(CPUARMState, vfp.preg_tmp);
5721        tcg_gen_st_i64(tmp, cpu_env, poff);
5722        tcg_temp_free_i64(tmp);
5723    }
5724
5725    t_pg = tcg_temp_new_ptr();
5726    tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5727
5728    gen_helper_gvec_mem *fn
5729        = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5730    fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(16, 16, zt)));
5731
5732    tcg_temp_free_ptr(t_pg);
5733
5734    /* Replicate that first quadword.  */
5735    if (vsz > 16) {
5736        int doff = vec_full_reg_offset(s, zt);
5737        tcg_gen_gvec_dup_mem(4, doff + 16, doff, vsz - 16, vsz - 16);
5738    }
5739}
5740
5741static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
5742{
5743    if (a->rm == 31) {
5744        return false;
5745    }
5746    if (sve_access_check(s)) {
5747        int msz = dtype_msz(a->dtype);
5748        TCGv_i64 addr = new_tmp_a64(s);
5749        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
5750        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5751        do_ldrq(s, a->rd, a->pg, addr, a->dtype);
5752    }
5753    return true;
5754}
5755
5756static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
5757{
5758    if (sve_access_check(s)) {
5759        TCGv_i64 addr = new_tmp_a64(s);
5760        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
5761        do_ldrq(s, a->rd, a->pg, addr, a->dtype);
5762    }
5763    return true;
5764}
5765
5766static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
5767{
5768    unsigned vsz = vec_full_reg_size(s);
5769    unsigned vsz_r32;
5770    TCGv_ptr t_pg;
5771    int poff, doff;
5772
5773    if (vsz < 32) {
5774        /*
5775         * Note that this UNDEFINED check comes after CheckSVEEnabled()
5776         * in the ARM pseudocode, which is the sve_access_check() done
5777         * in our caller.  We should not now return false from the caller.
5778         */
5779        unallocated_encoding(s);
5780        return;
5781    }
5782
5783    /* Load the first octaword using the normal predicated load helpers.  */
5784
5785    poff = pred_full_reg_offset(s, pg);
5786    if (vsz > 32) {
5787        /*
5788         * Zero-extend the first 32 bits of the predicate into a temporary.
5789         * This avoids triggering an assert making sure we don't have bits
5790         * set within a predicate beyond VQ, but we have lowered VQ to 2
5791         * for this load operation.
5792         */
5793        TCGv_i64 tmp = tcg_temp_new_i64();
5794#ifdef HOST_WORDS_BIGENDIAN
5795        poff += 4;
5796#endif
5797        tcg_gen_ld32u_i64(tmp, cpu_env, poff);
5798
5799        poff = offsetof(CPUARMState, vfp.preg_tmp);
5800        tcg_gen_st_i64(tmp, cpu_env, poff);
5801        tcg_temp_free_i64(tmp);
5802    }
5803
5804    t_pg = tcg_temp_new_ptr();
5805    tcg_gen_addi_ptr(t_pg, cpu_env, poff);
5806
5807    gen_helper_gvec_mem *fn
5808        = ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
5809    fn(cpu_env, t_pg, addr, tcg_constant_i32(simd_desc(32, 32, zt)));
5810
5811    tcg_temp_free_ptr(t_pg);
5812
5813    /*
5814     * Replicate that first octaword.
5815     * The replication happens in units of 32; if the full vector size
5816     * is not a multiple of 32, the final bits are zeroed.
5817     */
5818    doff = vec_full_reg_offset(s, zt);
5819    vsz_r32 = QEMU_ALIGN_DOWN(vsz, 32);
5820    if (vsz >= 64) {
5821        tcg_gen_gvec_dup_mem(5, doff + 32, doff, vsz_r32 - 32, vsz_r32 - 32);
5822    }
5823    vsz -= vsz_r32;
5824    if (vsz) {
5825        tcg_gen_gvec_dup_imm(MO_64, doff + vsz_r32, vsz, vsz, 0);
5826    }
5827}
5828
5829static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
5830{
5831    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5832        return false;
5833    }
5834    if (a->rm == 31) {
5835        return false;
5836    }
5837    if (sve_access_check(s)) {
5838        TCGv_i64 addr = new_tmp_a64(s);
5839        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
5840        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5841        do_ldro(s, a->rd, a->pg, addr, a->dtype);
5842    }
5843    return true;
5844}
5845
5846static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
5847{
5848    if (!dc_isar_feature(aa64_sve_f64mm, s)) {
5849        return false;
5850    }
5851    if (sve_access_check(s)) {
5852        TCGv_i64 addr = new_tmp_a64(s);
5853        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
5854        do_ldro(s, a->rd, a->pg, addr, a->dtype);
5855    }
5856    return true;
5857}
5858
5859/* Load and broadcast element.  */
5860static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
5861{
5862    unsigned vsz = vec_full_reg_size(s);
5863    unsigned psz = pred_full_reg_size(s);
5864    unsigned esz = dtype_esz[a->dtype];
5865    unsigned msz = dtype_msz(a->dtype);
5866    TCGLabel *over;
5867    TCGv_i64 temp, clean_addr;
5868
5869    if (!sve_access_check(s)) {
5870        return true;
5871    }
5872
5873    over = gen_new_label();
5874
5875    /* If the guarding predicate has no bits set, no load occurs.  */
5876    if (psz <= 8) {
5877        /* Reduce the pred_esz_masks value simply to reduce the
5878         * size of the code generated here.
5879         */
5880        uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
5881        temp = tcg_temp_new_i64();
5882        tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
5883        tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
5884        tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
5885        tcg_temp_free_i64(temp);
5886    } else {
5887        TCGv_i32 t32 = tcg_temp_new_i32();
5888        find_last_active(s, t32, esz, a->pg);
5889        tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
5890        tcg_temp_free_i32(t32);
5891    }
5892
5893    /* Load the data.  */
5894    temp = tcg_temp_new_i64();
5895    tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
5896    clean_addr = gen_mte_check1(s, temp, false, true, msz);
5897
5898    tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
5899                        finalize_memop(s, dtype_mop[a->dtype]));
5900
5901    /* Broadcast to *all* elements.  */
5902    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
5903                         vsz, vsz, temp);
5904    tcg_temp_free_i64(temp);
5905
5906    /* Zero the inactive elements.  */
5907    gen_set_label(over);
5908    return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
5909}
5910
5911static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
5912                      int msz, int esz, int nreg)
5913{
5914    static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
5915        { { { gen_helper_sve_st1bb_r,
5916              gen_helper_sve_st1bh_r,
5917              gen_helper_sve_st1bs_r,
5918              gen_helper_sve_st1bd_r },
5919            { NULL,
5920              gen_helper_sve_st1hh_le_r,
5921              gen_helper_sve_st1hs_le_r,
5922              gen_helper_sve_st1hd_le_r },
5923            { NULL, NULL,
5924              gen_helper_sve_st1ss_le_r,
5925              gen_helper_sve_st1sd_le_r },
5926            { NULL, NULL, NULL,
5927              gen_helper_sve_st1dd_le_r } },
5928          { { gen_helper_sve_st1bb_r,
5929              gen_helper_sve_st1bh_r,
5930              gen_helper_sve_st1bs_r,
5931              gen_helper_sve_st1bd_r },
5932            { NULL,
5933              gen_helper_sve_st1hh_be_r,
5934              gen_helper_sve_st1hs_be_r,
5935              gen_helper_sve_st1hd_be_r },
5936            { NULL, NULL,
5937              gen_helper_sve_st1ss_be_r,
5938              gen_helper_sve_st1sd_be_r },
5939            { NULL, NULL, NULL,
5940              gen_helper_sve_st1dd_be_r } } },
5941
5942        { { { gen_helper_sve_st1bb_r_mte,
5943              gen_helper_sve_st1bh_r_mte,
5944              gen_helper_sve_st1bs_r_mte,
5945              gen_helper_sve_st1bd_r_mte },
5946            { NULL,
5947              gen_helper_sve_st1hh_le_r_mte,
5948              gen_helper_sve_st1hs_le_r_mte,
5949              gen_helper_sve_st1hd_le_r_mte },
5950            { NULL, NULL,
5951              gen_helper_sve_st1ss_le_r_mte,
5952              gen_helper_sve_st1sd_le_r_mte },
5953            { NULL, NULL, NULL,
5954              gen_helper_sve_st1dd_le_r_mte } },
5955          { { gen_helper_sve_st1bb_r_mte,
5956              gen_helper_sve_st1bh_r_mte,
5957              gen_helper_sve_st1bs_r_mte,
5958              gen_helper_sve_st1bd_r_mte },
5959            { NULL,
5960              gen_helper_sve_st1hh_be_r_mte,
5961              gen_helper_sve_st1hs_be_r_mte,
5962              gen_helper_sve_st1hd_be_r_mte },
5963            { NULL, NULL,
5964              gen_helper_sve_st1ss_be_r_mte,
5965              gen_helper_sve_st1sd_be_r_mte },
5966            { NULL, NULL, NULL,
5967              gen_helper_sve_st1dd_be_r_mte } } },
5968    };
5969    static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
5970        { { { gen_helper_sve_st2bb_r,
5971              gen_helper_sve_st2hh_le_r,
5972              gen_helper_sve_st2ss_le_r,
5973              gen_helper_sve_st2dd_le_r },
5974            { gen_helper_sve_st3bb_r,
5975              gen_helper_sve_st3hh_le_r,
5976              gen_helper_sve_st3ss_le_r,
5977              gen_helper_sve_st3dd_le_r },
5978            { gen_helper_sve_st4bb_r,
5979              gen_helper_sve_st4hh_le_r,
5980              gen_helper_sve_st4ss_le_r,
5981              gen_helper_sve_st4dd_le_r } },
5982          { { gen_helper_sve_st2bb_r,
5983              gen_helper_sve_st2hh_be_r,
5984              gen_helper_sve_st2ss_be_r,
5985              gen_helper_sve_st2dd_be_r },
5986            { gen_helper_sve_st3bb_r,
5987              gen_helper_sve_st3hh_be_r,
5988              gen_helper_sve_st3ss_be_r,
5989              gen_helper_sve_st3dd_be_r },
5990            { gen_helper_sve_st4bb_r,
5991              gen_helper_sve_st4hh_be_r,
5992              gen_helper_sve_st4ss_be_r,
5993              gen_helper_sve_st4dd_be_r } } },
5994        { { { gen_helper_sve_st2bb_r_mte,
5995              gen_helper_sve_st2hh_le_r_mte,
5996              gen_helper_sve_st2ss_le_r_mte,
5997              gen_helper_sve_st2dd_le_r_mte },
5998            { gen_helper_sve_st3bb_r_mte,
5999              gen_helper_sve_st3hh_le_r_mte,
6000              gen_helper_sve_st3ss_le_r_mte,
6001              gen_helper_sve_st3dd_le_r_mte },
6002            { gen_helper_sve_st4bb_r_mte,
6003              gen_helper_sve_st4hh_le_r_mte,
6004              gen_helper_sve_st4ss_le_r_mte,
6005              gen_helper_sve_st4dd_le_r_mte } },
6006          { { gen_helper_sve_st2bb_r_mte,
6007              gen_helper_sve_st2hh_be_r_mte,
6008              gen_helper_sve_st2ss_be_r_mte,
6009              gen_helper_sve_st2dd_be_r_mte },
6010            { gen_helper_sve_st3bb_r_mte,
6011              gen_helper_sve_st3hh_be_r_mte,
6012              gen_helper_sve_st3ss_be_r_mte,
6013              gen_helper_sve_st3dd_be_r_mte },
6014            { gen_helper_sve_st4bb_r_mte,
6015              gen_helper_sve_st4hh_be_r_mte,
6016              gen_helper_sve_st4ss_be_r_mte,
6017              gen_helper_sve_st4dd_be_r_mte } } },
6018    };
6019    gen_helper_gvec_mem *fn;
6020    int be = s->be_data == MO_BE;
6021
6022    if (nreg == 0) {
6023        /* ST1 */
6024        fn = fn_single[s->mte_active[0]][be][msz][esz];
6025        nreg = 1;
6026    } else {
6027        /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
6028        assert(msz == esz);
6029        fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
6030    }
6031    assert(fn != NULL);
6032    do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
6033}
6034
6035static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
6036{
6037    if (a->rm == 31 || a->msz > a->esz) {
6038        return false;
6039    }
6040    if (sve_access_check(s)) {
6041        TCGv_i64 addr = new_tmp_a64(s);
6042        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
6043        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
6044        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
6045    }
6046    return true;
6047}
6048
6049static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
6050{
6051    if (a->msz > a->esz) {
6052        return false;
6053    }
6054    if (sve_access_check(s)) {
6055        int vsz = vec_full_reg_size(s);
6056        int elements = vsz >> a->esz;
6057        TCGv_i64 addr = new_tmp_a64(s);
6058
6059        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
6060                         (a->imm * elements * (a->nreg + 1)) << a->msz);
6061        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
6062    }
6063    return true;
6064}
6065
6066/*
6067 *** SVE gather loads / scatter stores
6068 */
6069
6070static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
6071                       int scale, TCGv_i64 scalar, int msz, bool is_write,
6072                       gen_helper_gvec_mem_scatter *fn)
6073{
6074    unsigned vsz = vec_full_reg_size(s);
6075    TCGv_ptr t_zm = tcg_temp_new_ptr();
6076    TCGv_ptr t_pg = tcg_temp_new_ptr();
6077    TCGv_ptr t_zt = tcg_temp_new_ptr();
6078    TCGv_i32 t_desc;
6079    int desc = 0;
6080
6081    if (s->mte_active[0]) {
6082        desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
6083        desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
6084        desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
6085        desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
6086        desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << msz) - 1);
6087        desc <<= SVE_MTEDESC_SHIFT;
6088    }
6089    desc = simd_desc(vsz, vsz, desc | scale);
6090    t_desc = tcg_const_i32(desc);
6091
6092    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
6093    tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
6094    tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
6095    fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
6096
6097    tcg_temp_free_ptr(t_zt);
6098    tcg_temp_free_ptr(t_zm);
6099    tcg_temp_free_ptr(t_pg);
6100    tcg_temp_free_i32(t_desc);
6101}
6102
6103/* Indexed by [mte][be][ff][xs][u][msz].  */
6104static gen_helper_gvec_mem_scatter * const
6105gather_load_fn32[2][2][2][2][2][3] = {
6106    { /* MTE Inactive */
6107        { /* Little-endian */
6108            { { { gen_helper_sve_ldbss_zsu,
6109                  gen_helper_sve_ldhss_le_zsu,
6110                  NULL, },
6111                { gen_helper_sve_ldbsu_zsu,
6112                  gen_helper_sve_ldhsu_le_zsu,
6113                  gen_helper_sve_ldss_le_zsu, } },
6114              { { gen_helper_sve_ldbss_zss,
6115                  gen_helper_sve_ldhss_le_zss,
6116                  NULL, },
6117                { gen_helper_sve_ldbsu_zss,
6118                  gen_helper_sve_ldhsu_le_zss,
6119                  gen_helper_sve_ldss_le_zss, } } },
6120
6121            /* First-fault */
6122            { { { gen_helper_sve_ldffbss_zsu,
6123                  gen_helper_sve_ldffhss_le_zsu,
6124                  NULL, },
6125                { gen_helper_sve_ldffbsu_zsu,
6126                  gen_helper_sve_ldffhsu_le_zsu,
6127                  gen_helper_sve_ldffss_le_zsu, } },
6128              { { gen_helper_sve_ldffbss_zss,
6129                  gen_helper_sve_ldffhss_le_zss,
6130                  NULL, },
6131                { gen_helper_sve_ldffbsu_zss,
6132                  gen_helper_sve_ldffhsu_le_zss,
6133                  gen_helper_sve_ldffss_le_zss, } } } },
6134
6135        { /* Big-endian */
6136            { { { gen_helper_sve_ldbss_zsu,
6137                  gen_helper_sve_ldhss_be_zsu,
6138                  NULL, },
6139                { gen_helper_sve_ldbsu_zsu,
6140                  gen_helper_sve_ldhsu_be_zsu,
6141                  gen_helper_sve_ldss_be_zsu, } },
6142              { { gen_helper_sve_ldbss_zss,
6143                  gen_helper_sve_ldhss_be_zss,
6144                  NULL, },
6145                { gen_helper_sve_ldbsu_zss,
6146                  gen_helper_sve_ldhsu_be_zss,
6147                  gen_helper_sve_ldss_be_zss, } } },
6148
6149            /* First-fault */
6150            { { { gen_helper_sve_ldffbss_zsu,
6151                  gen_helper_sve_ldffhss_be_zsu,
6152                  NULL, },
6153                { gen_helper_sve_ldffbsu_zsu,
6154                  gen_helper_sve_ldffhsu_be_zsu,
6155                  gen_helper_sve_ldffss_be_zsu, } },
6156              { { gen_helper_sve_ldffbss_zss,
6157                  gen_helper_sve_ldffhss_be_zss,
6158                  NULL, },
6159                { gen_helper_sve_ldffbsu_zss,
6160                  gen_helper_sve_ldffhsu_be_zss,
6161                  gen_helper_sve_ldffss_be_zss, } } } } },
6162    { /* MTE Active */
6163        { /* Little-endian */
6164            { { { gen_helper_sve_ldbss_zsu_mte,
6165                  gen_helper_sve_ldhss_le_zsu_mte,
6166                  NULL, },
6167                { gen_helper_sve_ldbsu_zsu_mte,
6168                  gen_helper_sve_ldhsu_le_zsu_mte,
6169                  gen_helper_sve_ldss_le_zsu_mte, } },
6170              { { gen_helper_sve_ldbss_zss_mte,
6171                  gen_helper_sve_ldhss_le_zss_mte,
6172                  NULL, },
6173                { gen_helper_sve_ldbsu_zss_mte,
6174                  gen_helper_sve_ldhsu_le_zss_mte,
6175                  gen_helper_sve_ldss_le_zss_mte, } } },
6176
6177            /* First-fault */
6178            { { { gen_helper_sve_ldffbss_zsu_mte,
6179                  gen_helper_sve_ldffhss_le_zsu_mte,
6180                  NULL, },
6181                { gen_helper_sve_ldffbsu_zsu_mte,
6182                  gen_helper_sve_ldffhsu_le_zsu_mte,
6183                  gen_helper_sve_ldffss_le_zsu_mte, } },
6184              { { gen_helper_sve_ldffbss_zss_mte,
6185                  gen_helper_sve_ldffhss_le_zss_mte,
6186                  NULL, },
6187                { gen_helper_sve_ldffbsu_zss_mte,
6188                  gen_helper_sve_ldffhsu_le_zss_mte,
6189                  gen_helper_sve_ldffss_le_zss_mte, } } } },
6190
6191        { /* Big-endian */
6192            { { { gen_helper_sve_ldbss_zsu_mte,
6193                  gen_helper_sve_ldhss_be_zsu_mte,
6194                  NULL, },
6195                { gen_helper_sve_ldbsu_zsu_mte,
6196                  gen_helper_sve_ldhsu_be_zsu_mte,
6197                  gen_helper_sve_ldss_be_zsu_mte, } },
6198              { { gen_helper_sve_ldbss_zss_mte,
6199                  gen_helper_sve_ldhss_be_zss_mte,
6200                  NULL, },
6201                { gen_helper_sve_ldbsu_zss_mte,
6202                  gen_helper_sve_ldhsu_be_zss_mte,
6203                  gen_helper_sve_ldss_be_zss_mte, } } },
6204
6205            /* First-fault */
6206            { { { gen_helper_sve_ldffbss_zsu_mte,
6207                  gen_helper_sve_ldffhss_be_zsu_mte,
6208                  NULL, },
6209                { gen_helper_sve_ldffbsu_zsu_mte,
6210                  gen_helper_sve_ldffhsu_be_zsu_mte,
6211                  gen_helper_sve_ldffss_be_zsu_mte, } },
6212              { { gen_helper_sve_ldffbss_zss_mte,
6213                  gen_helper_sve_ldffhss_be_zss_mte,
6214                  NULL, },
6215                { gen_helper_sve_ldffbsu_zss_mte,
6216                  gen_helper_sve_ldffhsu_be_zss_mte,
6217                  gen_helper_sve_ldffss_be_zss_mte, } } } } },
6218};
6219
6220/* Note that we overload xs=2 to indicate 64-bit offset.  */
6221static gen_helper_gvec_mem_scatter * const
6222gather_load_fn64[2][2][2][3][2][4] = {
6223    { /* MTE Inactive */
6224        { /* Little-endian */
6225            { { { gen_helper_sve_ldbds_zsu,
6226                  gen_helper_sve_ldhds_le_zsu,
6227                  gen_helper_sve_ldsds_le_zsu,
6228                  NULL, },
6229                { gen_helper_sve_ldbdu_zsu,
6230                  gen_helper_sve_ldhdu_le_zsu,
6231                  gen_helper_sve_ldsdu_le_zsu,
6232                  gen_helper_sve_lddd_le_zsu, } },
6233              { { gen_helper_sve_ldbds_zss,
6234                  gen_helper_sve_ldhds_le_zss,
6235                  gen_helper_sve_ldsds_le_zss,
6236                  NULL, },
6237                { gen_helper_sve_ldbdu_zss,
6238                  gen_helper_sve_ldhdu_le_zss,
6239                  gen_helper_sve_ldsdu_le_zss,
6240                  gen_helper_sve_lddd_le_zss, } },
6241              { { gen_helper_sve_ldbds_zd,
6242                  gen_helper_sve_ldhds_le_zd,
6243                  gen_helper_sve_ldsds_le_zd,
6244                  NULL, },
6245                { gen_helper_sve_ldbdu_zd,
6246                  gen_helper_sve_ldhdu_le_zd,
6247                  gen_helper_sve_ldsdu_le_zd,
6248                  gen_helper_sve_lddd_le_zd, } } },
6249
6250            /* First-fault */
6251            { { { gen_helper_sve_ldffbds_zsu,
6252                  gen_helper_sve_ldffhds_le_zsu,
6253                  gen_helper_sve_ldffsds_le_zsu,
6254                  NULL, },
6255                { gen_helper_sve_ldffbdu_zsu,
6256                  gen_helper_sve_ldffhdu_le_zsu,
6257                  gen_helper_sve_ldffsdu_le_zsu,
6258                  gen_helper_sve_ldffdd_le_zsu, } },
6259              { { gen_helper_sve_ldffbds_zss,
6260                  gen_helper_sve_ldffhds_le_zss,
6261                  gen_helper_sve_ldffsds_le_zss,
6262                  NULL, },
6263                { gen_helper_sve_ldffbdu_zss,
6264                  gen_helper_sve_ldffhdu_le_zss,
6265                  gen_helper_sve_ldffsdu_le_zss,
6266                  gen_helper_sve_ldffdd_le_zss, } },
6267              { { gen_helper_sve_ldffbds_zd,
6268                  gen_helper_sve_ldffhds_le_zd,
6269                  gen_helper_sve_ldffsds_le_zd,
6270                  NULL, },
6271                { gen_helper_sve_ldffbdu_zd,
6272                  gen_helper_sve_ldffhdu_le_zd,
6273                  gen_helper_sve_ldffsdu_le_zd,
6274                  gen_helper_sve_ldffdd_le_zd, } } } },
6275        { /* Big-endian */
6276            { { { gen_helper_sve_ldbds_zsu,
6277                  gen_helper_sve_ldhds_be_zsu,
6278                  gen_helper_sve_ldsds_be_zsu,
6279                  NULL, },
6280                { gen_helper_sve_ldbdu_zsu,
6281                  gen_helper_sve_ldhdu_be_zsu,
6282                  gen_helper_sve_ldsdu_be_zsu,
6283                  gen_helper_sve_lddd_be_zsu, } },
6284              { { gen_helper_sve_ldbds_zss,
6285                  gen_helper_sve_ldhds_be_zss,
6286                  gen_helper_sve_ldsds_be_zss,
6287                  NULL, },
6288                { gen_helper_sve_ldbdu_zss,
6289                  gen_helper_sve_ldhdu_be_zss,
6290                  gen_helper_sve_ldsdu_be_zss,
6291                  gen_helper_sve_lddd_be_zss, } },
6292              { { gen_helper_sve_ldbds_zd,
6293                  gen_helper_sve_ldhds_be_zd,
6294                  gen_helper_sve_ldsds_be_zd,
6295                  NULL, },
6296                { gen_helper_sve_ldbdu_zd,
6297                  gen_helper_sve_ldhdu_be_zd,
6298                  gen_helper_sve_ldsdu_be_zd,
6299                  gen_helper_sve_lddd_be_zd, } } },
6300
6301            /* First-fault */
6302            { { { gen_helper_sve_ldffbds_zsu,
6303                  gen_helper_sve_ldffhds_be_zsu,
6304                  gen_helper_sve_ldffsds_be_zsu,
6305                  NULL, },
6306                { gen_helper_sve_ldffbdu_zsu,
6307                  gen_helper_sve_ldffhdu_be_zsu,
6308                  gen_helper_sve_ldffsdu_be_zsu,
6309                  gen_helper_sve_ldffdd_be_zsu, } },
6310              { { gen_helper_sve_ldffbds_zss,
6311                  gen_helper_sve_ldffhds_be_zss,
6312                  gen_helper_sve_ldffsds_be_zss,
6313                  NULL, },
6314                { gen_helper_sve_ldffbdu_zss,
6315                  gen_helper_sve_ldffhdu_be_zss,
6316                  gen_helper_sve_ldffsdu_be_zss,
6317                  gen_helper_sve_ldffdd_be_zss, } },
6318              { { gen_helper_sve_ldffbds_zd,
6319                  gen_helper_sve_ldffhds_be_zd,
6320                  gen_helper_sve_ldffsds_be_zd,
6321                  NULL, },
6322                { gen_helper_sve_ldffbdu_zd,
6323                  gen_helper_sve_ldffhdu_be_zd,
6324                  gen_helper_sve_ldffsdu_be_zd,
6325                  gen_helper_sve_ldffdd_be_zd, } } } } },
6326    { /* MTE Active */
6327        { /* Little-endian */
6328            { { { gen_helper_sve_ldbds_zsu_mte,
6329                  gen_helper_sve_ldhds_le_zsu_mte,
6330                  gen_helper_sve_ldsds_le_zsu_mte,
6331                  NULL, },
6332                { gen_helper_sve_ldbdu_zsu_mte,
6333                  gen_helper_sve_ldhdu_le_zsu_mte,
6334                  gen_helper_sve_ldsdu_le_zsu_mte,
6335                  gen_helper_sve_lddd_le_zsu_mte, } },
6336              { { gen_helper_sve_ldbds_zss_mte,
6337                  gen_helper_sve_ldhds_le_zss_mte,
6338                  gen_helper_sve_ldsds_le_zss_mte,
6339                  NULL, },
6340                { gen_helper_sve_ldbdu_zss_mte,
6341                  gen_helper_sve_ldhdu_le_zss_mte,
6342                  gen_helper_sve_ldsdu_le_zss_mte,
6343                  gen_helper_sve_lddd_le_zss_mte, } },
6344              { { gen_helper_sve_ldbds_zd_mte,
6345                  gen_helper_sve_ldhds_le_zd_mte,
6346                  gen_helper_sve_ldsds_le_zd_mte,
6347                  NULL, },
6348                { gen_helper_sve_ldbdu_zd_mte,
6349                  gen_helper_sve_ldhdu_le_zd_mte,
6350                  gen_helper_sve_ldsdu_le_zd_mte,
6351                  gen_helper_sve_lddd_le_zd_mte, } } },
6352
6353            /* First-fault */
6354            { { { gen_helper_sve_ldffbds_zsu_mte,
6355                  gen_helper_sve_ldffhds_le_zsu_mte,
6356                  gen_helper_sve_ldffsds_le_zsu_mte,
6357                  NULL, },
6358                { gen_helper_sve_ldffbdu_zsu_mte,
6359                  gen_helper_sve_ldffhdu_le_zsu_mte,
6360                  gen_helper_sve_ldffsdu_le_zsu_mte,
6361                  gen_helper_sve_ldffdd_le_zsu_mte, } },
6362              { { gen_helper_sve_ldffbds_zss_mte,
6363                  gen_helper_sve_ldffhds_le_zss_mte,
6364                  gen_helper_sve_ldffsds_le_zss_mte,
6365                  NULL, },
6366                { gen_helper_sve_ldffbdu_zss_mte,
6367                  gen_helper_sve_ldffhdu_le_zss_mte,
6368                  gen_helper_sve_ldffsdu_le_zss_mte,
6369                  gen_helper_sve_ldffdd_le_zss_mte, } },
6370              { { gen_helper_sve_ldffbds_zd_mte,
6371                  gen_helper_sve_ldffhds_le_zd_mte,
6372                  gen_helper_sve_ldffsds_le_zd_mte,
6373                  NULL, },
6374                { gen_helper_sve_ldffbdu_zd_mte,
6375                  gen_helper_sve_ldffhdu_le_zd_mte,
6376                  gen_helper_sve_ldffsdu_le_zd_mte,
6377                  gen_helper_sve_ldffdd_le_zd_mte, } } } },
6378        { /* Big-endian */
6379            { { { gen_helper_sve_ldbds_zsu_mte,
6380                  gen_helper_sve_ldhds_be_zsu_mte,
6381                  gen_helper_sve_ldsds_be_zsu_mte,
6382                  NULL, },
6383                { gen_helper_sve_ldbdu_zsu_mte,
6384                  gen_helper_sve_ldhdu_be_zsu_mte,
6385                  gen_helper_sve_ldsdu_be_zsu_mte,
6386                  gen_helper_sve_lddd_be_zsu_mte, } },
6387              { { gen_helper_sve_ldbds_zss_mte,
6388                  gen_helper_sve_ldhds_be_zss_mte,
6389                  gen_helper_sve_ldsds_be_zss_mte,
6390                  NULL, },
6391                { gen_helper_sve_ldbdu_zss_mte,
6392                  gen_helper_sve_ldhdu_be_zss_mte,
6393                  gen_helper_sve_ldsdu_be_zss_mte,
6394                  gen_helper_sve_lddd_be_zss_mte, } },
6395              { { gen_helper_sve_ldbds_zd_mte,
6396                  gen_helper_sve_ldhds_be_zd_mte,
6397                  gen_helper_sve_ldsds_be_zd_mte,
6398                  NULL, },
6399                { gen_helper_sve_ldbdu_zd_mte,
6400                  gen_helper_sve_ldhdu_be_zd_mte,
6401                  gen_helper_sve_ldsdu_be_zd_mte,
6402                  gen_helper_sve_lddd_be_zd_mte, } } },
6403
6404            /* First-fault */
6405            { { { gen_helper_sve_ldffbds_zsu_mte,
6406                  gen_helper_sve_ldffhds_be_zsu_mte,
6407                  gen_helper_sve_ldffsds_be_zsu_mte,
6408                  NULL, },
6409                { gen_helper_sve_ldffbdu_zsu_mte,
6410                  gen_helper_sve_ldffhdu_be_zsu_mte,
6411                  gen_helper_sve_ldffsdu_be_zsu_mte,
6412                  gen_helper_sve_ldffdd_be_zsu_mte, } },
6413              { { gen_helper_sve_ldffbds_zss_mte,
6414                  gen_helper_sve_ldffhds_be_zss_mte,
6415                  gen_helper_sve_ldffsds_be_zss_mte,
6416                  NULL, },
6417                { gen_helper_sve_ldffbdu_zss_mte,
6418                  gen_helper_sve_ldffhdu_be_zss_mte,
6419                  gen_helper_sve_ldffsdu_be_zss_mte,
6420                  gen_helper_sve_ldffdd_be_zss_mte, } },
6421              { { gen_helper_sve_ldffbds_zd_mte,
6422                  gen_helper_sve_ldffhds_be_zd_mte,
6423                  gen_helper_sve_ldffsds_be_zd_mte,
6424                  NULL, },
6425                { gen_helper_sve_ldffbdu_zd_mte,
6426                  gen_helper_sve_ldffhdu_be_zd_mte,
6427                  gen_helper_sve_ldffsdu_be_zd_mte,
6428                  gen_helper_sve_ldffdd_be_zd_mte, } } } } },
6429};
6430
6431static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
6432{
6433    gen_helper_gvec_mem_scatter *fn = NULL;
6434    bool be = s->be_data == MO_BE;
6435    bool mte = s->mte_active[0];
6436
6437    if (!sve_access_check(s)) {
6438        return true;
6439    }
6440
6441    switch (a->esz) {
6442    case MO_32:
6443        fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
6444        break;
6445    case MO_64:
6446        fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
6447        break;
6448    }
6449    assert(fn != NULL);
6450
6451    do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
6452               cpu_reg_sp(s, a->rn), a->msz, false, fn);
6453    return true;
6454}
6455
6456static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
6457{
6458    gen_helper_gvec_mem_scatter *fn = NULL;
6459    bool be = s->be_data == MO_BE;
6460    bool mte = s->mte_active[0];
6461    TCGv_i64 imm;
6462
6463    if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
6464        return false;
6465    }
6466    if (!sve_access_check(s)) {
6467        return true;
6468    }
6469
6470    switch (a->esz) {
6471    case MO_32:
6472        fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
6473        break;
6474    case MO_64:
6475        fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
6476        break;
6477    }
6478    assert(fn != NULL);
6479
6480    /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
6481     * by loading the immediate into the scalar parameter.
6482     */
6483    imm = tcg_const_i64(a->imm << a->msz);
6484    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
6485    tcg_temp_free_i64(imm);
6486    return true;
6487}
6488
6489static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
6490{
6491    if (!dc_isar_feature(aa64_sve2, s)) {
6492        return false;
6493    }
6494    return trans_LD1_zprz(s, a);
6495}
6496
6497/* Indexed by [mte][be][xs][msz].  */
6498static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
6499    { /* MTE Inactive */
6500        { /* Little-endian */
6501            { gen_helper_sve_stbs_zsu,
6502              gen_helper_sve_sths_le_zsu,
6503              gen_helper_sve_stss_le_zsu, },
6504            { gen_helper_sve_stbs_zss,
6505              gen_helper_sve_sths_le_zss,
6506              gen_helper_sve_stss_le_zss, } },
6507        { /* Big-endian */
6508            { gen_helper_sve_stbs_zsu,
6509              gen_helper_sve_sths_be_zsu,
6510              gen_helper_sve_stss_be_zsu, },
6511            { gen_helper_sve_stbs_zss,
6512              gen_helper_sve_sths_be_zss,
6513              gen_helper_sve_stss_be_zss, } } },
6514    { /* MTE Active */
6515        { /* Little-endian */
6516            { gen_helper_sve_stbs_zsu_mte,
6517              gen_helper_sve_sths_le_zsu_mte,
6518              gen_helper_sve_stss_le_zsu_mte, },
6519            { gen_helper_sve_stbs_zss_mte,
6520              gen_helper_sve_sths_le_zss_mte,
6521              gen_helper_sve_stss_le_zss_mte, } },
6522        { /* Big-endian */
6523            { gen_helper_sve_stbs_zsu_mte,
6524              gen_helper_sve_sths_be_zsu_mte,
6525              gen_helper_sve_stss_be_zsu_mte, },
6526            { gen_helper_sve_stbs_zss_mte,
6527              gen_helper_sve_sths_be_zss_mte,
6528              gen_helper_sve_stss_be_zss_mte, } } },
6529};
6530
6531/* Note that we overload xs=2 to indicate 64-bit offset.  */
6532static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
6533    { /* MTE Inactive */
6534         { /* Little-endian */
6535             { gen_helper_sve_stbd_zsu,
6536               gen_helper_sve_sthd_le_zsu,
6537               gen_helper_sve_stsd_le_zsu,
6538               gen_helper_sve_stdd_le_zsu, },
6539             { gen_helper_sve_stbd_zss,
6540               gen_helper_sve_sthd_le_zss,
6541               gen_helper_sve_stsd_le_zss,
6542               gen_helper_sve_stdd_le_zss, },
6543             { gen_helper_sve_stbd_zd,
6544               gen_helper_sve_sthd_le_zd,
6545               gen_helper_sve_stsd_le_zd,
6546               gen_helper_sve_stdd_le_zd, } },
6547         { /* Big-endian */
6548             { gen_helper_sve_stbd_zsu,
6549               gen_helper_sve_sthd_be_zsu,
6550               gen_helper_sve_stsd_be_zsu,
6551               gen_helper_sve_stdd_be_zsu, },
6552             { gen_helper_sve_stbd_zss,
6553               gen_helper_sve_sthd_be_zss,
6554               gen_helper_sve_stsd_be_zss,
6555               gen_helper_sve_stdd_be_zss, },
6556             { gen_helper_sve_stbd_zd,
6557               gen_helper_sve_sthd_be_zd,
6558               gen_helper_sve_stsd_be_zd,
6559               gen_helper_sve_stdd_be_zd, } } },
6560    { /* MTE Inactive */
6561         { /* Little-endian */
6562             { gen_helper_sve_stbd_zsu_mte,
6563               gen_helper_sve_sthd_le_zsu_mte,
6564               gen_helper_sve_stsd_le_zsu_mte,
6565               gen_helper_sve_stdd_le_zsu_mte, },
6566             { gen_helper_sve_stbd_zss_mte,
6567               gen_helper_sve_sthd_le_zss_mte,
6568               gen_helper_sve_stsd_le_zss_mte,
6569               gen_helper_sve_stdd_le_zss_mte, },
6570             { gen_helper_sve_stbd_zd_mte,
6571               gen_helper_sve_sthd_le_zd_mte,
6572               gen_helper_sve_stsd_le_zd_mte,
6573               gen_helper_sve_stdd_le_zd_mte, } },
6574         { /* Big-endian */
6575             { gen_helper_sve_stbd_zsu_mte,
6576               gen_helper_sve_sthd_be_zsu_mte,
6577               gen_helper_sve_stsd_be_zsu_mte,
6578               gen_helper_sve_stdd_be_zsu_mte, },
6579             { gen_helper_sve_stbd_zss_mte,
6580               gen_helper_sve_sthd_be_zss_mte,
6581               gen_helper_sve_stsd_be_zss_mte,
6582               gen_helper_sve_stdd_be_zss_mte, },
6583             { gen_helper_sve_stbd_zd_mte,
6584               gen_helper_sve_sthd_be_zd_mte,
6585               gen_helper_sve_stsd_be_zd_mte,
6586               gen_helper_sve_stdd_be_zd_mte, } } },
6587};
6588
6589static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
6590{
6591    gen_helper_gvec_mem_scatter *fn;
6592    bool be = s->be_data == MO_BE;
6593    bool mte = s->mte_active[0];
6594
6595    if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
6596        return false;
6597    }
6598    if (!sve_access_check(s)) {
6599        return true;
6600    }
6601    switch (a->esz) {
6602    case MO_32:
6603        fn = scatter_store_fn32[mte][be][a->xs][a->msz];
6604        break;
6605    case MO_64:
6606        fn = scatter_store_fn64[mte][be][a->xs][a->msz];
6607        break;
6608    default:
6609        g_assert_not_reached();
6610    }
6611    do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
6612               cpu_reg_sp(s, a->rn), a->msz, true, fn);
6613    return true;
6614}
6615
6616static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
6617{
6618    gen_helper_gvec_mem_scatter *fn = NULL;
6619    bool be = s->be_data == MO_BE;
6620    bool mte = s->mte_active[0];
6621    TCGv_i64 imm;
6622
6623    if (a->esz < a->msz) {
6624        return false;
6625    }
6626    if (!sve_access_check(s)) {
6627        return true;
6628    }
6629
6630    switch (a->esz) {
6631    case MO_32:
6632        fn = scatter_store_fn32[mte][be][0][a->msz];
6633        break;
6634    case MO_64:
6635        fn = scatter_store_fn64[mte][be][2][a->msz];
6636        break;
6637    }
6638    assert(fn != NULL);
6639
6640    /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
6641     * by loading the immediate into the scalar parameter.
6642     */
6643    imm = tcg_const_i64(a->imm << a->msz);
6644    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
6645    tcg_temp_free_i64(imm);
6646    return true;
6647}
6648
6649static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
6650{
6651    if (!dc_isar_feature(aa64_sve2, s)) {
6652        return false;
6653    }
6654    return trans_ST1_zprz(s, a);
6655}
6656
6657/*
6658 * Prefetches
6659 */
6660
6661static bool trans_PRF(DisasContext *s, arg_PRF *a)
6662{
6663    /* Prefetch is a nop within QEMU.  */
6664    (void)sve_access_check(s);
6665    return true;
6666}
6667
6668static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
6669{
6670    if (a->rm == 31) {
6671        return false;
6672    }
6673    /* Prefetch is a nop within QEMU.  */
6674    (void)sve_access_check(s);
6675    return true;
6676}
6677
6678/*
6679 * Move Prefix
6680 *
6681 * TODO: The implementation so far could handle predicated merging movprfx.
6682 * The helper functions as written take an extra source register to
6683 * use in the operation, but the result is only written when predication
6684 * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
6685 * to allow the final write back to the destination to be unconditional.
6686 * For predicated zeroing movprfx, we need to rearrange the helpers to
6687 * allow the final write back to zero inactives.
6688 *
6689 * In the meantime, just emit the moves.
6690 */
6691
6692static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
6693{
6694    return do_mov_z(s, a->rd, a->rn);
6695}
6696
6697static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
6698{
6699    if (sve_access_check(s)) {
6700        do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
6701    }
6702    return true;
6703}
6704
6705static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
6706{
6707    return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
6708}
6709
6710/*
6711 * SVE2 Integer Multiply - Unpredicated
6712 */
6713
6714static bool trans_MUL_zzz(DisasContext *s, arg_rrr_esz *a)
6715{
6716    if (!dc_isar_feature(aa64_sve2, s)) {
6717        return false;
6718    }
6719    if (sve_access_check(s)) {
6720        gen_gvec_fn_zzz(s, tcg_gen_gvec_mul, a->esz, a->rd, a->rn, a->rm);
6721    }
6722    return true;
6723}
6724
6725static bool do_sve2_zzz_ool(DisasContext *s, arg_rrr_esz *a,
6726                            gen_helper_gvec_3 *fn)
6727{
6728    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6729        return false;
6730    }
6731    if (sve_access_check(s)) {
6732        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
6733    }
6734    return true;
6735}
6736
6737static bool trans_SMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6738{
6739    static gen_helper_gvec_3 * const fns[4] = {
6740        gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
6741        gen_helper_gvec_smulh_s, gen_helper_gvec_smulh_d,
6742    };
6743    return do_sve2_zzz_ool(s, a, fns[a->esz]);
6744}
6745
6746static bool trans_UMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6747{
6748    static gen_helper_gvec_3 * const fns[4] = {
6749        gen_helper_gvec_umulh_b, gen_helper_gvec_umulh_h,
6750        gen_helper_gvec_umulh_s, gen_helper_gvec_umulh_d,
6751    };
6752    return do_sve2_zzz_ool(s, a, fns[a->esz]);
6753}
6754
6755static bool trans_PMUL_zzz(DisasContext *s, arg_rrr_esz *a)
6756{
6757    return do_sve2_zzz_ool(s, a, gen_helper_gvec_pmul_b);
6758}
6759
6760static bool trans_SQDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6761{
6762    static gen_helper_gvec_3 * const fns[4] = {
6763        gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
6764        gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
6765    };
6766    return do_sve2_zzz_ool(s, a, fns[a->esz]);
6767}
6768
6769static bool trans_SQRDMULH_zzz(DisasContext *s, arg_rrr_esz *a)
6770{
6771    static gen_helper_gvec_3 * const fns[4] = {
6772        gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
6773        gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
6774    };
6775    return do_sve2_zzz_ool(s, a, fns[a->esz]);
6776}
6777
6778/*
6779 * SVE2 Integer - Predicated
6780 */
6781
6782static bool do_sve2_zpzz_ool(DisasContext *s, arg_rprr_esz *a,
6783                             gen_helper_gvec_4 *fn)
6784{
6785    if (!dc_isar_feature(aa64_sve2, s)) {
6786        return false;
6787    }
6788    return do_zpzz_ool(s, a, fn);
6789}
6790
6791static bool trans_SADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6792{
6793    static gen_helper_gvec_4 * const fns[3] = {
6794        gen_helper_sve2_sadalp_zpzz_h,
6795        gen_helper_sve2_sadalp_zpzz_s,
6796        gen_helper_sve2_sadalp_zpzz_d,
6797    };
6798    if (a->esz == 0) {
6799        return false;
6800    }
6801    return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6802}
6803
6804static bool trans_UADALP_zpzz(DisasContext *s, arg_rprr_esz *a)
6805{
6806    static gen_helper_gvec_4 * const fns[3] = {
6807        gen_helper_sve2_uadalp_zpzz_h,
6808        gen_helper_sve2_uadalp_zpzz_s,
6809        gen_helper_sve2_uadalp_zpzz_d,
6810    };
6811    if (a->esz == 0) {
6812        return false;
6813    }
6814    return do_sve2_zpzz_ool(s, a, fns[a->esz - 1]);
6815}
6816
6817/*
6818 * SVE2 integer unary operations (predicated)
6819 */
6820
6821static bool do_sve2_zpz_ool(DisasContext *s, arg_rpr_esz *a,
6822                            gen_helper_gvec_3 *fn)
6823{
6824    if (!dc_isar_feature(aa64_sve2, s)) {
6825        return false;
6826    }
6827    return do_zpz_ool(s, a, fn);
6828}
6829
6830static bool trans_URECPE(DisasContext *s, arg_rpr_esz *a)
6831{
6832    if (a->esz != 2) {
6833        return false;
6834    }
6835    return do_sve2_zpz_ool(s, a, gen_helper_sve2_urecpe_s);
6836}
6837
6838static bool trans_URSQRTE(DisasContext *s, arg_rpr_esz *a)
6839{
6840    if (a->esz != 2) {
6841        return false;
6842    }
6843    return do_sve2_zpz_ool(s, a, gen_helper_sve2_ursqrte_s);
6844}
6845
6846static bool trans_SQABS(DisasContext *s, arg_rpr_esz *a)
6847{
6848    static gen_helper_gvec_3 * const fns[4] = {
6849        gen_helper_sve2_sqabs_b, gen_helper_sve2_sqabs_h,
6850        gen_helper_sve2_sqabs_s, gen_helper_sve2_sqabs_d,
6851    };
6852    return do_sve2_zpz_ool(s, a, fns[a->esz]);
6853}
6854
6855static bool trans_SQNEG(DisasContext *s, arg_rpr_esz *a)
6856{
6857    static gen_helper_gvec_3 * const fns[4] = {
6858        gen_helper_sve2_sqneg_b, gen_helper_sve2_sqneg_h,
6859        gen_helper_sve2_sqneg_s, gen_helper_sve2_sqneg_d,
6860    };
6861    return do_sve2_zpz_ool(s, a, fns[a->esz]);
6862}
6863
6864#define DO_SVE2_ZPZZ(NAME, name) \
6865static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)                \
6866{                                                                         \
6867    static gen_helper_gvec_4 * const fns[4] = {                           \
6868        gen_helper_sve2_##name##_zpzz_b, gen_helper_sve2_##name##_zpzz_h, \
6869        gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d, \
6870    };                                                                    \
6871    return do_sve2_zpzz_ool(s, a, fns[a->esz]);                           \
6872}
6873
6874DO_SVE2_ZPZZ(SQSHL, sqshl)
6875DO_SVE2_ZPZZ(SQRSHL, sqrshl)
6876DO_SVE2_ZPZZ(SRSHL, srshl)
6877
6878DO_SVE2_ZPZZ(UQSHL, uqshl)
6879DO_SVE2_ZPZZ(UQRSHL, uqrshl)
6880DO_SVE2_ZPZZ(URSHL, urshl)
6881
6882DO_SVE2_ZPZZ(SHADD, shadd)
6883DO_SVE2_ZPZZ(SRHADD, srhadd)
6884DO_SVE2_ZPZZ(SHSUB, shsub)
6885
6886DO_SVE2_ZPZZ(UHADD, uhadd)
6887DO_SVE2_ZPZZ(URHADD, urhadd)
6888DO_SVE2_ZPZZ(UHSUB, uhsub)
6889
6890DO_SVE2_ZPZZ(ADDP, addp)
6891DO_SVE2_ZPZZ(SMAXP, smaxp)
6892DO_SVE2_ZPZZ(UMAXP, umaxp)
6893DO_SVE2_ZPZZ(SMINP, sminp)
6894DO_SVE2_ZPZZ(UMINP, uminp)
6895
6896DO_SVE2_ZPZZ(SQADD_zpzz, sqadd)
6897DO_SVE2_ZPZZ(UQADD_zpzz, uqadd)
6898DO_SVE2_ZPZZ(SQSUB_zpzz, sqsub)
6899DO_SVE2_ZPZZ(UQSUB_zpzz, uqsub)
6900DO_SVE2_ZPZZ(SUQADD, suqadd)
6901DO_SVE2_ZPZZ(USQADD, usqadd)
6902
6903/*
6904 * SVE2 Widening Integer Arithmetic
6905 */
6906
6907static bool do_sve2_zzw_ool(DisasContext *s, arg_rrr_esz *a,
6908                            gen_helper_gvec_3 *fn, int data)
6909{
6910    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
6911        return false;
6912    }
6913    if (sve_access_check(s)) {
6914        unsigned vsz = vec_full_reg_size(s);
6915        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
6916                           vec_full_reg_offset(s, a->rn),
6917                           vec_full_reg_offset(s, a->rm),
6918                           vsz, vsz, data, fn);
6919    }
6920    return true;
6921}
6922
6923#define DO_SVE2_ZZZ_TB(NAME, name, SEL1, SEL2) \
6924static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)               \
6925{                                                                       \
6926    static gen_helper_gvec_3 * const fns[4] = {                         \
6927        NULL,                       gen_helper_sve2_##name##_h,         \
6928        gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d,         \
6929    };                                                                  \
6930    return do_sve2_zzw_ool(s, a, fns[a->esz], (SEL2 << 1) | SEL1);      \
6931}
6932
6933DO_SVE2_ZZZ_TB(SADDLB, saddl, false, false)
6934DO_SVE2_ZZZ_TB(SSUBLB, ssubl, false, false)
6935DO_SVE2_ZZZ_TB(SABDLB, sabdl, false, false)
6936
6937DO_SVE2_ZZZ_TB(UADDLB, uaddl, false, false)
6938DO_SVE2_ZZZ_TB(USUBLB, usubl, false, false)
6939DO_SVE2_ZZZ_TB(UABDLB, uabdl, false, false)
6940
6941DO_SVE2_ZZZ_TB(SADDLT, saddl, true, true)
6942DO_SVE2_ZZZ_TB(SSUBLT, ssubl, true, true)
6943DO_SVE2_ZZZ_TB(SABDLT, sabdl, true, true)
6944
6945DO_SVE2_ZZZ_TB(UADDLT, uaddl, true, true)
6946DO_SVE2_ZZZ_TB(USUBLT, usubl, true, true)
6947DO_SVE2_ZZZ_TB(UABDLT, uabdl, true, true)
6948
6949DO_SVE2_ZZZ_TB(SADDLBT, saddl, false, true)
6950DO_SVE2_ZZZ_TB(SSUBLBT, ssubl, false, true)
6951DO_SVE2_ZZZ_TB(SSUBLTB, ssubl, true, false)
6952
6953DO_SVE2_ZZZ_TB(SQDMULLB_zzz, sqdmull_zzz, false, false)
6954DO_SVE2_ZZZ_TB(SQDMULLT_zzz, sqdmull_zzz, true, true)
6955
6956DO_SVE2_ZZZ_TB(SMULLB_zzz, smull_zzz, false, false)
6957DO_SVE2_ZZZ_TB(SMULLT_zzz, smull_zzz, true, true)
6958
6959DO_SVE2_ZZZ_TB(UMULLB_zzz, umull_zzz, false, false)
6960DO_SVE2_ZZZ_TB(UMULLT_zzz, umull_zzz, true, true)
6961
6962static bool do_eor_tb(DisasContext *s, arg_rrr_esz *a, bool sel1)
6963{
6964    static gen_helper_gvec_3 * const fns[4] = {
6965        gen_helper_sve2_eoril_b, gen_helper_sve2_eoril_h,
6966        gen_helper_sve2_eoril_s, gen_helper_sve2_eoril_d,
6967    };
6968    return do_sve2_zzw_ool(s, a, fns[a->esz], (!sel1 << 1) | sel1);
6969}
6970
6971static bool trans_EORBT(DisasContext *s, arg_rrr_esz *a)
6972{
6973    return do_eor_tb(s, a, false);
6974}
6975
6976static bool trans_EORTB(DisasContext *s, arg_rrr_esz *a)
6977{
6978    return do_eor_tb(s, a, true);
6979}
6980
6981static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
6982{
6983    static gen_helper_gvec_3 * const fns[4] = {
6984        gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
6985        NULL,                    gen_helper_sve2_pmull_d,
6986    };
6987    if (a->esz == 0 && !dc_isar_feature(aa64_sve2_pmull128, s)) {
6988        return false;
6989    }
6990    return do_sve2_zzw_ool(s, a, fns[a->esz], sel);
6991}
6992
6993static bool trans_PMULLB(DisasContext *s, arg_rrr_esz *a)
6994{
6995    return do_trans_pmull(s, a, false);
6996}
6997
6998static bool trans_PMULLT(DisasContext *s, arg_rrr_esz *a)
6999{
7000    return do_trans_pmull(s, a, true);
7001}
7002
7003#define DO_SVE2_ZZZ_WTB(NAME, name, SEL2) \
7004static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)       \
7005{                                                               \
7006    static gen_helper_gvec_3 * const fns[4] = {                 \
7007        NULL,                       gen_helper_sve2_##name##_h, \
7008        gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d, \
7009    };                                                          \
7010    return do_sve2_zzw_ool(s, a, fns[a->esz], SEL2);            \
7011}
7012
7013DO_SVE2_ZZZ_WTB(SADDWB, saddw, false)
7014DO_SVE2_ZZZ_WTB(SADDWT, saddw, true)
7015DO_SVE2_ZZZ_WTB(SSUBWB, ssubw, false)
7016DO_SVE2_ZZZ_WTB(SSUBWT, ssubw, true)
7017
7018DO_SVE2_ZZZ_WTB(UADDWB, uaddw, false)
7019DO_SVE2_ZZZ_WTB(UADDWT, uaddw, true)
7020DO_SVE2_ZZZ_WTB(USUBWB, usubw, false)
7021DO_SVE2_ZZZ_WTB(USUBWT, usubw, true)
7022
7023static void gen_sshll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
7024{
7025    int top = imm & 1;
7026    int shl = imm >> 1;
7027    int halfbits = 4 << vece;
7028
7029    if (top) {
7030        if (shl == halfbits) {
7031            TCGv_vec t = tcg_temp_new_vec_matching(d);
7032            tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
7033            tcg_gen_and_vec(vece, d, n, t);
7034            tcg_temp_free_vec(t);
7035        } else {
7036            tcg_gen_sari_vec(vece, d, n, halfbits);
7037            tcg_gen_shli_vec(vece, d, d, shl);
7038        }
7039    } else {
7040        tcg_gen_shli_vec(vece, d, n, halfbits);
7041        tcg_gen_sari_vec(vece, d, d, halfbits - shl);
7042    }
7043}
7044
7045static void gen_ushll_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int imm)
7046{
7047    int halfbits = 4 << vece;
7048    int top = imm & 1;
7049    int shl = (imm >> 1);
7050    int shift;
7051    uint64_t mask;
7052
7053    mask = MAKE_64BIT_MASK(0, halfbits);
7054    mask <<= shl;
7055    mask = dup_const(vece, mask);
7056
7057    shift = shl - top * halfbits;
7058    if (shift < 0) {
7059        tcg_gen_shri_i64(d, n, -shift);
7060    } else {
7061        tcg_gen_shli_i64(d, n, shift);
7062    }
7063    tcg_gen_andi_i64(d, d, mask);
7064}
7065
7066static void gen_ushll16_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7067{
7068    gen_ushll_i64(MO_16, d, n, imm);
7069}
7070
7071static void gen_ushll32_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7072{
7073    gen_ushll_i64(MO_32, d, n, imm);
7074}
7075
7076static void gen_ushll64_i64(TCGv_i64 d, TCGv_i64 n, int64_t imm)
7077{
7078    gen_ushll_i64(MO_64, d, n, imm);
7079}
7080
7081static void gen_ushll_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t imm)
7082{
7083    int halfbits = 4 << vece;
7084    int top = imm & 1;
7085    int shl = imm >> 1;
7086
7087    if (top) {
7088        if (shl == halfbits) {
7089            TCGv_vec t = tcg_temp_new_vec_matching(d);
7090            tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(halfbits, halfbits));
7091            tcg_gen_and_vec(vece, d, n, t);
7092            tcg_temp_free_vec(t);
7093        } else {
7094            tcg_gen_shri_vec(vece, d, n, halfbits);
7095            tcg_gen_shli_vec(vece, d, d, shl);
7096        }
7097    } else {
7098        if (shl == 0) {
7099            TCGv_vec t = tcg_temp_new_vec_matching(d);
7100            tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7101            tcg_gen_and_vec(vece, d, n, t);
7102            tcg_temp_free_vec(t);
7103        } else {
7104            tcg_gen_shli_vec(vece, d, n, halfbits);
7105            tcg_gen_shri_vec(vece, d, d, halfbits - shl);
7106        }
7107    }
7108}
7109
7110static bool do_sve2_shll_tb(DisasContext *s, arg_rri_esz *a,
7111                            bool sel, bool uns)
7112{
7113    static const TCGOpcode sshll_list[] = {
7114        INDEX_op_shli_vec, INDEX_op_sari_vec, 0
7115    };
7116    static const TCGOpcode ushll_list[] = {
7117        INDEX_op_shli_vec, INDEX_op_shri_vec, 0
7118    };
7119    static const GVecGen2i ops[2][3] = {
7120        { { .fniv = gen_sshll_vec,
7121            .opt_opc = sshll_list,
7122            .fno = gen_helper_sve2_sshll_h,
7123            .vece = MO_16 },
7124          { .fniv = gen_sshll_vec,
7125            .opt_opc = sshll_list,
7126            .fno = gen_helper_sve2_sshll_s,
7127            .vece = MO_32 },
7128          { .fniv = gen_sshll_vec,
7129            .opt_opc = sshll_list,
7130            .fno = gen_helper_sve2_sshll_d,
7131            .vece = MO_64 } },
7132        { { .fni8 = gen_ushll16_i64,
7133            .fniv = gen_ushll_vec,
7134            .opt_opc = ushll_list,
7135            .fno = gen_helper_sve2_ushll_h,
7136            .vece = MO_16 },
7137          { .fni8 = gen_ushll32_i64,
7138            .fniv = gen_ushll_vec,
7139            .opt_opc = ushll_list,
7140            .fno = gen_helper_sve2_ushll_s,
7141            .vece = MO_32 },
7142          { .fni8 = gen_ushll64_i64,
7143            .fniv = gen_ushll_vec,
7144            .opt_opc = ushll_list,
7145            .fno = gen_helper_sve2_ushll_d,
7146            .vece = MO_64 } },
7147    };
7148
7149    if (a->esz < 0 || a->esz > 2 || !dc_isar_feature(aa64_sve2, s)) {
7150        return false;
7151    }
7152    if (sve_access_check(s)) {
7153        unsigned vsz = vec_full_reg_size(s);
7154        tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7155                        vec_full_reg_offset(s, a->rn),
7156                        vsz, vsz, (a->imm << 1) | sel,
7157                        &ops[uns][a->esz]);
7158    }
7159    return true;
7160}
7161
7162static bool trans_SSHLLB(DisasContext *s, arg_rri_esz *a)
7163{
7164    return do_sve2_shll_tb(s, a, false, false);
7165}
7166
7167static bool trans_SSHLLT(DisasContext *s, arg_rri_esz *a)
7168{
7169    return do_sve2_shll_tb(s, a, true, false);
7170}
7171
7172static bool trans_USHLLB(DisasContext *s, arg_rri_esz *a)
7173{
7174    return do_sve2_shll_tb(s, a, false, true);
7175}
7176
7177static bool trans_USHLLT(DisasContext *s, arg_rri_esz *a)
7178{
7179    return do_sve2_shll_tb(s, a, true, true);
7180}
7181
7182static bool trans_BEXT(DisasContext *s, arg_rrr_esz *a)
7183{
7184    static gen_helper_gvec_3 * const fns[4] = {
7185        gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
7186        gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
7187    };
7188    if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7189        return false;
7190    }
7191    return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7192}
7193
7194static bool trans_BDEP(DisasContext *s, arg_rrr_esz *a)
7195{
7196    static gen_helper_gvec_3 * const fns[4] = {
7197        gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
7198        gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
7199    };
7200    if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7201        return false;
7202    }
7203    return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7204}
7205
7206static bool trans_BGRP(DisasContext *s, arg_rrr_esz *a)
7207{
7208    static gen_helper_gvec_3 * const fns[4] = {
7209        gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
7210        gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
7211    };
7212    if (!dc_isar_feature(aa64_sve2_bitperm, s)) {
7213        return false;
7214    }
7215    return do_sve2_zzw_ool(s, a, fns[a->esz], 0);
7216}
7217
7218static bool do_cadd(DisasContext *s, arg_rrr_esz *a, bool sq, bool rot)
7219{
7220    static gen_helper_gvec_3 * const fns[2][4] = {
7221        { gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
7222          gen_helper_sve2_cadd_s, gen_helper_sve2_cadd_d },
7223        { gen_helper_sve2_sqcadd_b, gen_helper_sve2_sqcadd_h,
7224          gen_helper_sve2_sqcadd_s, gen_helper_sve2_sqcadd_d },
7225    };
7226    return do_sve2_zzw_ool(s, a, fns[sq][a->esz], rot);
7227}
7228
7229static bool trans_CADD_rot90(DisasContext *s, arg_rrr_esz *a)
7230{
7231    return do_cadd(s, a, false, false);
7232}
7233
7234static bool trans_CADD_rot270(DisasContext *s, arg_rrr_esz *a)
7235{
7236    return do_cadd(s, a, false, true);
7237}
7238
7239static bool trans_SQCADD_rot90(DisasContext *s, arg_rrr_esz *a)
7240{
7241    return do_cadd(s, a, true, false);
7242}
7243
7244static bool trans_SQCADD_rot270(DisasContext *s, arg_rrr_esz *a)
7245{
7246    return do_cadd(s, a, true, true);
7247}
7248
7249static bool do_sve2_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
7250                             gen_helper_gvec_4 *fn, int data)
7251{
7252    if (fn == NULL || !dc_isar_feature(aa64_sve2, s)) {
7253        return false;
7254    }
7255    if (sve_access_check(s)) {
7256        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
7257    }
7258    return true;
7259}
7260
7261static bool do_abal(DisasContext *s, arg_rrrr_esz *a, bool uns, bool sel)
7262{
7263    static gen_helper_gvec_4 * const fns[2][4] = {
7264        { NULL,                    gen_helper_sve2_sabal_h,
7265          gen_helper_sve2_sabal_s, gen_helper_sve2_sabal_d },
7266        { NULL,                    gen_helper_sve2_uabal_h,
7267          gen_helper_sve2_uabal_s, gen_helper_sve2_uabal_d },
7268    };
7269    return do_sve2_zzzz_ool(s, a, fns[uns][a->esz], sel);
7270}
7271
7272static bool trans_SABALB(DisasContext *s, arg_rrrr_esz *a)
7273{
7274    return do_abal(s, a, false, false);
7275}
7276
7277static bool trans_SABALT(DisasContext *s, arg_rrrr_esz *a)
7278{
7279    return do_abal(s, a, false, true);
7280}
7281
7282static bool trans_UABALB(DisasContext *s, arg_rrrr_esz *a)
7283{
7284    return do_abal(s, a, true, false);
7285}
7286
7287static bool trans_UABALT(DisasContext *s, arg_rrrr_esz *a)
7288{
7289    return do_abal(s, a, true, true);
7290}
7291
7292static bool do_adcl(DisasContext *s, arg_rrrr_esz *a, bool sel)
7293{
7294    static gen_helper_gvec_4 * const fns[2] = {
7295        gen_helper_sve2_adcl_s,
7296        gen_helper_sve2_adcl_d,
7297    };
7298    /*
7299     * Note that in this case the ESZ field encodes both size and sign.
7300     * Split out 'subtract' into bit 1 of the data field for the helper.
7301     */
7302    return do_sve2_zzzz_ool(s, a, fns[a->esz & 1], (a->esz & 2) | sel);
7303}
7304
7305static bool trans_ADCLB(DisasContext *s, arg_rrrr_esz *a)
7306{
7307    return do_adcl(s, a, false);
7308}
7309
7310static bool trans_ADCLT(DisasContext *s, arg_rrrr_esz *a)
7311{
7312    return do_adcl(s, a, true);
7313}
7314
7315static bool do_sve2_fn2i(DisasContext *s, arg_rri_esz *a, GVecGen2iFn *fn)
7316{
7317    if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) {
7318        return false;
7319    }
7320    if (sve_access_check(s)) {
7321        unsigned vsz = vec_full_reg_size(s);
7322        unsigned rd_ofs = vec_full_reg_offset(s, a->rd);
7323        unsigned rn_ofs = vec_full_reg_offset(s, a->rn);
7324        fn(a->esz, rd_ofs, rn_ofs, a->imm, vsz, vsz);
7325    }
7326    return true;
7327}
7328
7329static bool trans_SSRA(DisasContext *s, arg_rri_esz *a)
7330{
7331    return do_sve2_fn2i(s, a, gen_gvec_ssra);
7332}
7333
7334static bool trans_USRA(DisasContext *s, arg_rri_esz *a)
7335{
7336    return do_sve2_fn2i(s, a, gen_gvec_usra);
7337}
7338
7339static bool trans_SRSRA(DisasContext *s, arg_rri_esz *a)
7340{
7341    return do_sve2_fn2i(s, a, gen_gvec_srsra);
7342}
7343
7344static bool trans_URSRA(DisasContext *s, arg_rri_esz *a)
7345{
7346    return do_sve2_fn2i(s, a, gen_gvec_ursra);
7347}
7348
7349static bool trans_SRI(DisasContext *s, arg_rri_esz *a)
7350{
7351    return do_sve2_fn2i(s, a, gen_gvec_sri);
7352}
7353
7354static bool trans_SLI(DisasContext *s, arg_rri_esz *a)
7355{
7356    return do_sve2_fn2i(s, a, gen_gvec_sli);
7357}
7358
7359static bool do_sve2_fn_zzz(DisasContext *s, arg_rrr_esz *a, GVecGen3Fn *fn)
7360{
7361    if (!dc_isar_feature(aa64_sve2, s)) {
7362        return false;
7363    }
7364    if (sve_access_check(s)) {
7365        gen_gvec_fn_zzz(s, fn, a->esz, a->rd, a->rn, a->rm);
7366    }
7367    return true;
7368}
7369
7370static bool trans_SABA(DisasContext *s, arg_rrr_esz *a)
7371{
7372    return do_sve2_fn_zzz(s, a, gen_gvec_saba);
7373}
7374
7375static bool trans_UABA(DisasContext *s, arg_rrr_esz *a)
7376{
7377    return do_sve2_fn_zzz(s, a, gen_gvec_uaba);
7378}
7379
7380static bool do_sve2_narrow_extract(DisasContext *s, arg_rri_esz *a,
7381                                   const GVecGen2 ops[3])
7382{
7383    if (a->esz < 0 || a->esz > MO_32 || a->imm != 0 ||
7384        !dc_isar_feature(aa64_sve2, s)) {
7385        return false;
7386    }
7387    if (sve_access_check(s)) {
7388        unsigned vsz = vec_full_reg_size(s);
7389        tcg_gen_gvec_2(vec_full_reg_offset(s, a->rd),
7390                        vec_full_reg_offset(s, a->rn),
7391                        vsz, vsz, &ops[a->esz]);
7392    }
7393    return true;
7394}
7395
7396static const TCGOpcode sqxtn_list[] = {
7397    INDEX_op_shli_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
7398};
7399
7400static void gen_sqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7401{
7402    TCGv_vec t = tcg_temp_new_vec_matching(d);
7403    int halfbits = 4 << vece;
7404    int64_t mask = (1ull << halfbits) - 1;
7405    int64_t min = -1ull << (halfbits - 1);
7406    int64_t max = -min - 1;
7407
7408    tcg_gen_dupi_vec(vece, t, min);
7409    tcg_gen_smax_vec(vece, d, n, t);
7410    tcg_gen_dupi_vec(vece, t, max);
7411    tcg_gen_smin_vec(vece, d, d, t);
7412    tcg_gen_dupi_vec(vece, t, mask);
7413    tcg_gen_and_vec(vece, d, d, t);
7414    tcg_temp_free_vec(t);
7415}
7416
7417static bool trans_SQXTNB(DisasContext *s, arg_rri_esz *a)
7418{
7419    static const GVecGen2 ops[3] = {
7420        { .fniv = gen_sqxtnb_vec,
7421          .opt_opc = sqxtn_list,
7422          .fno = gen_helper_sve2_sqxtnb_h,
7423          .vece = MO_16 },
7424        { .fniv = gen_sqxtnb_vec,
7425          .opt_opc = sqxtn_list,
7426          .fno = gen_helper_sve2_sqxtnb_s,
7427          .vece = MO_32 },
7428        { .fniv = gen_sqxtnb_vec,
7429          .opt_opc = sqxtn_list,
7430          .fno = gen_helper_sve2_sqxtnb_d,
7431          .vece = MO_64 },
7432    };
7433    return do_sve2_narrow_extract(s, a, ops);
7434}
7435
7436static void gen_sqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7437{
7438    TCGv_vec t = tcg_temp_new_vec_matching(d);
7439    int halfbits = 4 << vece;
7440    int64_t mask = (1ull << halfbits) - 1;
7441    int64_t min = -1ull << (halfbits - 1);
7442    int64_t max = -min - 1;
7443
7444    tcg_gen_dupi_vec(vece, t, min);
7445    tcg_gen_smax_vec(vece, n, n, t);
7446    tcg_gen_dupi_vec(vece, t, max);
7447    tcg_gen_smin_vec(vece, n, n, t);
7448    tcg_gen_shli_vec(vece, n, n, halfbits);
7449    tcg_gen_dupi_vec(vece, t, mask);
7450    tcg_gen_bitsel_vec(vece, d, t, d, n);
7451    tcg_temp_free_vec(t);
7452}
7453
7454static bool trans_SQXTNT(DisasContext *s, arg_rri_esz *a)
7455{
7456    static const GVecGen2 ops[3] = {
7457        { .fniv = gen_sqxtnt_vec,
7458          .opt_opc = sqxtn_list,
7459          .load_dest = true,
7460          .fno = gen_helper_sve2_sqxtnt_h,
7461          .vece = MO_16 },
7462        { .fniv = gen_sqxtnt_vec,
7463          .opt_opc = sqxtn_list,
7464          .load_dest = true,
7465          .fno = gen_helper_sve2_sqxtnt_s,
7466          .vece = MO_32 },
7467        { .fniv = gen_sqxtnt_vec,
7468          .opt_opc = sqxtn_list,
7469          .load_dest = true,
7470          .fno = gen_helper_sve2_sqxtnt_d,
7471          .vece = MO_64 },
7472    };
7473    return do_sve2_narrow_extract(s, a, ops);
7474}
7475
7476static const TCGOpcode uqxtn_list[] = {
7477    INDEX_op_shli_vec, INDEX_op_umin_vec, 0
7478};
7479
7480static void gen_uqxtnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7481{
7482    TCGv_vec t = tcg_temp_new_vec_matching(d);
7483    int halfbits = 4 << vece;
7484    int64_t max = (1ull << halfbits) - 1;
7485
7486    tcg_gen_dupi_vec(vece, t, max);
7487    tcg_gen_umin_vec(vece, d, n, t);
7488    tcg_temp_free_vec(t);
7489}
7490
7491static bool trans_UQXTNB(DisasContext *s, arg_rri_esz *a)
7492{
7493    static const GVecGen2 ops[3] = {
7494        { .fniv = gen_uqxtnb_vec,
7495          .opt_opc = uqxtn_list,
7496          .fno = gen_helper_sve2_uqxtnb_h,
7497          .vece = MO_16 },
7498        { .fniv = gen_uqxtnb_vec,
7499          .opt_opc = uqxtn_list,
7500          .fno = gen_helper_sve2_uqxtnb_s,
7501          .vece = MO_32 },
7502        { .fniv = gen_uqxtnb_vec,
7503          .opt_opc = uqxtn_list,
7504          .fno = gen_helper_sve2_uqxtnb_d,
7505          .vece = MO_64 },
7506    };
7507    return do_sve2_narrow_extract(s, a, ops);
7508}
7509
7510static void gen_uqxtnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7511{
7512    TCGv_vec t = tcg_temp_new_vec_matching(d);
7513    int halfbits = 4 << vece;
7514    int64_t max = (1ull << halfbits) - 1;
7515
7516    tcg_gen_dupi_vec(vece, t, max);
7517    tcg_gen_umin_vec(vece, n, n, t);
7518    tcg_gen_shli_vec(vece, n, n, halfbits);
7519    tcg_gen_bitsel_vec(vece, d, t, d, n);
7520    tcg_temp_free_vec(t);
7521}
7522
7523static bool trans_UQXTNT(DisasContext *s, arg_rri_esz *a)
7524{
7525    static const GVecGen2 ops[3] = {
7526        { .fniv = gen_uqxtnt_vec,
7527          .opt_opc = uqxtn_list,
7528          .load_dest = true,
7529          .fno = gen_helper_sve2_uqxtnt_h,
7530          .vece = MO_16 },
7531        { .fniv = gen_uqxtnt_vec,
7532          .opt_opc = uqxtn_list,
7533          .load_dest = true,
7534          .fno = gen_helper_sve2_uqxtnt_s,
7535          .vece = MO_32 },
7536        { .fniv = gen_uqxtnt_vec,
7537          .opt_opc = uqxtn_list,
7538          .load_dest = true,
7539          .fno = gen_helper_sve2_uqxtnt_d,
7540          .vece = MO_64 },
7541    };
7542    return do_sve2_narrow_extract(s, a, ops);
7543}
7544
7545static const TCGOpcode sqxtun_list[] = {
7546    INDEX_op_shli_vec, INDEX_op_umin_vec, INDEX_op_smax_vec, 0
7547};
7548
7549static void gen_sqxtunb_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7550{
7551    TCGv_vec t = tcg_temp_new_vec_matching(d);
7552    int halfbits = 4 << vece;
7553    int64_t max = (1ull << halfbits) - 1;
7554
7555    tcg_gen_dupi_vec(vece, t, 0);
7556    tcg_gen_smax_vec(vece, d, n, t);
7557    tcg_gen_dupi_vec(vece, t, max);
7558    tcg_gen_umin_vec(vece, d, d, t);
7559    tcg_temp_free_vec(t);
7560}
7561
7562static bool trans_SQXTUNB(DisasContext *s, arg_rri_esz *a)
7563{
7564    static const GVecGen2 ops[3] = {
7565        { .fniv = gen_sqxtunb_vec,
7566          .opt_opc = sqxtun_list,
7567          .fno = gen_helper_sve2_sqxtunb_h,
7568          .vece = MO_16 },
7569        { .fniv = gen_sqxtunb_vec,
7570          .opt_opc = sqxtun_list,
7571          .fno = gen_helper_sve2_sqxtunb_s,
7572          .vece = MO_32 },
7573        { .fniv = gen_sqxtunb_vec,
7574          .opt_opc = sqxtun_list,
7575          .fno = gen_helper_sve2_sqxtunb_d,
7576          .vece = MO_64 },
7577    };
7578    return do_sve2_narrow_extract(s, a, ops);
7579}
7580
7581static void gen_sqxtunt_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
7582{
7583    TCGv_vec t = tcg_temp_new_vec_matching(d);
7584    int halfbits = 4 << vece;
7585    int64_t max = (1ull << halfbits) - 1;
7586
7587    tcg_gen_dupi_vec(vece, t, 0);
7588    tcg_gen_smax_vec(vece, n, n, t);
7589    tcg_gen_dupi_vec(vece, t, max);
7590    tcg_gen_umin_vec(vece, n, n, t);
7591    tcg_gen_shli_vec(vece, n, n, halfbits);
7592    tcg_gen_bitsel_vec(vece, d, t, d, n);
7593    tcg_temp_free_vec(t);
7594}
7595
7596static bool trans_SQXTUNT(DisasContext *s, arg_rri_esz *a)
7597{
7598    static const GVecGen2 ops[3] = {
7599        { .fniv = gen_sqxtunt_vec,
7600          .opt_opc = sqxtun_list,
7601          .load_dest = true,
7602          .fno = gen_helper_sve2_sqxtunt_h,
7603          .vece = MO_16 },
7604        { .fniv = gen_sqxtunt_vec,
7605          .opt_opc = sqxtun_list,
7606          .load_dest = true,
7607          .fno = gen_helper_sve2_sqxtunt_s,
7608          .vece = MO_32 },
7609        { .fniv = gen_sqxtunt_vec,
7610          .opt_opc = sqxtun_list,
7611          .load_dest = true,
7612          .fno = gen_helper_sve2_sqxtunt_d,
7613          .vece = MO_64 },
7614    };
7615    return do_sve2_narrow_extract(s, a, ops);
7616}
7617
7618static bool do_sve2_shr_narrow(DisasContext *s, arg_rri_esz *a,
7619                               const GVecGen2i ops[3])
7620{
7621    if (a->esz < 0 || a->esz > MO_32 || !dc_isar_feature(aa64_sve2, s)) {
7622        return false;
7623    }
7624    assert(a->imm > 0 && a->imm <= (8 << a->esz));
7625    if (sve_access_check(s)) {
7626        unsigned vsz = vec_full_reg_size(s);
7627        tcg_gen_gvec_2i(vec_full_reg_offset(s, a->rd),
7628                        vec_full_reg_offset(s, a->rn),
7629                        vsz, vsz, a->imm, &ops[a->esz]);
7630    }
7631    return true;
7632}
7633
7634static void gen_shrnb_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7635{
7636    int halfbits = 4 << vece;
7637    uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7638
7639    tcg_gen_shri_i64(d, n, shr);
7640    tcg_gen_andi_i64(d, d, mask);
7641}
7642
7643static void gen_shrnb16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7644{
7645    gen_shrnb_i64(MO_16, d, n, shr);
7646}
7647
7648static void gen_shrnb32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7649{
7650    gen_shrnb_i64(MO_32, d, n, shr);
7651}
7652
7653static void gen_shrnb64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7654{
7655    gen_shrnb_i64(MO_64, d, n, shr);
7656}
7657
7658static void gen_shrnb_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7659{
7660    TCGv_vec t = tcg_temp_new_vec_matching(d);
7661    int halfbits = 4 << vece;
7662    uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7663
7664    tcg_gen_shri_vec(vece, n, n, shr);
7665    tcg_gen_dupi_vec(vece, t, mask);
7666    tcg_gen_and_vec(vece, d, n, t);
7667    tcg_temp_free_vec(t);
7668}
7669
7670static bool trans_SHRNB(DisasContext *s, arg_rri_esz *a)
7671{
7672    static const TCGOpcode vec_list[] = { INDEX_op_shri_vec, 0 };
7673    static const GVecGen2i ops[3] = {
7674        { .fni8 = gen_shrnb16_i64,
7675          .fniv = gen_shrnb_vec,
7676          .opt_opc = vec_list,
7677          .fno = gen_helper_sve2_shrnb_h,
7678          .vece = MO_16 },
7679        { .fni8 = gen_shrnb32_i64,
7680          .fniv = gen_shrnb_vec,
7681          .opt_opc = vec_list,
7682          .fno = gen_helper_sve2_shrnb_s,
7683          .vece = MO_32 },
7684        { .fni8 = gen_shrnb64_i64,
7685          .fniv = gen_shrnb_vec,
7686          .opt_opc = vec_list,
7687          .fno = gen_helper_sve2_shrnb_d,
7688          .vece = MO_64 },
7689    };
7690    return do_sve2_shr_narrow(s, a, ops);
7691}
7692
7693static void gen_shrnt_i64(unsigned vece, TCGv_i64 d, TCGv_i64 n, int shr)
7694{
7695    int halfbits = 4 << vece;
7696    uint64_t mask = dup_const(vece, MAKE_64BIT_MASK(0, halfbits));
7697
7698    tcg_gen_shli_i64(n, n, halfbits - shr);
7699    tcg_gen_andi_i64(n, n, ~mask);
7700    tcg_gen_andi_i64(d, d, mask);
7701    tcg_gen_or_i64(d, d, n);
7702}
7703
7704static void gen_shrnt16_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7705{
7706    gen_shrnt_i64(MO_16, d, n, shr);
7707}
7708
7709static void gen_shrnt32_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7710{
7711    gen_shrnt_i64(MO_32, d, n, shr);
7712}
7713
7714static void gen_shrnt64_i64(TCGv_i64 d, TCGv_i64 n, int64_t shr)
7715{
7716    tcg_gen_shri_i64(n, n, shr);
7717    tcg_gen_deposit_i64(d, d, n, 32, 32);
7718}
7719
7720static void gen_shrnt_vec(unsigned vece, TCGv_vec d, TCGv_vec n, int64_t shr)
7721{
7722    TCGv_vec t = tcg_temp_new_vec_matching(d);
7723    int halfbits = 4 << vece;
7724    uint64_t mask = MAKE_64BIT_MASK(0, halfbits);
7725
7726    tcg_gen_shli_vec(vece, n, n, halfbits - shr);
7727    tcg_gen_dupi_vec(vece, t, mask);
7728    tcg_gen_bitsel_vec(vece, d, t, d, n);
7729    tcg_temp_free_vec(t);
7730}
7731
7732static bool trans_SHRNT(DisasContext *s, arg_rri_esz *a)
7733{
7734    static const TCGOpcode vec_list[] = { INDEX_op_shli_vec, 0 };
7735    static const GVecGen2i ops[3] = {
7736        { .fni8 = gen_shrnt16_i64,
7737          .fniv = gen_shrnt_vec,
7738          .opt_opc = vec_list,
7739          .load_dest = true,
7740          .fno = gen_helper_sve2_shrnt_h,
7741          .vece = MO_16 },
7742        { .fni8 = gen_shrnt32_i64,
7743          .fniv = gen_shrnt_vec,
7744          .opt_opc = vec_list,
7745          .load_dest = true,
7746          .fno = gen_helper_sve2_shrnt_s,
7747          .vece = MO_32 },
7748        { .fni8 = gen_shrnt64_i64,
7749          .fniv = gen_shrnt_vec,
7750          .opt_opc = vec_list,
7751          .load_dest = true,
7752          .fno = gen_helper_sve2_shrnt_d,
7753          .vece = MO_64 },
7754    };
7755    return do_sve2_shr_narrow(s, a, ops);
7756}
7757
7758static bool trans_RSHRNB(DisasContext *s, arg_rri_esz *a)
7759{
7760    static const GVecGen2i ops[3] = {
7761        { .fno = gen_helper_sve2_rshrnb_h },
7762        { .fno = gen_helper_sve2_rshrnb_s },
7763        { .fno = gen_helper_sve2_rshrnb_d },
7764    };
7765    return do_sve2_shr_narrow(s, a, ops);
7766}
7767
7768static bool trans_RSHRNT(DisasContext *s, arg_rri_esz *a)
7769{
7770    static const GVecGen2i ops[3] = {
7771        { .fno = gen_helper_sve2_rshrnt_h },
7772        { .fno = gen_helper_sve2_rshrnt_s },
7773        { .fno = gen_helper_sve2_rshrnt_d },
7774    };
7775    return do_sve2_shr_narrow(s, a, ops);
7776}
7777
7778static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
7779                             TCGv_vec n, int64_t shr)
7780{
7781    TCGv_vec t = tcg_temp_new_vec_matching(d);
7782    int halfbits = 4 << vece;
7783
7784    tcg_gen_sari_vec(vece, n, n, shr);
7785    tcg_gen_dupi_vec(vece, t, 0);
7786    tcg_gen_smax_vec(vece, n, n, t);
7787    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7788    tcg_gen_umin_vec(vece, d, n, t);
7789    tcg_temp_free_vec(t);
7790}
7791
7792static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
7793{
7794    static const TCGOpcode vec_list[] = {
7795        INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7796    };
7797    static const GVecGen2i ops[3] = {
7798        { .fniv = gen_sqshrunb_vec,
7799          .opt_opc = vec_list,
7800          .fno = gen_helper_sve2_sqshrunb_h,
7801          .vece = MO_16 },
7802        { .fniv = gen_sqshrunb_vec,
7803          .opt_opc = vec_list,
7804          .fno = gen_helper_sve2_sqshrunb_s,
7805          .vece = MO_32 },
7806        { .fniv = gen_sqshrunb_vec,
7807          .opt_opc = vec_list,
7808          .fno = gen_helper_sve2_sqshrunb_d,
7809          .vece = MO_64 },
7810    };
7811    return do_sve2_shr_narrow(s, a, ops);
7812}
7813
7814static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
7815                             TCGv_vec n, int64_t shr)
7816{
7817    TCGv_vec t = tcg_temp_new_vec_matching(d);
7818    int halfbits = 4 << vece;
7819
7820    tcg_gen_sari_vec(vece, n, n, shr);
7821    tcg_gen_dupi_vec(vece, t, 0);
7822    tcg_gen_smax_vec(vece, n, n, t);
7823    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7824    tcg_gen_umin_vec(vece, n, n, t);
7825    tcg_gen_shli_vec(vece, n, n, halfbits);
7826    tcg_gen_bitsel_vec(vece, d, t, d, n);
7827    tcg_temp_free_vec(t);
7828}
7829
7830static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
7831{
7832    static const TCGOpcode vec_list[] = {
7833        INDEX_op_shli_vec, INDEX_op_sari_vec,
7834        INDEX_op_smax_vec, INDEX_op_umin_vec, 0
7835    };
7836    static const GVecGen2i ops[3] = {
7837        { .fniv = gen_sqshrunt_vec,
7838          .opt_opc = vec_list,
7839          .load_dest = true,
7840          .fno = gen_helper_sve2_sqshrunt_h,
7841          .vece = MO_16 },
7842        { .fniv = gen_sqshrunt_vec,
7843          .opt_opc = vec_list,
7844          .load_dest = true,
7845          .fno = gen_helper_sve2_sqshrunt_s,
7846          .vece = MO_32 },
7847        { .fniv = gen_sqshrunt_vec,
7848          .opt_opc = vec_list,
7849          .load_dest = true,
7850          .fno = gen_helper_sve2_sqshrunt_d,
7851          .vece = MO_64 },
7852    };
7853    return do_sve2_shr_narrow(s, a, ops);
7854}
7855
7856static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
7857{
7858    static const GVecGen2i ops[3] = {
7859        { .fno = gen_helper_sve2_sqrshrunb_h },
7860        { .fno = gen_helper_sve2_sqrshrunb_s },
7861        { .fno = gen_helper_sve2_sqrshrunb_d },
7862    };
7863    return do_sve2_shr_narrow(s, a, ops);
7864}
7865
7866static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
7867{
7868    static const GVecGen2i ops[3] = {
7869        { .fno = gen_helper_sve2_sqrshrunt_h },
7870        { .fno = gen_helper_sve2_sqrshrunt_s },
7871        { .fno = gen_helper_sve2_sqrshrunt_d },
7872    };
7873    return do_sve2_shr_narrow(s, a, ops);
7874}
7875
7876static void gen_sqshrnb_vec(unsigned vece, TCGv_vec d,
7877                            TCGv_vec n, int64_t shr)
7878{
7879    TCGv_vec t = tcg_temp_new_vec_matching(d);
7880    int halfbits = 4 << vece;
7881    int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7882    int64_t min = -max - 1;
7883
7884    tcg_gen_sari_vec(vece, n, n, shr);
7885    tcg_gen_dupi_vec(vece, t, min);
7886    tcg_gen_smax_vec(vece, n, n, t);
7887    tcg_gen_dupi_vec(vece, t, max);
7888    tcg_gen_smin_vec(vece, n, n, t);
7889    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7890    tcg_gen_and_vec(vece, d, n, t);
7891    tcg_temp_free_vec(t);
7892}
7893
7894static bool trans_SQSHRNB(DisasContext *s, arg_rri_esz *a)
7895{
7896    static const TCGOpcode vec_list[] = {
7897        INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7898    };
7899    static const GVecGen2i ops[3] = {
7900        { .fniv = gen_sqshrnb_vec,
7901          .opt_opc = vec_list,
7902          .fno = gen_helper_sve2_sqshrnb_h,
7903          .vece = MO_16 },
7904        { .fniv = gen_sqshrnb_vec,
7905          .opt_opc = vec_list,
7906          .fno = gen_helper_sve2_sqshrnb_s,
7907          .vece = MO_32 },
7908        { .fniv = gen_sqshrnb_vec,
7909          .opt_opc = vec_list,
7910          .fno = gen_helper_sve2_sqshrnb_d,
7911          .vece = MO_64 },
7912    };
7913    return do_sve2_shr_narrow(s, a, ops);
7914}
7915
7916static void gen_sqshrnt_vec(unsigned vece, TCGv_vec d,
7917                             TCGv_vec n, int64_t shr)
7918{
7919    TCGv_vec t = tcg_temp_new_vec_matching(d);
7920    int halfbits = 4 << vece;
7921    int64_t max = MAKE_64BIT_MASK(0, halfbits - 1);
7922    int64_t min = -max - 1;
7923
7924    tcg_gen_sari_vec(vece, n, n, shr);
7925    tcg_gen_dupi_vec(vece, t, min);
7926    tcg_gen_smax_vec(vece, n, n, t);
7927    tcg_gen_dupi_vec(vece, t, max);
7928    tcg_gen_smin_vec(vece, n, n, t);
7929    tcg_gen_shli_vec(vece, n, n, halfbits);
7930    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7931    tcg_gen_bitsel_vec(vece, d, t, d, n);
7932    tcg_temp_free_vec(t);
7933}
7934
7935static bool trans_SQSHRNT(DisasContext *s, arg_rri_esz *a)
7936{
7937    static const TCGOpcode vec_list[] = {
7938        INDEX_op_shli_vec, INDEX_op_sari_vec,
7939        INDEX_op_smax_vec, INDEX_op_smin_vec, 0
7940    };
7941    static const GVecGen2i ops[3] = {
7942        { .fniv = gen_sqshrnt_vec,
7943          .opt_opc = vec_list,
7944          .load_dest = true,
7945          .fno = gen_helper_sve2_sqshrnt_h,
7946          .vece = MO_16 },
7947        { .fniv = gen_sqshrnt_vec,
7948          .opt_opc = vec_list,
7949          .load_dest = true,
7950          .fno = gen_helper_sve2_sqshrnt_s,
7951          .vece = MO_32 },
7952        { .fniv = gen_sqshrnt_vec,
7953          .opt_opc = vec_list,
7954          .load_dest = true,
7955          .fno = gen_helper_sve2_sqshrnt_d,
7956          .vece = MO_64 },
7957    };
7958    return do_sve2_shr_narrow(s, a, ops);
7959}
7960
7961static bool trans_SQRSHRNB(DisasContext *s, arg_rri_esz *a)
7962{
7963    static const GVecGen2i ops[3] = {
7964        { .fno = gen_helper_sve2_sqrshrnb_h },
7965        { .fno = gen_helper_sve2_sqrshrnb_s },
7966        { .fno = gen_helper_sve2_sqrshrnb_d },
7967    };
7968    return do_sve2_shr_narrow(s, a, ops);
7969}
7970
7971static bool trans_SQRSHRNT(DisasContext *s, arg_rri_esz *a)
7972{
7973    static const GVecGen2i ops[3] = {
7974        { .fno = gen_helper_sve2_sqrshrnt_h },
7975        { .fno = gen_helper_sve2_sqrshrnt_s },
7976        { .fno = gen_helper_sve2_sqrshrnt_d },
7977    };
7978    return do_sve2_shr_narrow(s, a, ops);
7979}
7980
7981static void gen_uqshrnb_vec(unsigned vece, TCGv_vec d,
7982                            TCGv_vec n, int64_t shr)
7983{
7984    TCGv_vec t = tcg_temp_new_vec_matching(d);
7985    int halfbits = 4 << vece;
7986
7987    tcg_gen_shri_vec(vece, n, n, shr);
7988    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
7989    tcg_gen_umin_vec(vece, d, n, t);
7990    tcg_temp_free_vec(t);
7991}
7992
7993static bool trans_UQSHRNB(DisasContext *s, arg_rri_esz *a)
7994{
7995    static const TCGOpcode vec_list[] = {
7996        INDEX_op_shri_vec, INDEX_op_umin_vec, 0
7997    };
7998    static const GVecGen2i ops[3] = {
7999        { .fniv = gen_uqshrnb_vec,
8000          .opt_opc = vec_list,
8001          .fno = gen_helper_sve2_uqshrnb_h,
8002          .vece = MO_16 },
8003        { .fniv = gen_uqshrnb_vec,
8004          .opt_opc = vec_list,
8005          .fno = gen_helper_sve2_uqshrnb_s,
8006          .vece = MO_32 },
8007        { .fniv = gen_uqshrnb_vec,
8008          .opt_opc = vec_list,
8009          .fno = gen_helper_sve2_uqshrnb_d,
8010          .vece = MO_64 },
8011    };
8012    return do_sve2_shr_narrow(s, a, ops);
8013}
8014
8015static void gen_uqshrnt_vec(unsigned vece, TCGv_vec d,
8016                            TCGv_vec n, int64_t shr)
8017{
8018    TCGv_vec t = tcg_temp_new_vec_matching(d);
8019    int halfbits = 4 << vece;
8020
8021    tcg_gen_shri_vec(vece, n, n, shr);
8022    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
8023    tcg_gen_umin_vec(vece, n, n, t);
8024    tcg_gen_shli_vec(vece, n, n, halfbits);
8025    tcg_gen_bitsel_vec(vece, d, t, d, n);
8026    tcg_temp_free_vec(t);
8027}
8028
8029static bool trans_UQSHRNT(DisasContext *s, arg_rri_esz *a)
8030{
8031    static const TCGOpcode vec_list[] = {
8032        INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_umin_vec, 0
8033    };
8034    static const GVecGen2i ops[3] = {
8035        { .fniv = gen_uqshrnt_vec,
8036          .opt_opc = vec_list,
8037          .load_dest = true,
8038          .fno = gen_helper_sve2_uqshrnt_h,
8039          .vece = MO_16 },
8040        { .fniv = gen_uqshrnt_vec,
8041          .opt_opc = vec_list,
8042          .load_dest = true,
8043          .fno = gen_helper_sve2_uqshrnt_s,
8044          .vece = MO_32 },
8045        { .fniv = gen_uqshrnt_vec,
8046          .opt_opc = vec_list,
8047          .load_dest = true,
8048          .fno = gen_helper_sve2_uqshrnt_d,
8049          .vece = MO_64 },
8050    };
8051    return do_sve2_shr_narrow(s, a, ops);
8052}
8053
8054static bool trans_UQRSHRNB(DisasContext *s, arg_rri_esz *a)
8055{
8056    static const GVecGen2i ops[3] = {
8057        { .fno = gen_helper_sve2_uqrshrnb_h },
8058        { .fno = gen_helper_sve2_uqrshrnb_s },
8059        { .fno = gen_helper_sve2_uqrshrnb_d },
8060    };
8061    return do_sve2_shr_narrow(s, a, ops);
8062}
8063
8064static bool trans_UQRSHRNT(DisasContext *s, arg_rri_esz *a)
8065{
8066    static const GVecGen2i ops[3] = {
8067        { .fno = gen_helper_sve2_uqrshrnt_h },
8068        { .fno = gen_helper_sve2_uqrshrnt_s },
8069        { .fno = gen_helper_sve2_uqrshrnt_d },
8070    };
8071    return do_sve2_shr_narrow(s, a, ops);
8072}
8073
8074#define DO_SVE2_ZZZ_NARROW(NAME, name)                                    \
8075static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)                 \
8076{                                                                         \
8077    static gen_helper_gvec_3 * const fns[4] = {                           \
8078        NULL,                       gen_helper_sve2_##name##_h,           \
8079        gen_helper_sve2_##name##_s, gen_helper_sve2_##name##_d,           \
8080    };                                                                    \
8081    return do_sve2_zzz_ool(s, a, fns[a->esz]);                            \
8082}
8083
8084DO_SVE2_ZZZ_NARROW(ADDHNB, addhnb)
8085DO_SVE2_ZZZ_NARROW(ADDHNT, addhnt)
8086DO_SVE2_ZZZ_NARROW(RADDHNB, raddhnb)
8087DO_SVE2_ZZZ_NARROW(RADDHNT, raddhnt)
8088
8089DO_SVE2_ZZZ_NARROW(SUBHNB, subhnb)
8090DO_SVE2_ZZZ_NARROW(SUBHNT, subhnt)
8091DO_SVE2_ZZZ_NARROW(RSUBHNB, rsubhnb)
8092DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
8093
8094static bool do_sve2_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
8095                               gen_helper_gvec_flags_4 *fn)
8096{
8097    if (!dc_isar_feature(aa64_sve2, s)) {
8098        return false;
8099    }
8100    return do_ppzz_flags(s, a, fn);
8101}
8102
8103#define DO_SVE2_PPZZ_MATCH(NAME, name)                                      \
8104static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)                  \
8105{                                                                           \
8106    static gen_helper_gvec_flags_4 * const fns[4] = {                       \
8107        gen_helper_sve2_##name##_ppzz_b, gen_helper_sve2_##name##_ppzz_h,   \
8108        NULL,                            NULL                               \
8109    };                                                                      \
8110    return do_sve2_ppzz_flags(s, a, fns[a->esz]);                           \
8111}
8112
8113DO_SVE2_PPZZ_MATCH(MATCH, match)
8114DO_SVE2_PPZZ_MATCH(NMATCH, nmatch)
8115
8116static bool trans_HISTCNT(DisasContext *s, arg_rprr_esz *a)
8117{
8118    static gen_helper_gvec_4 * const fns[2] = {
8119        gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
8120    };
8121    if (a->esz < 2) {
8122        return false;
8123    }
8124    return do_sve2_zpzz_ool(s, a, fns[a->esz - 2]);
8125}
8126
8127static bool trans_HISTSEG(DisasContext *s, arg_rrr_esz *a)
8128{
8129    if (a->esz != 0) {
8130        return false;
8131    }
8132    return do_sve2_zzz_ool(s, a, gen_helper_sve2_histseg);
8133}
8134
8135static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
8136                            gen_helper_gvec_4_ptr *fn)
8137{
8138    if (!dc_isar_feature(aa64_sve2, s)) {
8139        return false;
8140    }
8141    return do_zpzz_fp(s, a, fn);
8142}
8143
8144#define DO_SVE2_ZPZZ_FP(NAME, name)                                         \
8145static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)                  \
8146{                                                                           \
8147    static gen_helper_gvec_4_ptr * const fns[4] = {                         \
8148        NULL,                            gen_helper_sve2_##name##_zpzz_h,   \
8149        gen_helper_sve2_##name##_zpzz_s, gen_helper_sve2_##name##_zpzz_d    \
8150    };                                                                      \
8151    return do_sve2_zpzz_fp(s, a, fns[a->esz]);                              \
8152}
8153
8154DO_SVE2_ZPZZ_FP(FADDP, faddp)
8155DO_SVE2_ZPZZ_FP(FMAXNMP, fmaxnmp)
8156DO_SVE2_ZPZZ_FP(FMINNMP, fminnmp)
8157DO_SVE2_ZPZZ_FP(FMAXP, fmaxp)
8158DO_SVE2_ZPZZ_FP(FMINP, fminp)
8159
8160/*
8161 * SVE Integer Multiply-Add (unpredicated)
8162 */
8163
8164static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
8165{
8166    gen_helper_gvec_4_ptr *fn;
8167
8168    switch (a->esz) {
8169    case MO_32:
8170        if (!dc_isar_feature(aa64_sve_f32mm, s)) {
8171            return false;
8172        }
8173        fn = gen_helper_fmmla_s;
8174        break;
8175    case MO_64:
8176        if (!dc_isar_feature(aa64_sve_f64mm, s)) {
8177            return false;
8178        }
8179        fn = gen_helper_fmmla_d;
8180        break;
8181    default:
8182        return false;
8183    }
8184
8185    if (sve_access_check(s)) {
8186        unsigned vsz = vec_full_reg_size(s);
8187        TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8188        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8189                           vec_full_reg_offset(s, a->rn),
8190                           vec_full_reg_offset(s, a->rm),
8191                           vec_full_reg_offset(s, a->ra),
8192                           status, vsz, vsz, 0, fn);
8193        tcg_temp_free_ptr(status);
8194    }
8195    return true;
8196}
8197
8198static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a,
8199                            bool sel1, bool sel2)
8200{
8201    static gen_helper_gvec_4 * const fns[] = {
8202        NULL,                           gen_helper_sve2_sqdmlal_zzzw_h,
8203        gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
8204    };
8205    return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8206}
8207
8208static bool do_sqdmlsl_zzzw(DisasContext *s, arg_rrrr_esz *a,
8209                            bool sel1, bool sel2)
8210{
8211    static gen_helper_gvec_4 * const fns[] = {
8212        NULL,                           gen_helper_sve2_sqdmlsl_zzzw_h,
8213        gen_helper_sve2_sqdmlsl_zzzw_s, gen_helper_sve2_sqdmlsl_zzzw_d,
8214    };
8215    return do_sve2_zzzz_ool(s, a, fns[a->esz], (sel2 << 1) | sel1);
8216}
8217
8218static bool trans_SQDMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8219{
8220    return do_sqdmlal_zzzw(s, a, false, false);
8221}
8222
8223static bool trans_SQDMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8224{
8225    return do_sqdmlal_zzzw(s, a, true, true);
8226}
8227
8228static bool trans_SQDMLALBT(DisasContext *s, arg_rrrr_esz *a)
8229{
8230    return do_sqdmlal_zzzw(s, a, false, true);
8231}
8232
8233static bool trans_SQDMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8234{
8235    return do_sqdmlsl_zzzw(s, a, false, false);
8236}
8237
8238static bool trans_SQDMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8239{
8240    return do_sqdmlsl_zzzw(s, a, true, true);
8241}
8242
8243static bool trans_SQDMLSLBT(DisasContext *s, arg_rrrr_esz *a)
8244{
8245    return do_sqdmlsl_zzzw(s, a, false, true);
8246}
8247
8248static bool trans_SQRDMLAH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8249{
8250    static gen_helper_gvec_4 * const fns[] = {
8251        gen_helper_sve2_sqrdmlah_b, gen_helper_sve2_sqrdmlah_h,
8252        gen_helper_sve2_sqrdmlah_s, gen_helper_sve2_sqrdmlah_d,
8253    };
8254    return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8255}
8256
8257static bool trans_SQRDMLSH_zzzz(DisasContext *s, arg_rrrr_esz *a)
8258{
8259    static gen_helper_gvec_4 * const fns[] = {
8260        gen_helper_sve2_sqrdmlsh_b, gen_helper_sve2_sqrdmlsh_h,
8261        gen_helper_sve2_sqrdmlsh_s, gen_helper_sve2_sqrdmlsh_d,
8262    };
8263    return do_sve2_zzzz_ool(s, a, fns[a->esz], 0);
8264}
8265
8266static bool do_smlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8267{
8268    static gen_helper_gvec_4 * const fns[] = {
8269        NULL,                         gen_helper_sve2_smlal_zzzw_h,
8270        gen_helper_sve2_smlal_zzzw_s, gen_helper_sve2_smlal_zzzw_d,
8271    };
8272    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8273}
8274
8275static bool trans_SMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8276{
8277    return do_smlal_zzzw(s, a, false);
8278}
8279
8280static bool trans_SMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8281{
8282    return do_smlal_zzzw(s, a, true);
8283}
8284
8285static bool do_umlal_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8286{
8287    static gen_helper_gvec_4 * const fns[] = {
8288        NULL,                         gen_helper_sve2_umlal_zzzw_h,
8289        gen_helper_sve2_umlal_zzzw_s, gen_helper_sve2_umlal_zzzw_d,
8290    };
8291    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8292}
8293
8294static bool trans_UMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8295{
8296    return do_umlal_zzzw(s, a, false);
8297}
8298
8299static bool trans_UMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8300{
8301    return do_umlal_zzzw(s, a, true);
8302}
8303
8304static bool do_smlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8305{
8306    static gen_helper_gvec_4 * const fns[] = {
8307        NULL,                         gen_helper_sve2_smlsl_zzzw_h,
8308        gen_helper_sve2_smlsl_zzzw_s, gen_helper_sve2_smlsl_zzzw_d,
8309    };
8310    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8311}
8312
8313static bool trans_SMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8314{
8315    return do_smlsl_zzzw(s, a, false);
8316}
8317
8318static bool trans_SMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8319{
8320    return do_smlsl_zzzw(s, a, true);
8321}
8322
8323static bool do_umlsl_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8324{
8325    static gen_helper_gvec_4 * const fns[] = {
8326        NULL,                         gen_helper_sve2_umlsl_zzzw_h,
8327        gen_helper_sve2_umlsl_zzzw_s, gen_helper_sve2_umlsl_zzzw_d,
8328    };
8329    return do_sve2_zzzz_ool(s, a, fns[a->esz], sel);
8330}
8331
8332static bool trans_UMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8333{
8334    return do_umlsl_zzzw(s, a, false);
8335}
8336
8337static bool trans_UMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8338{
8339    return do_umlsl_zzzw(s, a, true);
8340}
8341
8342static bool trans_CMLA_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8343{
8344    static gen_helper_gvec_4 * const fns[] = {
8345        gen_helper_sve2_cmla_zzzz_b, gen_helper_sve2_cmla_zzzz_h,
8346        gen_helper_sve2_cmla_zzzz_s, gen_helper_sve2_cmla_zzzz_d,
8347    };
8348
8349    if (!dc_isar_feature(aa64_sve2, s)) {
8350        return false;
8351    }
8352    if (sve_access_check(s)) {
8353        gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8354    }
8355    return true;
8356}
8357
8358static bool trans_CDOT_zzzz(DisasContext *s, arg_CMLA_zzzz *a)
8359{
8360    if (!dc_isar_feature(aa64_sve2, s) || a->esz < MO_32) {
8361        return false;
8362    }
8363    if (sve_access_check(s)) {
8364        gen_helper_gvec_4 *fn = (a->esz == MO_32
8365                                 ? gen_helper_sve2_cdot_zzzz_s
8366                                 : gen_helper_sve2_cdot_zzzz_d);
8367        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->rot);
8368    }
8369    return true;
8370}
8371
8372static bool trans_SQRDCMLAH_zzzz(DisasContext *s, arg_SQRDCMLAH_zzzz *a)
8373{
8374    static gen_helper_gvec_4 * const fns[] = {
8375        gen_helper_sve2_sqrdcmlah_zzzz_b, gen_helper_sve2_sqrdcmlah_zzzz_h,
8376        gen_helper_sve2_sqrdcmlah_zzzz_s, gen_helper_sve2_sqrdcmlah_zzzz_d,
8377    };
8378
8379    if (!dc_isar_feature(aa64_sve2, s)) {
8380        return false;
8381    }
8382    if (sve_access_check(s)) {
8383        gen_gvec_ool_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot);
8384    }
8385    return true;
8386}
8387
8388static bool trans_USDOT_zzzz(DisasContext *s, arg_USDOT_zzzz *a)
8389{
8390    if (a->esz != 2 || !dc_isar_feature(aa64_sve_i8mm, s)) {
8391        return false;
8392    }
8393    if (sve_access_check(s)) {
8394        unsigned vsz = vec_full_reg_size(s);
8395        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
8396                           vec_full_reg_offset(s, a->rn),
8397                           vec_full_reg_offset(s, a->rm),
8398                           vec_full_reg_offset(s, a->ra),
8399                           vsz, vsz, 0, gen_helper_gvec_usdot_b);
8400    }
8401    return true;
8402}
8403
8404static bool trans_AESMC(DisasContext *s, arg_AESMC *a)
8405{
8406    if (!dc_isar_feature(aa64_sve2_aes, s)) {
8407        return false;
8408    }
8409    if (sve_access_check(s)) {
8410        gen_gvec_ool_zz(s, gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt);
8411    }
8412    return true;
8413}
8414
8415static bool do_aese(DisasContext *s, arg_rrr_esz *a, bool decrypt)
8416{
8417    if (!dc_isar_feature(aa64_sve2_aes, s)) {
8418        return false;
8419    }
8420    if (sve_access_check(s)) {
8421        gen_gvec_ool_zzz(s, gen_helper_crypto_aese,
8422                         a->rd, a->rn, a->rm, decrypt);
8423    }
8424    return true;
8425}
8426
8427static bool trans_AESE(DisasContext *s, arg_rrr_esz *a)
8428{
8429    return do_aese(s, a, false);
8430}
8431
8432static bool trans_AESD(DisasContext *s, arg_rrr_esz *a)
8433{
8434    return do_aese(s, a, true);
8435}
8436
8437static bool do_sm4(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
8438{
8439    if (!dc_isar_feature(aa64_sve2_sm4, s)) {
8440        return false;
8441    }
8442    if (sve_access_check(s)) {
8443        gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, 0);
8444    }
8445    return true;
8446}
8447
8448static bool trans_SM4E(DisasContext *s, arg_rrr_esz *a)
8449{
8450    return do_sm4(s, a, gen_helper_crypto_sm4e);
8451}
8452
8453static bool trans_SM4EKEY(DisasContext *s, arg_rrr_esz *a)
8454{
8455    return do_sm4(s, a, gen_helper_crypto_sm4ekey);
8456}
8457
8458static bool trans_RAX1(DisasContext *s, arg_rrr_esz *a)
8459{
8460    if (!dc_isar_feature(aa64_sve2_sha3, s)) {
8461        return false;
8462    }
8463    if (sve_access_check(s)) {
8464        gen_gvec_fn_zzz(s, gen_gvec_rax1, MO_64, a->rd, a->rn, a->rm);
8465    }
8466    return true;
8467}
8468
8469static bool trans_FCVTNT_sh(DisasContext *s, arg_rpr_esz *a)
8470{
8471    if (!dc_isar_feature(aa64_sve2, s)) {
8472        return false;
8473    }
8474    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_sh);
8475}
8476
8477static bool trans_BFCVTNT(DisasContext *s, arg_rpr_esz *a)
8478{
8479    if (!dc_isar_feature(aa64_sve_bf16, s)) {
8480        return false;
8481    }
8482    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_bfcvtnt);
8483}
8484
8485static bool trans_FCVTNT_ds(DisasContext *s, arg_rpr_esz *a)
8486{
8487    if (!dc_isar_feature(aa64_sve2, s)) {
8488        return false;
8489    }
8490    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtnt_ds);
8491}
8492
8493static bool trans_FCVTLT_hs(DisasContext *s, arg_rpr_esz *a)
8494{
8495    if (!dc_isar_feature(aa64_sve2, s)) {
8496        return false;
8497    }
8498    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_hs);
8499}
8500
8501static bool trans_FCVTLT_sd(DisasContext *s, arg_rpr_esz *a)
8502{
8503    if (!dc_isar_feature(aa64_sve2, s)) {
8504        return false;
8505    }
8506    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve2_fcvtlt_sd);
8507}
8508
8509static bool trans_FCVTX_ds(DisasContext *s, arg_rpr_esz *a)
8510{
8511    if (!dc_isar_feature(aa64_sve2, s)) {
8512        return false;
8513    }
8514    return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve_fcvt_ds);
8515}
8516
8517static bool trans_FCVTXNT_ds(DisasContext *s, arg_rpr_esz *a)
8518{
8519    if (!dc_isar_feature(aa64_sve2, s)) {
8520        return false;
8521    }
8522    return do_frint_mode(s, a, float_round_to_odd, gen_helper_sve2_fcvtnt_ds);
8523}
8524
8525static bool trans_FLOGB(DisasContext *s, arg_rpr_esz *a)
8526{
8527    static gen_helper_gvec_3_ptr * const fns[] = {
8528        NULL,               gen_helper_flogb_h,
8529        gen_helper_flogb_s, gen_helper_flogb_d
8530    };
8531
8532    if (!dc_isar_feature(aa64_sve2, s) || fns[a->esz] == NULL) {
8533        return false;
8534    }
8535    if (sve_access_check(s)) {
8536        TCGv_ptr status =
8537            fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8538        unsigned vsz = vec_full_reg_size(s);
8539
8540        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
8541                           vec_full_reg_offset(s, a->rn),
8542                           pred_full_reg_offset(s, a->pg),
8543                           status, vsz, vsz, 0, fns[a->esz]);
8544        tcg_temp_free_ptr(status);
8545    }
8546    return true;
8547}
8548
8549static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
8550{
8551    if (!dc_isar_feature(aa64_sve2, s)) {
8552        return false;
8553    }
8554    if (sve_access_check(s)) {
8555        unsigned vsz = vec_full_reg_size(s);
8556        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8557                           vec_full_reg_offset(s, a->rn),
8558                           vec_full_reg_offset(s, a->rm),
8559                           vec_full_reg_offset(s, a->ra),
8560                           cpu_env, vsz, vsz, (sel << 1) | sub,
8561                           gen_helper_sve2_fmlal_zzzw_s);
8562    }
8563    return true;
8564}
8565
8566static bool trans_FMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8567{
8568    return do_FMLAL_zzzw(s, a, false, false);
8569}
8570
8571static bool trans_FMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8572{
8573    return do_FMLAL_zzzw(s, a, false, true);
8574}
8575
8576static bool trans_FMLSLB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8577{
8578    return do_FMLAL_zzzw(s, a, true, false);
8579}
8580
8581static bool trans_FMLSLT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8582{
8583    return do_FMLAL_zzzw(s, a, true, true);
8584}
8585
8586static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
8587{
8588    if (!dc_isar_feature(aa64_sve2, s)) {
8589        return false;
8590    }
8591    if (sve_access_check(s)) {
8592        unsigned vsz = vec_full_reg_size(s);
8593        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8594                           vec_full_reg_offset(s, a->rn),
8595                           vec_full_reg_offset(s, a->rm),
8596                           vec_full_reg_offset(s, a->ra),
8597                           cpu_env, vsz, vsz,
8598                           (a->index << 2) | (sel << 1) | sub,
8599                           gen_helper_sve2_fmlal_zzxw_s);
8600    }
8601    return true;
8602}
8603
8604static bool trans_FMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8605{
8606    return do_FMLAL_zzxw(s, a, false, false);
8607}
8608
8609static bool trans_FMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8610{
8611    return do_FMLAL_zzxw(s, a, false, true);
8612}
8613
8614static bool trans_FMLSLB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8615{
8616    return do_FMLAL_zzxw(s, a, true, false);
8617}
8618
8619static bool trans_FMLSLT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8620{
8621    return do_FMLAL_zzxw(s, a, true, true);
8622}
8623
8624static bool do_i8mm_zzzz_ool(DisasContext *s, arg_rrrr_esz *a,
8625                             gen_helper_gvec_4 *fn, int data)
8626{
8627    if (!dc_isar_feature(aa64_sve_i8mm, s)) {
8628        return false;
8629    }
8630    if (sve_access_check(s)) {
8631        gen_gvec_ool_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, data);
8632    }
8633    return true;
8634}
8635
8636static bool trans_SMMLA(DisasContext *s, arg_rrrr_esz *a)
8637{
8638    return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_smmla_b, 0);
8639}
8640
8641static bool trans_USMMLA(DisasContext *s, arg_rrrr_esz *a)
8642{
8643    return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_usmmla_b, 0);
8644}
8645
8646static bool trans_UMMLA(DisasContext *s, arg_rrrr_esz *a)
8647{
8648    return do_i8mm_zzzz_ool(s, a, gen_helper_gvec_ummla_b, 0);
8649}
8650
8651static bool trans_BFDOT_zzzz(DisasContext *s, arg_rrrr_esz *a)
8652{
8653    if (!dc_isar_feature(aa64_sve_bf16, s)) {
8654        return false;
8655    }
8656    if (sve_access_check(s)) {
8657        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot,
8658                          a->rd, a->rn, a->rm, a->ra, 0);
8659    }
8660    return true;
8661}
8662
8663static bool trans_BFDOT_zzxz(DisasContext *s, arg_rrxr_esz *a)
8664{
8665    if (!dc_isar_feature(aa64_sve_bf16, s)) {
8666        return false;
8667    }
8668    if (sve_access_check(s)) {
8669        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfdot_idx,
8670                          a->rd, a->rn, a->rm, a->ra, a->index);
8671    }
8672    return true;
8673}
8674
8675static bool trans_BFMMLA(DisasContext *s, arg_rrrr_esz *a)
8676{
8677    if (!dc_isar_feature(aa64_sve_bf16, s)) {
8678        return false;
8679    }
8680    if (sve_access_check(s)) {
8681        gen_gvec_ool_zzzz(s, gen_helper_gvec_bfmmla,
8682                          a->rd, a->rn, a->rm, a->ra, 0);
8683    }
8684    return true;
8685}
8686
8687static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
8688{
8689    if (!dc_isar_feature(aa64_sve_bf16, s)) {
8690        return false;
8691    }
8692    if (sve_access_check(s)) {
8693        TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8694        unsigned vsz = vec_full_reg_size(s);
8695
8696        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8697                           vec_full_reg_offset(s, a->rn),
8698                           vec_full_reg_offset(s, a->rm),
8699                           vec_full_reg_offset(s, a->ra),
8700                           status, vsz, vsz, sel,
8701                           gen_helper_gvec_bfmlal);
8702        tcg_temp_free_ptr(status);
8703    }
8704    return true;
8705}
8706
8707static bool trans_BFMLALB_zzzw(DisasContext *s, arg_rrrr_esz *a)
8708{
8709    return do_BFMLAL_zzzw(s, a, false);
8710}
8711
8712static bool trans_BFMLALT_zzzw(DisasContext *s, arg_rrrr_esz *a)
8713{
8714    return do_BFMLAL_zzzw(s, a, true);
8715}
8716
8717static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
8718{
8719    if (!dc_isar_feature(aa64_sve_bf16, s)) {
8720        return false;
8721    }
8722    if (sve_access_check(s)) {
8723        TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
8724        unsigned vsz = vec_full_reg_size(s);
8725
8726        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
8727                           vec_full_reg_offset(s, a->rn),
8728                           vec_full_reg_offset(s, a->rm),
8729                           vec_full_reg_offset(s, a->ra),
8730                           status, vsz, vsz, (a->index << 1) | sel,
8731                           gen_helper_gvec_bfmlal_idx);
8732        tcg_temp_free_ptr(status);
8733    }
8734    return true;
8735}
8736
8737static bool trans_BFMLALB_zzxw(DisasContext *s, arg_rrxr_esz *a)
8738{
8739    return do_BFMLAL_zzxw(s, a, false);
8740}
8741
8742static bool trans_BFMLALT_zzxw(DisasContext *s, arg_rrxr_esz *a)
8743{
8744    return do_BFMLAL_zzxw(s, a, true);
8745}
8746