LXR qemu/target/arm/translate-sve.c

   1/*
   2 * AArch64 SVE translation
   3 *
   4 * Copyright (c) 2018 Linaro, Ltd
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg-op.h"
  24#include "tcg-op-gvec.h"
  25#include "tcg-gvec-desc.h"
  26#include "qemu/log.h"
  27#include "arm_ldst.h"
  28#include "translate.h"
  29#include "internals.h"
  30#include "exec/helper-proto.h"
  31#include "exec/helper-gen.h"
  32#include "exec/log.h"
  33#include "trace-tcg.h"
  34#include "translate-a64.h"
  35#include "fpu/softfloat.h"
  36
  37
  38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                         TCGv_i64, uint32_t, uint32_t);
  40
  41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                     TCGv_ptr, TCGv_i32);
  43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                     TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                         TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50/*
  51 * Helpers for extracting complex instruction fields.
  52 */
  53
  54/* See e.g. ASR (immediate, predicated).
  55 * Returns -1 for unallocated encoding; diagnose later.
  56 */
  57static int tszimm_esz(int x)
  58{
  59    x >>= 3;  /* discard imm3 */
  60    return 31 - clz32(x);
  61}
  62
  63static int tszimm_shr(int x)
  64{
  65    return (16 << tszimm_esz(x)) - x;
  66}
  67
  68/* See e.g. LSL (immediate, predicated).  */
  69static int tszimm_shl(int x)
  70{
  71    return x - (8 << tszimm_esz(x));
  72}
  73
  74static inline int plus1(int x)
  75{
  76    return x + 1;
  77}
  78
  79/* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80static inline int expand_imm_sh8s(int x)
  81{
  82    return (int8_t)x << (x & 0x100 ? 8 : 0);
  83}
  84
  85static inline int expand_imm_sh8u(int x)
  86{
  87    return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88}
  89
  90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91 * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92 */
  93static inline int msz_dtype(int msz)
  94{
  95    static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96    return dtype[msz];
  97}
  98
  99/*
 100 * Include the generated decoder.
 101 */
 102
 103#include "decode-sve.inc.c"
 104
 105/*
 106 * Implement all of the translator functions referenced by the decoder.
 107 */
 108
 109/* Return the offset info CPUARMState of the predicate vector register Pn.
 110 * Note for this purpose, FFR is P16.
 111 */
 112static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113{
 114    return offsetof(CPUARMState, vfp.pregs[regno]);
 115}
 116
 117/* Return the byte size of the whole predicate register, VL / 64.  */
 118static inline int pred_full_reg_size(DisasContext *s)
 119{
 120    return s->sve_len >> 3;
 121}
 122
 123/* Round up the size of a register to a size allowed by
 124 * the tcg vector infrastructure.  Any operation which uses this
 125 * size may assume that the bits above pred_full_reg_size are zero,
 126 * and must leave them the same way.
 127 *
 128 * Note that this is not needed for the vector registers as they
 129 * are always properly sized for tcg vectors.
 130 */
 131static int size_for_gvec(int size)
 132{
 133    if (size <= 8) {
 134        return 8;
 135    } else {
 136        return QEMU_ALIGN_UP(size, 16);
 137    }
 138}
 139
 140static int pred_gvec_reg_size(DisasContext *s)
 141{
 142    return size_for_gvec(pred_full_reg_size(s));
 143}
 144
 145/* Invoke a vector expander on two Zregs.  */
 146static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                         int esz, int rd, int rn)
 148{
 149    if (sve_access_check(s)) {
 150        unsigned vsz = vec_full_reg_size(s);
 151        gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                vec_full_reg_offset(s, rn), vsz, vsz);
 153    }
 154    return true;
 155}
 156
 157/* Invoke a vector expander on three Zregs.  */
 158static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                         int esz, int rd, int rn, int rm)
 160{
 161    if (sve_access_check(s)) {
 162        unsigned vsz = vec_full_reg_size(s);
 163        gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                vec_full_reg_offset(s, rn),
 165                vec_full_reg_offset(s, rm), vsz, vsz);
 166    }
 167    return true;
 168}
 169
 170/* Invoke a vector move on two Zregs.  */
 171static bool do_mov_z(DisasContext *s, int rd, int rn)
 172{
 173    return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174}
 175
 176/* Initialize a Zreg with replications of a 64-bit immediate.  */
 177static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178{
 179    unsigned vsz = vec_full_reg_size(s);
 180    tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181}
 182
 183/* Invoke a vector expander on two Pregs.  */
 184static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                         int esz, int rd, int rn)
 186{
 187    if (sve_access_check(s)) {
 188        unsigned psz = pred_gvec_reg_size(s);
 189        gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                pred_full_reg_offset(s, rn), psz, psz);
 191    }
 192    return true;
 193}
 194
 195/* Invoke a vector expander on three Pregs.  */
 196static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                         int esz, int rd, int rn, int rm)
 198{
 199    if (sve_access_check(s)) {
 200        unsigned psz = pred_gvec_reg_size(s);
 201        gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                pred_full_reg_offset(s, rn),
 203                pred_full_reg_offset(s, rm), psz, psz);
 204    }
 205    return true;
 206}
 207
 208/* Invoke a vector operation on four Pregs.  */
 209static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                        int rd, int rn, int rm, int rg)
 211{
 212    if (sve_access_check(s)) {
 213        unsigned psz = pred_gvec_reg_size(s);
 214        tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                       pred_full_reg_offset(s, rn),
 216                       pred_full_reg_offset(s, rm),
 217                       pred_full_reg_offset(s, rg),
 218                       psz, psz, gvec_op);
 219    }
 220    return true;
 221}
 222
 223/* Invoke a vector move on two Pregs.  */
 224static bool do_mov_p(DisasContext *s, int rd, int rn)
 225{
 226    return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227}
 228
 229/* Set the cpu flags as per a return from an SVE helper.  */
 230static void do_pred_flags(TCGv_i32 t)
 231{
 232    tcg_gen_mov_i32(cpu_NF, t);
 233    tcg_gen_andi_i32(cpu_ZF, t, 2);
 234    tcg_gen_andi_i32(cpu_CF, t, 1);
 235    tcg_gen_movi_i32(cpu_VF, 0);
 236}
 237
 238/* Subroutines computing the ARM PredTest psuedofunction.  */
 239static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240{
 241    TCGv_i32 t = tcg_temp_new_i32();
 242
 243    gen_helper_sve_predtest1(t, d, g);
 244    do_pred_flags(t);
 245    tcg_temp_free_i32(t);
 246}
 247
 248static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249{
 250    TCGv_ptr dptr = tcg_temp_new_ptr();
 251    TCGv_ptr gptr = tcg_temp_new_ptr();
 252    TCGv_i32 t;
 253
 254    tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255    tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256    t = tcg_const_i32(words);
 257
 258    gen_helper_sve_predtest(t, dptr, gptr, t);
 259    tcg_temp_free_ptr(dptr);
 260    tcg_temp_free_ptr(gptr);
 261
 262    do_pred_flags(t);
 263    tcg_temp_free_i32(t);
 264}
 265
 266/* For each element size, the bits within a predicate word that are active.  */
 267const uint64_t pred_esz_masks[4] = {
 268    0xffffffffffffffffull, 0x5555555555555555ull,
 269    0x1111111111111111ull, 0x0101010101010101ull
 270};
 271
 272/*
 273 *** SVE Logical - Unpredicated Group
 274 */
 275
 276static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
 277{
 278    return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279}
 280
 281static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
 282{
 283    if (a->rn == a->rm) { /* MOV */
 284        return do_mov_z(s, a->rd, a->rn);
 285    } else {
 286        return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 287    }
 288}
 289
 290static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
 291{
 292    return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 293}
 294
 295static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
 296{
 297    return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 298}
 299
 300/*
 301 *** SVE Integer Arithmetic - Unpredicated Group
 302 */
 303
 304static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
 305{
 306    return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 307}
 308
 309static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
 310{
 311    return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 312}
 313
 314static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 315{
 316    return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 317}
 318
 319static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 320{
 321    return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 322}
 323
 324static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 325{
 326    return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 327}
 328
 329static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 330{
 331    return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 332}
 333
 334/*
 335 *** SVE Integer Arithmetic - Binary Predicated Group
 336 */
 337
 338static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 339{
 340    unsigned vsz = vec_full_reg_size(s);
 341    if (fn == NULL) {
 342        return false;
 343    }
 344    if (sve_access_check(s)) {
 345        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 346                           vec_full_reg_offset(s, a->rn),
 347                           vec_full_reg_offset(s, a->rm),
 348                           pred_full_reg_offset(s, a->pg),
 349                           vsz, vsz, 0, fn);
 350    }
 351    return true;
 352}
 353
 354/* Select active elememnts from Zn and inactive elements from Zm,
 355 * storing the result in Zd.
 356 */
 357static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 358{
 359    static gen_helper_gvec_4 * const fns[4] = {
 360        gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 361        gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 362    };
 363    unsigned vsz = vec_full_reg_size(s);
 364    tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 365                       vec_full_reg_offset(s, rn),
 366                       vec_full_reg_offset(s, rm),
 367                       pred_full_reg_offset(s, pg),
 368                       vsz, vsz, 0, fns[esz]);
 369}
 370
 371#define DO_ZPZZ(NAME, name) \
 372static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a)         \
 373{                                                                         \
 374    static gen_helper_gvec_4 * const fns[4] = {                           \
 375        gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 376        gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 377    };                                                                    \
 378    return do_zpzz_ool(s, a, fns[a->esz]);                                \
 379}
 380
 381DO_ZPZZ(AND, and)
 382DO_ZPZZ(EOR, eor)
 383DO_ZPZZ(ORR, orr)
 384DO_ZPZZ(BIC, bic)
 385
 386DO_ZPZZ(ADD, add)
 387DO_ZPZZ(SUB, sub)
 388
 389DO_ZPZZ(SMAX, smax)
 390DO_ZPZZ(UMAX, umax)
 391DO_ZPZZ(SMIN, smin)
 392DO_ZPZZ(UMIN, umin)
 393DO_ZPZZ(SABD, sabd)
 394DO_ZPZZ(UABD, uabd)
 395
 396DO_ZPZZ(MUL, mul)
 397DO_ZPZZ(SMULH, smulh)
 398DO_ZPZZ(UMULH, umulh)
 399
 400DO_ZPZZ(ASR, asr)
 401DO_ZPZZ(LSR, lsr)
 402DO_ZPZZ(LSL, lsl)
 403
 404static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 405{
 406    static gen_helper_gvec_4 * const fns[4] = {
 407        NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 408    };
 409    return do_zpzz_ool(s, a, fns[a->esz]);
 410}
 411
 412static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 413{
 414    static gen_helper_gvec_4 * const fns[4] = {
 415        NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 416    };
 417    return do_zpzz_ool(s, a, fns[a->esz]);
 418}
 419
 420static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
 421{
 422    if (sve_access_check(s)) {
 423        do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 424    }
 425    return true;
 426}
 427
 428#undef DO_ZPZZ
 429
 430/*
 431 *** SVE Integer Arithmetic - Unary Predicated Group
 432 */
 433
 434static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 435{
 436    if (fn == NULL) {
 437        return false;
 438    }
 439    if (sve_access_check(s)) {
 440        unsigned vsz = vec_full_reg_size(s);
 441        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 442                           vec_full_reg_offset(s, a->rn),
 443                           pred_full_reg_offset(s, a->pg),
 444                           vsz, vsz, 0, fn);
 445    }
 446    return true;
 447}
 448
 449#define DO_ZPZ(NAME, name) \
 450static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)           \
 451{                                                                   \
 452    static gen_helper_gvec_3 * const fns[4] = {                     \
 453        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 454        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 455    };                                                              \
 456    return do_zpz_ool(s, a, fns[a->esz]);                           \
 457}
 458
 459DO_ZPZ(CLS, cls)
 460DO_ZPZ(CLZ, clz)
 461DO_ZPZ(CNT_zpz, cnt_zpz)
 462DO_ZPZ(CNOT, cnot)
 463DO_ZPZ(NOT_zpz, not_zpz)
 464DO_ZPZ(ABS, abs)
 465DO_ZPZ(NEG, neg)
 466
 467static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
 468{
 469    static gen_helper_gvec_3 * const fns[4] = {
 470        NULL,
 471        gen_helper_sve_fabs_h,
 472        gen_helper_sve_fabs_s,
 473        gen_helper_sve_fabs_d
 474    };
 475    return do_zpz_ool(s, a, fns[a->esz]);
 476}
 477
 478static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
 479{
 480    static gen_helper_gvec_3 * const fns[4] = {
 481        NULL,
 482        gen_helper_sve_fneg_h,
 483        gen_helper_sve_fneg_s,
 484        gen_helper_sve_fneg_d
 485    };
 486    return do_zpz_ool(s, a, fns[a->esz]);
 487}
 488
 489static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
 490{
 491    static gen_helper_gvec_3 * const fns[4] = {
 492        NULL,
 493        gen_helper_sve_sxtb_h,
 494        gen_helper_sve_sxtb_s,
 495        gen_helper_sve_sxtb_d
 496    };
 497    return do_zpz_ool(s, a, fns[a->esz]);
 498}
 499
 500static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
 501{
 502    static gen_helper_gvec_3 * const fns[4] = {
 503        NULL,
 504        gen_helper_sve_uxtb_h,
 505        gen_helper_sve_uxtb_s,
 506        gen_helper_sve_uxtb_d
 507    };
 508    return do_zpz_ool(s, a, fns[a->esz]);
 509}
 510
 511static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
 512{
 513    static gen_helper_gvec_3 * const fns[4] = {
 514        NULL, NULL,
 515        gen_helper_sve_sxth_s,
 516        gen_helper_sve_sxth_d
 517    };
 518    return do_zpz_ool(s, a, fns[a->esz]);
 519}
 520
 521static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
 522{
 523    static gen_helper_gvec_3 * const fns[4] = {
 524        NULL, NULL,
 525        gen_helper_sve_uxth_s,
 526        gen_helper_sve_uxth_d
 527    };
 528    return do_zpz_ool(s, a, fns[a->esz]);
 529}
 530
 531static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
 532{
 533    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 534}
 535
 536static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
 537{
 538    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 539}
 540
 541#undef DO_ZPZ
 542
 543/*
 544 *** SVE Integer Reduction Group
 545 */
 546
 547typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 548static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 549                       gen_helper_gvec_reduc *fn)
 550{
 551    unsigned vsz = vec_full_reg_size(s);
 552    TCGv_ptr t_zn, t_pg;
 553    TCGv_i32 desc;
 554    TCGv_i64 temp;
 555
 556    if (fn == NULL) {
 557        return false;
 558    }
 559    if (!sve_access_check(s)) {
 560        return true;
 561    }
 562
 563    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 564    temp = tcg_temp_new_i64();
 565    t_zn = tcg_temp_new_ptr();
 566    t_pg = tcg_temp_new_ptr();
 567
 568    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 569    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 570    fn(temp, t_zn, t_pg, desc);
 571    tcg_temp_free_ptr(t_zn);
 572    tcg_temp_free_ptr(t_pg);
 573    tcg_temp_free_i32(desc);
 574
 575    write_fp_dreg(s, a->rd, temp);
 576    tcg_temp_free_i64(temp);
 577    return true;
 578}
 579
 580#define DO_VPZ(NAME, name) \
 581static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
 582{                                                                        \
 583    static gen_helper_gvec_reduc * const fns[4] = {                      \
 584        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 585        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 586    };                                                                   \
 587    return do_vpz_ool(s, a, fns[a->esz]);                                \
 588}
 589
 590DO_VPZ(ORV, orv)
 591DO_VPZ(ANDV, andv)
 592DO_VPZ(EORV, eorv)
 593
 594DO_VPZ(UADDV, uaddv)
 595DO_VPZ(SMAXV, smaxv)
 596DO_VPZ(UMAXV, umaxv)
 597DO_VPZ(SMINV, sminv)
 598DO_VPZ(UMINV, uminv)
 599
 600static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
 601{
 602    static gen_helper_gvec_reduc * const fns[4] = {
 603        gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 604        gen_helper_sve_saddv_s, NULL
 605    };
 606    return do_vpz_ool(s, a, fns[a->esz]);
 607}
 608
 609#undef DO_VPZ
 610
 611/*
 612 *** SVE Shift by Immediate - Predicated Group
 613 */
 614
 615/* Store zero into every active element of Zd.  We will use this for two
 616 * and three-operand predicated instructions for which logic dictates a
 617 * zero result.
 618 */
 619static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 620{
 621    static gen_helper_gvec_2 * const fns[4] = {
 622        gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 623        gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 624    };
 625    if (sve_access_check(s)) {
 626        unsigned vsz = vec_full_reg_size(s);
 627        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 628                           pred_full_reg_offset(s, pg),
 629                           vsz, vsz, 0, fns[esz]);
 630    }
 631    return true;
 632}
 633
 634/* Copy Zn into Zd, storing zeros into inactive elements.  */
 635static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 636{
 637    static gen_helper_gvec_3 * const fns[4] = {
 638        gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 639        gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 640    };
 641    unsigned vsz = vec_full_reg_size(s);
 642    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 643                       vec_full_reg_offset(s, rn),
 644                       pred_full_reg_offset(s, pg),
 645                       vsz, vsz, 0, fns[esz]);
 646}
 647
 648static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 649                        gen_helper_gvec_3 *fn)
 650{
 651    if (sve_access_check(s)) {
 652        unsigned vsz = vec_full_reg_size(s);
 653        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 654                           vec_full_reg_offset(s, a->rn),
 655                           pred_full_reg_offset(s, a->pg),
 656                           vsz, vsz, a->imm, fn);
 657    }
 658    return true;
 659}
 660
 661static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
 662{
 663    static gen_helper_gvec_3 * const fns[4] = {
 664        gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 665        gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 666    };
 667    if (a->esz < 0) {
 668        /* Invalid tsz encoding -- see tszimm_esz. */
 669        return false;
 670    }
 671    /* Shift by element size is architecturally valid.  For
 672       arithmetic right-shift, it's the same as by one less. */
 673    a->imm = MIN(a->imm, (8 << a->esz) - 1);
 674    return do_zpzi_ool(s, a, fns[a->esz]);
 675}
 676
 677static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
 678{
 679    static gen_helper_gvec_3 * const fns[4] = {
 680        gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 681        gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 682    };
 683    if (a->esz < 0) {
 684        return false;
 685    }
 686    /* Shift by element size is architecturally valid.
 687       For logical shifts, it is a zeroing operation.  */
 688    if (a->imm >= (8 << a->esz)) {
 689        return do_clr_zp(s, a->rd, a->pg, a->esz);
 690    } else {
 691        return do_zpzi_ool(s, a, fns[a->esz]);
 692    }
 693}
 694
 695static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
 696{
 697    static gen_helper_gvec_3 * const fns[4] = {
 698        gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 699        gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 700    };
 701    if (a->esz < 0) {
 702        return false;
 703    }
 704    /* Shift by element size is architecturally valid.
 705       For logical shifts, it is a zeroing operation.  */
 706    if (a->imm >= (8 << a->esz)) {
 707        return do_clr_zp(s, a->rd, a->pg, a->esz);
 708    } else {
 709        return do_zpzi_ool(s, a, fns[a->esz]);
 710    }
 711}
 712
 713static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
 714{
 715    static gen_helper_gvec_3 * const fns[4] = {
 716        gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 717        gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 718    };
 719    if (a->esz < 0) {
 720        return false;
 721    }
 722    /* Shift by element size is architecturally valid.  For arithmetic
 723       right shift for division, it is a zeroing operation.  */
 724    if (a->imm >= (8 << a->esz)) {
 725        return do_clr_zp(s, a->rd, a->pg, a->esz);
 726    } else {
 727        return do_zpzi_ool(s, a, fns[a->esz]);
 728    }
 729}
 730
 731/*
 732 *** SVE Bitwise Shift - Predicated Group
 733 */
 734
 735#define DO_ZPZW(NAME, name) \
 736static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a)         \
 737{                                                                         \
 738    static gen_helper_gvec_4 * const fns[3] = {                           \
 739        gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 740        gen_helper_sve_##name##_zpzw_s,                                   \
 741    };                                                                    \
 742    if (a->esz < 0 || a->esz >= 3) {                                      \
 743        return false;                                                     \
 744    }                                                                     \
 745    return do_zpzz_ool(s, a, fns[a->esz]);                                \
 746}
 747
 748DO_ZPZW(ASR, asr)
 749DO_ZPZW(LSR, lsr)
 750DO_ZPZW(LSL, lsl)
 751
 752#undef DO_ZPZW
 753
 754/*
 755 *** SVE Bitwise Shift - Unpredicated Group
 756 */
 757
 758static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 759                         void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 760                                         int64_t, uint32_t, uint32_t))
 761{
 762    if (a->esz < 0) {
 763        /* Invalid tsz encoding -- see tszimm_esz. */
 764        return false;
 765    }
 766    if (sve_access_check(s)) {
 767        unsigned vsz = vec_full_reg_size(s);
 768        /* Shift by element size is architecturally valid.  For
 769           arithmetic right-shift, it's the same as by one less.
 770           Otherwise it is a zeroing operation.  */
 771        if (a->imm >= 8 << a->esz) {
 772            if (asr) {
 773                a->imm = (8 << a->esz) - 1;
 774            } else {
 775                do_dupi_z(s, a->rd, 0);
 776                return true;
 777            }
 778        }
 779        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 780                vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 781    }
 782    return true;
 783}
 784
 785static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
 786{
 787    return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 788}
 789
 790static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
 791{
 792    return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 793}
 794
 795static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
 796{
 797    return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 798}
 799
 800static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 801{
 802    if (fn == NULL) {
 803        return false;
 804    }
 805    if (sve_access_check(s)) {
 806        unsigned vsz = vec_full_reg_size(s);
 807        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 808                           vec_full_reg_offset(s, a->rn),
 809                           vec_full_reg_offset(s, a->rm),
 810                           vsz, vsz, 0, fn);
 811    }
 812    return true;
 813}
 814
 815#define DO_ZZW(NAME, name) \
 816static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a)           \
 817{                                                                         \
 818    static gen_helper_gvec_3 * const fns[4] = {                           \
 819        gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 820        gen_helper_sve_##name##_zzw_s, NULL                               \
 821    };                                                                    \
 822    return do_zzw_ool(s, a, fns[a->esz]);                                 \
 823}
 824
 825DO_ZZW(ASR, asr)
 826DO_ZZW(LSR, lsr)
 827DO_ZZW(LSL, lsl)
 828
 829#undef DO_ZZW
 830
 831/*
 832 *** SVE Integer Multiply-Add Group
 833 */
 834
 835static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 836                         gen_helper_gvec_5 *fn)
 837{
 838    if (sve_access_check(s)) {
 839        unsigned vsz = vec_full_reg_size(s);
 840        tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 841                           vec_full_reg_offset(s, a->ra),
 842                           vec_full_reg_offset(s, a->rn),
 843                           vec_full_reg_offset(s, a->rm),
 844                           pred_full_reg_offset(s, a->pg),
 845                           vsz, vsz, 0, fn);
 846    }
 847    return true;
 848}
 849
 850#define DO_ZPZZZ(NAME, name) \
 851static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
 852{                                                                    \
 853    static gen_helper_gvec_5 * const fns[4] = {                      \
 854        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 855        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 856    };                                                               \
 857    return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 858}
 859
 860DO_ZPZZZ(MLA, mla)
 861DO_ZPZZZ(MLS, mls)
 862
 863#undef DO_ZPZZZ
 864
 865/*
 866 *** SVE Index Generation Group
 867 */
 868
 869static void do_index(DisasContext *s, int esz, int rd,
 870                     TCGv_i64 start, TCGv_i64 incr)
 871{
 872    unsigned vsz = vec_full_reg_size(s);
 873    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 874    TCGv_ptr t_zd = tcg_temp_new_ptr();
 875
 876    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 877    if (esz == 3) {
 878        gen_helper_sve_index_d(t_zd, start, incr, desc);
 879    } else {
 880        typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 881        static index_fn * const fns[3] = {
 882            gen_helper_sve_index_b,
 883            gen_helper_sve_index_h,
 884            gen_helper_sve_index_s,
 885        };
 886        TCGv_i32 s32 = tcg_temp_new_i32();
 887        TCGv_i32 i32 = tcg_temp_new_i32();
 888
 889        tcg_gen_extrl_i64_i32(s32, start);
 890        tcg_gen_extrl_i64_i32(i32, incr);
 891        fns[esz](t_zd, s32, i32, desc);
 892
 893        tcg_temp_free_i32(s32);
 894        tcg_temp_free_i32(i32);
 895    }
 896    tcg_temp_free_ptr(t_zd);
 897    tcg_temp_free_i32(desc);
 898}
 899
 900static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
 901{
 902    if (sve_access_check(s)) {
 903        TCGv_i64 start = tcg_const_i64(a->imm1);
 904        TCGv_i64 incr = tcg_const_i64(a->imm2);
 905        do_index(s, a->esz, a->rd, start, incr);
 906        tcg_temp_free_i64(start);
 907        tcg_temp_free_i64(incr);
 908    }
 909    return true;
 910}
 911
 912static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
 913{
 914    if (sve_access_check(s)) {
 915        TCGv_i64 start = tcg_const_i64(a->imm);
 916        TCGv_i64 incr = cpu_reg(s, a->rm);
 917        do_index(s, a->esz, a->rd, start, incr);
 918        tcg_temp_free_i64(start);
 919    }
 920    return true;
 921}
 922
 923static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
 924{
 925    if (sve_access_check(s)) {
 926        TCGv_i64 start = cpu_reg(s, a->rn);
 927        TCGv_i64 incr = tcg_const_i64(a->imm);
 928        do_index(s, a->esz, a->rd, start, incr);
 929        tcg_temp_free_i64(incr);
 930    }
 931    return true;
 932}
 933
 934static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
 935{
 936    if (sve_access_check(s)) {
 937        TCGv_i64 start = cpu_reg(s, a->rn);
 938        TCGv_i64 incr = cpu_reg(s, a->rm);
 939        do_index(s, a->esz, a->rd, start, incr);
 940    }
 941    return true;
 942}
 943
 944/*
 945 *** SVE Stack Allocation Group
 946 */
 947
 948static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
 949{
 950    TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 951    TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 952    tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 953    return true;
 954}
 955
 956static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
 957{
 958    TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 959    TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 960    tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 961    return true;
 962}
 963
 964static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
 965{
 966    TCGv_i64 reg = cpu_reg(s, a->rd);
 967    tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 968    return true;
 969}
 970
 971/*
 972 *** SVE Compute Vector Address Group
 973 */
 974
 975static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 976{
 977    if (sve_access_check(s)) {
 978        unsigned vsz = vec_full_reg_size(s);
 979        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 980                           vec_full_reg_offset(s, a->rn),
 981                           vec_full_reg_offset(s, a->rm),
 982                           vsz, vsz, a->imm, fn);
 983    }
 984    return true;
 985}
 986
 987static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
 988{
 989    return do_adr(s, a, gen_helper_sve_adr_p32);
 990}
 991
 992static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
 993{
 994    return do_adr(s, a, gen_helper_sve_adr_p64);
 995}
 996
 997static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
 998{
 999    return do_adr(s, a, gen_helper_sve_adr_s32);
1000}

1001
1002static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
1003{
1004    return do_adr(s, a, gen_helper_sve_adr_u32);
1005}
1006
1007/*
1008 *** SVE Integer Misc - Unpredicated Group
1009 */
1010
1011static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1012{
1013    static gen_helper_gvec_2 * const fns[4] = {
1014        NULL,
1015        gen_helper_sve_fexpa_h,
1016        gen_helper_sve_fexpa_s,
1017        gen_helper_sve_fexpa_d,
1018    };
1019    if (a->esz == 0) {
1020        return false;
1021    }
1022    if (sve_access_check(s)) {
1023        unsigned vsz = vec_full_reg_size(s);
1024        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1025                           vec_full_reg_offset(s, a->rn),
1026                           vsz, vsz, 0, fns[a->esz]);
1027    }
1028    return true;
1029}
1030
1031static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1032{
1033    static gen_helper_gvec_3 * const fns[4] = {
1034        NULL,
1035        gen_helper_sve_ftssel_h,
1036        gen_helper_sve_ftssel_s,
1037        gen_helper_sve_ftssel_d,
1038    };
1039    if (a->esz == 0) {
1040        return false;
1041    }
1042    if (sve_access_check(s)) {
1043        unsigned vsz = vec_full_reg_size(s);
1044        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1045                           vec_full_reg_offset(s, a->rn),
1046                           vec_full_reg_offset(s, a->rm),
1047                           vsz, vsz, 0, fns[a->esz]);
1048    }
1049    return true;
1050}
1051
1052/*
1053 *** SVE Predicate Logical Operations Group
1054 */
1055
1056static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1057                          const GVecGen4 *gvec_op)
1058{
1059    if (!sve_access_check(s)) {
1060        return true;
1061    }
1062
1063    unsigned psz = pred_gvec_reg_size(s);
1064    int dofs = pred_full_reg_offset(s, a->rd);
1065    int nofs = pred_full_reg_offset(s, a->rn);
1066    int mofs = pred_full_reg_offset(s, a->rm);
1067    int gofs = pred_full_reg_offset(s, a->pg);
1068
1069    if (psz == 8) {
1070        /* Do the operation and the flags generation in temps.  */
1071        TCGv_i64 pd = tcg_temp_new_i64();
1072        TCGv_i64 pn = tcg_temp_new_i64();
1073        TCGv_i64 pm = tcg_temp_new_i64();
1074        TCGv_i64 pg = tcg_temp_new_i64();
1075
1076        tcg_gen_ld_i64(pn, cpu_env, nofs);
1077        tcg_gen_ld_i64(pm, cpu_env, mofs);
1078        tcg_gen_ld_i64(pg, cpu_env, gofs);
1079
1080        gvec_op->fni8(pd, pn, pm, pg);
1081        tcg_gen_st_i64(pd, cpu_env, dofs);
1082
1083        do_predtest1(pd, pg);
1084
1085        tcg_temp_free_i64(pd);
1086        tcg_temp_free_i64(pn);
1087        tcg_temp_free_i64(pm);
1088        tcg_temp_free_i64(pg);
1089    } else {
1090        /* The operation and flags generation is large.  The computation
1091         * of the flags depends on the original contents of the guarding
1092         * predicate.  If the destination overwrites the guarding predicate,
1093         * then the easiest way to get this right is to save a copy.
1094          */
1095        int tofs = gofs;
1096        if (a->rd == a->pg) {
1097            tofs = offsetof(CPUARMState, vfp.preg_tmp);
1098            tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1099        }
1100
1101        tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1102        do_predtest(s, dofs, tofs, psz / 8);
1103    }
1104    return true;
1105}
1106
1107static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1108{
1109    tcg_gen_and_i64(pd, pn, pm);
1110    tcg_gen_and_i64(pd, pd, pg);
1111}
1112
1113static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1114                           TCGv_vec pm, TCGv_vec pg)
1115{
1116    tcg_gen_and_vec(vece, pd, pn, pm);
1117    tcg_gen_and_vec(vece, pd, pd, pg);
1118}
1119
1120static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1121{
1122    static const GVecGen4 op = {
1123        .fni8 = gen_and_pg_i64,
1124        .fniv = gen_and_pg_vec,
1125        .fno = gen_helper_sve_and_pppp,
1126        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1127    };
1128    if (a->s) {
1129        return do_pppp_flags(s, a, &op);
1130    } else if (a->rn == a->rm) {
1131        if (a->pg == a->rn) {
1132            return do_mov_p(s, a->rd, a->rn);
1133        } else {
1134            return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1135        }
1136    } else if (a->pg == a->rn || a->pg == a->rm) {
1137        return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1138    } else {
1139        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1140    }
1141}
1142
1143static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1144{
1145    tcg_gen_andc_i64(pd, pn, pm);
1146    tcg_gen_and_i64(pd, pd, pg);
1147}
1148
1149static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1150                           TCGv_vec pm, TCGv_vec pg)
1151{
1152    tcg_gen_andc_vec(vece, pd, pn, pm);
1153    tcg_gen_and_vec(vece, pd, pd, pg);
1154}
1155
1156static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1157{
1158    static const GVecGen4 op = {
1159        .fni8 = gen_bic_pg_i64,
1160        .fniv = gen_bic_pg_vec,
1161        .fno = gen_helper_sve_bic_pppp,
1162        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1163    };
1164    if (a->s) {
1165        return do_pppp_flags(s, a, &op);
1166    } else if (a->pg == a->rn) {
1167        return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1168    } else {
1169        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1170    }
1171}
1172
1173static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1174{
1175    tcg_gen_xor_i64(pd, pn, pm);
1176    tcg_gen_and_i64(pd, pd, pg);
1177}
1178
1179static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1180                           TCGv_vec pm, TCGv_vec pg)
1181{
1182    tcg_gen_xor_vec(vece, pd, pn, pm);
1183    tcg_gen_and_vec(vece, pd, pd, pg);
1184}
1185
1186static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1187{
1188    static const GVecGen4 op = {
1189        .fni8 = gen_eor_pg_i64,
1190        .fniv = gen_eor_pg_vec,
1191        .fno = gen_helper_sve_eor_pppp,
1192        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1193    };
1194    if (a->s) {
1195        return do_pppp_flags(s, a, &op);
1196    } else {
1197        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1198    }
1199}
1200
1201static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1202{
1203    tcg_gen_and_i64(pn, pn, pg);
1204    tcg_gen_andc_i64(pm, pm, pg);
1205    tcg_gen_or_i64(pd, pn, pm);
1206}
1207
1208static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1209                           TCGv_vec pm, TCGv_vec pg)
1210{
1211    tcg_gen_and_vec(vece, pn, pn, pg);
1212    tcg_gen_andc_vec(vece, pm, pm, pg);
1213    tcg_gen_or_vec(vece, pd, pn, pm);
1214}
1215
1216static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1217{
1218    static const GVecGen4 op = {
1219        .fni8 = gen_sel_pg_i64,
1220        .fniv = gen_sel_pg_vec,
1221        .fno = gen_helper_sve_sel_pppp,
1222        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1223    };
1224    if (a->s) {
1225        return false;
1226    } else {
1227        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1228    }
1229}
1230
1231static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1232{
1233    tcg_gen_or_i64(pd, pn, pm);
1234    tcg_gen_and_i64(pd, pd, pg);
1235}
1236
1237static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1238                           TCGv_vec pm, TCGv_vec pg)
1239{
1240    tcg_gen_or_vec(vece, pd, pn, pm);
1241    tcg_gen_and_vec(vece, pd, pd, pg);
1242}
1243
1244static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1245{
1246    static const GVecGen4 op = {
1247        .fni8 = gen_orr_pg_i64,
1248        .fniv = gen_orr_pg_vec,
1249        .fno = gen_helper_sve_orr_pppp,
1250        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1251    };
1252    if (a->s) {
1253        return do_pppp_flags(s, a, &op);
1254    } else if (a->pg == a->rn && a->rn == a->rm) {
1255        return do_mov_p(s, a->rd, a->rn);
1256    } else {
1257        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1258    }
1259}
1260
1261static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1262{
1263    tcg_gen_orc_i64(pd, pn, pm);
1264    tcg_gen_and_i64(pd, pd, pg);
1265}
1266
1267static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1268                           TCGv_vec pm, TCGv_vec pg)
1269{
1270    tcg_gen_orc_vec(vece, pd, pn, pm);
1271    tcg_gen_and_vec(vece, pd, pd, pg);
1272}
1273
1274static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1275{
1276    static const GVecGen4 op = {
1277        .fni8 = gen_orn_pg_i64,
1278        .fniv = gen_orn_pg_vec,
1279        .fno = gen_helper_sve_orn_pppp,
1280        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1281    };
1282    if (a->s) {
1283        return do_pppp_flags(s, a, &op);
1284    } else {
1285        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1286    }
1287}
1288
1289static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1290{
1291    tcg_gen_or_i64(pd, pn, pm);
1292    tcg_gen_andc_i64(pd, pg, pd);
1293}
1294
1295static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1296                           TCGv_vec pm, TCGv_vec pg)
1297{
1298    tcg_gen_or_vec(vece, pd, pn, pm);
1299    tcg_gen_andc_vec(vece, pd, pg, pd);
1300}
1301
1302static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1303{
1304    static const GVecGen4 op = {
1305        .fni8 = gen_nor_pg_i64,
1306        .fniv = gen_nor_pg_vec,
1307        .fno = gen_helper_sve_nor_pppp,
1308        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1309    };
1310    if (a->s) {
1311        return do_pppp_flags(s, a, &op);
1312    } else {
1313        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1314    }
1315}
1316
1317static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1318{
1319    tcg_gen_and_i64(pd, pn, pm);
1320    tcg_gen_andc_i64(pd, pg, pd);
1321}
1322
1323static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1324                           TCGv_vec pm, TCGv_vec pg)
1325{
1326    tcg_gen_and_vec(vece, pd, pn, pm);
1327    tcg_gen_andc_vec(vece, pd, pg, pd);
1328}
1329
1330static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1331{
1332    static const GVecGen4 op = {
1333        .fni8 = gen_nand_pg_i64,
1334        .fniv = gen_nand_pg_vec,
1335        .fno = gen_helper_sve_nand_pppp,
1336        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1337    };
1338    if (a->s) {
1339        return do_pppp_flags(s, a, &op);
1340    } else {
1341        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1342    }
1343}
1344
1345/*
1346 *** SVE Predicate Misc Group
1347 */
1348
1349static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1350{
1351    if (sve_access_check(s)) {
1352        int nofs = pred_full_reg_offset(s, a->rn);
1353        int gofs = pred_full_reg_offset(s, a->pg);
1354        int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1355
1356        if (words == 1) {
1357            TCGv_i64 pn = tcg_temp_new_i64();
1358            TCGv_i64 pg = tcg_temp_new_i64();
1359
1360            tcg_gen_ld_i64(pn, cpu_env, nofs);
1361            tcg_gen_ld_i64(pg, cpu_env, gofs);
1362            do_predtest1(pn, pg);
1363
1364            tcg_temp_free_i64(pn);
1365            tcg_temp_free_i64(pg);
1366        } else {
1367            do_predtest(s, nofs, gofs, words);
1368        }
1369    }
1370    return true;
1371}
1372
1373/* See the ARM pseudocode DecodePredCount.  */
1374static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1375{
1376    unsigned elements = fullsz >> esz;
1377    unsigned bound;
1378
1379    switch (pattern) {
1380    case 0x0: /* POW2 */
1381        return pow2floor(elements);
1382    case 0x1: /* VL1 */
1383    case 0x2: /* VL2 */
1384    case 0x3: /* VL3 */
1385    case 0x4: /* VL4 */
1386    case 0x5: /* VL5 */
1387    case 0x6: /* VL6 */
1388    case 0x7: /* VL7 */
1389    case 0x8: /* VL8 */
1390        bound = pattern;
1391        break;
1392    case 0x9: /* VL16 */
1393    case 0xa: /* VL32 */
1394    case 0xb: /* VL64 */
1395    case 0xc: /* VL128 */
1396    case 0xd: /* VL256 */
1397        bound = 16 << (pattern - 9);
1398        break;
1399    case 0x1d: /* MUL4 */
1400        return elements - elements % 4;
1401    case 0x1e: /* MUL3 */
1402        return elements - elements % 3;
1403    case 0x1f: /* ALL */
1404        return elements;
1405    default:   /* #uimm5 */
1406        return 0;
1407    }
1408    return elements >= bound ? bound : 0;
1409}
1410
1411/* This handles all of the predicate initialization instructions,
1412 * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1413 * so that decode_pred_count returns 0.  For SETFFR, we will have
1414 * set RD == 16 == FFR.
1415 */
1416static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1417{
1418    if (!sve_access_check(s)) {
1419        return true;
1420    }
1421
1422    unsigned fullsz = vec_full_reg_size(s);
1423    unsigned ofs = pred_full_reg_offset(s, rd);
1424    unsigned numelem, setsz, i;
1425    uint64_t word, lastword;
1426    TCGv_i64 t;
1427
1428    numelem = decode_pred_count(fullsz, pat, esz);
1429
1430    /* Determine what we must store into each bit, and how many.  */
1431    if (numelem == 0) {
1432        lastword = word = 0;
1433        setsz = fullsz;
1434    } else {
1435        setsz = numelem << esz;
1436        lastword = word = pred_esz_masks[esz];
1437        if (setsz % 64) {
1438            lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1439        }
1440    }
1441
1442    t = tcg_temp_new_i64();
1443    if (fullsz <= 64) {
1444        tcg_gen_movi_i64(t, lastword);
1445        tcg_gen_st_i64(t, cpu_env, ofs);
1446        goto done;
1447    }
1448
1449    if (word == lastword) {
1450        unsigned maxsz = size_for_gvec(fullsz / 8);
1451        unsigned oprsz = size_for_gvec(setsz / 8);
1452
1453        if (oprsz * 8 == setsz) {
1454            tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1455            goto done;
1456        }
1457    }
1458
1459    setsz /= 8;
1460    fullsz /= 8;
1461
1462    tcg_gen_movi_i64(t, word);
1463    for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1464        tcg_gen_st_i64(t, cpu_env, ofs + i);
1465    }
1466    if (lastword != word) {
1467        tcg_gen_movi_i64(t, lastword);
1468        tcg_gen_st_i64(t, cpu_env, ofs + i);
1469        i += 8;
1470    }
1471    if (i < fullsz) {
1472        tcg_gen_movi_i64(t, 0);
1473        for (; i < fullsz; i += 8) {
1474            tcg_gen_st_i64(t, cpu_env, ofs + i);
1475        }
1476    }
1477
1478 done:
1479    tcg_temp_free_i64(t);
1480
1481    /* PTRUES */
1482    if (setflag) {
1483        tcg_gen_movi_i32(cpu_NF, -(word != 0));
1484        tcg_gen_movi_i32(cpu_CF, word == 0);
1485        tcg_gen_movi_i32(cpu_VF, 0);
1486        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1487    }
1488    return true;
1489}
1490
1491static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1492{
1493    return do_predset(s, a->esz, a->rd, a->pat, a->s);
1494}
1495
1496static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1497{
1498    /* Note pat == 31 is #all, to set all elements.  */
1499    return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1500}
1501
1502static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1503{
1504    /* Note pat == 32 is #unimp, to set no elements.  */
1505    return do_predset(s, 0, a->rd, 32, false);
1506}
1507
1508static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1509{
1510    /* The path through do_pppp_flags is complicated enough to want to avoid
1511     * duplication.  Frob the arguments into the form of a predicated AND.
1512     */
1513    arg_rprr_s alt_a = {
1514        .rd = a->rd, .pg = a->pg, .s = a->s,
1515        .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1516    };
1517    return trans_AND_pppp(s, &alt_a);
1518}
1519
1520static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1521{
1522    return do_mov_p(s, a->rd, FFR_PRED_NUM);
1523}
1524
1525static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1526{
1527    return do_mov_p(s, FFR_PRED_NUM, a->rn);
1528}
1529
1530static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1531                            void (*gen_fn)(TCGv_i32, TCGv_ptr,
1532                                           TCGv_ptr, TCGv_i32))
1533{
1534    if (!sve_access_check(s)) {
1535        return true;
1536    }
1537
1538    TCGv_ptr t_pd = tcg_temp_new_ptr();
1539    TCGv_ptr t_pg = tcg_temp_new_ptr();
1540    TCGv_i32 t;
1541    unsigned desc;
1542
1543    desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1544    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1545
1546    tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1547    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1548    t = tcg_const_i32(desc);
1549
1550    gen_fn(t, t_pd, t_pg, t);
1551    tcg_temp_free_ptr(t_pd);
1552    tcg_temp_free_ptr(t_pg);
1553
1554    do_pred_flags(t);
1555    tcg_temp_free_i32(t);
1556    return true;
1557}
1558
1559static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1560{
1561    return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1562}
1563
1564static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1565{
1566    return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1567}
1568
1569/*
1570 *** SVE Element Count Group
1571 */
1572
1573/* Perform an inline saturating addition of a 32-bit value within
1574 * a 64-bit register.  The second operand is known to be positive,
1575 * which halves the comparisions we must perform to bound the result.
1576 */
1577static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1578{
1579    int64_t ibound;
1580    TCGv_i64 bound;
1581    TCGCond cond;
1582
1583    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1584    if (u) {
1585        tcg_gen_ext32u_i64(reg, reg);
1586    } else {
1587        tcg_gen_ext32s_i64(reg, reg);
1588    }
1589    if (d) {
1590        tcg_gen_sub_i64(reg, reg, val);
1591        ibound = (u ? 0 : INT32_MIN);
1592        cond = TCG_COND_LT;
1593    } else {
1594        tcg_gen_add_i64(reg, reg, val);
1595        ibound = (u ? UINT32_MAX : INT32_MAX);
1596        cond = TCG_COND_GT;
1597    }
1598    bound = tcg_const_i64(ibound);
1599    tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1600    tcg_temp_free_i64(bound);
1601}
1602
1603/* Similarly with 64-bit values.  */
1604static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1605{
1606    TCGv_i64 t0 = tcg_temp_new_i64();
1607    TCGv_i64 t1 = tcg_temp_new_i64();
1608    TCGv_i64 t2;
1609
1610    if (u) {
1611        if (d) {
1612            tcg_gen_sub_i64(t0, reg, val);
1613            tcg_gen_movi_i64(t1, 0);
1614            tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1615        } else {
1616            tcg_gen_add_i64(t0, reg, val);
1617            tcg_gen_movi_i64(t1, -1);
1618            tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1619        }
1620    } else {
1621        if (d) {
1622            /* Detect signed overflow for subtraction.  */
1623            tcg_gen_xor_i64(t0, reg, val);
1624            tcg_gen_sub_i64(t1, reg, val);
1625            tcg_gen_xor_i64(reg, reg, t1);
1626            tcg_gen_and_i64(t0, t0, reg);
1627
1628            /* Bound the result.  */
1629            tcg_gen_movi_i64(reg, INT64_MIN);
1630            t2 = tcg_const_i64(0);
1631            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1632        } else {
1633            /* Detect signed overflow for addition.  */
1634            tcg_gen_xor_i64(t0, reg, val);
1635            tcg_gen_add_i64(reg, reg, val);
1636            tcg_gen_xor_i64(t1, reg, val);
1637            tcg_gen_andc_i64(t0, t1, t0);
1638
1639            /* Bound the result.  */
1640            tcg_gen_movi_i64(t1, INT64_MAX);
1641            t2 = tcg_const_i64(0);
1642            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1643        }
1644        tcg_temp_free_i64(t2);
1645    }
1646    tcg_temp_free_i64(t0);
1647    tcg_temp_free_i64(t1);
1648}
1649
1650/* Similarly with a vector and a scalar operand.  */
1651static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1652                              TCGv_i64 val, bool u, bool d)
1653{
1654    unsigned vsz = vec_full_reg_size(s);
1655    TCGv_ptr dptr, nptr;
1656    TCGv_i32 t32, desc;
1657    TCGv_i64 t64;
1658
1659    dptr = tcg_temp_new_ptr();
1660    nptr = tcg_temp_new_ptr();
1661    tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1662    tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1663    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1664
1665    switch (esz) {
1666    case MO_8:
1667        t32 = tcg_temp_new_i32();
1668        tcg_gen_extrl_i64_i32(t32, val);
1669        if (d) {
1670            tcg_gen_neg_i32(t32, t32);
1671        }
1672        if (u) {
1673            gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1674        } else {
1675            gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1676        }
1677        tcg_temp_free_i32(t32);
1678        break;
1679
1680    case MO_16:
1681        t32 = tcg_temp_new_i32();
1682        tcg_gen_extrl_i64_i32(t32, val);
1683        if (d) {
1684            tcg_gen_neg_i32(t32, t32);
1685        }
1686        if (u) {
1687            gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1688        } else {
1689            gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1690        }
1691        tcg_temp_free_i32(t32);
1692        break;
1693
1694    case MO_32:
1695        t64 = tcg_temp_new_i64();
1696        if (d) {
1697            tcg_gen_neg_i64(t64, val);
1698        } else {
1699            tcg_gen_mov_i64(t64, val);
1700        }
1701        if (u) {
1702            gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1703        } else {
1704            gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1705        }
1706        tcg_temp_free_i64(t64);
1707        break;
1708
1709    case MO_64:
1710        if (u) {
1711            if (d) {
1712                gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1713            } else {
1714                gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1715            }
1716        } else if (d) {
1717            t64 = tcg_temp_new_i64();
1718            tcg_gen_neg_i64(t64, val);
1719            gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1720            tcg_temp_free_i64(t64);
1721        } else {
1722            gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1723        }
1724        break;
1725
1726    default:
1727        g_assert_not_reached();
1728    }
1729
1730    tcg_temp_free_ptr(dptr);
1731    tcg_temp_free_ptr(nptr);
1732    tcg_temp_free_i32(desc);
1733}
1734
1735static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1736{
1737    if (sve_access_check(s)) {
1738        unsigned fullsz = vec_full_reg_size(s);
1739        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1740        tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1741    }
1742    return true;
1743}
1744
1745static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1746{
1747    if (sve_access_check(s)) {
1748        unsigned fullsz = vec_full_reg_size(s);
1749        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1750        int inc = numelem * a->imm * (a->d ? -1 : 1);
1751        TCGv_i64 reg = cpu_reg(s, a->rd);
1752
1753        tcg_gen_addi_i64(reg, reg, inc);
1754    }
1755    return true;
1756}
1757
1758static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1759{
1760    if (!sve_access_check(s)) {
1761        return true;
1762    }
1763
1764    unsigned fullsz = vec_full_reg_size(s);
1765    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1766    int inc = numelem * a->imm;
1767    TCGv_i64 reg = cpu_reg(s, a->rd);
1768
1769    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1770    if (inc == 0) {
1771        if (a->u) {
1772            tcg_gen_ext32u_i64(reg, reg);
1773        } else {
1774            tcg_gen_ext32s_i64(reg, reg);
1775        }
1776    } else {
1777        TCGv_i64 t = tcg_const_i64(inc);
1778        do_sat_addsub_32(reg, t, a->u, a->d);
1779        tcg_temp_free_i64(t);
1780    }
1781    return true;
1782}
1783
1784static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1785{
1786    if (!sve_access_check(s)) {
1787        return true;
1788    }
1789
1790    unsigned fullsz = vec_full_reg_size(s);
1791    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1792    int inc = numelem * a->imm;
1793    TCGv_i64 reg = cpu_reg(s, a->rd);
1794
1795    if (inc != 0) {
1796        TCGv_i64 t = tcg_const_i64(inc);
1797        do_sat_addsub_64(reg, t, a->u, a->d);
1798        tcg_temp_free_i64(t);
1799    }
1800    return true;
1801}
1802
1803static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1804{
1805    if (a->esz == 0) {
1806        return false;
1807    }
1808
1809    unsigned fullsz = vec_full_reg_size(s);
1810    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1811    int inc = numelem * a->imm;
1812
1813    if (inc != 0) {
1814        if (sve_access_check(s)) {
1815            TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1816            tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1817                              vec_full_reg_offset(s, a->rn),
1818                              t, fullsz, fullsz);
1819            tcg_temp_free_i64(t);
1820        }
1821    } else {
1822        do_mov_z(s, a->rd, a->rn);
1823    }
1824    return true;
1825}
1826
1827static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1828{
1829    if (a->esz == 0) {
1830        return false;
1831    }
1832
1833    unsigned fullsz = vec_full_reg_size(s);
1834    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1835    int inc = numelem * a->imm;
1836
1837    if (inc != 0) {
1838        if (sve_access_check(s)) {
1839            TCGv_i64 t = tcg_const_i64(inc);
1840            do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1841            tcg_temp_free_i64(t);
1842        }
1843    } else {
1844        do_mov_z(s, a->rd, a->rn);
1845    }
1846    return true;
1847}
1848
1849/*
1850 *** SVE Bitwise Immediate Group
1851 */
1852
1853static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1854{
1855    uint64_t imm;
1856    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1857                                extract32(a->dbm, 0, 6),
1858                                extract32(a->dbm, 6, 6))) {
1859        return false;
1860    }
1861    if (sve_access_check(s)) {
1862        unsigned vsz = vec_full_reg_size(s);
1863        gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1864                vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1865    }
1866    return true;
1867}
1868
1869static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1870{
1871    return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1872}
1873
1874static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1875{
1876    return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1877}
1878
1879static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1880{
1881    return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1882}
1883
1884static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1885{
1886    uint64_t imm;
1887    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1888                                extract32(a->dbm, 0, 6),
1889                                extract32(a->dbm, 6, 6))) {
1890        return false;
1891    }
1892    if (sve_access_check(s)) {
1893        do_dupi_z(s, a->rd, imm);
1894    }
1895    return true;
1896}
1897
1898/*
1899 *** SVE Integer Wide Immediate - Predicated Group
1900 */
1901
1902/* Implement all merging copies.  This is used for CPY (immediate),
1903 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1904 */
1905static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1906                     TCGv_i64 val)
1907{
1908    typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1909    static gen_cpy * const fns[4] = {
1910        gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1911        gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1912    };
1913    unsigned vsz = vec_full_reg_size(s);
1914    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1915    TCGv_ptr t_zd = tcg_temp_new_ptr();
1916    TCGv_ptr t_zn = tcg_temp_new_ptr();
1917    TCGv_ptr t_pg = tcg_temp_new_ptr();
1918
1919    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1920    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1921    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1922
1923    fns[esz](t_zd, t_zn, t_pg, val, desc);
1924
1925    tcg_temp_free_ptr(t_zd);
1926    tcg_temp_free_ptr(t_zn);
1927    tcg_temp_free_ptr(t_pg);
1928    tcg_temp_free_i32(desc);
1929}
1930
1931static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1932{
1933    if (a->esz == 0) {
1934        return false;
1935    }
1936    if (sve_access_check(s)) {
1937        /* Decode the VFP immediate.  */
1938        uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1939        TCGv_i64 t_imm = tcg_const_i64(imm);
1940        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1941        tcg_temp_free_i64(t_imm);
1942    }
1943    return true;
1944}
1945
1946static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1947{
1948    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1949        return false;
1950    }
1951    if (sve_access_check(s)) {
1952        TCGv_i64 t_imm = tcg_const_i64(a->imm);
1953        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1954        tcg_temp_free_i64(t_imm);
1955    }
1956    return true;
1957}
1958
1959static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1960{
1961    static gen_helper_gvec_2i * const fns[4] = {
1962        gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1963        gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1964    };
1965
1966    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1967        return false;
1968    }
1969    if (sve_access_check(s)) {
1970        unsigned vsz = vec_full_reg_size(s);
1971        TCGv_i64 t_imm = tcg_const_i64(a->imm);
1972        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1973                            pred_full_reg_offset(s, a->pg),
1974                            t_imm, vsz, vsz, 0, fns[a->esz]);
1975        tcg_temp_free_i64(t_imm);
1976    }
1977    return true;
1978}
1979
1980/*
1981 *** SVE Permute Extract Group
1982 */
1983
1984static bool trans_EXT(DisasContext *s, arg_EXT *a)
1985{
1986    if (!sve_access_check(s)) {
1987        return true;
1988    }
1989
1990    unsigned vsz = vec_full_reg_size(s);
1991    unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1992    unsigned n_siz = vsz - n_ofs;
1993    unsigned d = vec_full_reg_offset(s, a->rd);
1994    unsigned n = vec_full_reg_offset(s, a->rn);
1995    unsigned m = vec_full_reg_offset(s, a->rm);
1996
1997    /* Use host vector move insns if we have appropriate sizes
1998     * and no unfortunate overlap.
1999     */
2000    if (m != d

2001        && n_ofs == size_for_gvec(n_ofs)
2002        && n_siz == size_for_gvec(n_siz)
2003        && (d != n || n_siz <= n_ofs)) {
2004        tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2005        if (n_ofs != 0) {
2006            tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2007        }
2008    } else {
2009        tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2010    }
2011    return true;
2012}
2013
2014/*
2015 *** SVE Permute - Unpredicated Group
2016 */
2017
2018static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2019{
2020    if (sve_access_check(s)) {
2021        unsigned vsz = vec_full_reg_size(s);
2022        tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2023                             vsz, vsz, cpu_reg_sp(s, a->rn));
2024    }
2025    return true;
2026}
2027
2028static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2029{
2030    if ((a->imm & 0x1f) == 0) {
2031        return false;
2032    }
2033    if (sve_access_check(s)) {
2034        unsigned vsz = vec_full_reg_size(s);
2035        unsigned dofs = vec_full_reg_offset(s, a->rd);
2036        unsigned esz, index;
2037
2038        esz = ctz32(a->imm);
2039        index = a->imm >> (esz + 1);
2040
2041        if ((index << esz) < vsz) {
2042            unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2043            tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2044        } else {
2045            tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2046        }
2047    }
2048    return true;
2049}
2050
2051static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2052{
2053    typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2054    static gen_insr * const fns[4] = {
2055        gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2056        gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2057    };
2058    unsigned vsz = vec_full_reg_size(s);
2059    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2060    TCGv_ptr t_zd = tcg_temp_new_ptr();
2061    TCGv_ptr t_zn = tcg_temp_new_ptr();
2062
2063    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2064    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2065
2066    fns[a->esz](t_zd, t_zn, val, desc);
2067
2068    tcg_temp_free_ptr(t_zd);
2069    tcg_temp_free_ptr(t_zn);
2070    tcg_temp_free_i32(desc);
2071}
2072
2073static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2074{
2075    if (sve_access_check(s)) {
2076        TCGv_i64 t = tcg_temp_new_i64();
2077        tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2078        do_insr_i64(s, a, t);
2079        tcg_temp_free_i64(t);
2080    }
2081    return true;
2082}
2083
2084static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2085{
2086    if (sve_access_check(s)) {
2087        do_insr_i64(s, a, cpu_reg(s, a->rm));
2088    }
2089    return true;
2090}
2091
2092static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2093{
2094    static gen_helper_gvec_2 * const fns[4] = {
2095        gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2096        gen_helper_sve_rev_s, gen_helper_sve_rev_d
2097    };
2098
2099    if (sve_access_check(s)) {
2100        unsigned vsz = vec_full_reg_size(s);
2101        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2102                           vec_full_reg_offset(s, a->rn),
2103                           vsz, vsz, 0, fns[a->esz]);
2104    }
2105    return true;
2106}
2107
2108static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2109{
2110    static gen_helper_gvec_3 * const fns[4] = {
2111        gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2112        gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2113    };
2114
2115    if (sve_access_check(s)) {
2116        unsigned vsz = vec_full_reg_size(s);
2117        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2118                           vec_full_reg_offset(s, a->rn),
2119                           vec_full_reg_offset(s, a->rm),
2120                           vsz, vsz, 0, fns[a->esz]);
2121    }
2122    return true;
2123}
2124
2125static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2126{
2127    static gen_helper_gvec_2 * const fns[4][2] = {
2128        { NULL, NULL },
2129        { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2130        { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2131        { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2132    };
2133
2134    if (a->esz == 0) {
2135        return false;
2136    }
2137    if (sve_access_check(s)) {
2138        unsigned vsz = vec_full_reg_size(s);
2139        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2140                           vec_full_reg_offset(s, a->rn)
2141                           + (a->h ? vsz / 2 : 0),
2142                           vsz, vsz, 0, fns[a->esz][a->u]);
2143    }
2144    return true;
2145}
2146
2147/*
2148 *** SVE Permute - Predicates Group
2149 */
2150
2151static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2152                          gen_helper_gvec_3 *fn)
2153{
2154    if (!sve_access_check(s)) {
2155        return true;
2156    }
2157
2158    unsigned vsz = pred_full_reg_size(s);
2159
2160    /* Predicate sizes may be smaller and cannot use simd_desc.
2161       We cannot round up, as we do elsewhere, because we need
2162       the exact size for ZIP2 and REV.  We retain the style for
2163       the other helpers for consistency.  */
2164    TCGv_ptr t_d = tcg_temp_new_ptr();
2165    TCGv_ptr t_n = tcg_temp_new_ptr();
2166    TCGv_ptr t_m = tcg_temp_new_ptr();
2167    TCGv_i32 t_desc;
2168    int desc;
2169
2170    desc = vsz - 2;
2171    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2172    desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2173
2174    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2175    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2176    tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2177    t_desc = tcg_const_i32(desc);
2178
2179    fn(t_d, t_n, t_m, t_desc);
2180
2181    tcg_temp_free_ptr(t_d);
2182    tcg_temp_free_ptr(t_n);
2183    tcg_temp_free_ptr(t_m);
2184    tcg_temp_free_i32(t_desc);
2185    return true;
2186}
2187
2188static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2189                          gen_helper_gvec_2 *fn)
2190{
2191    if (!sve_access_check(s)) {
2192        return true;
2193    }
2194
2195    unsigned vsz = pred_full_reg_size(s);
2196    TCGv_ptr t_d = tcg_temp_new_ptr();
2197    TCGv_ptr t_n = tcg_temp_new_ptr();
2198    TCGv_i32 t_desc;
2199    int desc;
2200
2201    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2202    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2203
2204    /* Predicate sizes may be smaller and cannot use simd_desc.
2205       We cannot round up, as we do elsewhere, because we need
2206       the exact size for ZIP2 and REV.  We retain the style for
2207       the other helpers for consistency.  */
2208
2209    desc = vsz - 2;
2210    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2211    desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2212    t_desc = tcg_const_i32(desc);
2213
2214    fn(t_d, t_n, t_desc);
2215
2216    tcg_temp_free_i32(t_desc);
2217    tcg_temp_free_ptr(t_d);
2218    tcg_temp_free_ptr(t_n);
2219    return true;
2220}
2221
2222static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2223{
2224    return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2225}
2226
2227static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2228{
2229    return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2230}
2231
2232static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2233{
2234    return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2235}
2236
2237static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2238{
2239    return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2240}
2241
2242static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2243{
2244    return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2245}
2246
2247static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2248{
2249    return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2250}
2251
2252static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2253{
2254    return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2255}
2256
2257static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2258{
2259    return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2260}
2261
2262static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2263{
2264    return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2265}
2266
2267/*
2268 *** SVE Permute - Interleaving Group
2269 */
2270
2271static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2272{
2273    static gen_helper_gvec_3 * const fns[4] = {
2274        gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2275        gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2276    };
2277
2278    if (sve_access_check(s)) {
2279        unsigned vsz = vec_full_reg_size(s);
2280        unsigned high_ofs = high ? vsz / 2 : 0;
2281        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2282                           vec_full_reg_offset(s, a->rn) + high_ofs,
2283                           vec_full_reg_offset(s, a->rm) + high_ofs,
2284                           vsz, vsz, 0, fns[a->esz]);
2285    }
2286    return true;
2287}
2288
2289static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2290                            gen_helper_gvec_3 *fn)
2291{
2292    if (sve_access_check(s)) {
2293        unsigned vsz = vec_full_reg_size(s);
2294        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2295                           vec_full_reg_offset(s, a->rn),
2296                           vec_full_reg_offset(s, a->rm),
2297                           vsz, vsz, data, fn);
2298    }
2299    return true;
2300}
2301
2302static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2303{
2304    return do_zip(s, a, false);
2305}
2306
2307static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2308{
2309    return do_zip(s, a, true);
2310}
2311
2312static gen_helper_gvec_3 * const uzp_fns[4] = {
2313    gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2314    gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2315};
2316
2317static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2318{
2319    return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2320}
2321
2322static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2323{
2324    return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2325}
2326
2327static gen_helper_gvec_3 * const trn_fns[4] = {
2328    gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2329    gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2330};
2331
2332static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2333{
2334    return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2335}
2336
2337static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2338{
2339    return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2340}
2341
2342/*
2343 *** SVE Permute Vector - Predicated Group
2344 */
2345
2346static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2347{
2348    static gen_helper_gvec_3 * const fns[4] = {
2349        NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2350    };
2351    return do_zpz_ool(s, a, fns[a->esz]);
2352}
2353
2354/* Call the helper that computes the ARM LastActiveElement pseudocode
2355 * function, scaled by the element size.  This includes the not found
2356 * indication; e.g. not found for esz=3 is -8.
2357 */
2358static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2359{
2360    /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2361     * round up, as we do elsewhere, because we need the exact size.
2362     */
2363    TCGv_ptr t_p = tcg_temp_new_ptr();
2364    TCGv_i32 t_desc;
2365    unsigned vsz = pred_full_reg_size(s);
2366    unsigned desc;
2367
2368    desc = vsz - 2;
2369    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2370
2371    tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2372    t_desc = tcg_const_i32(desc);
2373
2374    gen_helper_sve_last_active_element(ret, t_p, t_desc);
2375
2376    tcg_temp_free_i32(t_desc);
2377    tcg_temp_free_ptr(t_p);
2378}
2379
2380/* Increment LAST to the offset of the next element in the vector,
2381 * wrapping around to 0.
2382 */
2383static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2384{
2385    unsigned vsz = vec_full_reg_size(s);
2386
2387    tcg_gen_addi_i32(last, last, 1 << esz);
2388    if (is_power_of_2(vsz)) {
2389        tcg_gen_andi_i32(last, last, vsz - 1);
2390    } else {
2391        TCGv_i32 max = tcg_const_i32(vsz);
2392        TCGv_i32 zero = tcg_const_i32(0);
2393        tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2394        tcg_temp_free_i32(max);
2395        tcg_temp_free_i32(zero);
2396    }
2397}
2398
2399/* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2400static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2401{
2402    unsigned vsz = vec_full_reg_size(s);
2403
2404    if (is_power_of_2(vsz)) {
2405        tcg_gen_andi_i32(last, last, vsz - 1);
2406    } else {
2407        TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2408        TCGv_i32 zero = tcg_const_i32(0);
2409        tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2410        tcg_temp_free_i32(max);
2411        tcg_temp_free_i32(zero);
2412    }
2413}
2414
2415/* Load an unsigned element of ESZ from BASE+OFS.  */
2416static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2417{
2418    TCGv_i64 r = tcg_temp_new_i64();
2419
2420    switch (esz) {
2421    case 0:
2422        tcg_gen_ld8u_i64(r, base, ofs);
2423        break;
2424    case 1:
2425        tcg_gen_ld16u_i64(r, base, ofs);
2426        break;
2427    case 2:
2428        tcg_gen_ld32u_i64(r, base, ofs);
2429        break;
2430    case 3:
2431        tcg_gen_ld_i64(r, base, ofs);
2432        break;
2433    default:
2434        g_assert_not_reached();
2435    }
2436    return r;
2437}
2438
2439/* Load an unsigned element of ESZ from RM[LAST].  */
2440static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2441                                 int rm, int esz)
2442{
2443    TCGv_ptr p = tcg_temp_new_ptr();
2444    TCGv_i64 r;
2445
2446    /* Convert offset into vector into offset into ENV.
2447     * The final adjustment for the vector register base
2448     * is added via constant offset to the load.
2449     */
2450#ifdef HOST_WORDS_BIGENDIAN
2451    /* Adjust for element ordering.  See vec_reg_offset.  */
2452    if (esz < 3) {
2453        tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2454    }
2455#endif
2456    tcg_gen_ext_i32_ptr(p, last);
2457    tcg_gen_add_ptr(p, p, cpu_env);
2458
2459    r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2460    tcg_temp_free_ptr(p);
2461
2462    return r;
2463}
2464
2465/* Compute CLAST for a Zreg.  */
2466static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2467{
2468    TCGv_i32 last;
2469    TCGLabel *over;
2470    TCGv_i64 ele;
2471    unsigned vsz, esz = a->esz;
2472
2473    if (!sve_access_check(s)) {
2474        return true;
2475    }
2476
2477    last = tcg_temp_local_new_i32();
2478    over = gen_new_label();
2479
2480    find_last_active(s, last, esz, a->pg);
2481
2482    /* There is of course no movcond for a 2048-bit vector,
2483     * so we must branch over the actual store.
2484     */
2485    tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2486
2487    if (!before) {
2488        incr_last_active(s, last, esz);
2489    }
2490
2491    ele = load_last_active(s, last, a->rm, esz);
2492    tcg_temp_free_i32(last);
2493
2494    vsz = vec_full_reg_size(s);
2495    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2496    tcg_temp_free_i64(ele);
2497
2498    /* If this insn used MOVPRFX, we may need a second move.  */
2499    if (a->rd != a->rn) {
2500        TCGLabel *done = gen_new_label();
2501        tcg_gen_br(done);
2502
2503        gen_set_label(over);
2504        do_mov_z(s, a->rd, a->rn);
2505
2506        gen_set_label(done);
2507    } else {
2508        gen_set_label(over);
2509    }
2510    return true;
2511}
2512
2513static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2514{
2515    return do_clast_vector(s, a, false);
2516}
2517
2518static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2519{
2520    return do_clast_vector(s, a, true);
2521}
2522
2523/* Compute CLAST for a scalar.  */
2524static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2525                            bool before, TCGv_i64 reg_val)
2526{
2527    TCGv_i32 last = tcg_temp_new_i32();
2528    TCGv_i64 ele, cmp, zero;
2529
2530    find_last_active(s, last, esz, pg);
2531
2532    /* Extend the original value of last prior to incrementing.  */
2533    cmp = tcg_temp_new_i64();
2534    tcg_gen_ext_i32_i64(cmp, last);
2535
2536    if (!before) {
2537        incr_last_active(s, last, esz);
2538    }
2539
2540    /* The conceit here is that while last < 0 indicates not found, after
2541     * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2542     * from which we can load garbage.  We then discard the garbage with
2543     * a conditional move.
2544     */
2545    ele = load_last_active(s, last, rm, esz);
2546    tcg_temp_free_i32(last);
2547
2548    zero = tcg_const_i64(0);
2549    tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2550
2551    tcg_temp_free_i64(zero);
2552    tcg_temp_free_i64(cmp);
2553    tcg_temp_free_i64(ele);
2554}
2555
2556/* Compute CLAST for a Vreg.  */
2557static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2558{
2559    if (sve_access_check(s)) {
2560        int esz = a->esz;
2561        int ofs = vec_reg_offset(s, a->rd, 0, esz);
2562        TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2563
2564        do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2565        write_fp_dreg(s, a->rd, reg);
2566        tcg_temp_free_i64(reg);
2567    }
2568    return true;
2569}
2570
2571static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2572{
2573    return do_clast_fp(s, a, false);
2574}
2575
2576static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2577{
2578    return do_clast_fp(s, a, true);
2579}
2580
2581/* Compute CLAST for a Xreg.  */
2582static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2583{
2584    TCGv_i64 reg;
2585
2586    if (!sve_access_check(s)) {
2587        return true;
2588    }
2589
2590    reg = cpu_reg(s, a->rd);
2591    switch (a->esz) {
2592    case 0:
2593        tcg_gen_ext8u_i64(reg, reg);
2594        break;
2595    case 1:
2596        tcg_gen_ext16u_i64(reg, reg);
2597        break;
2598    case 2:
2599        tcg_gen_ext32u_i64(reg, reg);
2600        break;
2601    case 3:
2602        break;
2603    default:
2604        g_assert_not_reached();
2605    }
2606
2607    do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2608    return true;
2609}
2610
2611static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2612{
2613    return do_clast_general(s, a, false);
2614}
2615
2616static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2617{
2618    return do_clast_general(s, a, true);
2619}
2620
2621/* Compute LAST for a scalar.  */
2622static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2623                               int pg, int rm, bool before)
2624{
2625    TCGv_i32 last = tcg_temp_new_i32();
2626    TCGv_i64 ret;
2627
2628    find_last_active(s, last, esz, pg);
2629    if (before) {
2630        wrap_last_active(s, last, esz);
2631    } else {
2632        incr_last_active(s, last, esz);
2633    }
2634
2635    ret = load_last_active(s, last, rm, esz);
2636    tcg_temp_free_i32(last);
2637    return ret;
2638}
2639
2640/* Compute LAST for a Vreg.  */
2641static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2642{
2643    if (sve_access_check(s)) {
2644        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2645        write_fp_dreg(s, a->rd, val);
2646        tcg_temp_free_i64(val);
2647    }
2648    return true;
2649}
2650
2651static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2652{
2653    return do_last_fp(s, a, false);
2654}
2655
2656static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2657{
2658    return do_last_fp(s, a, true);
2659}
2660
2661/* Compute LAST for a Xreg.  */
2662static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2663{
2664    if (sve_access_check(s)) {
2665        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2666        tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2667        tcg_temp_free_i64(val);
2668    }
2669    return true;
2670}
2671
2672static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2673{
2674    return do_last_general(s, a, false);
2675}
2676
2677static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2678{
2679    return do_last_general(s, a, true);
2680}
2681
2682static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2683{
2684    if (sve_access_check(s)) {
2685        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2686    }
2687    return true;
2688}
2689
2690static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2691{
2692    if (sve_access_check(s)) {
2693        int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2694        TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2695        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2696        tcg_temp_free_i64(t);
2697    }
2698    return true;
2699}
2700
2701static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2702{
2703    static gen_helper_gvec_3 * const fns[4] = {
2704        NULL,
2705        gen_helper_sve_revb_h,
2706        gen_helper_sve_revb_s,
2707        gen_helper_sve_revb_d,
2708    };
2709    return do_zpz_ool(s, a, fns[a->esz]);
2710}
2711
2712static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2713{
2714    static gen_helper_gvec_3 * const fns[4] = {
2715        NULL,
2716        NULL,
2717        gen_helper_sve_revh_s,
2718        gen_helper_sve_revh_d,
2719    };
2720    return do_zpz_ool(s, a, fns[a->esz]);
2721}
2722
2723static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2724{
2725    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2726}
2727
2728static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2729{
2730    static gen_helper_gvec_3 * const fns[4] = {
2731        gen_helper_sve_rbit_b,
2732        gen_helper_sve_rbit_h,
2733        gen_helper_sve_rbit_s,
2734        gen_helper_sve_rbit_d,
2735    };
2736    return do_zpz_ool(s, a, fns[a->esz]);
2737}
2738
2739static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2740{
2741    if (sve_access_check(s)) {
2742        unsigned vsz = vec_full_reg_size(s);
2743        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2744                           vec_full_reg_offset(s, a->rn),
2745                           vec_full_reg_offset(s, a->rm),
2746                           pred_full_reg_offset(s, a->pg),
2747                           vsz, vsz, a->esz, gen_helper_sve_splice);
2748    }
2749    return true;
2750}
2751
2752/*
2753 *** SVE Integer Compare - Vectors Group
2754 */
2755
2756static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2757                          gen_helper_gvec_flags_4 *gen_fn)
2758{
2759    TCGv_ptr pd, zn, zm, pg;
2760    unsigned vsz;
2761    TCGv_i32 t;
2762
2763    if (gen_fn == NULL) {
2764        return false;
2765    }
2766    if (!sve_access_check(s)) {
2767        return true;
2768    }
2769
2770    vsz = vec_full_reg_size(s);
2771    t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2772    pd = tcg_temp_new_ptr();
2773    zn = tcg_temp_new_ptr();
2774    zm = tcg_temp_new_ptr();
2775    pg = tcg_temp_new_ptr();
2776
2777    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2778    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2779    tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2780    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2781
2782    gen_fn(t, pd, zn, zm, pg, t);
2783
2784    tcg_temp_free_ptr(pd);
2785    tcg_temp_free_ptr(zn);
2786    tcg_temp_free_ptr(zm);
2787    tcg_temp_free_ptr(pg);
2788
2789    do_pred_flags(t);
2790
2791    tcg_temp_free_i32(t);
2792    return true;
2793}
2794
2795#define DO_PPZZ(NAME, name) \
2796static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)         \
2797{                                                                         \
2798    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2799        gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2800        gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2801    };                                                                    \
2802    return do_ppzz_flags(s, a, fns[a->esz]);                              \
2803}
2804
2805DO_PPZZ(CMPEQ, cmpeq)
2806DO_PPZZ(CMPNE, cmpne)
2807DO_PPZZ(CMPGT, cmpgt)
2808DO_PPZZ(CMPGE, cmpge)
2809DO_PPZZ(CMPHI, cmphi)
2810DO_PPZZ(CMPHS, cmphs)
2811
2812#undef DO_PPZZ
2813
2814#define DO_PPZW(NAME, name) \
2815static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a)         \
2816{                                                                         \
2817    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2818        gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2819        gen_helper_sve_##name##_ppzw_s, NULL                              \
2820    };                                                                    \
2821    return do_ppzz_flags(s, a, fns[a->esz]);                              \
2822}
2823
2824DO_PPZW(CMPEQ, cmpeq)
2825DO_PPZW(CMPNE, cmpne)
2826DO_PPZW(CMPGT, cmpgt)
2827DO_PPZW(CMPGE, cmpge)
2828DO_PPZW(CMPHI, cmphi)
2829DO_PPZW(CMPHS, cmphs)
2830DO_PPZW(CMPLT, cmplt)
2831DO_PPZW(CMPLE, cmple)
2832DO_PPZW(CMPLO, cmplo)
2833DO_PPZW(CMPLS, cmpls)
2834
2835#undef DO_PPZW
2836
2837/*
2838 *** SVE Integer Compare - Immediate Groups
2839 */
2840
2841static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2842                          gen_helper_gvec_flags_3 *gen_fn)
2843{
2844    TCGv_ptr pd, zn, pg;
2845    unsigned vsz;
2846    TCGv_i32 t;
2847
2848    if (gen_fn == NULL) {
2849        return false;
2850    }
2851    if (!sve_access_check(s)) {
2852        return true;
2853    }
2854
2855    vsz = vec_full_reg_size(s);
2856    t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2857    pd = tcg_temp_new_ptr();
2858    zn = tcg_temp_new_ptr();
2859    pg = tcg_temp_new_ptr();
2860
2861    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2862    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2863    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2864
2865    gen_fn(t, pd, zn, pg, t);
2866
2867    tcg_temp_free_ptr(pd);
2868    tcg_temp_free_ptr(zn);
2869    tcg_temp_free_ptr(pg);
2870
2871    do_pred_flags(t);
2872
2873    tcg_temp_free_i32(t);
2874    return true;
2875}
2876
2877#define DO_PPZI(NAME, name) \
2878static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a)         \
2879{                                                                         \
2880    static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2881        gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2882        gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2883    };                                                                    \
2884    return do_ppzi_flags(s, a, fns[a->esz]);                              \
2885}
2886
2887DO_PPZI(CMPEQ, cmpeq)
2888DO_PPZI(CMPNE, cmpne)
2889DO_PPZI(CMPGT, cmpgt)
2890DO_PPZI(CMPGE, cmpge)
2891DO_PPZI(CMPHI, cmphi)
2892DO_PPZI(CMPHS, cmphs)
2893DO_PPZI(CMPLT, cmplt)
2894DO_PPZI(CMPLE, cmple)
2895DO_PPZI(CMPLO, cmplo)
2896DO_PPZI(CMPLS, cmpls)
2897
2898#undef DO_PPZI
2899
2900/*
2901 *** SVE Partition Break Group
2902 */
2903
2904static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2905                    gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2906{
2907    if (!sve_access_check(s)) {
2908        return true;
2909    }
2910
2911    unsigned vsz = pred_full_reg_size(s);
2912
2913    /* Predicate sizes may be smaller and cannot use simd_desc.  */
2914    TCGv_ptr d = tcg_temp_new_ptr();
2915    TCGv_ptr n = tcg_temp_new_ptr();
2916    TCGv_ptr m = tcg_temp_new_ptr();
2917    TCGv_ptr g = tcg_temp_new_ptr();
2918    TCGv_i32 t = tcg_const_i32(vsz - 2);
2919
2920    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2921    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2922    tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2923    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2924
2925    if (a->s) {
2926        fn_s(t, d, n, m, g, t);
2927        do_pred_flags(t);
2928    } else {
2929        fn(d, n, m, g, t);
2930    }
2931    tcg_temp_free_ptr(d);
2932    tcg_temp_free_ptr(n);
2933    tcg_temp_free_ptr(m);
2934    tcg_temp_free_ptr(g);
2935    tcg_temp_free_i32(t);
2936    return true;
2937}
2938
2939static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2940                    gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2941{
2942    if (!sve_access_check(s)) {
2943        return true;
2944    }
2945
2946    unsigned vsz = pred_full_reg_size(s);
2947
2948    /* Predicate sizes may be smaller and cannot use simd_desc.  */
2949    TCGv_ptr d = tcg_temp_new_ptr();
2950    TCGv_ptr n = tcg_temp_new_ptr();
2951    TCGv_ptr g = tcg_temp_new_ptr();
2952    TCGv_i32 t = tcg_const_i32(vsz - 2);
2953
2954    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2955    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2956    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2957
2958    if (a->s) {
2959        fn_s(t, d, n, g, t);
2960        do_pred_flags(t);
2961    } else {
2962        fn(d, n, g, t);
2963    }
2964    tcg_temp_free_ptr(d);
2965    tcg_temp_free_ptr(n);
2966    tcg_temp_free_ptr(g);
2967    tcg_temp_free_i32(t);
2968    return true;
2969}
2970
2971static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2972{
2973    return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2974}
2975
2976static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2977{
2978    return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2979}
2980
2981static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2982{
2983    return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2984}
2985
2986static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2987{
2988    return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2989}
2990
2991static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2992{
2993    return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2994}
2995
2996static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
2997{
2998    return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2999}
3000

3001static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
3002{
3003    return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3004}
3005
3006/*
3007 *** SVE Predicate Count Group
3008 */
3009
3010static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3011{
3012    unsigned psz = pred_full_reg_size(s);
3013
3014    if (psz <= 8) {
3015        uint64_t psz_mask;
3016
3017        tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3018        if (pn != pg) {
3019            TCGv_i64 g = tcg_temp_new_i64();
3020            tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3021            tcg_gen_and_i64(val, val, g);
3022            tcg_temp_free_i64(g);
3023        }
3024
3025        /* Reduce the pred_esz_masks value simply to reduce the
3026         * size of the code generated here.
3027         */
3028        psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3029        tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3030
3031        tcg_gen_ctpop_i64(val, val);
3032    } else {
3033        TCGv_ptr t_pn = tcg_temp_new_ptr();
3034        TCGv_ptr t_pg = tcg_temp_new_ptr();
3035        unsigned desc;
3036        TCGv_i32 t_desc;
3037
3038        desc = psz - 2;
3039        desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3040
3041        tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3042        tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3043        t_desc = tcg_const_i32(desc);
3044
3045        gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3046        tcg_temp_free_ptr(t_pn);
3047        tcg_temp_free_ptr(t_pg);
3048        tcg_temp_free_i32(t_desc);
3049    }
3050}
3051
3052static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3053{
3054    if (sve_access_check(s)) {
3055        do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3056    }
3057    return true;
3058}
3059
3060static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3061{
3062    if (sve_access_check(s)) {
3063        TCGv_i64 reg = cpu_reg(s, a->rd);
3064        TCGv_i64 val = tcg_temp_new_i64();
3065
3066        do_cntp(s, val, a->esz, a->pg, a->pg);
3067        if (a->d) {
3068            tcg_gen_sub_i64(reg, reg, val);
3069        } else {
3070            tcg_gen_add_i64(reg, reg, val);
3071        }
3072        tcg_temp_free_i64(val);
3073    }
3074    return true;
3075}
3076
3077static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3078{
3079    if (a->esz == 0) {
3080        return false;
3081    }
3082    if (sve_access_check(s)) {
3083        unsigned vsz = vec_full_reg_size(s);
3084        TCGv_i64 val = tcg_temp_new_i64();
3085        GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3086
3087        do_cntp(s, val, a->esz, a->pg, a->pg);
3088        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3089                vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3090    }
3091    return true;
3092}
3093
3094static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3095{
3096    if (sve_access_check(s)) {
3097        TCGv_i64 reg = cpu_reg(s, a->rd);
3098        TCGv_i64 val = tcg_temp_new_i64();
3099
3100        do_cntp(s, val, a->esz, a->pg, a->pg);
3101        do_sat_addsub_32(reg, val, a->u, a->d);
3102    }
3103    return true;
3104}
3105
3106static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3107{
3108    if (sve_access_check(s)) {
3109        TCGv_i64 reg = cpu_reg(s, a->rd);
3110        TCGv_i64 val = tcg_temp_new_i64();
3111
3112        do_cntp(s, val, a->esz, a->pg, a->pg);
3113        do_sat_addsub_64(reg, val, a->u, a->d);
3114    }
3115    return true;
3116}
3117
3118static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3119{
3120    if (a->esz == 0) {
3121        return false;
3122    }
3123    if (sve_access_check(s)) {
3124        TCGv_i64 val = tcg_temp_new_i64();
3125        do_cntp(s, val, a->esz, a->pg, a->pg);
3126        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3127    }
3128    return true;
3129}
3130
3131/*
3132 *** SVE Integer Compare Scalars Group
3133 */
3134
3135static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3136{
3137    if (!sve_access_check(s)) {
3138        return true;
3139    }
3140
3141    TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3142    TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3143    TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3144    TCGv_i64 cmp = tcg_temp_new_i64();
3145
3146    tcg_gen_setcond_i64(cond, cmp, rn, rm);
3147    tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3148    tcg_temp_free_i64(cmp);
3149
3150    /* VF = !NF & !CF.  */
3151    tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3152    tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3153
3154    /* Both NF and VF actually look at bit 31.  */
3155    tcg_gen_neg_i32(cpu_NF, cpu_NF);
3156    tcg_gen_neg_i32(cpu_VF, cpu_VF);
3157    return true;
3158}
3159
3160static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3161{
3162    TCGv_i64 op0, op1, t0, t1, tmax;
3163    TCGv_i32 t2, t3;
3164    TCGv_ptr ptr;
3165    unsigned desc, vsz = vec_full_reg_size(s);
3166    TCGCond cond;
3167
3168    if (!sve_access_check(s)) {
3169        return true;
3170    }
3171
3172    op0 = read_cpu_reg(s, a->rn, 1);
3173    op1 = read_cpu_reg(s, a->rm, 1);
3174
3175    if (!a->sf) {
3176        if (a->u) {
3177            tcg_gen_ext32u_i64(op0, op0);
3178            tcg_gen_ext32u_i64(op1, op1);
3179        } else {
3180            tcg_gen_ext32s_i64(op0, op0);
3181            tcg_gen_ext32s_i64(op1, op1);
3182        }
3183    }
3184
3185    /* For the helper, compress the different conditions into a computation
3186     * of how many iterations for which the condition is true.
3187     */
3188    t0 = tcg_temp_new_i64();
3189    t1 = tcg_temp_new_i64();
3190    tcg_gen_sub_i64(t0, op1, op0);
3191
3192    tmax = tcg_const_i64(vsz >> a->esz);
3193    if (a->eq) {
3194        /* Equality means one more iteration.  */
3195        tcg_gen_addi_i64(t0, t0, 1);
3196
3197        /* If op1 is max (un)signed integer (and the only time the addition
3198         * above could overflow), then we produce an all-true predicate by
3199         * setting the count to the vector length.  This is because the
3200         * pseudocode is described as an increment + compare loop, and the
3201         * max integer would always compare true.
3202         */
3203        tcg_gen_movi_i64(t1, (a->sf
3204                              ? (a->u ? UINT64_MAX : INT64_MAX)
3205                              : (a->u ? UINT32_MAX : INT32_MAX)));
3206        tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3207    }
3208
3209    /* Bound to the maximum.  */
3210    tcg_gen_umin_i64(t0, t0, tmax);
3211    tcg_temp_free_i64(tmax);
3212
3213    /* Set the count to zero if the condition is false.  */
3214    cond = (a->u
3215            ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3216            : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3217    tcg_gen_movi_i64(t1, 0);
3218    tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3219    tcg_temp_free_i64(t1);
3220
3221    /* Since we're bounded, pass as a 32-bit type.  */
3222    t2 = tcg_temp_new_i32();
3223    tcg_gen_extrl_i64_i32(t2, t0);
3224    tcg_temp_free_i64(t0);
3225
3226    /* Scale elements to bits.  */
3227    tcg_gen_shli_i32(t2, t2, a->esz);
3228
3229    desc = (vsz / 8) - 2;
3230    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3231    t3 = tcg_const_i32(desc);
3232
3233    ptr = tcg_temp_new_ptr();
3234    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3235
3236    gen_helper_sve_while(t2, ptr, t2, t3);
3237    do_pred_flags(t2);
3238
3239    tcg_temp_free_ptr(ptr);
3240    tcg_temp_free_i32(t2);
3241    tcg_temp_free_i32(t3);
3242    return true;
3243}
3244
3245/*
3246 *** SVE Integer Wide Immediate - Unpredicated Group
3247 */
3248
3249static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3250{
3251    if (a->esz == 0) {
3252        return false;
3253    }
3254    if (sve_access_check(s)) {
3255        unsigned vsz = vec_full_reg_size(s);
3256        int dofs = vec_full_reg_offset(s, a->rd);
3257        uint64_t imm;
3258
3259        /* Decode the VFP immediate.  */
3260        imm = vfp_expand_imm(a->esz, a->imm);
3261        imm = dup_const(a->esz, imm);
3262
3263        tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3264    }
3265    return true;
3266}
3267
3268static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3269{
3270    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3271        return false;
3272    }
3273    if (sve_access_check(s)) {
3274        unsigned vsz = vec_full_reg_size(s);
3275        int dofs = vec_full_reg_offset(s, a->rd);
3276
3277        tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3278    }
3279    return true;
3280}
3281
3282static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3283{
3284    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3285        return false;
3286    }
3287    if (sve_access_check(s)) {
3288        unsigned vsz = vec_full_reg_size(s);
3289        tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3290                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3291    }
3292    return true;
3293}
3294
3295static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3296{
3297    a->imm = -a->imm;
3298    return trans_ADD_zzi(s, a);
3299}
3300
3301static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3302{
3303    static const GVecGen2s op[4] = {
3304        { .fni8 = tcg_gen_vec_sub8_i64,
3305          .fniv = tcg_gen_sub_vec,
3306          .fno = gen_helper_sve_subri_b,
3307          .opc = INDEX_op_sub_vec,
3308          .vece = MO_8,
3309          .scalar_first = true },
3310        { .fni8 = tcg_gen_vec_sub16_i64,
3311          .fniv = tcg_gen_sub_vec,
3312          .fno = gen_helper_sve_subri_h,
3313          .opc = INDEX_op_sub_vec,
3314          .vece = MO_16,
3315          .scalar_first = true },
3316        { .fni4 = tcg_gen_sub_i32,
3317          .fniv = tcg_gen_sub_vec,
3318          .fno = gen_helper_sve_subri_s,
3319          .opc = INDEX_op_sub_vec,
3320          .vece = MO_32,
3321          .scalar_first = true },
3322        { .fni8 = tcg_gen_sub_i64,
3323          .fniv = tcg_gen_sub_vec,
3324          .fno = gen_helper_sve_subri_d,
3325          .opc = INDEX_op_sub_vec,
3326          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3327          .vece = MO_64,
3328          .scalar_first = true }
3329    };
3330
3331    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3332        return false;
3333    }
3334    if (sve_access_check(s)) {
3335        unsigned vsz = vec_full_reg_size(s);
3336        TCGv_i64 c = tcg_const_i64(a->imm);
3337        tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3338                        vec_full_reg_offset(s, a->rn),
3339                        vsz, vsz, c, &op[a->esz]);
3340        tcg_temp_free_i64(c);
3341    }
3342    return true;
3343}
3344
3345static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3346{
3347    if (sve_access_check(s)) {
3348        unsigned vsz = vec_full_reg_size(s);
3349        tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3350                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3351    }
3352    return true;
3353}
3354
3355static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3356{
3357    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3358        return false;
3359    }
3360    if (sve_access_check(s)) {
3361        TCGv_i64 val = tcg_const_i64(a->imm);
3362        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3363        tcg_temp_free_i64(val);
3364    }
3365    return true;
3366}
3367
3368static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3369{
3370    return do_zzi_sat(s, a, false, false);
3371}
3372
3373static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3374{
3375    return do_zzi_sat(s, a, true, false);
3376}
3377
3378static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3379{
3380    return do_zzi_sat(s, a, false, true);
3381}
3382
3383static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3384{
3385    return do_zzi_sat(s, a, true, true);
3386}
3387
3388static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3389{
3390    if (sve_access_check(s)) {
3391        unsigned vsz = vec_full_reg_size(s);
3392        TCGv_i64 c = tcg_const_i64(a->imm);
3393
3394        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3395                            vec_full_reg_offset(s, a->rn),
3396                            c, vsz, vsz, 0, fn);
3397        tcg_temp_free_i64(c);
3398    }
3399    return true;
3400}
3401
3402#define DO_ZZI(NAME, name) \
3403static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a)         \
3404{                                                                       \
3405    static gen_helper_gvec_2i * const fns[4] = {                        \
3406        gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3407        gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3408    };                                                                  \
3409    return do_zzi_ool(s, a, fns[a->esz]);                               \
3410}
3411
3412DO_ZZI(SMAX, smax)
3413DO_ZZI(UMAX, umax)
3414DO_ZZI(SMIN, smin)
3415DO_ZZI(UMIN, umin)
3416
3417#undef DO_ZZI
3418
3419static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
3420{
3421    static gen_helper_gvec_3 * const fns[2][2] = {
3422        { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3423        { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3424    };
3425
3426    if (sve_access_check(s)) {
3427        unsigned vsz = vec_full_reg_size(s);
3428        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3429                           vec_full_reg_offset(s, a->rn),
3430                           vec_full_reg_offset(s, a->rm),
3431                           vsz, vsz, 0, fns[a->u][a->sz]);
3432    }
3433    return true;
3434}
3435
3436static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
3437{
3438    static gen_helper_gvec_3 * const fns[2][2] = {
3439        { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3440        { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3441    };
3442
3443    if (sve_access_check(s)) {
3444        unsigned vsz = vec_full_reg_size(s);
3445        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3446                           vec_full_reg_offset(s, a->rn),
3447                           vec_full_reg_offset(s, a->rm),
3448                           vsz, vsz, a->index, fns[a->u][a->sz]);
3449    }
3450    return true;
3451}
3452
3453
3454/*
3455 *** SVE Floating Point Multiply-Add Indexed Group
3456 */
3457
3458static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3459{
3460    static gen_helper_gvec_4_ptr * const fns[3] = {
3461        gen_helper_gvec_fmla_idx_h,
3462        gen_helper_gvec_fmla_idx_s,
3463        gen_helper_gvec_fmla_idx_d,
3464    };
3465
3466    if (sve_access_check(s)) {
3467        unsigned vsz = vec_full_reg_size(s);
3468        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3469        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3470                           vec_full_reg_offset(s, a->rn),
3471                           vec_full_reg_offset(s, a->rm),
3472                           vec_full_reg_offset(s, a->ra),
3473                           status, vsz, vsz, (a->index << 1) | a->sub,
3474                           fns[a->esz - 1]);
3475        tcg_temp_free_ptr(status);
3476    }
3477    return true;
3478}
3479
3480/*
3481 *** SVE Floating Point Multiply Indexed Group
3482 */
3483
3484static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
3485{
3486    static gen_helper_gvec_3_ptr * const fns[3] = {
3487        gen_helper_gvec_fmul_idx_h,
3488        gen_helper_gvec_fmul_idx_s,
3489        gen_helper_gvec_fmul_idx_d,
3490    };
3491
3492    if (sve_access_check(s)) {
3493        unsigned vsz = vec_full_reg_size(s);
3494        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3495        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3496                           vec_full_reg_offset(s, a->rn),
3497                           vec_full_reg_offset(s, a->rm),
3498                           status, vsz, vsz, a->index, fns[a->esz - 1]);
3499        tcg_temp_free_ptr(status);
3500    }
3501    return true;
3502}
3503
3504/*
3505 *** SVE Floating Point Fast Reduction Group
3506 */
3507
3508typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3509                                  TCGv_ptr, TCGv_i32);
3510
3511static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3512                      gen_helper_fp_reduce *fn)
3513{
3514    unsigned vsz = vec_full_reg_size(s);
3515    unsigned p2vsz = pow2ceil(vsz);
3516    TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3517    TCGv_ptr t_zn, t_pg, status;
3518    TCGv_i64 temp;
3519
3520    temp = tcg_temp_new_i64();
3521    t_zn = tcg_temp_new_ptr();
3522    t_pg = tcg_temp_new_ptr();
3523
3524    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3525    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3526    status = get_fpstatus_ptr(a->esz == MO_16);
3527
3528    fn(temp, t_zn, t_pg, status, t_desc);
3529    tcg_temp_free_ptr(t_zn);
3530    tcg_temp_free_ptr(t_pg);
3531    tcg_temp_free_ptr(status);
3532    tcg_temp_free_i32(t_desc);
3533
3534    write_fp_dreg(s, a->rd, temp);
3535    tcg_temp_free_i64(temp);
3536}
3537
3538#define DO_VPZ(NAME, name) \
3539static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
3540{                                                                        \
3541    static gen_helper_fp_reduce * const fns[3] = {                       \
3542        gen_helper_sve_##name##_h,                                       \
3543        gen_helper_sve_##name##_s,                                       \
3544        gen_helper_sve_##name##_d,                                       \
3545    };                                                                   \
3546    if (a->esz == 0) {                                                   \
3547        return false;                                                    \
3548    }                                                                    \
3549    if (sve_access_check(s)) {                                           \
3550        do_reduce(s, a, fns[a->esz - 1]);                                \
3551    }                                                                    \
3552    return true;                                                         \
3553}
3554
3555DO_VPZ(FADDV, faddv)
3556DO_VPZ(FMINNMV, fminnmv)
3557DO_VPZ(FMAXNMV, fmaxnmv)
3558DO_VPZ(FMINV, fminv)
3559DO_VPZ(FMAXV, fmaxv)
3560
3561/*
3562 *** SVE Floating Point Unary Operations - Unpredicated Group
3563 */
3564
3565static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3566{
3567    unsigned vsz = vec_full_reg_size(s);
3568    TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3569
3570    tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3571                       vec_full_reg_offset(s, a->rn),
3572                       status, vsz, vsz, 0, fn);
3573    tcg_temp_free_ptr(status);
3574}
3575
3576static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3577{
3578    static gen_helper_gvec_2_ptr * const fns[3] = {
3579        gen_helper_gvec_frecpe_h,
3580        gen_helper_gvec_frecpe_s,
3581        gen_helper_gvec_frecpe_d,
3582    };
3583    if (a->esz == 0) {
3584        return false;
3585    }
3586    if (sve_access_check(s)) {
3587        do_zz_fp(s, a, fns[a->esz - 1]);
3588    }
3589    return true;
3590}
3591
3592static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3593{
3594    static gen_helper_gvec_2_ptr * const fns[3] = {
3595        gen_helper_gvec_frsqrte_h,
3596        gen_helper_gvec_frsqrte_s,
3597        gen_helper_gvec_frsqrte_d,
3598    };
3599    if (a->esz == 0) {
3600        return false;
3601    }
3602    if (sve_access_check(s)) {
3603        do_zz_fp(s, a, fns[a->esz - 1]);
3604    }
3605    return true;
3606}
3607
3608/*
3609 *** SVE Floating Point Compare with Zero Group
3610 */
3611
3612static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3613                      gen_helper_gvec_3_ptr *fn)
3614{
3615    unsigned vsz = vec_full_reg_size(s);
3616    TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3617
3618    tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3619                       vec_full_reg_offset(s, a->rn),
3620                       pred_full_reg_offset(s, a->pg),
3621                       status, vsz, vsz, 0, fn);
3622    tcg_temp_free_ptr(status);
3623}
3624
3625#define DO_PPZ(NAME, name) \
3626static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)         \
3627{                                                                 \
3628    static gen_helper_gvec_3_ptr * const fns[3] = {               \
3629        gen_helper_sve_##name##_h,                                \
3630        gen_helper_sve_##name##_s,                                \
3631        gen_helper_sve_##name##_d,                                \
3632    };                                                            \
3633    if (a->esz == 0) {                                            \
3634        return false;                                             \
3635    }                                                             \
3636    if (sve_access_check(s)) {                                    \
3637        do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3638    }                                                             \
3639    return true;                                                  \
3640}
3641
3642DO_PPZ(FCMGE_ppz0, fcmge0)
3643DO_PPZ(FCMGT_ppz0, fcmgt0)
3644DO_PPZ(FCMLE_ppz0, fcmle0)
3645DO_PPZ(FCMLT_ppz0, fcmlt0)
3646DO_PPZ(FCMEQ_ppz0, fcmeq0)
3647DO_PPZ(FCMNE_ppz0, fcmne0)
3648
3649#undef DO_PPZ
3650
3651/*
3652 *** SVE floating-point trig multiply-add coefficient
3653 */
3654
3655static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
3656{
3657    static gen_helper_gvec_3_ptr * const fns[3] = {
3658        gen_helper_sve_ftmad_h,
3659        gen_helper_sve_ftmad_s,
3660        gen_helper_sve_ftmad_d,
3661    };
3662
3663    if (a->esz == 0) {
3664        return false;
3665    }
3666    if (sve_access_check(s)) {
3667        unsigned vsz = vec_full_reg_size(s);
3668        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3669        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3670                           vec_full_reg_offset(s, a->rn),
3671                           vec_full_reg_offset(s, a->rm),
3672                           status, vsz, vsz, a->imm, fns[a->esz - 1]);
3673        tcg_temp_free_ptr(status);
3674    }
3675    return true;
3676}
3677
3678/*
3679 *** SVE Floating Point Accumulating Reduction Group
3680 */
3681
3682static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3683{
3684    typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3685                          TCGv_ptr, TCGv_ptr, TCGv_i32);
3686    static fadda_fn * const fns[3] = {
3687        gen_helper_sve_fadda_h,
3688        gen_helper_sve_fadda_s,
3689        gen_helper_sve_fadda_d,
3690    };
3691    unsigned vsz = vec_full_reg_size(s);
3692    TCGv_ptr t_rm, t_pg, t_fpst;
3693    TCGv_i64 t_val;
3694    TCGv_i32 t_desc;
3695
3696    if (a->esz == 0) {
3697        return false;
3698    }
3699    if (!sve_access_check(s)) {
3700        return true;
3701    }
3702
3703    t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3704    t_rm = tcg_temp_new_ptr();
3705    t_pg = tcg_temp_new_ptr();
3706    tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3707    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3708    t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3709    t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3710
3711    fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3712
3713    tcg_temp_free_i32(t_desc);
3714    tcg_temp_free_ptr(t_fpst);
3715    tcg_temp_free_ptr(t_pg);
3716    tcg_temp_free_ptr(t_rm);
3717
3718    write_fp_dreg(s, a->rd, t_val);
3719    tcg_temp_free_i64(t_val);
3720    return true;
3721}
3722
3723/*
3724 *** SVE Floating Point Arithmetic - Unpredicated Group
3725 */
3726
3727static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3728                      gen_helper_gvec_3_ptr *fn)
3729{
3730    if (fn == NULL) {
3731        return false;
3732    }
3733    if (sve_access_check(s)) {
3734        unsigned vsz = vec_full_reg_size(s);
3735        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3736        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3737                           vec_full_reg_offset(s, a->rn),
3738                           vec_full_reg_offset(s, a->rm),
3739                           status, vsz, vsz, 0, fn);
3740        tcg_temp_free_ptr(status);
3741    }
3742    return true;
3743}
3744
3745
3746#define DO_FP3(NAME, name) \
3747static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)           \
3748{                                                                   \
3749    static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3750        NULL, gen_helper_gvec_##name##_h,                           \
3751        gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3752    };                                                              \
3753    return do_zzz_fp(s, a, fns[a->esz]);                            \
3754}
3755
3756DO_FP3(FADD_zzz, fadd)
3757DO_FP3(FSUB_zzz, fsub)
3758DO_FP3(FMUL_zzz, fmul)
3759DO_FP3(FTSMUL, ftsmul)
3760DO_FP3(FRECPS, recps)
3761DO_FP3(FRSQRTS, rsqrts)
3762
3763#undef DO_FP3
3764
3765/*
3766 *** SVE Floating Point Arithmetic - Predicated Group
3767 */
3768
3769static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3770                       gen_helper_gvec_4_ptr *fn)
3771{
3772    if (fn == NULL) {
3773        return false;
3774    }
3775    if (sve_access_check(s)) {
3776        unsigned vsz = vec_full_reg_size(s);
3777        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3778        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3779                           vec_full_reg_offset(s, a->rn),
3780                           vec_full_reg_offset(s, a->rm),
3781                           pred_full_reg_offset(s, a->pg),
3782                           status, vsz, vsz, 0, fn);
3783        tcg_temp_free_ptr(status);
3784    }
3785    return true;
3786}
3787
3788#define DO_FP3(NAME, name) \
3789static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)          \
3790{                                                                   \
3791    static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3792        NULL, gen_helper_sve_##name##_h,                            \
3793        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3794    };                                                              \
3795    return do_zpzz_fp(s, a, fns[a->esz]);                           \
3796}
3797
3798DO_FP3(FADD_zpzz, fadd)
3799DO_FP3(FSUB_zpzz, fsub)
3800DO_FP3(FMUL_zpzz, fmul)
3801DO_FP3(FMIN_zpzz, fmin)
3802DO_FP3(FMAX_zpzz, fmax)
3803DO_FP3(FMINNM_zpzz, fminnum)
3804DO_FP3(FMAXNM_zpzz, fmaxnum)
3805DO_FP3(FABD, fabd)
3806DO_FP3(FSCALE, fscalbn)
3807DO_FP3(FDIV, fdiv)
3808DO_FP3(FMULX, fmulx)
3809
3810#undef DO_FP3
3811
3812typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3813                                      TCGv_i64, TCGv_ptr, TCGv_i32);
3814
3815static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3816                         TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3817{
3818    unsigned vsz = vec_full_reg_size(s);
3819    TCGv_ptr t_zd, t_zn, t_pg, status;
3820    TCGv_i32 desc;
3821
3822    t_zd = tcg_temp_new_ptr();
3823    t_zn = tcg_temp_new_ptr();
3824    t_pg = tcg_temp_new_ptr();
3825    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3826    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3827    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3828
3829    status = get_fpstatus_ptr(is_fp16);
3830    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3831    fn(t_zd, t_zn, t_pg, scalar, status, desc);
3832
3833    tcg_temp_free_i32(desc);
3834    tcg_temp_free_ptr(status);
3835    tcg_temp_free_ptr(t_pg);
3836    tcg_temp_free_ptr(t_zn);
3837    tcg_temp_free_ptr(t_zd);
3838}
3839
3840static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3841                      gen_helper_sve_fp2scalar *fn)
3842{
3843    TCGv_i64 temp = tcg_const_i64(imm);
3844    do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3845    tcg_temp_free_i64(temp);
3846}
3847
3848#define DO_FP_IMM(NAME, name, const0, const1) \
3849static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a)         \
3850{                                                                         \
3851    static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3852        gen_helper_sve_##name##_h,                                        \
3853        gen_helper_sve_##name##_s,                                        \
3854        gen_helper_sve_##name##_d                                         \
3855    };                                                                    \
3856    static uint64_t const val[3][2] = {                                   \
3857        { float16_##const0, float16_##const1 },                           \
3858        { float32_##const0, float32_##const1 },                           \
3859        { float64_##const0, float64_##const1 },                           \
3860    };                                                                    \
3861    if (a->esz == 0) {                                                    \
3862        return false;                                                     \
3863    }                                                                     \
3864    if (sve_access_check(s)) {                                            \
3865        do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3866    }                                                                     \
3867    return true;                                                          \
3868}
3869
3870#define float16_two  make_float16(0x4000)
3871#define float32_two  make_float32(0x40000000)
3872#define float64_two  make_float64(0x4000000000000000ULL)
3873
3874DO_FP_IMM(FADD, fadds, half, one)
3875DO_FP_IMM(FSUB, fsubs, half, one)
3876DO_FP_IMM(FMUL, fmuls, half, two)
3877DO_FP_IMM(FSUBR, fsubrs, half, one)
3878DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3879DO_FP_IMM(FMINNM, fminnms, zero, one)
3880DO_FP_IMM(FMAX, fmaxs, zero, one)
3881DO_FP_IMM(FMIN, fmins, zero, one)
3882
3883#undef DO_FP_IMM
3884
3885static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3886                      gen_helper_gvec_4_ptr *fn)
3887{
3888    if (fn == NULL) {
3889        return false;
3890    }
3891    if (sve_access_check(s)) {
3892        unsigned vsz = vec_full_reg_size(s);
3893        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3894        tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3895                           vec_full_reg_offset(s, a->rn),
3896                           vec_full_reg_offset(s, a->rm),
3897                           pred_full_reg_offset(s, a->pg),
3898                           status, vsz, vsz, 0, fn);
3899        tcg_temp_free_ptr(status);
3900    }
3901    return true;
3902}
3903
3904#define DO_FPCMP(NAME, name) \
3905static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)     \
3906{                                                                     \
3907    static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3908        NULL, gen_helper_sve_##name##_h,                              \
3909        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3910    };                                                                \
3911    return do_fp_cmp(s, a, fns[a->esz]);                              \
3912}
3913
3914DO_FPCMP(FCMGE, fcmge)
3915DO_FPCMP(FCMGT, fcmgt)
3916DO_FPCMP(FCMEQ, fcmeq)
3917DO_FPCMP(FCMNE, fcmne)
3918DO_FPCMP(FCMUO, fcmuo)
3919DO_FPCMP(FACGE, facge)
3920DO_FPCMP(FACGT, facgt)
3921
3922#undef DO_FPCMP
3923
3924static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
3925{
3926    static gen_helper_gvec_4_ptr * const fns[3] = {
3927        gen_helper_sve_fcadd_h,
3928        gen_helper_sve_fcadd_s,
3929        gen_helper_sve_fcadd_d
3930    };
3931
3932    if (a->esz == 0) {
3933        return false;
3934    }
3935    if (sve_access_check(s)) {
3936        unsigned vsz = vec_full_reg_size(s);
3937        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3938        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3939                           vec_full_reg_offset(s, a->rn),
3940                           vec_full_reg_offset(s, a->rm),
3941                           pred_full_reg_offset(s, a->pg),
3942                           status, vsz, vsz, a->rot, fns[a->esz - 1]);
3943        tcg_temp_free_ptr(status);
3944    }
3945    return true;
3946}
3947
3948typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3949
3950static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3951{
3952    if (fn == NULL) {
3953        return false;
3954    }
3955    if (!sve_access_check(s)) {
3956        return true;
3957    }
3958
3959    unsigned vsz = vec_full_reg_size(s);
3960    unsigned desc;
3961    TCGv_i32 t_desc;
3962    TCGv_ptr pg = tcg_temp_new_ptr();
3963
3964    /* We would need 7 operands to pass these arguments "properly".
3965     * So we encode all the register numbers into the descriptor.
3966     */
3967    desc = deposit32(a->rd, 5, 5, a->rn);
3968    desc = deposit32(desc, 10, 5, a->rm);
3969    desc = deposit32(desc, 15, 5, a->ra);
3970    desc = simd_desc(vsz, vsz, desc);
3971
3972    t_desc = tcg_const_i32(desc);
3973    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3974    fn(cpu_env, pg, t_desc);
3975    tcg_temp_free_i32(t_desc);
3976    tcg_temp_free_ptr(pg);
3977    return true;
3978}
3979
3980#define DO_FMLA(NAME, name) \
3981static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
3982{                                                                    \
3983    static gen_helper_sve_fmla * const fns[4] = {                    \
3984        NULL, gen_helper_sve_##name##_h,                             \
3985        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3986    };                                                               \
3987    return do_fmla(s, a, fns[a->esz]);                               \
3988}
3989
3990DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3991DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3992DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3993DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3994
3995#undef DO_FMLA
3996
3997static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
3998{
3999    static gen_helper_sve_fmla * const fns[3] = {
4000        gen_helper_sve_fcmla_zpzzz_h,

4001        gen_helper_sve_fcmla_zpzzz_s,
4002        gen_helper_sve_fcmla_zpzzz_d,
4003    };
4004
4005    if (a->esz == 0) {
4006        return false;
4007    }
4008    if (sve_access_check(s)) {
4009        unsigned vsz = vec_full_reg_size(s);
4010        unsigned desc;
4011        TCGv_i32 t_desc;
4012        TCGv_ptr pg = tcg_temp_new_ptr();
4013
4014        /* We would need 7 operands to pass these arguments "properly".
4015         * So we encode all the register numbers into the descriptor.
4016         */
4017        desc = deposit32(a->rd, 5, 5, a->rn);
4018        desc = deposit32(desc, 10, 5, a->rm);
4019        desc = deposit32(desc, 15, 5, a->ra);
4020        desc = deposit32(desc, 20, 2, a->rot);
4021        desc = sextract32(desc, 0, 22);
4022        desc = simd_desc(vsz, vsz, desc);
4023
4024        t_desc = tcg_const_i32(desc);
4025        tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4026        fns[a->esz - 1](cpu_env, pg, t_desc);
4027        tcg_temp_free_i32(t_desc);
4028        tcg_temp_free_ptr(pg);
4029    }
4030    return true;
4031}
4032
4033static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4034{
4035    static gen_helper_gvec_3_ptr * const fns[2] = {
4036        gen_helper_gvec_fcmlah_idx,
4037        gen_helper_gvec_fcmlas_idx,
4038    };
4039
4040    tcg_debug_assert(a->esz == 1 || a->esz == 2);
4041    tcg_debug_assert(a->rd == a->ra);
4042    if (sve_access_check(s)) {
4043        unsigned vsz = vec_full_reg_size(s);
4044        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4045        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4046                           vec_full_reg_offset(s, a->rn),
4047                           vec_full_reg_offset(s, a->rm),
4048                           status, vsz, vsz,
4049                           a->index * 4 + a->rot,
4050                           fns[a->esz - 1]);
4051        tcg_temp_free_ptr(status);
4052    }
4053    return true;
4054}
4055
4056/*
4057 *** SVE Floating Point Unary Operations Predicated Group
4058 */
4059
4060static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4061                       bool is_fp16, gen_helper_gvec_3_ptr *fn)
4062{
4063    if (sve_access_check(s)) {
4064        unsigned vsz = vec_full_reg_size(s);
4065        TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4066        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4067                           vec_full_reg_offset(s, rn),
4068                           pred_full_reg_offset(s, pg),
4069                           status, vsz, vsz, 0, fn);
4070        tcg_temp_free_ptr(status);
4071    }
4072    return true;
4073}
4074
4075static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4076{
4077    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4078}
4079
4080static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4081{
4082    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4083}
4084
4085static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4086{
4087    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4088}
4089
4090static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4091{
4092    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4093}
4094
4095static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4096{
4097    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4098}
4099
4100static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4101{
4102    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4103}
4104
4105static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4106{
4107    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4108}
4109
4110static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4111{
4112    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4113}
4114
4115static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4116{
4117    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4118}
4119
4120static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4121{
4122    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4123}
4124
4125static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4126{
4127    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4128}
4129
4130static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4131{
4132    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4133}
4134
4135static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4136{
4137    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4138}
4139
4140static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4141{
4142    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4143}
4144
4145static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4146{
4147    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4148}
4149
4150static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4151{
4152    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4153}
4154
4155static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4156{
4157    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4158}
4159
4160static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4161{
4162    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4163}
4164
4165static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4166{
4167    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4168}
4169
4170static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4171{
4172    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4173}
4174
4175static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4176    gen_helper_sve_frint_h,
4177    gen_helper_sve_frint_s,
4178    gen_helper_sve_frint_d
4179};
4180
4181static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4182{
4183    if (a->esz == 0) {
4184        return false;
4185    }
4186    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4187                      frint_fns[a->esz - 1]);
4188}
4189
4190static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4191{
4192    static gen_helper_gvec_3_ptr * const fns[3] = {
4193        gen_helper_sve_frintx_h,
4194        gen_helper_sve_frintx_s,
4195        gen_helper_sve_frintx_d
4196    };
4197    if (a->esz == 0) {
4198        return false;
4199    }
4200    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4201}
4202
4203static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4204{
4205    if (a->esz == 0) {
4206        return false;
4207    }
4208    if (sve_access_check(s)) {
4209        unsigned vsz = vec_full_reg_size(s);
4210        TCGv_i32 tmode = tcg_const_i32(mode);
4211        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4212
4213        gen_helper_set_rmode(tmode, tmode, status);
4214
4215        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4216                           vec_full_reg_offset(s, a->rn),
4217                           pred_full_reg_offset(s, a->pg),
4218                           status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4219
4220        gen_helper_set_rmode(tmode, tmode, status);
4221        tcg_temp_free_i32(tmode);
4222        tcg_temp_free_ptr(status);
4223    }
4224    return true;
4225}
4226
4227static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4228{
4229    return do_frint_mode(s, a, float_round_nearest_even);
4230}
4231
4232static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4233{
4234    return do_frint_mode(s, a, float_round_up);
4235}
4236
4237static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4238{
4239    return do_frint_mode(s, a, float_round_down);
4240}
4241
4242static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4243{
4244    return do_frint_mode(s, a, float_round_to_zero);
4245}
4246
4247static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4248{
4249    return do_frint_mode(s, a, float_round_ties_away);
4250}
4251
4252static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4253{
4254    static gen_helper_gvec_3_ptr * const fns[3] = {
4255        gen_helper_sve_frecpx_h,
4256        gen_helper_sve_frecpx_s,
4257        gen_helper_sve_frecpx_d
4258    };
4259    if (a->esz == 0) {
4260        return false;
4261    }
4262    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4263}
4264
4265static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4266{
4267    static gen_helper_gvec_3_ptr * const fns[3] = {
4268        gen_helper_sve_fsqrt_h,
4269        gen_helper_sve_fsqrt_s,
4270        gen_helper_sve_fsqrt_d
4271    };
4272    if (a->esz == 0) {
4273        return false;
4274    }
4275    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4276}
4277
4278static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4279{
4280    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4281}
4282
4283static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4284{
4285    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4286}
4287
4288static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4289{
4290    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4291}
4292
4293static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4294{
4295    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4296}
4297
4298static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4299{
4300    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4301}
4302
4303static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4304{
4305    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4306}
4307
4308static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4309{
4310    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4311}
4312
4313static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4314{
4315    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4316}
4317
4318static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4319{
4320    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4321}
4322
4323static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4324{
4325    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4326}
4327
4328static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4329{
4330    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4331}
4332
4333static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4334{
4335    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4336}
4337
4338static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4339{
4340    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4341}
4342
4343static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4344{
4345    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4346}
4347
4348/*
4349 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4350 */
4351
4352/* Subroutine loading a vector register at VOFS of LEN bytes.
4353 * The load should begin at the address Rn + IMM.
4354 */
4355
4356static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4357{
4358    int len_align = QEMU_ALIGN_DOWN(len, 8);
4359    int len_remain = len % 8;
4360    int nparts = len / 8 + ctpop8(len_remain);
4361    int midx = get_mem_index(s);
4362    TCGv_i64 addr, t0, t1;
4363
4364    addr = tcg_temp_new_i64();
4365    t0 = tcg_temp_new_i64();
4366
4367    /* Note that unpredicated load/store of vector/predicate registers
4368     * are defined as a stream of bytes, which equates to little-endian
4369     * operations on larger quantities.  There is no nice way to force
4370     * a little-endian load for aarch64_be-linux-user out of line.
4371     *
4372     * Attempt to keep code expansion to a minimum by limiting the
4373     * amount of unrolling done.
4374     */
4375    if (nparts <= 4) {
4376        int i;
4377
4378        for (i = 0; i < len_align; i += 8) {
4379            tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4380            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4381            tcg_gen_st_i64(t0, cpu_env, vofs + i);
4382        }
4383    } else {
4384        TCGLabel *loop = gen_new_label();
4385        TCGv_ptr tp, i = tcg_const_local_ptr(0);
4386
4387        gen_set_label(loop);
4388
4389        /* Minimize the number of local temps that must be re-read from
4390         * the stack each iteration.  Instead, re-compute values other
4391         * than the loop counter.
4392         */
4393        tp = tcg_temp_new_ptr();
4394        tcg_gen_addi_ptr(tp, i, imm);
4395        tcg_gen_extu_ptr_i64(addr, tp);
4396        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4397
4398        tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4399
4400        tcg_gen_add_ptr(tp, cpu_env, i);
4401        tcg_gen_addi_ptr(i, i, 8);
4402        tcg_gen_st_i64(t0, tp, vofs);
4403        tcg_temp_free_ptr(tp);
4404
4405        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4406        tcg_temp_free_ptr(i);
4407    }
4408
4409    /* Predicate register loads can be any multiple of 2.
4410     * Note that we still store the entire 64-bit unit into cpu_env.
4411     */
4412    if (len_remain) {
4413        tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4414
4415        switch (len_remain) {
4416        case 2:
4417        case 4:
4418        case 8:
4419            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4420            break;
4421
4422        case 6:
4423            t1 = tcg_temp_new_i64();
4424            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4425            tcg_gen_addi_i64(addr, addr, 4);
4426            tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4427            tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4428            tcg_temp_free_i64(t1);
4429            break;
4430
4431        default:
4432            g_assert_not_reached();
4433        }
4434        tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4435    }
4436    tcg_temp_free_i64(addr);
4437    tcg_temp_free_i64(t0);
4438}
4439
4440/* Similarly for stores.  */
4441static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4442{
4443    int len_align = QEMU_ALIGN_DOWN(len, 8);
4444    int len_remain = len % 8;
4445    int nparts = len / 8 + ctpop8(len_remain);
4446    int midx = get_mem_index(s);
4447    TCGv_i64 addr, t0;
4448
4449    addr = tcg_temp_new_i64();
4450    t0 = tcg_temp_new_i64();
4451
4452    /* Note that unpredicated load/store of vector/predicate registers
4453     * are defined as a stream of bytes, which equates to little-endian
4454     * operations on larger quantities.  There is no nice way to force
4455     * a little-endian store for aarch64_be-linux-user out of line.
4456     *
4457     * Attempt to keep code expansion to a minimum by limiting the
4458     * amount of unrolling done.
4459     */
4460    if (nparts <= 4) {
4461        int i;
4462
4463        for (i = 0; i < len_align; i += 8) {
4464            tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4465            tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4466            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4467        }
4468    } else {
4469        TCGLabel *loop = gen_new_label();
4470        TCGv_ptr t2, i = tcg_const_local_ptr(0);
4471
4472        gen_set_label(loop);
4473
4474        t2 = tcg_temp_new_ptr();
4475        tcg_gen_add_ptr(t2, cpu_env, i);
4476        tcg_gen_ld_i64(t0, t2, vofs);
4477
4478        /* Minimize the number of local temps that must be re-read from
4479         * the stack each iteration.  Instead, re-compute values other
4480         * than the loop counter.
4481         */
4482        tcg_gen_addi_ptr(t2, i, imm);
4483        tcg_gen_extu_ptr_i64(addr, t2);
4484        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4485        tcg_temp_free_ptr(t2);
4486
4487        tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4488
4489        tcg_gen_addi_ptr(i, i, 8);
4490
4491        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4492        tcg_temp_free_ptr(i);
4493    }
4494
4495    /* Predicate register stores can be any multiple of 2.  */
4496    if (len_remain) {
4497        tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4498        tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4499
4500        switch (len_remain) {
4501        case 2:
4502        case 4:
4503        case 8:
4504            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4505            break;
4506
4507        case 6:
4508            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4509            tcg_gen_addi_i64(addr, addr, 4);
4510            tcg_gen_shri_i64(t0, t0, 32);
4511            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4512            break;
4513
4514        default:
4515            g_assert_not_reached();
4516        }
4517    }
4518    tcg_temp_free_i64(addr);
4519    tcg_temp_free_i64(t0);
4520}
4521
4522static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4523{
4524    if (sve_access_check(s)) {
4525        int size = vec_full_reg_size(s);
4526        int off = vec_full_reg_offset(s, a->rd);
4527        do_ldr(s, off, size, a->rn, a->imm * size);
4528    }
4529    return true;
4530}
4531
4532static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4533{
4534    if (sve_access_check(s)) {
4535        int size = pred_full_reg_size(s);
4536        int off = pred_full_reg_offset(s, a->rd);
4537        do_ldr(s, off, size, a->rn, a->imm * size);
4538    }
4539    return true;
4540}
4541
4542static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4543{
4544    if (sve_access_check(s)) {
4545        int size = vec_full_reg_size(s);
4546        int off = vec_full_reg_offset(s, a->rd);
4547        do_str(s, off, size, a->rn, a->imm * size);
4548    }
4549    return true;
4550}
4551
4552static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4553{
4554    if (sve_access_check(s)) {
4555        int size = pred_full_reg_size(s);
4556        int off = pred_full_reg_offset(s, a->rd);
4557        do_str(s, off, size, a->rn, a->imm * size);
4558    }
4559    return true;
4560}
4561
4562/*
4563 *** SVE Memory - Contiguous Load Group
4564 */
4565
4566/* The memory mode of the dtype.  */
4567static const TCGMemOp dtype_mop[16] = {
4568    MO_UB, MO_UB, MO_UB, MO_UB,
4569    MO_SL, MO_UW, MO_UW, MO_UW,
4570    MO_SW, MO_SW, MO_UL, MO_UL,
4571    MO_SB, MO_SB, MO_SB, MO_Q
4572};
4573
4574#define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4575
4576/* The vector element size of dtype.  */
4577static const uint8_t dtype_esz[16] = {
4578    0, 1, 2, 3,
4579    3, 1, 2, 3,
4580    3, 2, 2, 3,
4581    3, 2, 1, 3
4582};
4583
4584static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
4585{
4586    return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
4587}
4588
4589static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4590                       int dtype, gen_helper_gvec_mem *fn)
4591{
4592    unsigned vsz = vec_full_reg_size(s);
4593    TCGv_ptr t_pg;
4594    TCGv_i32 t_desc;
4595    int desc;
4596
4597    /* For e.g. LD4, there are not enough arguments to pass all 4
4598     * registers as pointers, so encode the regno into the data field.
4599     * For consistency, do this even for LD1.
4600     */
4601    desc = sve_memopidx(s, dtype);
4602    desc |= zt << MEMOPIDX_SHIFT;
4603    desc = simd_desc(vsz, vsz, desc);
4604    t_desc = tcg_const_i32(desc);
4605    t_pg = tcg_temp_new_ptr();
4606
4607    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4608    fn(cpu_env, t_pg, addr, t_desc);
4609
4610    tcg_temp_free_ptr(t_pg);
4611    tcg_temp_free_i32(t_desc);
4612}
4613
4614static void do_ld_zpa(DisasContext *s, int zt, int pg,
4615                      TCGv_i64 addr, int dtype, int nreg)
4616{
4617    static gen_helper_gvec_mem * const fns[2][16][4] = {
4618        /* Little-endian */
4619        { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4620            gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4621          { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4622          { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4623          { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4624
4625          { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4626          { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4627            gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4628          { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4629          { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4630
4631          { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4632          { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4633          { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4634            gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4635          { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4636
4637          { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4638          { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4639          { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4640          { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4641            gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4642
4643        /* Big-endian */
4644        { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4645            gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4646          { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4647          { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4648          { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4649
4650          { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4651          { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4652            gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4653          { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4654          { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4655
4656          { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4657          { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4658          { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4659            gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4660          { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4661
4662          { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4663          { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4664          { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4665          { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4666            gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
4667    };
4668    gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
4669
4670    /* While there are holes in the table, they are not
4671     * accessible via the instruction encoding.
4672     */
4673    assert(fn != NULL);
4674    do_mem_zpa(s, zt, pg, addr, dtype, fn);
4675}
4676
4677static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4678{
4679    if (a->rm == 31) {
4680        return false;
4681    }
4682    if (sve_access_check(s)) {
4683        TCGv_i64 addr = new_tmp_a64(s);
4684        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4685        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4686        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4687    }
4688    return true;
4689}
4690
4691static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4692{
4693    if (sve_access_check(s)) {
4694        int vsz = vec_full_reg_size(s);
4695        int elements = vsz >> dtype_esz[a->dtype];
4696        TCGv_i64 addr = new_tmp_a64(s);
4697
4698        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4699                         (a->imm * elements * (a->nreg + 1))
4700                         << dtype_msz(a->dtype));
4701        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4702    }
4703    return true;
4704}
4705
4706static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4707{
4708    static gen_helper_gvec_mem * const fns[2][16] = {
4709        /* Little-endian */
4710        { gen_helper_sve_ldff1bb_r,
4711          gen_helper_sve_ldff1bhu_r,
4712          gen_helper_sve_ldff1bsu_r,
4713          gen_helper_sve_ldff1bdu_r,
4714
4715          gen_helper_sve_ldff1sds_le_r,
4716          gen_helper_sve_ldff1hh_le_r,
4717          gen_helper_sve_ldff1hsu_le_r,
4718          gen_helper_sve_ldff1hdu_le_r,
4719
4720          gen_helper_sve_ldff1hds_le_r,
4721          gen_helper_sve_ldff1hss_le_r,
4722          gen_helper_sve_ldff1ss_le_r,
4723          gen_helper_sve_ldff1sdu_le_r,
4724
4725          gen_helper_sve_ldff1bds_r,
4726          gen_helper_sve_ldff1bss_r,
4727          gen_helper_sve_ldff1bhs_r,
4728          gen_helper_sve_ldff1dd_le_r },
4729
4730        /* Big-endian */
4731        { gen_helper_sve_ldff1bb_r,
4732          gen_helper_sve_ldff1bhu_r,
4733          gen_helper_sve_ldff1bsu_r,
4734          gen_helper_sve_ldff1bdu_r,
4735
4736          gen_helper_sve_ldff1sds_be_r,
4737          gen_helper_sve_ldff1hh_be_r,
4738          gen_helper_sve_ldff1hsu_be_r,
4739          gen_helper_sve_ldff1hdu_be_r,
4740
4741          gen_helper_sve_ldff1hds_be_r,
4742          gen_helper_sve_ldff1hss_be_r,
4743          gen_helper_sve_ldff1ss_be_r,
4744          gen_helper_sve_ldff1sdu_be_r,
4745
4746          gen_helper_sve_ldff1bds_r,
4747          gen_helper_sve_ldff1bss_r,
4748          gen_helper_sve_ldff1bhs_r,
4749          gen_helper_sve_ldff1dd_be_r },
4750    };
4751
4752    if (sve_access_check(s)) {
4753        TCGv_i64 addr = new_tmp_a64(s);
4754        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4755        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4756        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4757                   fns[s->be_data == MO_BE][a->dtype]);
4758    }
4759    return true;
4760}
4761
4762static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4763{
4764    static gen_helper_gvec_mem * const fns[2][16] = {
4765        /* Little-endian */
4766        { gen_helper_sve_ldnf1bb_r,
4767          gen_helper_sve_ldnf1bhu_r,
4768          gen_helper_sve_ldnf1bsu_r,
4769          gen_helper_sve_ldnf1bdu_r,
4770
4771          gen_helper_sve_ldnf1sds_le_r,
4772          gen_helper_sve_ldnf1hh_le_r,
4773          gen_helper_sve_ldnf1hsu_le_r,
4774          gen_helper_sve_ldnf1hdu_le_r,
4775
4776          gen_helper_sve_ldnf1hds_le_r,
4777          gen_helper_sve_ldnf1hss_le_r,
4778          gen_helper_sve_ldnf1ss_le_r,
4779          gen_helper_sve_ldnf1sdu_le_r,
4780
4781          gen_helper_sve_ldnf1bds_r,
4782          gen_helper_sve_ldnf1bss_r,
4783          gen_helper_sve_ldnf1bhs_r,
4784          gen_helper_sve_ldnf1dd_le_r },
4785
4786        /* Big-endian */
4787        { gen_helper_sve_ldnf1bb_r,
4788          gen_helper_sve_ldnf1bhu_r,
4789          gen_helper_sve_ldnf1bsu_r,
4790          gen_helper_sve_ldnf1bdu_r,
4791
4792          gen_helper_sve_ldnf1sds_be_r,
4793          gen_helper_sve_ldnf1hh_be_r,
4794          gen_helper_sve_ldnf1hsu_be_r,
4795          gen_helper_sve_ldnf1hdu_be_r,
4796
4797          gen_helper_sve_ldnf1hds_be_r,
4798          gen_helper_sve_ldnf1hss_be_r,
4799          gen_helper_sve_ldnf1ss_be_r,
4800          gen_helper_sve_ldnf1sdu_be_r,
4801
4802          gen_helper_sve_ldnf1bds_r,
4803          gen_helper_sve_ldnf1bss_r,
4804          gen_helper_sve_ldnf1bhs_r,
4805          gen_helper_sve_ldnf1dd_be_r },
4806    };
4807
4808    if (sve_access_check(s)) {
4809        int vsz = vec_full_reg_size(s);
4810        int elements = vsz >> dtype_esz[a->dtype];
4811        int off = (a->imm * elements) << dtype_msz(a->dtype);
4812        TCGv_i64 addr = new_tmp_a64(s);
4813
4814        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4815        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4816                   fns[s->be_data == MO_BE][a->dtype]);
4817    }
4818    return true;
4819}
4820
4821static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4822{
4823    static gen_helper_gvec_mem * const fns[2][4] = {
4824        { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_le_r,
4825          gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4826        { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_be_r,
4827          gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4828    };
4829    unsigned vsz = vec_full_reg_size(s);
4830    TCGv_ptr t_pg;
4831    TCGv_i32 t_desc;
4832    int desc, poff;
4833
4834    /* Load the first quadword using the normal predicated load helpers.  */
4835    desc = sve_memopidx(s, msz_dtype(msz));
4836    desc |= zt << MEMOPIDX_SHIFT;
4837    desc = simd_desc(16, 16, desc);
4838    t_desc = tcg_const_i32(desc);
4839
4840    poff = pred_full_reg_offset(s, pg);
4841    if (vsz > 16) {
4842        /*
4843         * Zero-extend the first 16 bits of the predicate into a temporary.
4844         * This avoids triggering an assert making sure we don't have bits
4845         * set within a predicate beyond VQ, but we have lowered VQ to 1
4846         * for this load operation.
4847         */
4848        TCGv_i64 tmp = tcg_temp_new_i64();
4849#ifdef HOST_WORDS_BIGENDIAN
4850        poff += 6;
4851#endif
4852        tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4853
4854        poff = offsetof(CPUARMState, vfp.preg_tmp);
4855        tcg_gen_st_i64(tmp, cpu_env, poff);
4856        tcg_temp_free_i64(tmp);
4857    }
4858
4859    t_pg = tcg_temp_new_ptr();
4860    tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4861
4862    fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
4863
4864    tcg_temp_free_ptr(t_pg);
4865    tcg_temp_free_i32(t_desc);
4866
4867    /* Replicate that first quadword.  */
4868    if (vsz > 16) {
4869        unsigned dofs = vec_full_reg_offset(s, zt);
4870        tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4871    }
4872}
4873
4874static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4875{
4876    if (a->rm == 31) {
4877        return false;
4878    }
4879    if (sve_access_check(s)) {
4880        int msz = dtype_msz(a->dtype);
4881        TCGv_i64 addr = new_tmp_a64(s);
4882        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4883        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4884        do_ldrq(s, a->rd, a->pg, addr, msz);
4885    }
4886    return true;
4887}
4888
4889static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4890{
4891    if (sve_access_check(s)) {
4892        TCGv_i64 addr = new_tmp_a64(s);
4893        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4894        do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4895    }
4896    return true;
4897}
4898
4899/* Load and broadcast element.  */
4900static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4901{
4902    if (!sve_access_check(s)) {
4903        return true;
4904    }
4905
4906    unsigned vsz = vec_full_reg_size(s);
4907    unsigned psz = pred_full_reg_size(s);
4908    unsigned esz = dtype_esz[a->dtype];
4909    unsigned msz = dtype_msz(a->dtype);
4910    TCGLabel *over = gen_new_label();
4911    TCGv_i64 temp;
4912
4913    /* If the guarding predicate has no bits set, no load occurs.  */
4914    if (psz <= 8) {
4915        /* Reduce the pred_esz_masks value simply to reduce the
4916         * size of the code generated here.
4917         */
4918        uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4919        temp = tcg_temp_new_i64();
4920        tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4921        tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4922        tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4923        tcg_temp_free_i64(temp);
4924    } else {
4925        TCGv_i32 t32 = tcg_temp_new_i32();
4926        find_last_active(s, t32, esz, a->pg);
4927        tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4928        tcg_temp_free_i32(t32);
4929    }
4930
4931    /* Load the data.  */
4932    temp = tcg_temp_new_i64();
4933    tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4934    tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4935                        s->be_data | dtype_mop[a->dtype]);
4936
4937    /* Broadcast to *all* elements.  */
4938    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4939                         vsz, vsz, temp);
4940    tcg_temp_free_i64(temp);
4941
4942    /* Zero the inactive elements.  */
4943    gen_set_label(over);
4944    do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4945    return true;
4946}
4947
4948static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4949                      int msz, int esz, int nreg)
4950{
4951    static gen_helper_gvec_mem * const fn_single[2][4][4] = {
4952        { { gen_helper_sve_st1bb_r,
4953            gen_helper_sve_st1bh_r,
4954            gen_helper_sve_st1bs_r,
4955            gen_helper_sve_st1bd_r },
4956          { NULL,
4957            gen_helper_sve_st1hh_le_r,
4958            gen_helper_sve_st1hs_le_r,
4959            gen_helper_sve_st1hd_le_r },
4960          { NULL, NULL,
4961            gen_helper_sve_st1ss_le_r,
4962            gen_helper_sve_st1sd_le_r },
4963          { NULL, NULL, NULL,
4964            gen_helper_sve_st1dd_le_r } },
4965        { { gen_helper_sve_st1bb_r,
4966            gen_helper_sve_st1bh_r,
4967            gen_helper_sve_st1bs_r,
4968            gen_helper_sve_st1bd_r },
4969          { NULL,
4970            gen_helper_sve_st1hh_be_r,
4971            gen_helper_sve_st1hs_be_r,
4972            gen_helper_sve_st1hd_be_r },
4973          { NULL, NULL,
4974            gen_helper_sve_st1ss_be_r,
4975            gen_helper_sve_st1sd_be_r },
4976          { NULL, NULL, NULL,
4977            gen_helper_sve_st1dd_be_r } },
4978    };
4979    static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
4980        { { gen_helper_sve_st2bb_r,
4981            gen_helper_sve_st2hh_le_r,
4982            gen_helper_sve_st2ss_le_r,
4983            gen_helper_sve_st2dd_le_r },
4984          { gen_helper_sve_st3bb_r,
4985            gen_helper_sve_st3hh_le_r,
4986            gen_helper_sve_st3ss_le_r,
4987            gen_helper_sve_st3dd_le_r },
4988          { gen_helper_sve_st4bb_r,
4989            gen_helper_sve_st4hh_le_r,
4990            gen_helper_sve_st4ss_le_r,
4991            gen_helper_sve_st4dd_le_r } },
4992        { { gen_helper_sve_st2bb_r,
4993            gen_helper_sve_st2hh_be_r,
4994            gen_helper_sve_st2ss_be_r,
4995            gen_helper_sve_st2dd_be_r },
4996          { gen_helper_sve_st3bb_r,
4997            gen_helper_sve_st3hh_be_r,
4998            gen_helper_sve_st3ss_be_r,
4999            gen_helper_sve_st3dd_be_r },
5000          { gen_helper_sve_st4bb_r,

5001            gen_helper_sve_st4hh_be_r,
5002            gen_helper_sve_st4ss_be_r,
5003            gen_helper_sve_st4dd_be_r } },
5004    };
5005    gen_helper_gvec_mem *fn;
5006    int be = s->be_data == MO_BE;
5007
5008    if (nreg == 0) {
5009        /* ST1 */
5010        fn = fn_single[be][msz][esz];
5011    } else {
5012        /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5013        assert(msz == esz);
5014        fn = fn_multiple[be][nreg - 1][msz];
5015    }
5016    assert(fn != NULL);
5017    do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn);
5018}
5019
5020static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5021{
5022    if (a->rm == 31 || a->msz > a->esz) {
5023        return false;
5024    }
5025    if (sve_access_check(s)) {
5026        TCGv_i64 addr = new_tmp_a64(s);
5027        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5028        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5029        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5030    }
5031    return true;
5032}
5033
5034static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5035{
5036    if (a->msz > a->esz) {
5037        return false;
5038    }
5039    if (sve_access_check(s)) {
5040        int vsz = vec_full_reg_size(s);
5041        int elements = vsz >> a->esz;
5042        TCGv_i64 addr = new_tmp_a64(s);
5043
5044        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5045                         (a->imm * elements * (a->nreg + 1)) << a->msz);
5046        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5047    }
5048    return true;
5049}
5050
5051/*
5052 *** SVE gather loads / scatter stores
5053 */
5054
5055static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5056                       int scale, TCGv_i64 scalar, int msz,
5057                       gen_helper_gvec_mem_scatter *fn)
5058{
5059    unsigned vsz = vec_full_reg_size(s);
5060    TCGv_ptr t_zm = tcg_temp_new_ptr();
5061    TCGv_ptr t_pg = tcg_temp_new_ptr();
5062    TCGv_ptr t_zt = tcg_temp_new_ptr();
5063    TCGv_i32 t_desc;
5064    int desc;
5065
5066    desc = sve_memopidx(s, msz_dtype(msz));
5067    desc |= scale << MEMOPIDX_SHIFT;
5068    desc = simd_desc(vsz, vsz, desc);
5069    t_desc = tcg_const_i32(desc);
5070
5071    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5072    tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5073    tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5074    fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
5075
5076    tcg_temp_free_ptr(t_zt);
5077    tcg_temp_free_ptr(t_zm);
5078    tcg_temp_free_ptr(t_pg);
5079    tcg_temp_free_i32(t_desc);
5080}
5081
5082/* Indexed by [be][ff][xs][u][msz].  */
5083static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
5084    /* Little-endian */
5085    { { { { gen_helper_sve_ldbss_zsu,
5086            gen_helper_sve_ldhss_le_zsu,
5087            NULL, },
5088          { gen_helper_sve_ldbsu_zsu,
5089            gen_helper_sve_ldhsu_le_zsu,
5090            gen_helper_sve_ldss_le_zsu, } },
5091        { { gen_helper_sve_ldbss_zss,
5092            gen_helper_sve_ldhss_le_zss,
5093            NULL, },
5094          { gen_helper_sve_ldbsu_zss,
5095            gen_helper_sve_ldhsu_le_zss,
5096            gen_helper_sve_ldss_le_zss, } } },
5097
5098      /* First-fault */
5099      { { { gen_helper_sve_ldffbss_zsu,
5100            gen_helper_sve_ldffhss_le_zsu,
5101            NULL, },
5102          { gen_helper_sve_ldffbsu_zsu,
5103            gen_helper_sve_ldffhsu_le_zsu,
5104            gen_helper_sve_ldffss_le_zsu, } },
5105        { { gen_helper_sve_ldffbss_zss,
5106            gen_helper_sve_ldffhss_le_zss,
5107            NULL, },
5108          { gen_helper_sve_ldffbsu_zss,
5109            gen_helper_sve_ldffhsu_le_zss,
5110            gen_helper_sve_ldffss_le_zss, } } } },
5111
5112    /* Big-endian */
5113    { { { { gen_helper_sve_ldbss_zsu,
5114            gen_helper_sve_ldhss_be_zsu,
5115            NULL, },
5116          { gen_helper_sve_ldbsu_zsu,
5117            gen_helper_sve_ldhsu_be_zsu,
5118            gen_helper_sve_ldss_be_zsu, } },
5119        { { gen_helper_sve_ldbss_zss,
5120            gen_helper_sve_ldhss_be_zss,
5121            NULL, },
5122          { gen_helper_sve_ldbsu_zss,
5123            gen_helper_sve_ldhsu_be_zss,
5124            gen_helper_sve_ldss_be_zss, } } },
5125
5126      /* First-fault */
5127      { { { gen_helper_sve_ldffbss_zsu,
5128            gen_helper_sve_ldffhss_be_zsu,
5129            NULL, },
5130          { gen_helper_sve_ldffbsu_zsu,
5131            gen_helper_sve_ldffhsu_be_zsu,
5132            gen_helper_sve_ldffss_be_zsu, } },
5133        { { gen_helper_sve_ldffbss_zss,
5134            gen_helper_sve_ldffhss_be_zss,
5135            NULL, },
5136          { gen_helper_sve_ldffbsu_zss,
5137            gen_helper_sve_ldffhsu_be_zss,
5138            gen_helper_sve_ldffss_be_zss, } } } },
5139};
5140
5141/* Note that we overload xs=2 to indicate 64-bit offset.  */
5142static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
5143    /* Little-endian */
5144    { { { { gen_helper_sve_ldbds_zsu,
5145            gen_helper_sve_ldhds_le_zsu,
5146            gen_helper_sve_ldsds_le_zsu,
5147            NULL, },
5148          { gen_helper_sve_ldbdu_zsu,
5149            gen_helper_sve_ldhdu_le_zsu,
5150            gen_helper_sve_ldsdu_le_zsu,
5151            gen_helper_sve_lddd_le_zsu, } },
5152        { { gen_helper_sve_ldbds_zss,
5153            gen_helper_sve_ldhds_le_zss,
5154            gen_helper_sve_ldsds_le_zss,
5155            NULL, },
5156          { gen_helper_sve_ldbdu_zss,
5157            gen_helper_sve_ldhdu_le_zss,
5158            gen_helper_sve_ldsdu_le_zss,
5159            gen_helper_sve_lddd_le_zss, } },
5160        { { gen_helper_sve_ldbds_zd,
5161            gen_helper_sve_ldhds_le_zd,
5162            gen_helper_sve_ldsds_le_zd,
5163            NULL, },
5164          { gen_helper_sve_ldbdu_zd,
5165            gen_helper_sve_ldhdu_le_zd,
5166            gen_helper_sve_ldsdu_le_zd,
5167            gen_helper_sve_lddd_le_zd, } } },
5168
5169      /* First-fault */
5170      { { { gen_helper_sve_ldffbds_zsu,
5171            gen_helper_sve_ldffhds_le_zsu,
5172            gen_helper_sve_ldffsds_le_zsu,
5173            NULL, },
5174          { gen_helper_sve_ldffbdu_zsu,
5175            gen_helper_sve_ldffhdu_le_zsu,
5176            gen_helper_sve_ldffsdu_le_zsu,
5177            gen_helper_sve_ldffdd_le_zsu, } },
5178        { { gen_helper_sve_ldffbds_zss,
5179            gen_helper_sve_ldffhds_le_zss,
5180            gen_helper_sve_ldffsds_le_zss,
5181            NULL, },
5182          { gen_helper_sve_ldffbdu_zss,
5183            gen_helper_sve_ldffhdu_le_zss,
5184            gen_helper_sve_ldffsdu_le_zss,
5185            gen_helper_sve_ldffdd_le_zss, } },
5186        { { gen_helper_sve_ldffbds_zd,
5187            gen_helper_sve_ldffhds_le_zd,
5188            gen_helper_sve_ldffsds_le_zd,
5189            NULL, },
5190          { gen_helper_sve_ldffbdu_zd,
5191            gen_helper_sve_ldffhdu_le_zd,
5192            gen_helper_sve_ldffsdu_le_zd,
5193            gen_helper_sve_ldffdd_le_zd, } } } },
5194
5195    /* Big-endian */
5196    { { { { gen_helper_sve_ldbds_zsu,
5197            gen_helper_sve_ldhds_be_zsu,
5198            gen_helper_sve_ldsds_be_zsu,
5199            NULL, },
5200          { gen_helper_sve_ldbdu_zsu,
5201            gen_helper_sve_ldhdu_be_zsu,
5202            gen_helper_sve_ldsdu_be_zsu,
5203            gen_helper_sve_lddd_be_zsu, } },
5204        { { gen_helper_sve_ldbds_zss,
5205            gen_helper_sve_ldhds_be_zss,
5206            gen_helper_sve_ldsds_be_zss,
5207            NULL, },
5208          { gen_helper_sve_ldbdu_zss,
5209            gen_helper_sve_ldhdu_be_zss,
5210            gen_helper_sve_ldsdu_be_zss,
5211            gen_helper_sve_lddd_be_zss, } },
5212        { { gen_helper_sve_ldbds_zd,
5213            gen_helper_sve_ldhds_be_zd,
5214            gen_helper_sve_ldsds_be_zd,
5215            NULL, },
5216          { gen_helper_sve_ldbdu_zd,
5217            gen_helper_sve_ldhdu_be_zd,
5218            gen_helper_sve_ldsdu_be_zd,
5219            gen_helper_sve_lddd_be_zd, } } },
5220
5221      /* First-fault */
5222      { { { gen_helper_sve_ldffbds_zsu,
5223            gen_helper_sve_ldffhds_be_zsu,
5224            gen_helper_sve_ldffsds_be_zsu,
5225            NULL, },
5226          { gen_helper_sve_ldffbdu_zsu,
5227            gen_helper_sve_ldffhdu_be_zsu,
5228            gen_helper_sve_ldffsdu_be_zsu,
5229            gen_helper_sve_ldffdd_be_zsu, } },
5230        { { gen_helper_sve_ldffbds_zss,
5231            gen_helper_sve_ldffhds_be_zss,
5232            gen_helper_sve_ldffsds_be_zss,
5233            NULL, },
5234          { gen_helper_sve_ldffbdu_zss,
5235            gen_helper_sve_ldffhdu_be_zss,
5236            gen_helper_sve_ldffsdu_be_zss,
5237            gen_helper_sve_ldffdd_be_zss, } },
5238        { { gen_helper_sve_ldffbds_zd,
5239            gen_helper_sve_ldffhds_be_zd,
5240            gen_helper_sve_ldffsds_be_zd,
5241            NULL, },
5242          { gen_helper_sve_ldffbdu_zd,
5243            gen_helper_sve_ldffhdu_be_zd,
5244            gen_helper_sve_ldffsdu_be_zd,
5245            gen_helper_sve_ldffdd_be_zd, } } } },
5246};
5247
5248static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5249{
5250    gen_helper_gvec_mem_scatter *fn = NULL;
5251    int be = s->be_data == MO_BE;
5252
5253    if (!sve_access_check(s)) {
5254        return true;
5255    }
5256
5257    switch (a->esz) {
5258    case MO_32:
5259        fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
5260        break;
5261    case MO_64:
5262        fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
5263        break;
5264    }
5265    assert(fn != NULL);
5266
5267    do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5268               cpu_reg_sp(s, a->rn), a->msz, fn);
5269    return true;
5270}
5271
5272static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5273{
5274    gen_helper_gvec_mem_scatter *fn = NULL;
5275    int be = s->be_data == MO_BE;
5276    TCGv_i64 imm;
5277
5278    if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5279        return false;
5280    }
5281    if (!sve_access_check(s)) {
5282        return true;
5283    }
5284
5285    switch (a->esz) {
5286    case MO_32:
5287        fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
5288        break;
5289    case MO_64:
5290        fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
5291        break;
5292    }
5293    assert(fn != NULL);
5294
5295    /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5296     * by loading the immediate into the scalar parameter.
5297     */
5298    imm = tcg_const_i64(a->imm << a->msz);
5299    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5300    tcg_temp_free_i64(imm);
5301    return true;
5302}
5303
5304/* Indexed by [be][xs][msz].  */
5305static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
5306    /* Little-endian */
5307    { { gen_helper_sve_stbs_zsu,
5308        gen_helper_sve_sths_le_zsu,
5309        gen_helper_sve_stss_le_zsu, },
5310      { gen_helper_sve_stbs_zss,
5311        gen_helper_sve_sths_le_zss,
5312        gen_helper_sve_stss_le_zss, } },
5313    /* Big-endian */
5314    { { gen_helper_sve_stbs_zsu,
5315        gen_helper_sve_sths_be_zsu,
5316        gen_helper_sve_stss_be_zsu, },
5317      { gen_helper_sve_stbs_zss,
5318        gen_helper_sve_sths_be_zss,
5319        gen_helper_sve_stss_be_zss, } },
5320};
5321
5322/* Note that we overload xs=2 to indicate 64-bit offset.  */
5323static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
5324    /* Little-endian */
5325    { { gen_helper_sve_stbd_zsu,
5326        gen_helper_sve_sthd_le_zsu,
5327        gen_helper_sve_stsd_le_zsu,
5328        gen_helper_sve_stdd_le_zsu, },
5329      { gen_helper_sve_stbd_zss,
5330        gen_helper_sve_sthd_le_zss,
5331        gen_helper_sve_stsd_le_zss,
5332        gen_helper_sve_stdd_le_zss, },
5333      { gen_helper_sve_stbd_zd,
5334        gen_helper_sve_sthd_le_zd,
5335        gen_helper_sve_stsd_le_zd,
5336        gen_helper_sve_stdd_le_zd, } },
5337    /* Big-endian */
5338    { { gen_helper_sve_stbd_zsu,
5339        gen_helper_sve_sthd_be_zsu,
5340        gen_helper_sve_stsd_be_zsu,
5341        gen_helper_sve_stdd_be_zsu, },
5342      { gen_helper_sve_stbd_zss,
5343        gen_helper_sve_sthd_be_zss,
5344        gen_helper_sve_stsd_be_zss,
5345        gen_helper_sve_stdd_be_zss, },
5346      { gen_helper_sve_stbd_zd,
5347        gen_helper_sve_sthd_be_zd,
5348        gen_helper_sve_stsd_be_zd,
5349        gen_helper_sve_stdd_be_zd, } },
5350};
5351
5352static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5353{
5354    gen_helper_gvec_mem_scatter *fn;
5355    int be = s->be_data == MO_BE;
5356
5357    if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5358        return false;
5359    }
5360    if (!sve_access_check(s)) {
5361        return true;
5362    }
5363    switch (a->esz) {
5364    case MO_32:
5365        fn = scatter_store_fn32[be][a->xs][a->msz];
5366        break;
5367    case MO_64:
5368        fn = scatter_store_fn64[be][a->xs][a->msz];
5369        break;
5370    default:
5371        g_assert_not_reached();
5372    }
5373    do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5374               cpu_reg_sp(s, a->rn), a->msz, fn);
5375    return true;
5376}
5377
5378static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5379{
5380    gen_helper_gvec_mem_scatter *fn = NULL;
5381    int be = s->be_data == MO_BE;
5382    TCGv_i64 imm;
5383
5384    if (a->esz < a->msz) {
5385        return false;
5386    }
5387    if (!sve_access_check(s)) {
5388        return true;
5389    }
5390
5391    switch (a->esz) {
5392    case MO_32:
5393        fn = scatter_store_fn32[be][0][a->msz];
5394        break;
5395    case MO_64:
5396        fn = scatter_store_fn64[be][2][a->msz];
5397        break;
5398    }
5399    assert(fn != NULL);
5400
5401    /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5402     * by loading the immediate into the scalar parameter.
5403     */
5404    imm = tcg_const_i64(a->imm << a->msz);
5405    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5406    tcg_temp_free_i64(imm);
5407    return true;
5408}
5409
5410/*
5411 * Prefetches
5412 */
5413
5414static bool trans_PRF(DisasContext *s, arg_PRF *a)
5415{
5416    /* Prefetch is a nop within QEMU.  */
5417    (void)sve_access_check(s);
5418    return true;
5419}
5420
5421static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5422{
5423    if (a->rm == 31) {
5424        return false;
5425    }
5426    /* Prefetch is a nop within QEMU.  */
5427    (void)sve_access_check(s);
5428    return true;
5429}
5430
5431/*
5432 * Move Prefix
5433 *
5434 * TODO: The implementation so far could handle predicated merging movprfx.
5435 * The helper functions as written take an extra source register to
5436 * use in the operation, but the result is only written when predication
5437 * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5438 * to allow the final write back to the destination to be unconditional.
5439 * For predicated zeroing movprfx, we need to rearrange the helpers to
5440 * allow the final write back to zero inactives.
5441 *
5442 * In the meantime, just emit the moves.
5443 */
5444
5445static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
5446{
5447    return do_mov_z(s, a->rd, a->rn);
5448}
5449
5450static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
5451{
5452    if (sve_access_check(s)) {
5453        do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5454    }
5455    return true;
5456}
5457
5458static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
5459{
5460    if (sve_access_check(s)) {
5461        do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5462    }
5463    return true;
5464}
5465