qemu/target/arm/translate-sve.c
<<
>>
Prefs
   1/*
   2 * AArch64 SVE translation
   3 *
   4 * Copyright (c) 2018 Linaro, Ltd
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg-op.h"
  24#include "tcg-op-gvec.h"
  25#include "tcg-gvec-desc.h"
  26#include "qemu/log.h"
  27#include "arm_ldst.h"
  28#include "translate.h"
  29#include "internals.h"
  30#include "exec/helper-proto.h"
  31#include "exec/helper-gen.h"
  32#include "exec/log.h"
  33#include "trace-tcg.h"
  34#include "translate-a64.h"
  35#include "fpu/softfloat.h"
  36
  37
  38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                         TCGv_i64, uint32_t, uint32_t);
  40
  41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                     TCGv_ptr, TCGv_i32);
  43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                     TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                         TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50/*
  51 * Helpers for extracting complex instruction fields.
  52 */
  53
  54/* See e.g. ASR (immediate, predicated).
  55 * Returns -1 for unallocated encoding; diagnose later.
  56 */
  57static int tszimm_esz(int x)
  58{
  59    x >>= 3;  /* discard imm3 */
  60    return 31 - clz32(x);
  61}
  62
  63static int tszimm_shr(int x)
  64{
  65    return (16 << tszimm_esz(x)) - x;
  66}
  67
  68/* See e.g. LSL (immediate, predicated).  */
  69static int tszimm_shl(int x)
  70{
  71    return x - (8 << tszimm_esz(x));
  72}
  73
  74static inline int plus1(int x)
  75{
  76    return x + 1;
  77}
  78
  79/* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80static inline int expand_imm_sh8s(int x)
  81{
  82    return (int8_t)x << (x & 0x100 ? 8 : 0);
  83}
  84
  85static inline int expand_imm_sh8u(int x)
  86{
  87    return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88}
  89
  90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91 * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92 */
  93static inline int msz_dtype(int msz)
  94{
  95    static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96    return dtype[msz];
  97}
  98
  99/*
 100 * Include the generated decoder.
 101 */
 102
 103#include "decode-sve.inc.c"
 104
 105/*
 106 * Implement all of the translator functions referenced by the decoder.
 107 */
 108
 109/* Return the offset info CPUARMState of the predicate vector register Pn.
 110 * Note for this purpose, FFR is P16.
 111 */
 112static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113{
 114    return offsetof(CPUARMState, vfp.pregs[regno]);
 115}
 116
 117/* Return the byte size of the whole predicate register, VL / 64.  */
 118static inline int pred_full_reg_size(DisasContext *s)
 119{
 120    return s->sve_len >> 3;
 121}
 122
 123/* Round up the size of a register to a size allowed by
 124 * the tcg vector infrastructure.  Any operation which uses this
 125 * size may assume that the bits above pred_full_reg_size are zero,
 126 * and must leave them the same way.
 127 *
 128 * Note that this is not needed for the vector registers as they
 129 * are always properly sized for tcg vectors.
 130 */
 131static int size_for_gvec(int size)
 132{
 133    if (size <= 8) {
 134        return 8;
 135    } else {
 136        return QEMU_ALIGN_UP(size, 16);
 137    }
 138}
 139
 140static int pred_gvec_reg_size(DisasContext *s)
 141{
 142    return size_for_gvec(pred_full_reg_size(s));
 143}
 144
 145/* Invoke a vector expander on two Zregs.  */
 146static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                         int esz, int rd, int rn)
 148{
 149    if (sve_access_check(s)) {
 150        unsigned vsz = vec_full_reg_size(s);
 151        gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                vec_full_reg_offset(s, rn), vsz, vsz);
 153    }
 154    return true;
 155}
 156
 157/* Invoke a vector expander on three Zregs.  */
 158static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                         int esz, int rd, int rn, int rm)
 160{
 161    if (sve_access_check(s)) {
 162        unsigned vsz = vec_full_reg_size(s);
 163        gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                vec_full_reg_offset(s, rn),
 165                vec_full_reg_offset(s, rm), vsz, vsz);
 166    }
 167    return true;
 168}
 169
 170/* Invoke a vector move on two Zregs.  */
 171static bool do_mov_z(DisasContext *s, int rd, int rn)
 172{
 173    return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174}
 175
 176/* Initialize a Zreg with replications of a 64-bit immediate.  */
 177static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178{
 179    unsigned vsz = vec_full_reg_size(s);
 180    tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181}
 182
 183/* Invoke a vector expander on two Pregs.  */
 184static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                         int esz, int rd, int rn)
 186{
 187    if (sve_access_check(s)) {
 188        unsigned psz = pred_gvec_reg_size(s);
 189        gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                pred_full_reg_offset(s, rn), psz, psz);
 191    }
 192    return true;
 193}
 194
 195/* Invoke a vector expander on three Pregs.  */
 196static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                         int esz, int rd, int rn, int rm)
 198{
 199    if (sve_access_check(s)) {
 200        unsigned psz = pred_gvec_reg_size(s);
 201        gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                pred_full_reg_offset(s, rn),
 203                pred_full_reg_offset(s, rm), psz, psz);
 204    }
 205    return true;
 206}
 207
 208/* Invoke a vector operation on four Pregs.  */
 209static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                        int rd, int rn, int rm, int rg)
 211{
 212    if (sve_access_check(s)) {
 213        unsigned psz = pred_gvec_reg_size(s);
 214        tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                       pred_full_reg_offset(s, rn),
 216                       pred_full_reg_offset(s, rm),
 217                       pred_full_reg_offset(s, rg),
 218                       psz, psz, gvec_op);
 219    }
 220    return true;
 221}
 222
 223/* Invoke a vector move on two Pregs.  */
 224static bool do_mov_p(DisasContext *s, int rd, int rn)
 225{
 226    return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227}
 228
 229/* Set the cpu flags as per a return from an SVE helper.  */
 230static void do_pred_flags(TCGv_i32 t)
 231{
 232    tcg_gen_mov_i32(cpu_NF, t);
 233    tcg_gen_andi_i32(cpu_ZF, t, 2);
 234    tcg_gen_andi_i32(cpu_CF, t, 1);
 235    tcg_gen_movi_i32(cpu_VF, 0);
 236}
 237
 238/* Subroutines computing the ARM PredTest psuedofunction.  */
 239static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240{
 241    TCGv_i32 t = tcg_temp_new_i32();
 242
 243    gen_helper_sve_predtest1(t, d, g);
 244    do_pred_flags(t);
 245    tcg_temp_free_i32(t);
 246}
 247
 248static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249{
 250    TCGv_ptr dptr = tcg_temp_new_ptr();
 251    TCGv_ptr gptr = tcg_temp_new_ptr();
 252    TCGv_i32 t;
 253
 254    tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255    tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256    t = tcg_const_i32(words);
 257
 258    gen_helper_sve_predtest(t, dptr, gptr, t);
 259    tcg_temp_free_ptr(dptr);
 260    tcg_temp_free_ptr(gptr);
 261
 262    do_pred_flags(t);
 263    tcg_temp_free_i32(t);
 264}
 265
 266/* For each element size, the bits within a predicate word that are active.  */
 267const uint64_t pred_esz_masks[4] = {
 268    0xffffffffffffffffull, 0x5555555555555555ull,
 269    0x1111111111111111ull, 0x0101010101010101ull
 270};
 271
 272/*
 273 *** SVE Logical - Unpredicated Group
 274 */
 275
 276static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 277{
 278    return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279}
 280
 281static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 282{
 283    if (a->rn == a->rm) { /* MOV */
 284        return do_mov_z(s, a->rd, a->rn);
 285    } else {
 286        return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 287    }
 288}
 289
 290static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 291{
 292    return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 293}
 294
 295static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 296{
 297    return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 298}
 299
 300/*
 301 *** SVE Integer Arithmetic - Unpredicated Group
 302 */
 303
 304static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 305{
 306    return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 307}
 308
 309static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 310{
 311    return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 312}
 313
 314static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 315{
 316    return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 317}
 318
 319static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 320{
 321    return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 322}
 323
 324static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 325{
 326    return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 327}
 328
 329static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 330{
 331    return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 332}
 333
 334/*
 335 *** SVE Integer Arithmetic - Binary Predicated Group
 336 */
 337
 338static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 339{
 340    unsigned vsz = vec_full_reg_size(s);
 341    if (fn == NULL) {
 342        return false;
 343    }
 344    if (sve_access_check(s)) {
 345        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 346                           vec_full_reg_offset(s, a->rn),
 347                           vec_full_reg_offset(s, a->rm),
 348                           pred_full_reg_offset(s, a->pg),
 349                           vsz, vsz, 0, fn);
 350    }
 351    return true;
 352}
 353
 354/* Select active elememnts from Zn and inactive elements from Zm,
 355 * storing the result in Zd.
 356 */
 357static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 358{
 359    static gen_helper_gvec_4 * const fns[4] = {
 360        gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 361        gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 362    };
 363    unsigned vsz = vec_full_reg_size(s);
 364    tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 365                       vec_full_reg_offset(s, rn),
 366                       vec_full_reg_offset(s, rm),
 367                       pred_full_reg_offset(s, pg),
 368                       vsz, vsz, 0, fns[esz]);
 369}
 370
 371#define DO_ZPZZ(NAME, name) \
 372static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 373                                uint32_t insn)                            \
 374{                                                                         \
 375    static gen_helper_gvec_4 * const fns[4] = {                           \
 376        gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 377        gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 378    };                                                                    \
 379    return do_zpzz_ool(s, a, fns[a->esz]);                                \
 380}
 381
 382DO_ZPZZ(AND, and)
 383DO_ZPZZ(EOR, eor)
 384DO_ZPZZ(ORR, orr)
 385DO_ZPZZ(BIC, bic)
 386
 387DO_ZPZZ(ADD, add)
 388DO_ZPZZ(SUB, sub)
 389
 390DO_ZPZZ(SMAX, smax)
 391DO_ZPZZ(UMAX, umax)
 392DO_ZPZZ(SMIN, smin)
 393DO_ZPZZ(UMIN, umin)
 394DO_ZPZZ(SABD, sabd)
 395DO_ZPZZ(UABD, uabd)
 396
 397DO_ZPZZ(MUL, mul)
 398DO_ZPZZ(SMULH, smulh)
 399DO_ZPZZ(UMULH, umulh)
 400
 401DO_ZPZZ(ASR, asr)
 402DO_ZPZZ(LSR, lsr)
 403DO_ZPZZ(LSL, lsl)
 404
 405static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 406{
 407    static gen_helper_gvec_4 * const fns[4] = {
 408        NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 409    };
 410    return do_zpzz_ool(s, a, fns[a->esz]);
 411}
 412
 413static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 414{
 415    static gen_helper_gvec_4 * const fns[4] = {
 416        NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 417    };
 418    return do_zpzz_ool(s, a, fns[a->esz]);
 419}
 420
 421static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 422{
 423    if (sve_access_check(s)) {
 424        do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 425    }
 426    return true;
 427}
 428
 429#undef DO_ZPZZ
 430
 431/*
 432 *** SVE Integer Arithmetic - Unary Predicated Group
 433 */
 434
 435static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 436{
 437    if (fn == NULL) {
 438        return false;
 439    }
 440    if (sve_access_check(s)) {
 441        unsigned vsz = vec_full_reg_size(s);
 442        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 443                           vec_full_reg_offset(s, a->rn),
 444                           pred_full_reg_offset(s, a->pg),
 445                           vsz, vsz, 0, fn);
 446    }
 447    return true;
 448}
 449
 450#define DO_ZPZ(NAME, name) \
 451static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 452{                                                                   \
 453    static gen_helper_gvec_3 * const fns[4] = {                     \
 454        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 455        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 456    };                                                              \
 457    return do_zpz_ool(s, a, fns[a->esz]);                           \
 458}
 459
 460DO_ZPZ(CLS, cls)
 461DO_ZPZ(CLZ, clz)
 462DO_ZPZ(CNT_zpz, cnt_zpz)
 463DO_ZPZ(CNOT, cnot)
 464DO_ZPZ(NOT_zpz, not_zpz)
 465DO_ZPZ(ABS, abs)
 466DO_ZPZ(NEG, neg)
 467
 468static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 469{
 470    static gen_helper_gvec_3 * const fns[4] = {
 471        NULL,
 472        gen_helper_sve_fabs_h,
 473        gen_helper_sve_fabs_s,
 474        gen_helper_sve_fabs_d
 475    };
 476    return do_zpz_ool(s, a, fns[a->esz]);
 477}
 478
 479static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 480{
 481    static gen_helper_gvec_3 * const fns[4] = {
 482        NULL,
 483        gen_helper_sve_fneg_h,
 484        gen_helper_sve_fneg_s,
 485        gen_helper_sve_fneg_d
 486    };
 487    return do_zpz_ool(s, a, fns[a->esz]);
 488}
 489
 490static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 491{
 492    static gen_helper_gvec_3 * const fns[4] = {
 493        NULL,
 494        gen_helper_sve_sxtb_h,
 495        gen_helper_sve_sxtb_s,
 496        gen_helper_sve_sxtb_d
 497    };
 498    return do_zpz_ool(s, a, fns[a->esz]);
 499}
 500
 501static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 502{
 503    static gen_helper_gvec_3 * const fns[4] = {
 504        NULL,
 505        gen_helper_sve_uxtb_h,
 506        gen_helper_sve_uxtb_s,
 507        gen_helper_sve_uxtb_d
 508    };
 509    return do_zpz_ool(s, a, fns[a->esz]);
 510}
 511
 512static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 513{
 514    static gen_helper_gvec_3 * const fns[4] = {
 515        NULL, NULL,
 516        gen_helper_sve_sxth_s,
 517        gen_helper_sve_sxth_d
 518    };
 519    return do_zpz_ool(s, a, fns[a->esz]);
 520}
 521
 522static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 523{
 524    static gen_helper_gvec_3 * const fns[4] = {
 525        NULL, NULL,
 526        gen_helper_sve_uxth_s,
 527        gen_helper_sve_uxth_d
 528    };
 529    return do_zpz_ool(s, a, fns[a->esz]);
 530}
 531
 532static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 533{
 534    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 535}
 536
 537static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 538{
 539    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 540}
 541
 542#undef DO_ZPZ
 543
 544/*
 545 *** SVE Integer Reduction Group
 546 */
 547
 548typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 549static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 550                       gen_helper_gvec_reduc *fn)
 551{
 552    unsigned vsz = vec_full_reg_size(s);
 553    TCGv_ptr t_zn, t_pg;
 554    TCGv_i32 desc;
 555    TCGv_i64 temp;
 556
 557    if (fn == NULL) {
 558        return false;
 559    }
 560    if (!sve_access_check(s)) {
 561        return true;
 562    }
 563
 564    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 565    temp = tcg_temp_new_i64();
 566    t_zn = tcg_temp_new_ptr();
 567    t_pg = tcg_temp_new_ptr();
 568
 569    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 570    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 571    fn(temp, t_zn, t_pg, desc);
 572    tcg_temp_free_ptr(t_zn);
 573    tcg_temp_free_ptr(t_pg);
 574    tcg_temp_free_i32(desc);
 575
 576    write_fp_dreg(s, a->rd, temp);
 577    tcg_temp_free_i64(temp);
 578    return true;
 579}
 580
 581#define DO_VPZ(NAME, name) \
 582static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 583{                                                                        \
 584    static gen_helper_gvec_reduc * const fns[4] = {                      \
 585        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 586        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 587    };                                                                   \
 588    return do_vpz_ool(s, a, fns[a->esz]);                                \
 589}
 590
 591DO_VPZ(ORV, orv)
 592DO_VPZ(ANDV, andv)
 593DO_VPZ(EORV, eorv)
 594
 595DO_VPZ(UADDV, uaddv)
 596DO_VPZ(SMAXV, smaxv)
 597DO_VPZ(UMAXV, umaxv)
 598DO_VPZ(SMINV, sminv)
 599DO_VPZ(UMINV, uminv)
 600
 601static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 602{
 603    static gen_helper_gvec_reduc * const fns[4] = {
 604        gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 605        gen_helper_sve_saddv_s, NULL
 606    };
 607    return do_vpz_ool(s, a, fns[a->esz]);
 608}
 609
 610#undef DO_VPZ
 611
 612/*
 613 *** SVE Shift by Immediate - Predicated Group
 614 */
 615
 616/* Store zero into every active element of Zd.  We will use this for two
 617 * and three-operand predicated instructions for which logic dictates a
 618 * zero result.
 619 */
 620static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 621{
 622    static gen_helper_gvec_2 * const fns[4] = {
 623        gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 624        gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 625    };
 626    if (sve_access_check(s)) {
 627        unsigned vsz = vec_full_reg_size(s);
 628        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 629                           pred_full_reg_offset(s, pg),
 630                           vsz, vsz, 0, fns[esz]);
 631    }
 632    return true;
 633}
 634
 635/* Copy Zn into Zd, storing zeros into inactive elements.  */
 636static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 637{
 638    static gen_helper_gvec_3 * const fns[4] = {
 639        gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 640        gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 641    };
 642    unsigned vsz = vec_full_reg_size(s);
 643    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 644                       vec_full_reg_offset(s, rn),
 645                       pred_full_reg_offset(s, pg),
 646                       vsz, vsz, 0, fns[esz]);
 647}
 648
 649static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 650                        gen_helper_gvec_3 *fn)
 651{
 652    if (sve_access_check(s)) {
 653        unsigned vsz = vec_full_reg_size(s);
 654        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 655                           vec_full_reg_offset(s, a->rn),
 656                           pred_full_reg_offset(s, a->pg),
 657                           vsz, vsz, a->imm, fn);
 658    }
 659    return true;
 660}
 661
 662static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 663{
 664    static gen_helper_gvec_3 * const fns[4] = {
 665        gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 666        gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 667    };
 668    if (a->esz < 0) {
 669        /* Invalid tsz encoding -- see tszimm_esz. */
 670        return false;
 671    }
 672    /* Shift by element size is architecturally valid.  For
 673       arithmetic right-shift, it's the same as by one less. */
 674    a->imm = MIN(a->imm, (8 << a->esz) - 1);
 675    return do_zpzi_ool(s, a, fns[a->esz]);
 676}
 677
 678static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 679{
 680    static gen_helper_gvec_3 * const fns[4] = {
 681        gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 682        gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 683    };
 684    if (a->esz < 0) {
 685        return false;
 686    }
 687    /* Shift by element size is architecturally valid.
 688       For logical shifts, it is a zeroing operation.  */
 689    if (a->imm >= (8 << a->esz)) {
 690        return do_clr_zp(s, a->rd, a->pg, a->esz);
 691    } else {
 692        return do_zpzi_ool(s, a, fns[a->esz]);
 693    }
 694}
 695
 696static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 697{
 698    static gen_helper_gvec_3 * const fns[4] = {
 699        gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 700        gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 701    };
 702    if (a->esz < 0) {
 703        return false;
 704    }
 705    /* Shift by element size is architecturally valid.
 706       For logical shifts, it is a zeroing operation.  */
 707    if (a->imm >= (8 << a->esz)) {
 708        return do_clr_zp(s, a->rd, a->pg, a->esz);
 709    } else {
 710        return do_zpzi_ool(s, a, fns[a->esz]);
 711    }
 712}
 713
 714static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 715{
 716    static gen_helper_gvec_3 * const fns[4] = {
 717        gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 718        gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 719    };
 720    if (a->esz < 0) {
 721        return false;
 722    }
 723    /* Shift by element size is architecturally valid.  For arithmetic
 724       right shift for division, it is a zeroing operation.  */
 725    if (a->imm >= (8 << a->esz)) {
 726        return do_clr_zp(s, a->rd, a->pg, a->esz);
 727    } else {
 728        return do_zpzi_ool(s, a, fns[a->esz]);
 729    }
 730}
 731
 732/*
 733 *** SVE Bitwise Shift - Predicated Group
 734 */
 735
 736#define DO_ZPZW(NAME, name) \
 737static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 738                                uint32_t insn)                            \
 739{                                                                         \
 740    static gen_helper_gvec_4 * const fns[3] = {                           \
 741        gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 742        gen_helper_sve_##name##_zpzw_s,                                   \
 743    };                                                                    \
 744    if (a->esz < 0 || a->esz >= 3) {                                      \
 745        return false;                                                     \
 746    }                                                                     \
 747    return do_zpzz_ool(s, a, fns[a->esz]);                                \
 748}
 749
 750DO_ZPZW(ASR, asr)
 751DO_ZPZW(LSR, lsr)
 752DO_ZPZW(LSL, lsl)
 753
 754#undef DO_ZPZW
 755
 756/*
 757 *** SVE Bitwise Shift - Unpredicated Group
 758 */
 759
 760static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 761                         void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 762                                         int64_t, uint32_t, uint32_t))
 763{
 764    if (a->esz < 0) {
 765        /* Invalid tsz encoding -- see tszimm_esz. */
 766        return false;
 767    }
 768    if (sve_access_check(s)) {
 769        unsigned vsz = vec_full_reg_size(s);
 770        /* Shift by element size is architecturally valid.  For
 771           arithmetic right-shift, it's the same as by one less.
 772           Otherwise it is a zeroing operation.  */
 773        if (a->imm >= 8 << a->esz) {
 774            if (asr) {
 775                a->imm = (8 << a->esz) - 1;
 776            } else {
 777                do_dupi_z(s, a->rd, 0);
 778                return true;
 779            }
 780        }
 781        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 782                vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 783    }
 784    return true;
 785}
 786
 787static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 788{
 789    return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 790}
 791
 792static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 793{
 794    return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 795}
 796
 797static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 798{
 799    return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 800}
 801
 802static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 803{
 804    if (fn == NULL) {
 805        return false;
 806    }
 807    if (sve_access_check(s)) {
 808        unsigned vsz = vec_full_reg_size(s);
 809        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 810                           vec_full_reg_offset(s, a->rn),
 811                           vec_full_reg_offset(s, a->rm),
 812                           vsz, vsz, 0, fn);
 813    }
 814    return true;
 815}
 816
 817#define DO_ZZW(NAME, name) \
 818static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 819                               uint32_t insn)                             \
 820{                                                                         \
 821    static gen_helper_gvec_3 * const fns[4] = {                           \
 822        gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 823        gen_helper_sve_##name##_zzw_s, NULL                               \
 824    };                                                                    \
 825    return do_zzw_ool(s, a, fns[a->esz]);                                 \
 826}
 827
 828DO_ZZW(ASR, asr)
 829DO_ZZW(LSR, lsr)
 830DO_ZZW(LSL, lsl)
 831
 832#undef DO_ZZW
 833
 834/*
 835 *** SVE Integer Multiply-Add Group
 836 */
 837
 838static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 839                         gen_helper_gvec_5 *fn)
 840{
 841    if (sve_access_check(s)) {
 842        unsigned vsz = vec_full_reg_size(s);
 843        tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 844                           vec_full_reg_offset(s, a->ra),
 845                           vec_full_reg_offset(s, a->rn),
 846                           vec_full_reg_offset(s, a->rm),
 847                           pred_full_reg_offset(s, a->pg),
 848                           vsz, vsz, 0, fn);
 849    }
 850    return true;
 851}
 852
 853#define DO_ZPZZZ(NAME, name) \
 854static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 855{                                                                    \
 856    static gen_helper_gvec_5 * const fns[4] = {                      \
 857        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 858        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 859    };                                                               \
 860    return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 861}
 862
 863DO_ZPZZZ(MLA, mla)
 864DO_ZPZZZ(MLS, mls)
 865
 866#undef DO_ZPZZZ
 867
 868/*
 869 *** SVE Index Generation Group
 870 */
 871
 872static void do_index(DisasContext *s, int esz, int rd,
 873                     TCGv_i64 start, TCGv_i64 incr)
 874{
 875    unsigned vsz = vec_full_reg_size(s);
 876    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 877    TCGv_ptr t_zd = tcg_temp_new_ptr();
 878
 879    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 880    if (esz == 3) {
 881        gen_helper_sve_index_d(t_zd, start, incr, desc);
 882    } else {
 883        typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 884        static index_fn * const fns[3] = {
 885            gen_helper_sve_index_b,
 886            gen_helper_sve_index_h,
 887            gen_helper_sve_index_s,
 888        };
 889        TCGv_i32 s32 = tcg_temp_new_i32();
 890        TCGv_i32 i32 = tcg_temp_new_i32();
 891
 892        tcg_gen_extrl_i64_i32(s32, start);
 893        tcg_gen_extrl_i64_i32(i32, incr);
 894        fns[esz](t_zd, s32, i32, desc);
 895
 896        tcg_temp_free_i32(s32);
 897        tcg_temp_free_i32(i32);
 898    }
 899    tcg_temp_free_ptr(t_zd);
 900    tcg_temp_free_i32(desc);
 901}
 902
 903static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 904{
 905    if (sve_access_check(s)) {
 906        TCGv_i64 start = tcg_const_i64(a->imm1);
 907        TCGv_i64 incr = tcg_const_i64(a->imm2);
 908        do_index(s, a->esz, a->rd, start, incr);
 909        tcg_temp_free_i64(start);
 910        tcg_temp_free_i64(incr);
 911    }
 912    return true;
 913}
 914
 915static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 916{
 917    if (sve_access_check(s)) {
 918        TCGv_i64 start = tcg_const_i64(a->imm);
 919        TCGv_i64 incr = cpu_reg(s, a->rm);
 920        do_index(s, a->esz, a->rd, start, incr);
 921        tcg_temp_free_i64(start);
 922    }
 923    return true;
 924}
 925
 926static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 927{
 928    if (sve_access_check(s)) {
 929        TCGv_i64 start = cpu_reg(s, a->rn);
 930        TCGv_i64 incr = tcg_const_i64(a->imm);
 931        do_index(s, a->esz, a->rd, start, incr);
 932        tcg_temp_free_i64(incr);
 933    }
 934    return true;
 935}
 936
 937static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 938{
 939    if (sve_access_check(s)) {
 940        TCGv_i64 start = cpu_reg(s, a->rn);
 941        TCGv_i64 incr = cpu_reg(s, a->rm);
 942        do_index(s, a->esz, a->rd, start, incr);
 943    }
 944    return true;
 945}
 946
 947/*
 948 *** SVE Stack Allocation Group
 949 */
 950
 951static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 952{
 953    TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 954    TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 955    tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 956    return true;
 957}
 958
 959static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 960{
 961    TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 962    TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 963    tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 964    return true;
 965}
 966
 967static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 968{
 969    TCGv_i64 reg = cpu_reg(s, a->rd);
 970    tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 971    return true;
 972}
 973
 974/*
 975 *** SVE Compute Vector Address Group
 976 */
 977
 978static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 979{
 980    if (sve_access_check(s)) {
 981        unsigned vsz = vec_full_reg_size(s);
 982        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 983                           vec_full_reg_offset(s, a->rn),
 984                           vec_full_reg_offset(s, a->rm),
 985                           vsz, vsz, a->imm, fn);
 986    }
 987    return true;
 988}
 989
 990static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 991{
 992    return do_adr(s, a, gen_helper_sve_adr_p32);
 993}
 994
 995static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 996{
 997    return do_adr(s, a, gen_helper_sve_adr_p64);
 998}
 999
1000static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
1001{
1002    return do_adr(s, a, gen_helper_sve_adr_s32);
1003}
1004
1005static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
1006{
1007    return do_adr(s, a, gen_helper_sve_adr_u32);
1008}
1009
1010/*
1011 *** SVE Integer Misc - Unpredicated Group
1012 */
1013
1014static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1015{
1016    static gen_helper_gvec_2 * const fns[4] = {
1017        NULL,
1018        gen_helper_sve_fexpa_h,
1019        gen_helper_sve_fexpa_s,
1020        gen_helper_sve_fexpa_d,
1021    };
1022    if (a->esz == 0) {
1023        return false;
1024    }
1025    if (sve_access_check(s)) {
1026        unsigned vsz = vec_full_reg_size(s);
1027        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1028                           vec_full_reg_offset(s, a->rn),
1029                           vsz, vsz, 0, fns[a->esz]);
1030    }
1031    return true;
1032}
1033
1034static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1035{
1036    static gen_helper_gvec_3 * const fns[4] = {
1037        NULL,
1038        gen_helper_sve_ftssel_h,
1039        gen_helper_sve_ftssel_s,
1040        gen_helper_sve_ftssel_d,
1041    };
1042    if (a->esz == 0) {
1043        return false;
1044    }
1045    if (sve_access_check(s)) {
1046        unsigned vsz = vec_full_reg_size(s);
1047        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1048                           vec_full_reg_offset(s, a->rn),
1049                           vec_full_reg_offset(s, a->rm),
1050                           vsz, vsz, 0, fns[a->esz]);
1051    }
1052    return true;
1053}
1054
1055/*
1056 *** SVE Predicate Logical Operations Group
1057 */
1058
1059static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1060                          const GVecGen4 *gvec_op)
1061{
1062    if (!sve_access_check(s)) {
1063        return true;
1064    }
1065
1066    unsigned psz = pred_gvec_reg_size(s);
1067    int dofs = pred_full_reg_offset(s, a->rd);
1068    int nofs = pred_full_reg_offset(s, a->rn);
1069    int mofs = pred_full_reg_offset(s, a->rm);
1070    int gofs = pred_full_reg_offset(s, a->pg);
1071
1072    if (psz == 8) {
1073        /* Do the operation and the flags generation in temps.  */
1074        TCGv_i64 pd = tcg_temp_new_i64();
1075        TCGv_i64 pn = tcg_temp_new_i64();
1076        TCGv_i64 pm = tcg_temp_new_i64();
1077        TCGv_i64 pg = tcg_temp_new_i64();
1078
1079        tcg_gen_ld_i64(pn, cpu_env, nofs);
1080        tcg_gen_ld_i64(pm, cpu_env, mofs);
1081        tcg_gen_ld_i64(pg, cpu_env, gofs);
1082
1083        gvec_op->fni8(pd, pn, pm, pg);
1084        tcg_gen_st_i64(pd, cpu_env, dofs);
1085
1086        do_predtest1(pd, pg);
1087
1088        tcg_temp_free_i64(pd);
1089        tcg_temp_free_i64(pn);
1090        tcg_temp_free_i64(pm);
1091        tcg_temp_free_i64(pg);
1092    } else {
1093        /* The operation and flags generation is large.  The computation
1094         * of the flags depends on the original contents of the guarding
1095         * predicate.  If the destination overwrites the guarding predicate,
1096         * then the easiest way to get this right is to save a copy.
1097          */
1098        int tofs = gofs;
1099        if (a->rd == a->pg) {
1100            tofs = offsetof(CPUARMState, vfp.preg_tmp);
1101            tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1102        }
1103
1104        tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1105        do_predtest(s, dofs, tofs, psz / 8);
1106    }
1107    return true;
1108}
1109
1110static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1111{
1112    tcg_gen_and_i64(pd, pn, pm);
1113    tcg_gen_and_i64(pd, pd, pg);
1114}
1115
1116static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1117                           TCGv_vec pm, TCGv_vec pg)
1118{
1119    tcg_gen_and_vec(vece, pd, pn, pm);
1120    tcg_gen_and_vec(vece, pd, pd, pg);
1121}
1122
1123static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1124{
1125    static const GVecGen4 op = {
1126        .fni8 = gen_and_pg_i64,
1127        .fniv = gen_and_pg_vec,
1128        .fno = gen_helper_sve_and_pppp,
1129        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1130    };
1131    if (a->s) {
1132        return do_pppp_flags(s, a, &op);
1133    } else if (a->rn == a->rm) {
1134        if (a->pg == a->rn) {
1135            return do_mov_p(s, a->rd, a->rn);
1136        } else {
1137            return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1138        }
1139    } else if (a->pg == a->rn || a->pg == a->rm) {
1140        return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1141    } else {
1142        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1143    }
1144}
1145
1146static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1147{
1148    tcg_gen_andc_i64(pd, pn, pm);
1149    tcg_gen_and_i64(pd, pd, pg);
1150}
1151
1152static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1153                           TCGv_vec pm, TCGv_vec pg)
1154{
1155    tcg_gen_andc_vec(vece, pd, pn, pm);
1156    tcg_gen_and_vec(vece, pd, pd, pg);
1157}
1158
1159static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1160{
1161    static const GVecGen4 op = {
1162        .fni8 = gen_bic_pg_i64,
1163        .fniv = gen_bic_pg_vec,
1164        .fno = gen_helper_sve_bic_pppp,
1165        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1166    };
1167    if (a->s) {
1168        return do_pppp_flags(s, a, &op);
1169    } else if (a->pg == a->rn) {
1170        return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1171    } else {
1172        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1173    }
1174}
1175
1176static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1177{
1178    tcg_gen_xor_i64(pd, pn, pm);
1179    tcg_gen_and_i64(pd, pd, pg);
1180}
1181
1182static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1183                           TCGv_vec pm, TCGv_vec pg)
1184{
1185    tcg_gen_xor_vec(vece, pd, pn, pm);
1186    tcg_gen_and_vec(vece, pd, pd, pg);
1187}
1188
1189static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1190{
1191    static const GVecGen4 op = {
1192        .fni8 = gen_eor_pg_i64,
1193        .fniv = gen_eor_pg_vec,
1194        .fno = gen_helper_sve_eor_pppp,
1195        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1196    };
1197    if (a->s) {
1198        return do_pppp_flags(s, a, &op);
1199    } else {
1200        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1201    }
1202}
1203
1204static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1205{
1206    tcg_gen_and_i64(pn, pn, pg);
1207    tcg_gen_andc_i64(pm, pm, pg);
1208    tcg_gen_or_i64(pd, pn, pm);
1209}
1210
1211static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1212                           TCGv_vec pm, TCGv_vec pg)
1213{
1214    tcg_gen_and_vec(vece, pn, pn, pg);
1215    tcg_gen_andc_vec(vece, pm, pm, pg);
1216    tcg_gen_or_vec(vece, pd, pn, pm);
1217}
1218
1219static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1220{
1221    static const GVecGen4 op = {
1222        .fni8 = gen_sel_pg_i64,
1223        .fniv = gen_sel_pg_vec,
1224        .fno = gen_helper_sve_sel_pppp,
1225        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1226    };
1227    if (a->s) {
1228        return false;
1229    } else {
1230        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1231    }
1232}
1233
1234static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1235{
1236    tcg_gen_or_i64(pd, pn, pm);
1237    tcg_gen_and_i64(pd, pd, pg);
1238}
1239
1240static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1241                           TCGv_vec pm, TCGv_vec pg)
1242{
1243    tcg_gen_or_vec(vece, pd, pn, pm);
1244    tcg_gen_and_vec(vece, pd, pd, pg);
1245}
1246
1247static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1248{
1249    static const GVecGen4 op = {
1250        .fni8 = gen_orr_pg_i64,
1251        .fniv = gen_orr_pg_vec,
1252        .fno = gen_helper_sve_orr_pppp,
1253        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1254    };
1255    if (a->s) {
1256        return do_pppp_flags(s, a, &op);
1257    } else if (a->pg == a->rn && a->rn == a->rm) {
1258        return do_mov_p(s, a->rd, a->rn);
1259    } else {
1260        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1261    }
1262}
1263
1264static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1265{
1266    tcg_gen_orc_i64(pd, pn, pm);
1267    tcg_gen_and_i64(pd, pd, pg);
1268}
1269
1270static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1271                           TCGv_vec pm, TCGv_vec pg)
1272{
1273    tcg_gen_orc_vec(vece, pd, pn, pm);
1274    tcg_gen_and_vec(vece, pd, pd, pg);
1275}
1276
1277static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1278{
1279    static const GVecGen4 op = {
1280        .fni8 = gen_orn_pg_i64,
1281        .fniv = gen_orn_pg_vec,
1282        .fno = gen_helper_sve_orn_pppp,
1283        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1284    };
1285    if (a->s) {
1286        return do_pppp_flags(s, a, &op);
1287    } else {
1288        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1289    }
1290}
1291
1292static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1293{
1294    tcg_gen_or_i64(pd, pn, pm);
1295    tcg_gen_andc_i64(pd, pg, pd);
1296}
1297
1298static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1299                           TCGv_vec pm, TCGv_vec pg)
1300{
1301    tcg_gen_or_vec(vece, pd, pn, pm);
1302    tcg_gen_andc_vec(vece, pd, pg, pd);
1303}
1304
1305static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1306{
1307    static const GVecGen4 op = {
1308        .fni8 = gen_nor_pg_i64,
1309        .fniv = gen_nor_pg_vec,
1310        .fno = gen_helper_sve_nor_pppp,
1311        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1312    };
1313    if (a->s) {
1314        return do_pppp_flags(s, a, &op);
1315    } else {
1316        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1317    }
1318}
1319
1320static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1321{
1322    tcg_gen_and_i64(pd, pn, pm);
1323    tcg_gen_andc_i64(pd, pg, pd);
1324}
1325
1326static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1327                           TCGv_vec pm, TCGv_vec pg)
1328{
1329    tcg_gen_and_vec(vece, pd, pn, pm);
1330    tcg_gen_andc_vec(vece, pd, pg, pd);
1331}
1332
1333static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1334{
1335    static const GVecGen4 op = {
1336        .fni8 = gen_nand_pg_i64,
1337        .fniv = gen_nand_pg_vec,
1338        .fno = gen_helper_sve_nand_pppp,
1339        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1340    };
1341    if (a->s) {
1342        return do_pppp_flags(s, a, &op);
1343    } else {
1344        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1345    }
1346}
1347
1348/*
1349 *** SVE Predicate Misc Group
1350 */
1351
1352static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1353{
1354    if (sve_access_check(s)) {
1355        int nofs = pred_full_reg_offset(s, a->rn);
1356        int gofs = pred_full_reg_offset(s, a->pg);
1357        int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1358
1359        if (words == 1) {
1360            TCGv_i64 pn = tcg_temp_new_i64();
1361            TCGv_i64 pg = tcg_temp_new_i64();
1362
1363            tcg_gen_ld_i64(pn, cpu_env, nofs);
1364            tcg_gen_ld_i64(pg, cpu_env, gofs);
1365            do_predtest1(pn, pg);
1366
1367            tcg_temp_free_i64(pn);
1368            tcg_temp_free_i64(pg);
1369        } else {
1370            do_predtest(s, nofs, gofs, words);
1371        }
1372    }
1373    return true;
1374}
1375
1376/* See the ARM pseudocode DecodePredCount.  */
1377static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1378{
1379    unsigned elements = fullsz >> esz;
1380    unsigned bound;
1381
1382    switch (pattern) {
1383    case 0x0: /* POW2 */
1384        return pow2floor(elements);
1385    case 0x1: /* VL1 */
1386    case 0x2: /* VL2 */
1387    case 0x3: /* VL3 */
1388    case 0x4: /* VL4 */
1389    case 0x5: /* VL5 */
1390    case 0x6: /* VL6 */
1391    case 0x7: /* VL7 */
1392    case 0x8: /* VL8 */
1393        bound = pattern;
1394        break;
1395    case 0x9: /* VL16 */
1396    case 0xa: /* VL32 */
1397    case 0xb: /* VL64 */
1398    case 0xc: /* VL128 */
1399    case 0xd: /* VL256 */
1400        bound = 16 << (pattern - 9);
1401        break;
1402    case 0x1d: /* MUL4 */
1403        return elements - elements % 4;
1404    case 0x1e: /* MUL3 */
1405        return elements - elements % 3;
1406    case 0x1f: /* ALL */
1407        return elements;
1408    default:   /* #uimm5 */
1409        return 0;
1410    }
1411    return elements >= bound ? bound : 0;
1412}
1413
1414/* This handles all of the predicate initialization instructions,
1415 * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1416 * so that decode_pred_count returns 0.  For SETFFR, we will have
1417 * set RD == 16 == FFR.
1418 */
1419static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1420{
1421    if (!sve_access_check(s)) {
1422        return true;
1423    }
1424
1425    unsigned fullsz = vec_full_reg_size(s);
1426    unsigned ofs = pred_full_reg_offset(s, rd);
1427    unsigned numelem, setsz, i;
1428    uint64_t word, lastword;
1429    TCGv_i64 t;
1430
1431    numelem = decode_pred_count(fullsz, pat, esz);
1432
1433    /* Determine what we must store into each bit, and how many.  */
1434    if (numelem == 0) {
1435        lastword = word = 0;
1436        setsz = fullsz;
1437    } else {
1438        setsz = numelem << esz;
1439        lastword = word = pred_esz_masks[esz];
1440        if (setsz % 64) {
1441            lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1442        }
1443    }
1444
1445    t = tcg_temp_new_i64();
1446    if (fullsz <= 64) {
1447        tcg_gen_movi_i64(t, lastword);
1448        tcg_gen_st_i64(t, cpu_env, ofs);
1449        goto done;
1450    }
1451
1452    if (word == lastword) {
1453        unsigned maxsz = size_for_gvec(fullsz / 8);
1454        unsigned oprsz = size_for_gvec(setsz / 8);
1455
1456        if (oprsz * 8 == setsz) {
1457            tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1458            goto done;
1459        }
1460    }
1461
1462    setsz /= 8;
1463    fullsz /= 8;
1464
1465    tcg_gen_movi_i64(t, word);
1466    for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1467        tcg_gen_st_i64(t, cpu_env, ofs + i);
1468    }
1469    if (lastword != word) {
1470        tcg_gen_movi_i64(t, lastword);
1471        tcg_gen_st_i64(t, cpu_env, ofs + i);
1472        i += 8;
1473    }
1474    if (i < fullsz) {
1475        tcg_gen_movi_i64(t, 0);
1476        for (; i < fullsz; i += 8) {
1477            tcg_gen_st_i64(t, cpu_env, ofs + i);
1478        }
1479    }
1480
1481 done:
1482    tcg_temp_free_i64(t);
1483
1484    /* PTRUES */
1485    if (setflag) {
1486        tcg_gen_movi_i32(cpu_NF, -(word != 0));
1487        tcg_gen_movi_i32(cpu_CF, word == 0);
1488        tcg_gen_movi_i32(cpu_VF, 0);
1489        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1490    }
1491    return true;
1492}
1493
1494static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1495{
1496    return do_predset(s, a->esz, a->rd, a->pat, a->s);
1497}
1498
1499static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1500{
1501    /* Note pat == 31 is #all, to set all elements.  */
1502    return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1503}
1504
1505static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1506{
1507    /* Note pat == 32 is #unimp, to set no elements.  */
1508    return do_predset(s, 0, a->rd, 32, false);
1509}
1510
1511static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1512{
1513    /* The path through do_pppp_flags is complicated enough to want to avoid
1514     * duplication.  Frob the arguments into the form of a predicated AND.
1515     */
1516    arg_rprr_s alt_a = {
1517        .rd = a->rd, .pg = a->pg, .s = a->s,
1518        .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1519    };
1520    return trans_AND_pppp(s, &alt_a, insn);
1521}
1522
1523static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1524{
1525    return do_mov_p(s, a->rd, FFR_PRED_NUM);
1526}
1527
1528static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1529{
1530    return do_mov_p(s, FFR_PRED_NUM, a->rn);
1531}
1532
1533static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1534                            void (*gen_fn)(TCGv_i32, TCGv_ptr,
1535                                           TCGv_ptr, TCGv_i32))
1536{
1537    if (!sve_access_check(s)) {
1538        return true;
1539    }
1540
1541    TCGv_ptr t_pd = tcg_temp_new_ptr();
1542    TCGv_ptr t_pg = tcg_temp_new_ptr();
1543    TCGv_i32 t;
1544    unsigned desc;
1545
1546    desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1547    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1548
1549    tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1550    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1551    t = tcg_const_i32(desc);
1552
1553    gen_fn(t, t_pd, t_pg, t);
1554    tcg_temp_free_ptr(t_pd);
1555    tcg_temp_free_ptr(t_pg);
1556
1557    do_pred_flags(t);
1558    tcg_temp_free_i32(t);
1559    return true;
1560}
1561
1562static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1563{
1564    return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1565}
1566
1567static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1568{
1569    return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1570}
1571
1572/*
1573 *** SVE Element Count Group
1574 */
1575
1576/* Perform an inline saturating addition of a 32-bit value within
1577 * a 64-bit register.  The second operand is known to be positive,
1578 * which halves the comparisions we must perform to bound the result.
1579 */
1580static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1581{
1582    int64_t ibound;
1583    TCGv_i64 bound;
1584    TCGCond cond;
1585
1586    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1587    if (u) {
1588        tcg_gen_ext32u_i64(reg, reg);
1589    } else {
1590        tcg_gen_ext32s_i64(reg, reg);
1591    }
1592    if (d) {
1593        tcg_gen_sub_i64(reg, reg, val);
1594        ibound = (u ? 0 : INT32_MIN);
1595        cond = TCG_COND_LT;
1596    } else {
1597        tcg_gen_add_i64(reg, reg, val);
1598        ibound = (u ? UINT32_MAX : INT32_MAX);
1599        cond = TCG_COND_GT;
1600    }
1601    bound = tcg_const_i64(ibound);
1602    tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1603    tcg_temp_free_i64(bound);
1604}
1605
1606/* Similarly with 64-bit values.  */
1607static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1608{
1609    TCGv_i64 t0 = tcg_temp_new_i64();
1610    TCGv_i64 t1 = tcg_temp_new_i64();
1611    TCGv_i64 t2;
1612
1613    if (u) {
1614        if (d) {
1615            tcg_gen_sub_i64(t0, reg, val);
1616            tcg_gen_movi_i64(t1, 0);
1617            tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1618        } else {
1619            tcg_gen_add_i64(t0, reg, val);
1620            tcg_gen_movi_i64(t1, -1);
1621            tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1622        }
1623    } else {
1624        if (d) {
1625            /* Detect signed overflow for subtraction.  */
1626            tcg_gen_xor_i64(t0, reg, val);
1627            tcg_gen_sub_i64(t1, reg, val);
1628            tcg_gen_xor_i64(reg, reg, t0);
1629            tcg_gen_and_i64(t0, t0, reg);
1630
1631            /* Bound the result.  */
1632            tcg_gen_movi_i64(reg, INT64_MIN);
1633            t2 = tcg_const_i64(0);
1634            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1635        } else {
1636            /* Detect signed overflow for addition.  */
1637            tcg_gen_xor_i64(t0, reg, val);
1638            tcg_gen_add_i64(reg, reg, val);
1639            tcg_gen_xor_i64(t1, reg, val);
1640            tcg_gen_andc_i64(t0, t1, t0);
1641
1642            /* Bound the result.  */
1643            tcg_gen_movi_i64(t1, INT64_MAX);
1644            t2 = tcg_const_i64(0);
1645            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1646        }
1647        tcg_temp_free_i64(t2);
1648    }
1649    tcg_temp_free_i64(t0);
1650    tcg_temp_free_i64(t1);
1651}
1652
1653/* Similarly with a vector and a scalar operand.  */
1654static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1655                              TCGv_i64 val, bool u, bool d)
1656{
1657    unsigned vsz = vec_full_reg_size(s);
1658    TCGv_ptr dptr, nptr;
1659    TCGv_i32 t32, desc;
1660    TCGv_i64 t64;
1661
1662    dptr = tcg_temp_new_ptr();
1663    nptr = tcg_temp_new_ptr();
1664    tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1665    tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1666    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1667
1668    switch (esz) {
1669    case MO_8:
1670        t32 = tcg_temp_new_i32();
1671        tcg_gen_extrl_i64_i32(t32, val);
1672        if (d) {
1673            tcg_gen_neg_i32(t32, t32);
1674        }
1675        if (u) {
1676            gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1677        } else {
1678            gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1679        }
1680        tcg_temp_free_i32(t32);
1681        break;
1682
1683    case MO_16:
1684        t32 = tcg_temp_new_i32();
1685        tcg_gen_extrl_i64_i32(t32, val);
1686        if (d) {
1687            tcg_gen_neg_i32(t32, t32);
1688        }
1689        if (u) {
1690            gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1691        } else {
1692            gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1693        }
1694        tcg_temp_free_i32(t32);
1695        break;
1696
1697    case MO_32:
1698        t64 = tcg_temp_new_i64();
1699        if (d) {
1700            tcg_gen_neg_i64(t64, val);
1701        } else {
1702            tcg_gen_mov_i64(t64, val);
1703        }
1704        if (u) {
1705            gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1706        } else {
1707            gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1708        }
1709        tcg_temp_free_i64(t64);
1710        break;
1711
1712    case MO_64:
1713        if (u) {
1714            if (d) {
1715                gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1716            } else {
1717                gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1718            }
1719        } else if (d) {
1720            t64 = tcg_temp_new_i64();
1721            tcg_gen_neg_i64(t64, val);
1722            gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1723            tcg_temp_free_i64(t64);
1724        } else {
1725            gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1726        }
1727        break;
1728
1729    default:
1730        g_assert_not_reached();
1731    }
1732
1733    tcg_temp_free_ptr(dptr);
1734    tcg_temp_free_ptr(nptr);
1735    tcg_temp_free_i32(desc);
1736}
1737
1738static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1739{
1740    if (sve_access_check(s)) {
1741        unsigned fullsz = vec_full_reg_size(s);
1742        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1743        tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1744    }
1745    return true;
1746}
1747
1748static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1749{
1750    if (sve_access_check(s)) {
1751        unsigned fullsz = vec_full_reg_size(s);
1752        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1753        int inc = numelem * a->imm * (a->d ? -1 : 1);
1754        TCGv_i64 reg = cpu_reg(s, a->rd);
1755
1756        tcg_gen_addi_i64(reg, reg, inc);
1757    }
1758    return true;
1759}
1760
1761static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1762                               uint32_t insn)
1763{
1764    if (!sve_access_check(s)) {
1765        return true;
1766    }
1767
1768    unsigned fullsz = vec_full_reg_size(s);
1769    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1770    int inc = numelem * a->imm;
1771    TCGv_i64 reg = cpu_reg(s, a->rd);
1772
1773    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1774    if (inc == 0) {
1775        if (a->u) {
1776            tcg_gen_ext32u_i64(reg, reg);
1777        } else {
1778            tcg_gen_ext32s_i64(reg, reg);
1779        }
1780    } else {
1781        TCGv_i64 t = tcg_const_i64(inc);
1782        do_sat_addsub_32(reg, t, a->u, a->d);
1783        tcg_temp_free_i64(t);
1784    }
1785    return true;
1786}
1787
1788static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1789                               uint32_t insn)
1790{
1791    if (!sve_access_check(s)) {
1792        return true;
1793    }
1794
1795    unsigned fullsz = vec_full_reg_size(s);
1796    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1797    int inc = numelem * a->imm;
1798    TCGv_i64 reg = cpu_reg(s, a->rd);
1799
1800    if (inc != 0) {
1801        TCGv_i64 t = tcg_const_i64(inc);
1802        do_sat_addsub_64(reg, t, a->u, a->d);
1803        tcg_temp_free_i64(t);
1804    }
1805    return true;
1806}
1807
1808static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1809{
1810    if (a->esz == 0) {
1811        return false;
1812    }
1813
1814    unsigned fullsz = vec_full_reg_size(s);
1815    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1816    int inc = numelem * a->imm;
1817
1818    if (inc != 0) {
1819        if (sve_access_check(s)) {
1820            TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1821            tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1822                              vec_full_reg_offset(s, a->rn),
1823                              t, fullsz, fullsz);
1824            tcg_temp_free_i64(t);
1825        }
1826    } else {
1827        do_mov_z(s, a->rd, a->rn);
1828    }
1829    return true;
1830}
1831
1832static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1833                            uint32_t insn)
1834{
1835    if (a->esz == 0) {
1836        return false;
1837    }
1838
1839    unsigned fullsz = vec_full_reg_size(s);
1840    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1841    int inc = numelem * a->imm;
1842
1843    if (inc != 0) {
1844        if (sve_access_check(s)) {
1845            TCGv_i64 t = tcg_const_i64(inc);
1846            do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1847            tcg_temp_free_i64(t);
1848        }
1849    } else {
1850        do_mov_z(s, a->rd, a->rn);
1851    }
1852    return true;
1853}
1854
1855/*
1856 *** SVE Bitwise Immediate Group
1857 */
1858
1859static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1860{
1861    uint64_t imm;
1862    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1863                                extract32(a->dbm, 0, 6),
1864                                extract32(a->dbm, 6, 6))) {
1865        return false;
1866    }
1867    if (sve_access_check(s)) {
1868        unsigned vsz = vec_full_reg_size(s);
1869        gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1870                vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1871    }
1872    return true;
1873}
1874
1875static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1876{
1877    return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1878}
1879
1880static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1881{
1882    return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1883}
1884
1885static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1886{
1887    return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1888}
1889
1890static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1891{
1892    uint64_t imm;
1893    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1894                                extract32(a->dbm, 0, 6),
1895                                extract32(a->dbm, 6, 6))) {
1896        return false;
1897    }
1898    if (sve_access_check(s)) {
1899        do_dupi_z(s, a->rd, imm);
1900    }
1901    return true;
1902}
1903
1904/*
1905 *** SVE Integer Wide Immediate - Predicated Group
1906 */
1907
1908/* Implement all merging copies.  This is used for CPY (immediate),
1909 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1910 */
1911static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1912                     TCGv_i64 val)
1913{
1914    typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1915    static gen_cpy * const fns[4] = {
1916        gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1917        gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1918    };
1919    unsigned vsz = vec_full_reg_size(s);
1920    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1921    TCGv_ptr t_zd = tcg_temp_new_ptr();
1922    TCGv_ptr t_zn = tcg_temp_new_ptr();
1923    TCGv_ptr t_pg = tcg_temp_new_ptr();
1924
1925    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1926    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1927    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1928
1929    fns[esz](t_zd, t_zn, t_pg, val, desc);
1930
1931    tcg_temp_free_ptr(t_zd);
1932    tcg_temp_free_ptr(t_zn);
1933    tcg_temp_free_ptr(t_pg);
1934    tcg_temp_free_i32(desc);
1935}
1936
1937static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1938{
1939    if (a->esz == 0) {
1940        return false;
1941    }
1942    if (sve_access_check(s)) {
1943        /* Decode the VFP immediate.  */
1944        uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1945        TCGv_i64 t_imm = tcg_const_i64(imm);
1946        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1947        tcg_temp_free_i64(t_imm);
1948    }
1949    return true;
1950}
1951
1952static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1953{
1954    if (a->esz == 0 && extract32(insn, 13, 1)) {
1955        return false;
1956    }
1957    if (sve_access_check(s)) {
1958        TCGv_i64 t_imm = tcg_const_i64(a->imm);
1959        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1960        tcg_temp_free_i64(t_imm);
1961    }
1962    return true;
1963}
1964
1965static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1966{
1967    static gen_helper_gvec_2i * const fns[4] = {
1968        gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1969        gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1970    };
1971
1972    if (a->esz == 0 && extract32(insn, 13, 1)) {
1973        return false;
1974    }
1975    if (sve_access_check(s)) {
1976        unsigned vsz = vec_full_reg_size(s);
1977        TCGv_i64 t_imm = tcg_const_i64(a->imm);
1978        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1979                            pred_full_reg_offset(s, a->pg),
1980                            t_imm, vsz, vsz, 0, fns[a->esz]);
1981        tcg_temp_free_i64(t_imm);
1982    }
1983    return true;
1984}
1985
1986/*
1987 *** SVE Permute Extract Group
1988 */
1989
1990static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1991{
1992    if (!sve_access_check(s)) {
1993        return true;
1994    }
1995
1996    unsigned vsz = vec_full_reg_size(s);
1997    unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1998    unsigned n_siz = vsz - n_ofs;
1999    unsigned d = vec_full_reg_offset(s, a->rd);
2000    unsigned n = vec_full_reg_offset(s, a->rn);
2001    unsigned m = vec_full_reg_offset(s, a->rm);
2002
2003    /* Use host vector move insns if we have appropriate sizes
2004     * and no unfortunate overlap.
2005     */
2006    if (m != d
2007        && n_ofs == size_for_gvec(n_ofs)
2008        && n_siz == size_for_gvec(n_siz)
2009        && (d != n || n_siz <= n_ofs)) {
2010        tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2011        if (n_ofs != 0) {
2012            tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2013        }
2014    } else {
2015        tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2016    }
2017    return true;
2018}
2019
2020/*
2021 *** SVE Permute - Unpredicated Group
2022 */
2023
2024static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2025{
2026    if (sve_access_check(s)) {
2027        unsigned vsz = vec_full_reg_size(s);
2028        tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2029                             vsz, vsz, cpu_reg_sp(s, a->rn));
2030    }
2031    return true;
2032}
2033
2034static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2035{
2036    if ((a->imm & 0x1f) == 0) {
2037        return false;
2038    }
2039    if (sve_access_check(s)) {
2040        unsigned vsz = vec_full_reg_size(s);
2041        unsigned dofs = vec_full_reg_offset(s, a->rd);
2042        unsigned esz, index;
2043
2044        esz = ctz32(a->imm);
2045        index = a->imm >> (esz + 1);
2046
2047        if ((index << esz) < vsz) {
2048            unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2049            tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2050        } else {
2051            tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2052        }
2053    }
2054    return true;
2055}
2056
2057static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2058{
2059    typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2060    static gen_insr * const fns[4] = {
2061        gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2062        gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2063    };
2064    unsigned vsz = vec_full_reg_size(s);
2065    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2066    TCGv_ptr t_zd = tcg_temp_new_ptr();
2067    TCGv_ptr t_zn = tcg_temp_new_ptr();
2068
2069    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2070    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2071
2072    fns[a->esz](t_zd, t_zn, val, desc);
2073
2074    tcg_temp_free_ptr(t_zd);
2075    tcg_temp_free_ptr(t_zn);
2076    tcg_temp_free_i32(desc);
2077}
2078
2079static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2080{
2081    if (sve_access_check(s)) {
2082        TCGv_i64 t = tcg_temp_new_i64();
2083        tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2084        do_insr_i64(s, a, t);
2085        tcg_temp_free_i64(t);
2086    }
2087    return true;
2088}
2089
2090static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2091{
2092    if (sve_access_check(s)) {
2093        do_insr_i64(s, a, cpu_reg(s, a->rm));
2094    }
2095    return true;
2096}
2097
2098static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2099{
2100    static gen_helper_gvec_2 * const fns[4] = {
2101        gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2102        gen_helper_sve_rev_s, gen_helper_sve_rev_d
2103    };
2104
2105    if (sve_access_check(s)) {
2106        unsigned vsz = vec_full_reg_size(s);
2107        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2108                           vec_full_reg_offset(s, a->rn),
2109                           vsz, vsz, 0, fns[a->esz]);
2110    }
2111    return true;
2112}
2113
2114static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2115{
2116    static gen_helper_gvec_3 * const fns[4] = {
2117        gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2118        gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2119    };
2120
2121    if (sve_access_check(s)) {
2122        unsigned vsz = vec_full_reg_size(s);
2123        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2124                           vec_full_reg_offset(s, a->rn),
2125                           vec_full_reg_offset(s, a->rm),
2126                           vsz, vsz, 0, fns[a->esz]);
2127    }
2128    return true;
2129}
2130
2131static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2132{
2133    static gen_helper_gvec_2 * const fns[4][2] = {
2134        { NULL, NULL },
2135        { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2136        { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2137        { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2138    };
2139
2140    if (a->esz == 0) {
2141        return false;
2142    }
2143    if (sve_access_check(s)) {
2144        unsigned vsz = vec_full_reg_size(s);
2145        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2146                           vec_full_reg_offset(s, a->rn)
2147                           + (a->h ? vsz / 2 : 0),
2148                           vsz, vsz, 0, fns[a->esz][a->u]);
2149    }
2150    return true;
2151}
2152
2153/*
2154 *** SVE Permute - Predicates Group
2155 */
2156
2157static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2158                          gen_helper_gvec_3 *fn)
2159{
2160    if (!sve_access_check(s)) {
2161        return true;
2162    }
2163
2164    unsigned vsz = pred_full_reg_size(s);
2165
2166    /* Predicate sizes may be smaller and cannot use simd_desc.
2167       We cannot round up, as we do elsewhere, because we need
2168       the exact size for ZIP2 and REV.  We retain the style for
2169       the other helpers for consistency.  */
2170    TCGv_ptr t_d = tcg_temp_new_ptr();
2171    TCGv_ptr t_n = tcg_temp_new_ptr();
2172    TCGv_ptr t_m = tcg_temp_new_ptr();
2173    TCGv_i32 t_desc;
2174    int desc;
2175
2176    desc = vsz - 2;
2177    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2178    desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2179
2180    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2181    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2182    tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2183    t_desc = tcg_const_i32(desc);
2184
2185    fn(t_d, t_n, t_m, t_desc);
2186
2187    tcg_temp_free_ptr(t_d);
2188    tcg_temp_free_ptr(t_n);
2189    tcg_temp_free_ptr(t_m);
2190    tcg_temp_free_i32(t_desc);
2191    return true;
2192}
2193
2194static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2195                          gen_helper_gvec_2 *fn)
2196{
2197    if (!sve_access_check(s)) {
2198        return true;
2199    }
2200
2201    unsigned vsz = pred_full_reg_size(s);
2202    TCGv_ptr t_d = tcg_temp_new_ptr();
2203    TCGv_ptr t_n = tcg_temp_new_ptr();
2204    TCGv_i32 t_desc;
2205    int desc;
2206
2207    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2208    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2209
2210    /* Predicate sizes may be smaller and cannot use simd_desc.
2211       We cannot round up, as we do elsewhere, because we need
2212       the exact size for ZIP2 and REV.  We retain the style for
2213       the other helpers for consistency.  */
2214
2215    desc = vsz - 2;
2216    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2217    desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2218    t_desc = tcg_const_i32(desc);
2219
2220    fn(t_d, t_n, t_desc);
2221
2222    tcg_temp_free_i32(t_desc);
2223    tcg_temp_free_ptr(t_d);
2224    tcg_temp_free_ptr(t_n);
2225    return true;
2226}
2227
2228static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2229{
2230    return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2231}
2232
2233static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2234{
2235    return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2236}
2237
2238static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2239{
2240    return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2241}
2242
2243static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2244{
2245    return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2246}
2247
2248static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2249{
2250    return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2251}
2252
2253static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2254{
2255    return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2256}
2257
2258static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2259{
2260    return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2261}
2262
2263static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2264{
2265    return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2266}
2267
2268static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2269{
2270    return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2271}
2272
2273/*
2274 *** SVE Permute - Interleaving Group
2275 */
2276
2277static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2278{
2279    static gen_helper_gvec_3 * const fns[4] = {
2280        gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2281        gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2282    };
2283
2284    if (sve_access_check(s)) {
2285        unsigned vsz = vec_full_reg_size(s);
2286        unsigned high_ofs = high ? vsz / 2 : 0;
2287        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2288                           vec_full_reg_offset(s, a->rn) + high_ofs,
2289                           vec_full_reg_offset(s, a->rm) + high_ofs,
2290                           vsz, vsz, 0, fns[a->esz]);
2291    }
2292    return true;
2293}
2294
2295static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2296                            gen_helper_gvec_3 *fn)
2297{
2298    if (sve_access_check(s)) {
2299        unsigned vsz = vec_full_reg_size(s);
2300        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2301                           vec_full_reg_offset(s, a->rn),
2302                           vec_full_reg_offset(s, a->rm),
2303                           vsz, vsz, data, fn);
2304    }
2305    return true;
2306}
2307
2308static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2309{
2310    return do_zip(s, a, false);
2311}
2312
2313static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2314{
2315    return do_zip(s, a, true);
2316}
2317
2318static gen_helper_gvec_3 * const uzp_fns[4] = {
2319    gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2320    gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2321};
2322
2323static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2324{
2325    return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2326}
2327
2328static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2329{
2330    return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2331}
2332
2333static gen_helper_gvec_3 * const trn_fns[4] = {
2334    gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2335    gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2336};
2337
2338static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2339{
2340    return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2341}
2342
2343static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2344{
2345    return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2346}
2347
2348/*
2349 *** SVE Permute Vector - Predicated Group
2350 */
2351
2352static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2353{
2354    static gen_helper_gvec_3 * const fns[4] = {
2355        NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2356    };
2357    return do_zpz_ool(s, a, fns[a->esz]);
2358}
2359
2360/* Call the helper that computes the ARM LastActiveElement pseudocode
2361 * function, scaled by the element size.  This includes the not found
2362 * indication; e.g. not found for esz=3 is -8.
2363 */
2364static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2365{
2366    /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2367     * round up, as we do elsewhere, because we need the exact size.
2368     */
2369    TCGv_ptr t_p = tcg_temp_new_ptr();
2370    TCGv_i32 t_desc;
2371    unsigned vsz = pred_full_reg_size(s);
2372    unsigned desc;
2373
2374    desc = vsz - 2;
2375    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2376
2377    tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2378    t_desc = tcg_const_i32(desc);
2379
2380    gen_helper_sve_last_active_element(ret, t_p, t_desc);
2381
2382    tcg_temp_free_i32(t_desc);
2383    tcg_temp_free_ptr(t_p);
2384}
2385
2386/* Increment LAST to the offset of the next element in the vector,
2387 * wrapping around to 0.
2388 */
2389static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2390{
2391    unsigned vsz = vec_full_reg_size(s);
2392
2393    tcg_gen_addi_i32(last, last, 1 << esz);
2394    if (is_power_of_2(vsz)) {
2395        tcg_gen_andi_i32(last, last, vsz - 1);
2396    } else {
2397        TCGv_i32 max = tcg_const_i32(vsz);
2398        TCGv_i32 zero = tcg_const_i32(0);
2399        tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2400        tcg_temp_free_i32(max);
2401        tcg_temp_free_i32(zero);
2402    }
2403}
2404
2405/* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2406static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2407{
2408    unsigned vsz = vec_full_reg_size(s);
2409
2410    if (is_power_of_2(vsz)) {
2411        tcg_gen_andi_i32(last, last, vsz - 1);
2412    } else {
2413        TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2414        TCGv_i32 zero = tcg_const_i32(0);
2415        tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2416        tcg_temp_free_i32(max);
2417        tcg_temp_free_i32(zero);
2418    }
2419}
2420
2421/* Load an unsigned element of ESZ from BASE+OFS.  */
2422static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2423{
2424    TCGv_i64 r = tcg_temp_new_i64();
2425
2426    switch (esz) {
2427    case 0:
2428        tcg_gen_ld8u_i64(r, base, ofs);
2429        break;
2430    case 1:
2431        tcg_gen_ld16u_i64(r, base, ofs);
2432        break;
2433    case 2:
2434        tcg_gen_ld32u_i64(r, base, ofs);
2435        break;
2436    case 3:
2437        tcg_gen_ld_i64(r, base, ofs);
2438        break;
2439    default:
2440        g_assert_not_reached();
2441    }
2442    return r;
2443}
2444
2445/* Load an unsigned element of ESZ from RM[LAST].  */
2446static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2447                                 int rm, int esz)
2448{
2449    TCGv_ptr p = tcg_temp_new_ptr();
2450    TCGv_i64 r;
2451
2452    /* Convert offset into vector into offset into ENV.
2453     * The final adjustment for the vector register base
2454     * is added via constant offset to the load.
2455     */
2456#ifdef HOST_WORDS_BIGENDIAN
2457    /* Adjust for element ordering.  See vec_reg_offset.  */
2458    if (esz < 3) {
2459        tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2460    }
2461#endif
2462    tcg_gen_ext_i32_ptr(p, last);
2463    tcg_gen_add_ptr(p, p, cpu_env);
2464
2465    r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2466    tcg_temp_free_ptr(p);
2467
2468    return r;
2469}
2470
2471/* Compute CLAST for a Zreg.  */
2472static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2473{
2474    TCGv_i32 last;
2475    TCGLabel *over;
2476    TCGv_i64 ele;
2477    unsigned vsz, esz = a->esz;
2478
2479    if (!sve_access_check(s)) {
2480        return true;
2481    }
2482
2483    last = tcg_temp_local_new_i32();
2484    over = gen_new_label();
2485
2486    find_last_active(s, last, esz, a->pg);
2487
2488    /* There is of course no movcond for a 2048-bit vector,
2489     * so we must branch over the actual store.
2490     */
2491    tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2492
2493    if (!before) {
2494        incr_last_active(s, last, esz);
2495    }
2496
2497    ele = load_last_active(s, last, a->rm, esz);
2498    tcg_temp_free_i32(last);
2499
2500    vsz = vec_full_reg_size(s);
2501    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2502    tcg_temp_free_i64(ele);
2503
2504    /* If this insn used MOVPRFX, we may need a second move.  */
2505    if (a->rd != a->rn) {
2506        TCGLabel *done = gen_new_label();
2507        tcg_gen_br(done);
2508
2509        gen_set_label(over);
2510        do_mov_z(s, a->rd, a->rn);
2511
2512        gen_set_label(done);
2513    } else {
2514        gen_set_label(over);
2515    }
2516    return true;
2517}
2518
2519static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2520{
2521    return do_clast_vector(s, a, false);
2522}
2523
2524static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2525{
2526    return do_clast_vector(s, a, true);
2527}
2528
2529/* Compute CLAST for a scalar.  */
2530static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2531                            bool before, TCGv_i64 reg_val)
2532{
2533    TCGv_i32 last = tcg_temp_new_i32();
2534    TCGv_i64 ele, cmp, zero;
2535
2536    find_last_active(s, last, esz, pg);
2537
2538    /* Extend the original value of last prior to incrementing.  */
2539    cmp = tcg_temp_new_i64();
2540    tcg_gen_ext_i32_i64(cmp, last);
2541
2542    if (!before) {
2543        incr_last_active(s, last, esz);
2544    }
2545
2546    /* The conceit here is that while last < 0 indicates not found, after
2547     * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2548     * from which we can load garbage.  We then discard the garbage with
2549     * a conditional move.
2550     */
2551    ele = load_last_active(s, last, rm, esz);
2552    tcg_temp_free_i32(last);
2553
2554    zero = tcg_const_i64(0);
2555    tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2556
2557    tcg_temp_free_i64(zero);
2558    tcg_temp_free_i64(cmp);
2559    tcg_temp_free_i64(ele);
2560}
2561
2562/* Compute CLAST for a Vreg.  */
2563static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2564{
2565    if (sve_access_check(s)) {
2566        int esz = a->esz;
2567        int ofs = vec_reg_offset(s, a->rd, 0, esz);
2568        TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2569
2570        do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2571        write_fp_dreg(s, a->rd, reg);
2572        tcg_temp_free_i64(reg);
2573    }
2574    return true;
2575}
2576
2577static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2578{
2579    return do_clast_fp(s, a, false);
2580}
2581
2582static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2583{
2584    return do_clast_fp(s, a, true);
2585}
2586
2587/* Compute CLAST for a Xreg.  */
2588static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2589{
2590    TCGv_i64 reg;
2591
2592    if (!sve_access_check(s)) {
2593        return true;
2594    }
2595
2596    reg = cpu_reg(s, a->rd);
2597    switch (a->esz) {
2598    case 0:
2599        tcg_gen_ext8u_i64(reg, reg);
2600        break;
2601    case 1:
2602        tcg_gen_ext16u_i64(reg, reg);
2603        break;
2604    case 2:
2605        tcg_gen_ext32u_i64(reg, reg);
2606        break;
2607    case 3:
2608        break;
2609    default:
2610        g_assert_not_reached();
2611    }
2612
2613    do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2614    return true;
2615}
2616
2617static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2618{
2619    return do_clast_general(s, a, false);
2620}
2621
2622static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2623{
2624    return do_clast_general(s, a, true);
2625}
2626
2627/* Compute LAST for a scalar.  */
2628static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2629                               int pg, int rm, bool before)
2630{
2631    TCGv_i32 last = tcg_temp_new_i32();
2632    TCGv_i64 ret;
2633
2634    find_last_active(s, last, esz, pg);
2635    if (before) {
2636        wrap_last_active(s, last, esz);
2637    } else {
2638        incr_last_active(s, last, esz);
2639    }
2640
2641    ret = load_last_active(s, last, rm, esz);
2642    tcg_temp_free_i32(last);
2643    return ret;
2644}
2645
2646/* Compute LAST for a Vreg.  */
2647static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2648{
2649    if (sve_access_check(s)) {
2650        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2651        write_fp_dreg(s, a->rd, val);
2652        tcg_temp_free_i64(val);
2653    }
2654    return true;
2655}
2656
2657static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2658{
2659    return do_last_fp(s, a, false);
2660}
2661
2662static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2663{
2664    return do_last_fp(s, a, true);
2665}
2666
2667/* Compute LAST for a Xreg.  */
2668static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2669{
2670    if (sve_access_check(s)) {
2671        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2672        tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2673        tcg_temp_free_i64(val);
2674    }
2675    return true;
2676}
2677
2678static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2679{
2680    return do_last_general(s, a, false);
2681}
2682
2683static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2684{
2685    return do_last_general(s, a, true);
2686}
2687
2688static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2689{
2690    if (sve_access_check(s)) {
2691        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2692    }
2693    return true;
2694}
2695
2696static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2697{
2698    if (sve_access_check(s)) {
2699        int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2700        TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2701        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2702        tcg_temp_free_i64(t);
2703    }
2704    return true;
2705}
2706
2707static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2708{
2709    static gen_helper_gvec_3 * const fns[4] = {
2710        NULL,
2711        gen_helper_sve_revb_h,
2712        gen_helper_sve_revb_s,
2713        gen_helper_sve_revb_d,
2714    };
2715    return do_zpz_ool(s, a, fns[a->esz]);
2716}
2717
2718static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2719{
2720    static gen_helper_gvec_3 * const fns[4] = {
2721        NULL,
2722        NULL,
2723        gen_helper_sve_revh_s,
2724        gen_helper_sve_revh_d,
2725    };
2726    return do_zpz_ool(s, a, fns[a->esz]);
2727}
2728
2729static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2730{
2731    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2732}
2733
2734static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2735{
2736    static gen_helper_gvec_3 * const fns[4] = {
2737        gen_helper_sve_rbit_b,
2738        gen_helper_sve_rbit_h,
2739        gen_helper_sve_rbit_s,
2740        gen_helper_sve_rbit_d,
2741    };
2742    return do_zpz_ool(s, a, fns[a->esz]);
2743}
2744
2745static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2746{
2747    if (sve_access_check(s)) {
2748        unsigned vsz = vec_full_reg_size(s);
2749        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2750                           vec_full_reg_offset(s, a->rn),
2751                           vec_full_reg_offset(s, a->rm),
2752                           pred_full_reg_offset(s, a->pg),
2753                           vsz, vsz, a->esz, gen_helper_sve_splice);
2754    }
2755    return true;
2756}
2757
2758/*
2759 *** SVE Integer Compare - Vectors Group
2760 */
2761
2762static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2763                          gen_helper_gvec_flags_4 *gen_fn)
2764{
2765    TCGv_ptr pd, zn, zm, pg;
2766    unsigned vsz;
2767    TCGv_i32 t;
2768
2769    if (gen_fn == NULL) {
2770        return false;
2771    }
2772    if (!sve_access_check(s)) {
2773        return true;
2774    }
2775
2776    vsz = vec_full_reg_size(s);
2777    t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2778    pd = tcg_temp_new_ptr();
2779    zn = tcg_temp_new_ptr();
2780    zm = tcg_temp_new_ptr();
2781    pg = tcg_temp_new_ptr();
2782
2783    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2784    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2785    tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2786    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2787
2788    gen_fn(t, pd, zn, zm, pg, t);
2789
2790    tcg_temp_free_ptr(pd);
2791    tcg_temp_free_ptr(zn);
2792    tcg_temp_free_ptr(zm);
2793    tcg_temp_free_ptr(pg);
2794
2795    do_pred_flags(t);
2796
2797    tcg_temp_free_i32(t);
2798    return true;
2799}
2800
2801#define DO_PPZZ(NAME, name) \
2802static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,         \
2803                                uint32_t insn)                            \
2804{                                                                         \
2805    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2806        gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2807        gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2808    };                                                                    \
2809    return do_ppzz_flags(s, a, fns[a->esz]);                              \
2810}
2811
2812DO_PPZZ(CMPEQ, cmpeq)
2813DO_PPZZ(CMPNE, cmpne)
2814DO_PPZZ(CMPGT, cmpgt)
2815DO_PPZZ(CMPGE, cmpge)
2816DO_PPZZ(CMPHI, cmphi)
2817DO_PPZZ(CMPHS, cmphs)
2818
2819#undef DO_PPZZ
2820
2821#define DO_PPZW(NAME, name) \
2822static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a,         \
2823                                uint32_t insn)                            \
2824{                                                                         \
2825    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2826        gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2827        gen_helper_sve_##name##_ppzw_s, NULL                              \
2828    };                                                                    \
2829    return do_ppzz_flags(s, a, fns[a->esz]);                              \
2830}
2831
2832DO_PPZW(CMPEQ, cmpeq)
2833DO_PPZW(CMPNE, cmpne)
2834DO_PPZW(CMPGT, cmpgt)
2835DO_PPZW(CMPGE, cmpge)
2836DO_PPZW(CMPHI, cmphi)
2837DO_PPZW(CMPHS, cmphs)
2838DO_PPZW(CMPLT, cmplt)
2839DO_PPZW(CMPLE, cmple)
2840DO_PPZW(CMPLO, cmplo)
2841DO_PPZW(CMPLS, cmpls)
2842
2843#undef DO_PPZW
2844
2845/*
2846 *** SVE Integer Compare - Immediate Groups
2847 */
2848
2849static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2850                          gen_helper_gvec_flags_3 *gen_fn)
2851{
2852    TCGv_ptr pd, zn, pg;
2853    unsigned vsz;
2854    TCGv_i32 t;
2855
2856    if (gen_fn == NULL) {
2857        return false;
2858    }
2859    if (!sve_access_check(s)) {
2860        return true;
2861    }
2862
2863    vsz = vec_full_reg_size(s);
2864    t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2865    pd = tcg_temp_new_ptr();
2866    zn = tcg_temp_new_ptr();
2867    pg = tcg_temp_new_ptr();
2868
2869    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2870    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2871    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2872
2873    gen_fn(t, pd, zn, pg, t);
2874
2875    tcg_temp_free_ptr(pd);
2876    tcg_temp_free_ptr(zn);
2877    tcg_temp_free_ptr(pg);
2878
2879    do_pred_flags(t);
2880
2881    tcg_temp_free_i32(t);
2882    return true;
2883}
2884
2885#define DO_PPZI(NAME, name) \
2886static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a,         \
2887                                uint32_t insn)                            \
2888{                                                                         \
2889    static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2890        gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2891        gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2892    };                                                                    \
2893    return do_ppzi_flags(s, a, fns[a->esz]);                              \
2894}
2895
2896DO_PPZI(CMPEQ, cmpeq)
2897DO_PPZI(CMPNE, cmpne)
2898DO_PPZI(CMPGT, cmpgt)
2899DO_PPZI(CMPGE, cmpge)
2900DO_PPZI(CMPHI, cmphi)
2901DO_PPZI(CMPHS, cmphs)
2902DO_PPZI(CMPLT, cmplt)
2903DO_PPZI(CMPLE, cmple)
2904DO_PPZI(CMPLO, cmplo)
2905DO_PPZI(CMPLS, cmpls)
2906
2907#undef DO_PPZI
2908
2909/*
2910 *** SVE Partition Break Group
2911 */
2912
2913static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2914                    gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2915{
2916    if (!sve_access_check(s)) {
2917        return true;
2918    }
2919
2920    unsigned vsz = pred_full_reg_size(s);
2921
2922    /* Predicate sizes may be smaller and cannot use simd_desc.  */
2923    TCGv_ptr d = tcg_temp_new_ptr();
2924    TCGv_ptr n = tcg_temp_new_ptr();
2925    TCGv_ptr m = tcg_temp_new_ptr();
2926    TCGv_ptr g = tcg_temp_new_ptr();
2927    TCGv_i32 t = tcg_const_i32(vsz - 2);
2928
2929    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2930    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2931    tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2932    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2933
2934    if (a->s) {
2935        fn_s(t, d, n, m, g, t);
2936        do_pred_flags(t);
2937    } else {
2938        fn(d, n, m, g, t);
2939    }
2940    tcg_temp_free_ptr(d);
2941    tcg_temp_free_ptr(n);
2942    tcg_temp_free_ptr(m);
2943    tcg_temp_free_ptr(g);
2944    tcg_temp_free_i32(t);
2945    return true;
2946}
2947
2948static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2949                    gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2950{
2951    if (!sve_access_check(s)) {
2952        return true;
2953    }
2954
2955    unsigned vsz = pred_full_reg_size(s);
2956
2957    /* Predicate sizes may be smaller and cannot use simd_desc.  */
2958    TCGv_ptr d = tcg_temp_new_ptr();
2959    TCGv_ptr n = tcg_temp_new_ptr();
2960    TCGv_ptr g = tcg_temp_new_ptr();
2961    TCGv_i32 t = tcg_const_i32(vsz - 2);
2962
2963    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2964    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2965    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2966
2967    if (a->s) {
2968        fn_s(t, d, n, g, t);
2969        do_pred_flags(t);
2970    } else {
2971        fn(d, n, g, t);
2972    }
2973    tcg_temp_free_ptr(d);
2974    tcg_temp_free_ptr(n);
2975    tcg_temp_free_ptr(g);
2976    tcg_temp_free_i32(t);
2977    return true;
2978}
2979
2980static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2981{
2982    return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2983}
2984
2985static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2986{
2987    return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2988}
2989
2990static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2991{
2992    return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2993}
2994
2995static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2996{
2997    return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2998}
2999
3000static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3001{
3002    return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3003}
3004
3005static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3006{
3007    return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3008}
3009
3010static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3011{
3012    return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3013}
3014
3015/*
3016 *** SVE Predicate Count Group
3017 */
3018
3019static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3020{
3021    unsigned psz = pred_full_reg_size(s);
3022
3023    if (psz <= 8) {
3024        uint64_t psz_mask;
3025
3026        tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3027        if (pn != pg) {
3028            TCGv_i64 g = tcg_temp_new_i64();
3029            tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3030            tcg_gen_and_i64(val, val, g);
3031            tcg_temp_free_i64(g);
3032        }
3033
3034        /* Reduce the pred_esz_masks value simply to reduce the
3035         * size of the code generated here.
3036         */
3037        psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3038        tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3039
3040        tcg_gen_ctpop_i64(val, val);
3041    } else {
3042        TCGv_ptr t_pn = tcg_temp_new_ptr();
3043        TCGv_ptr t_pg = tcg_temp_new_ptr();
3044        unsigned desc;
3045        TCGv_i32 t_desc;
3046
3047        desc = psz - 2;
3048        desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3049
3050        tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3051        tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3052        t_desc = tcg_const_i32(desc);
3053
3054        gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3055        tcg_temp_free_ptr(t_pn);
3056        tcg_temp_free_ptr(t_pg);
3057        tcg_temp_free_i32(t_desc);
3058    }
3059}
3060
3061static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3062{
3063    if (sve_access_check(s)) {
3064        do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3065    }
3066    return true;
3067}
3068
3069static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3070                            uint32_t insn)
3071{
3072    if (sve_access_check(s)) {
3073        TCGv_i64 reg = cpu_reg(s, a->rd);
3074        TCGv_i64 val = tcg_temp_new_i64();
3075
3076        do_cntp(s, val, a->esz, a->pg, a->pg);
3077        if (a->d) {
3078            tcg_gen_sub_i64(reg, reg, val);
3079        } else {
3080            tcg_gen_add_i64(reg, reg, val);
3081        }
3082        tcg_temp_free_i64(val);
3083    }
3084    return true;
3085}
3086
3087static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3088                            uint32_t insn)
3089{
3090    if (a->esz == 0) {
3091        return false;
3092    }
3093    if (sve_access_check(s)) {
3094        unsigned vsz = vec_full_reg_size(s);
3095        TCGv_i64 val = tcg_temp_new_i64();
3096        GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3097
3098        do_cntp(s, val, a->esz, a->pg, a->pg);
3099        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3100                vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3101    }
3102    return true;
3103}
3104
3105static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3106                                uint32_t insn)
3107{
3108    if (sve_access_check(s)) {
3109        TCGv_i64 reg = cpu_reg(s, a->rd);
3110        TCGv_i64 val = tcg_temp_new_i64();
3111
3112        do_cntp(s, val, a->esz, a->pg, a->pg);
3113        do_sat_addsub_32(reg, val, a->u, a->d);
3114    }
3115    return true;
3116}
3117
3118static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3119                                uint32_t insn)
3120{
3121    if (sve_access_check(s)) {
3122        TCGv_i64 reg = cpu_reg(s, a->rd);
3123        TCGv_i64 val = tcg_temp_new_i64();
3124
3125        do_cntp(s, val, a->esz, a->pg, a->pg);
3126        do_sat_addsub_64(reg, val, a->u, a->d);
3127    }
3128    return true;
3129}
3130
3131static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3132                             uint32_t insn)
3133{
3134    if (a->esz == 0) {
3135        return false;
3136    }
3137    if (sve_access_check(s)) {
3138        TCGv_i64 val = tcg_temp_new_i64();
3139        do_cntp(s, val, a->esz, a->pg, a->pg);
3140        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3141    }
3142    return true;
3143}
3144
3145/*
3146 *** SVE Integer Compare Scalars Group
3147 */
3148
3149static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3150{
3151    if (!sve_access_check(s)) {
3152        return true;
3153    }
3154
3155    TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3156    TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3157    TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3158    TCGv_i64 cmp = tcg_temp_new_i64();
3159
3160    tcg_gen_setcond_i64(cond, cmp, rn, rm);
3161    tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3162    tcg_temp_free_i64(cmp);
3163
3164    /* VF = !NF & !CF.  */
3165    tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3166    tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3167
3168    /* Both NF and VF actually look at bit 31.  */
3169    tcg_gen_neg_i32(cpu_NF, cpu_NF);
3170    tcg_gen_neg_i32(cpu_VF, cpu_VF);
3171    return true;
3172}
3173
3174static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3175{
3176    if (!sve_access_check(s)) {
3177        return true;
3178    }
3179
3180    TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3181    TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3182    TCGv_i64 t0 = tcg_temp_new_i64();
3183    TCGv_i64 t1 = tcg_temp_new_i64();
3184    TCGv_i32 t2, t3;
3185    TCGv_ptr ptr;
3186    unsigned desc, vsz = vec_full_reg_size(s);
3187    TCGCond cond;
3188
3189    if (!a->sf) {
3190        if (a->u) {
3191            tcg_gen_ext32u_i64(op0, op0);
3192            tcg_gen_ext32u_i64(op1, op1);
3193        } else {
3194            tcg_gen_ext32s_i64(op0, op0);
3195            tcg_gen_ext32s_i64(op1, op1);
3196        }
3197    }
3198
3199    /* For the helper, compress the different conditions into a computation
3200     * of how many iterations for which the condition is true.
3201     *
3202     * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3203     * 2**64 iterations, overflowing to 0.  Of course, predicate registers
3204     * aren't that large, so any value >= predicate size is sufficient.
3205     */
3206    tcg_gen_sub_i64(t0, op1, op0);
3207
3208    /* t0 = MIN(op1 - op0, vsz).  */
3209    tcg_gen_movi_i64(t1, vsz);
3210    tcg_gen_umin_i64(t0, t0, t1);
3211    if (a->eq) {
3212        /* Equality means one more iteration.  */
3213        tcg_gen_addi_i64(t0, t0, 1);
3214    }
3215
3216    /* t0 = (condition true ? t0 : 0).  */
3217    cond = (a->u
3218            ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3219            : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3220    tcg_gen_movi_i64(t1, 0);
3221    tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3222
3223    t2 = tcg_temp_new_i32();
3224    tcg_gen_extrl_i64_i32(t2, t0);
3225    tcg_temp_free_i64(t0);
3226    tcg_temp_free_i64(t1);
3227
3228    desc = (vsz / 8) - 2;
3229    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3230    t3 = tcg_const_i32(desc);
3231
3232    ptr = tcg_temp_new_ptr();
3233    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3234
3235    gen_helper_sve_while(t2, ptr, t2, t3);
3236    do_pred_flags(t2);
3237
3238    tcg_temp_free_ptr(ptr);
3239    tcg_temp_free_i32(t2);
3240    tcg_temp_free_i32(t3);
3241    return true;
3242}
3243
3244/*
3245 *** SVE Integer Wide Immediate - Unpredicated Group
3246 */
3247
3248static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3249{
3250    if (a->esz == 0) {
3251        return false;
3252    }
3253    if (sve_access_check(s)) {
3254        unsigned vsz = vec_full_reg_size(s);
3255        int dofs = vec_full_reg_offset(s, a->rd);
3256        uint64_t imm;
3257
3258        /* Decode the VFP immediate.  */
3259        imm = vfp_expand_imm(a->esz, a->imm);
3260        imm = dup_const(a->esz, imm);
3261
3262        tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3263    }
3264    return true;
3265}
3266
3267static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3268{
3269    if (a->esz == 0 && extract32(insn, 13, 1)) {
3270        return false;
3271    }
3272    if (sve_access_check(s)) {
3273        unsigned vsz = vec_full_reg_size(s);
3274        int dofs = vec_full_reg_offset(s, a->rd);
3275
3276        tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3277    }
3278    return true;
3279}
3280
3281static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3282{
3283    if (a->esz == 0 && extract32(insn, 13, 1)) {
3284        return false;
3285    }
3286    if (sve_access_check(s)) {
3287        unsigned vsz = vec_full_reg_size(s);
3288        tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3289                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3290    }
3291    return true;
3292}
3293
3294static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3295{
3296    a->imm = -a->imm;
3297    return trans_ADD_zzi(s, a, insn);
3298}
3299
3300static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3301{
3302    static const GVecGen2s op[4] = {
3303        { .fni8 = tcg_gen_vec_sub8_i64,
3304          .fniv = tcg_gen_sub_vec,
3305          .fno = gen_helper_sve_subri_b,
3306          .opc = INDEX_op_sub_vec,
3307          .vece = MO_8,
3308          .scalar_first = true },
3309        { .fni8 = tcg_gen_vec_sub16_i64,
3310          .fniv = tcg_gen_sub_vec,
3311          .fno = gen_helper_sve_subri_h,
3312          .opc = INDEX_op_sub_vec,
3313          .vece = MO_16,
3314          .scalar_first = true },
3315        { .fni4 = tcg_gen_sub_i32,
3316          .fniv = tcg_gen_sub_vec,
3317          .fno = gen_helper_sve_subri_s,
3318          .opc = INDEX_op_sub_vec,
3319          .vece = MO_32,
3320          .scalar_first = true },
3321        { .fni8 = tcg_gen_sub_i64,
3322          .fniv = tcg_gen_sub_vec,
3323          .fno = gen_helper_sve_subri_d,
3324          .opc = INDEX_op_sub_vec,
3325          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3326          .vece = MO_64,
3327          .scalar_first = true }
3328    };
3329
3330    if (a->esz == 0 && extract32(insn, 13, 1)) {
3331        return false;
3332    }
3333    if (sve_access_check(s)) {
3334        unsigned vsz = vec_full_reg_size(s);
3335        TCGv_i64 c = tcg_const_i64(a->imm);
3336        tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3337                        vec_full_reg_offset(s, a->rn),
3338                        vsz, vsz, c, &op[a->esz]);
3339        tcg_temp_free_i64(c);
3340    }
3341    return true;
3342}
3343
3344static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3345{
3346    if (sve_access_check(s)) {
3347        unsigned vsz = vec_full_reg_size(s);
3348        tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3349                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3350    }
3351    return true;
3352}
3353
3354static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3355                       bool u, bool d)
3356{
3357    if (a->esz == 0 && extract32(insn, 13, 1)) {
3358        return false;
3359    }
3360    if (sve_access_check(s)) {
3361        TCGv_i64 val = tcg_const_i64(a->imm);
3362        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3363        tcg_temp_free_i64(val);
3364    }
3365    return true;
3366}
3367
3368static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3369{
3370    return do_zzi_sat(s, a, insn, false, false);
3371}
3372
3373static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3374{
3375    return do_zzi_sat(s, a, insn, true, false);
3376}
3377
3378static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3379{
3380    return do_zzi_sat(s, a, insn, false, true);
3381}
3382
3383static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3384{
3385    return do_zzi_sat(s, a, insn, true, true);
3386}
3387
3388static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3389{
3390    if (sve_access_check(s)) {
3391        unsigned vsz = vec_full_reg_size(s);
3392        TCGv_i64 c = tcg_const_i64(a->imm);
3393
3394        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3395                            vec_full_reg_offset(s, a->rn),
3396                            c, vsz, vsz, 0, fn);
3397        tcg_temp_free_i64(c);
3398    }
3399    return true;
3400}
3401
3402#define DO_ZZI(NAME, name) \
3403static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a,         \
3404                               uint32_t insn)                           \
3405{                                                                       \
3406    static gen_helper_gvec_2i * const fns[4] = {                        \
3407        gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3408        gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3409    };                                                                  \
3410    return do_zzi_ool(s, a, fns[a->esz]);                               \
3411}
3412
3413DO_ZZI(SMAX, smax)
3414DO_ZZI(UMAX, umax)
3415DO_ZZI(SMIN, smin)
3416DO_ZZI(UMIN, umin)
3417
3418#undef DO_ZZI
3419
3420static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a, uint32_t insn)
3421{
3422    static gen_helper_gvec_3 * const fns[2][2] = {
3423        { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3424        { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3425    };
3426
3427    if (sve_access_check(s)) {
3428        unsigned vsz = vec_full_reg_size(s);
3429        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3430                           vec_full_reg_offset(s, a->rn),
3431                           vec_full_reg_offset(s, a->rm),
3432                           vsz, vsz, 0, fns[a->u][a->sz]);
3433    }
3434    return true;
3435}
3436
3437static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a, uint32_t insn)
3438{
3439    static gen_helper_gvec_3 * const fns[2][2] = {
3440        { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3441        { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3442    };
3443
3444    if (sve_access_check(s)) {
3445        unsigned vsz = vec_full_reg_size(s);
3446        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3447                           vec_full_reg_offset(s, a->rn),
3448                           vec_full_reg_offset(s, a->rm),
3449                           vsz, vsz, a->index, fns[a->u][a->sz]);
3450    }
3451    return true;
3452}
3453
3454
3455/*
3456 *** SVE Floating Point Multiply-Add Indexed Group
3457 */
3458
3459static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
3460{
3461    static gen_helper_gvec_4_ptr * const fns[3] = {
3462        gen_helper_gvec_fmla_idx_h,
3463        gen_helper_gvec_fmla_idx_s,
3464        gen_helper_gvec_fmla_idx_d,
3465    };
3466
3467    if (sve_access_check(s)) {
3468        unsigned vsz = vec_full_reg_size(s);
3469        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3470        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3471                           vec_full_reg_offset(s, a->rn),
3472                           vec_full_reg_offset(s, a->rm),
3473                           vec_full_reg_offset(s, a->ra),
3474                           status, vsz, vsz, (a->index << 1) | a->sub,
3475                           fns[a->esz - 1]);
3476        tcg_temp_free_ptr(status);
3477    }
3478    return true;
3479}
3480
3481/*
3482 *** SVE Floating Point Multiply Indexed Group
3483 */
3484
3485static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
3486{
3487    static gen_helper_gvec_3_ptr * const fns[3] = {
3488        gen_helper_gvec_fmul_idx_h,
3489        gen_helper_gvec_fmul_idx_s,
3490        gen_helper_gvec_fmul_idx_d,
3491    };
3492
3493    if (sve_access_check(s)) {
3494        unsigned vsz = vec_full_reg_size(s);
3495        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3496        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3497                           vec_full_reg_offset(s, a->rn),
3498                           vec_full_reg_offset(s, a->rm),
3499                           status, vsz, vsz, a->index, fns[a->esz - 1]);
3500        tcg_temp_free_ptr(status);
3501    }
3502    return true;
3503}
3504
3505/*
3506 *** SVE Floating Point Fast Reduction Group
3507 */
3508
3509typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3510                                  TCGv_ptr, TCGv_i32);
3511
3512static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3513                      gen_helper_fp_reduce *fn)
3514{
3515    unsigned vsz = vec_full_reg_size(s);
3516    unsigned p2vsz = pow2ceil(vsz);
3517    TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3518    TCGv_ptr t_zn, t_pg, status;
3519    TCGv_i64 temp;
3520
3521    temp = tcg_temp_new_i64();
3522    t_zn = tcg_temp_new_ptr();
3523    t_pg = tcg_temp_new_ptr();
3524
3525    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3526    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3527    status = get_fpstatus_ptr(a->esz == MO_16);
3528
3529    fn(temp, t_zn, t_pg, status, t_desc);
3530    tcg_temp_free_ptr(t_zn);
3531    tcg_temp_free_ptr(t_pg);
3532    tcg_temp_free_ptr(status);
3533    tcg_temp_free_i32(t_desc);
3534
3535    write_fp_dreg(s, a->rd, temp);
3536    tcg_temp_free_i64(temp);
3537}
3538
3539#define DO_VPZ(NAME, name) \
3540static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3541{                                                                        \
3542    static gen_helper_fp_reduce * const fns[3] = {                       \
3543        gen_helper_sve_##name##_h,                                       \
3544        gen_helper_sve_##name##_s,                                       \
3545        gen_helper_sve_##name##_d,                                       \
3546    };                                                                   \
3547    if (a->esz == 0) {                                                   \
3548        return false;                                                    \
3549    }                                                                    \
3550    if (sve_access_check(s)) {                                           \
3551        do_reduce(s, a, fns[a->esz - 1]);                                \
3552    }                                                                    \
3553    return true;                                                         \
3554}
3555
3556DO_VPZ(FADDV, faddv)
3557DO_VPZ(FMINNMV, fminnmv)
3558DO_VPZ(FMAXNMV, fmaxnmv)
3559DO_VPZ(FMINV, fminv)
3560DO_VPZ(FMAXV, fmaxv)
3561
3562/*
3563 *** SVE Floating Point Unary Operations - Unpredicated Group
3564 */
3565
3566static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3567{
3568    unsigned vsz = vec_full_reg_size(s);
3569    TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3570
3571    tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3572                       vec_full_reg_offset(s, a->rn),
3573                       status, vsz, vsz, 0, fn);
3574    tcg_temp_free_ptr(status);
3575}
3576
3577static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3578{
3579    static gen_helper_gvec_2_ptr * const fns[3] = {
3580        gen_helper_gvec_frecpe_h,
3581        gen_helper_gvec_frecpe_s,
3582        gen_helper_gvec_frecpe_d,
3583    };
3584    if (a->esz == 0) {
3585        return false;
3586    }
3587    if (sve_access_check(s)) {
3588        do_zz_fp(s, a, fns[a->esz - 1]);
3589    }
3590    return true;
3591}
3592
3593static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3594{
3595    static gen_helper_gvec_2_ptr * const fns[3] = {
3596        gen_helper_gvec_frsqrte_h,
3597        gen_helper_gvec_frsqrte_s,
3598        gen_helper_gvec_frsqrte_d,
3599    };
3600    if (a->esz == 0) {
3601        return false;
3602    }
3603    if (sve_access_check(s)) {
3604        do_zz_fp(s, a, fns[a->esz - 1]);
3605    }
3606    return true;
3607}
3608
3609/*
3610 *** SVE Floating Point Compare with Zero Group
3611 */
3612
3613static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3614                      gen_helper_gvec_3_ptr *fn)
3615{
3616    unsigned vsz = vec_full_reg_size(s);
3617    TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3618
3619    tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3620                       vec_full_reg_offset(s, a->rn),
3621                       pred_full_reg_offset(s, a->pg),
3622                       status, vsz, vsz, 0, fn);
3623    tcg_temp_free_ptr(status);
3624}
3625
3626#define DO_PPZ(NAME, name) \
3627static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3628{                                                                 \
3629    static gen_helper_gvec_3_ptr * const fns[3] = {               \
3630        gen_helper_sve_##name##_h,                                \
3631        gen_helper_sve_##name##_s,                                \
3632        gen_helper_sve_##name##_d,                                \
3633    };                                                            \
3634    if (a->esz == 0) {                                            \
3635        return false;                                             \
3636    }                                                             \
3637    if (sve_access_check(s)) {                                    \
3638        do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3639    }                                                             \
3640    return true;                                                  \
3641}
3642
3643DO_PPZ(FCMGE_ppz0, fcmge0)
3644DO_PPZ(FCMGT_ppz0, fcmgt0)
3645DO_PPZ(FCMLE_ppz0, fcmle0)
3646DO_PPZ(FCMLT_ppz0, fcmlt0)
3647DO_PPZ(FCMEQ_ppz0, fcmeq0)
3648DO_PPZ(FCMNE_ppz0, fcmne0)
3649
3650#undef DO_PPZ
3651
3652/*
3653 *** SVE floating-point trig multiply-add coefficient
3654 */
3655
3656static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a, uint32_t insn)
3657{
3658    static gen_helper_gvec_3_ptr * const fns[3] = {
3659        gen_helper_sve_ftmad_h,
3660        gen_helper_sve_ftmad_s,
3661        gen_helper_sve_ftmad_d,
3662    };
3663
3664    if (a->esz == 0) {
3665        return false;
3666    }
3667    if (sve_access_check(s)) {
3668        unsigned vsz = vec_full_reg_size(s);
3669        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3670        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3671                           vec_full_reg_offset(s, a->rn),
3672                           vec_full_reg_offset(s, a->rm),
3673                           status, vsz, vsz, a->imm, fns[a->esz - 1]);
3674        tcg_temp_free_ptr(status);
3675    }
3676    return true;
3677}
3678
3679/*
3680 *** SVE Floating Point Accumulating Reduction Group
3681 */
3682
3683static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3684{
3685    typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3686                          TCGv_ptr, TCGv_ptr, TCGv_i32);
3687    static fadda_fn * const fns[3] = {
3688        gen_helper_sve_fadda_h,
3689        gen_helper_sve_fadda_s,
3690        gen_helper_sve_fadda_d,
3691    };
3692    unsigned vsz = vec_full_reg_size(s);
3693    TCGv_ptr t_rm, t_pg, t_fpst;
3694    TCGv_i64 t_val;
3695    TCGv_i32 t_desc;
3696
3697    if (a->esz == 0) {
3698        return false;
3699    }
3700    if (!sve_access_check(s)) {
3701        return true;
3702    }
3703
3704    t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3705    t_rm = tcg_temp_new_ptr();
3706    t_pg = tcg_temp_new_ptr();
3707    tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3708    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3709    t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3710    t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3711
3712    fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3713
3714    tcg_temp_free_i32(t_desc);
3715    tcg_temp_free_ptr(t_fpst);
3716    tcg_temp_free_ptr(t_pg);
3717    tcg_temp_free_ptr(t_rm);
3718
3719    write_fp_dreg(s, a->rd, t_val);
3720    tcg_temp_free_i64(t_val);
3721    return true;
3722}
3723
3724/*
3725 *** SVE Floating Point Arithmetic - Unpredicated Group
3726 */
3727
3728static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3729                      gen_helper_gvec_3_ptr *fn)
3730{
3731    if (fn == NULL) {
3732        return false;
3733    }
3734    if (sve_access_check(s)) {
3735        unsigned vsz = vec_full_reg_size(s);
3736        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3737        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3738                           vec_full_reg_offset(s, a->rn),
3739                           vec_full_reg_offset(s, a->rm),
3740                           status, vsz, vsz, 0, fn);
3741        tcg_temp_free_ptr(status);
3742    }
3743    return true;
3744}
3745
3746
3747#define DO_FP3(NAME, name) \
3748static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3749{                                                                   \
3750    static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3751        NULL, gen_helper_gvec_##name##_h,                           \
3752        gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3753    };                                                              \
3754    return do_zzz_fp(s, a, fns[a->esz]);                            \
3755}
3756
3757DO_FP3(FADD_zzz, fadd)
3758DO_FP3(FSUB_zzz, fsub)
3759DO_FP3(FMUL_zzz, fmul)
3760DO_FP3(FTSMUL, ftsmul)
3761DO_FP3(FRECPS, recps)
3762DO_FP3(FRSQRTS, rsqrts)
3763
3764#undef DO_FP3
3765
3766/*
3767 *** SVE Floating Point Arithmetic - Predicated Group
3768 */
3769
3770static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3771                       gen_helper_gvec_4_ptr *fn)
3772{
3773    if (fn == NULL) {
3774        return false;
3775    }
3776    if (sve_access_check(s)) {
3777        unsigned vsz = vec_full_reg_size(s);
3778        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3779        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3780                           vec_full_reg_offset(s, a->rn),
3781                           vec_full_reg_offset(s, a->rm),
3782                           pred_full_reg_offset(s, a->pg),
3783                           status, vsz, vsz, 0, fn);
3784        tcg_temp_free_ptr(status);
3785    }
3786    return true;
3787}
3788
3789#define DO_FP3(NAME, name) \
3790static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3791{                                                                   \
3792    static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3793        NULL, gen_helper_sve_##name##_h,                            \
3794        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3795    };                                                              \
3796    return do_zpzz_fp(s, a, fns[a->esz]);                           \
3797}
3798
3799DO_FP3(FADD_zpzz, fadd)
3800DO_FP3(FSUB_zpzz, fsub)
3801DO_FP3(FMUL_zpzz, fmul)
3802DO_FP3(FMIN_zpzz, fmin)
3803DO_FP3(FMAX_zpzz, fmax)
3804DO_FP3(FMINNM_zpzz, fminnum)
3805DO_FP3(FMAXNM_zpzz, fmaxnum)
3806DO_FP3(FABD, fabd)
3807DO_FP3(FSCALE, fscalbn)
3808DO_FP3(FDIV, fdiv)
3809DO_FP3(FMULX, fmulx)
3810
3811#undef DO_FP3
3812
3813typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3814                                      TCGv_i64, TCGv_ptr, TCGv_i32);
3815
3816static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3817                         TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3818{
3819    unsigned vsz = vec_full_reg_size(s);
3820    TCGv_ptr t_zd, t_zn, t_pg, status;
3821    TCGv_i32 desc;
3822
3823    t_zd = tcg_temp_new_ptr();
3824    t_zn = tcg_temp_new_ptr();
3825    t_pg = tcg_temp_new_ptr();
3826    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3827    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3828    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3829
3830    status = get_fpstatus_ptr(is_fp16);
3831    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3832    fn(t_zd, t_zn, t_pg, scalar, status, desc);
3833
3834    tcg_temp_free_i32(desc);
3835    tcg_temp_free_ptr(status);
3836    tcg_temp_free_ptr(t_pg);
3837    tcg_temp_free_ptr(t_zn);
3838    tcg_temp_free_ptr(t_zd);
3839}
3840
3841static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3842                      gen_helper_sve_fp2scalar *fn)
3843{
3844    TCGv_i64 temp = tcg_const_i64(imm);
3845    do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3846    tcg_temp_free_i64(temp);
3847}
3848
3849#define DO_FP_IMM(NAME, name, const0, const1) \
3850static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a,         \
3851                                uint32_t insn)                            \
3852{                                                                         \
3853    static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3854        gen_helper_sve_##name##_h,                                        \
3855        gen_helper_sve_##name##_s,                                        \
3856        gen_helper_sve_##name##_d                                         \
3857    };                                                                    \
3858    static uint64_t const val[3][2] = {                                   \
3859        { float16_##const0, float16_##const1 },                           \
3860        { float32_##const0, float32_##const1 },                           \
3861        { float64_##const0, float64_##const1 },                           \
3862    };                                                                    \
3863    if (a->esz == 0) {                                                    \
3864        return false;                                                     \
3865    }                                                                     \
3866    if (sve_access_check(s)) {                                            \
3867        do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3868    }                                                                     \
3869    return true;                                                          \
3870}
3871
3872#define float16_two  make_float16(0x4000)
3873#define float32_two  make_float32(0x40000000)
3874#define float64_two  make_float64(0x4000000000000000ULL)
3875
3876DO_FP_IMM(FADD, fadds, half, one)
3877DO_FP_IMM(FSUB, fsubs, half, one)
3878DO_FP_IMM(FMUL, fmuls, half, two)
3879DO_FP_IMM(FSUBR, fsubrs, half, one)
3880DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3881DO_FP_IMM(FMINNM, fminnms, zero, one)
3882DO_FP_IMM(FMAX, fmaxs, zero, one)
3883DO_FP_IMM(FMIN, fmins, zero, one)
3884
3885#undef DO_FP_IMM
3886
3887static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3888                      gen_helper_gvec_4_ptr *fn)
3889{
3890    if (fn == NULL) {
3891        return false;
3892    }
3893    if (sve_access_check(s)) {
3894        unsigned vsz = vec_full_reg_size(s);
3895        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3896        tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3897                           vec_full_reg_offset(s, a->rn),
3898                           vec_full_reg_offset(s, a->rm),
3899                           pred_full_reg_offset(s, a->pg),
3900                           status, vsz, vsz, 0, fn);
3901        tcg_temp_free_ptr(status);
3902    }
3903    return true;
3904}
3905
3906#define DO_FPCMP(NAME, name) \
3907static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,     \
3908                                uint32_t insn)                        \
3909{                                                                     \
3910    static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3911        NULL, gen_helper_sve_##name##_h,                              \
3912        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3913    };                                                                \
3914    return do_fp_cmp(s, a, fns[a->esz]);                              \
3915}
3916
3917DO_FPCMP(FCMGE, fcmge)
3918DO_FPCMP(FCMGT, fcmgt)
3919DO_FPCMP(FCMEQ, fcmeq)
3920DO_FPCMP(FCMNE, fcmne)
3921DO_FPCMP(FCMUO, fcmuo)
3922DO_FPCMP(FACGE, facge)
3923DO_FPCMP(FACGT, facgt)
3924
3925#undef DO_FPCMP
3926
3927static bool trans_FCADD(DisasContext *s, arg_FCADD *a, uint32_t insn)
3928{
3929    static gen_helper_gvec_4_ptr * const fns[3] = {
3930        gen_helper_sve_fcadd_h,
3931        gen_helper_sve_fcadd_s,
3932        gen_helper_sve_fcadd_d
3933    };
3934
3935    if (a->esz == 0) {
3936        return false;
3937    }
3938    if (sve_access_check(s)) {
3939        unsigned vsz = vec_full_reg_size(s);
3940        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3941        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3942                           vec_full_reg_offset(s, a->rn),
3943                           vec_full_reg_offset(s, a->rm),
3944                           pred_full_reg_offset(s, a->pg),
3945                           status, vsz, vsz, a->rot, fns[a->esz - 1]);
3946        tcg_temp_free_ptr(status);
3947    }
3948    return true;
3949}
3950
3951typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3952
3953static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3954{
3955    if (fn == NULL) {
3956        return false;
3957    }
3958    if (!sve_access_check(s)) {
3959        return true;
3960    }
3961
3962    unsigned vsz = vec_full_reg_size(s);
3963    unsigned desc;
3964    TCGv_i32 t_desc;
3965    TCGv_ptr pg = tcg_temp_new_ptr();
3966
3967    /* We would need 7 operands to pass these arguments "properly".
3968     * So we encode all the register numbers into the descriptor.
3969     */
3970    desc = deposit32(a->rd, 5, 5, a->rn);
3971    desc = deposit32(desc, 10, 5, a->rm);
3972    desc = deposit32(desc, 15, 5, a->ra);
3973    desc = simd_desc(vsz, vsz, desc);
3974
3975    t_desc = tcg_const_i32(desc);
3976    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3977    fn(cpu_env, pg, t_desc);
3978    tcg_temp_free_i32(t_desc);
3979    tcg_temp_free_ptr(pg);
3980    return true;
3981}
3982
3983#define DO_FMLA(NAME, name) \
3984static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3985{                                                                    \
3986    static gen_helper_sve_fmla * const fns[4] = {                    \
3987        NULL, gen_helper_sve_##name##_h,                             \
3988        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3989    };                                                               \
3990    return do_fmla(s, a, fns[a->esz]);                               \
3991}
3992
3993DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3994DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3995DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3996DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3997
3998#undef DO_FMLA
3999
4000static bool trans_FCMLA_zpzzz(DisasContext *s,
4001                              arg_FCMLA_zpzzz *a, uint32_t insn)
4002{
4003    static gen_helper_sve_fmla * const fns[3] = {
4004        gen_helper_sve_fcmla_zpzzz_h,
4005        gen_helper_sve_fcmla_zpzzz_s,
4006        gen_helper_sve_fcmla_zpzzz_d,
4007    };
4008
4009    if (a->esz == 0) {
4010        return false;
4011    }
4012    if (sve_access_check(s)) {
4013        unsigned vsz = vec_full_reg_size(s);
4014        unsigned desc;
4015        TCGv_i32 t_desc;
4016        TCGv_ptr pg = tcg_temp_new_ptr();
4017
4018        /* We would need 7 operands to pass these arguments "properly".
4019         * So we encode all the register numbers into the descriptor.
4020         */
4021        desc = deposit32(a->rd, 5, 5, a->rn);
4022        desc = deposit32(desc, 10, 5, a->rm);
4023        desc = deposit32(desc, 15, 5, a->ra);
4024        desc = deposit32(desc, 20, 2, a->rot);
4025        desc = sextract32(desc, 0, 22);
4026        desc = simd_desc(vsz, vsz, desc);
4027
4028        t_desc = tcg_const_i32(desc);
4029        tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4030        fns[a->esz - 1](cpu_env, pg, t_desc);
4031        tcg_temp_free_i32(t_desc);
4032        tcg_temp_free_ptr(pg);
4033    }
4034    return true;
4035}
4036
4037static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a, uint32_t insn)
4038{
4039    static gen_helper_gvec_3_ptr * const fns[2] = {
4040        gen_helper_gvec_fcmlah_idx,
4041        gen_helper_gvec_fcmlas_idx,
4042    };
4043
4044    tcg_debug_assert(a->esz == 1 || a->esz == 2);
4045    tcg_debug_assert(a->rd == a->ra);
4046    if (sve_access_check(s)) {
4047        unsigned vsz = vec_full_reg_size(s);
4048        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4049        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4050                           vec_full_reg_offset(s, a->rn),
4051                           vec_full_reg_offset(s, a->rm),
4052                           status, vsz, vsz,
4053                           a->index * 4 + a->rot,
4054                           fns[a->esz - 1]);
4055        tcg_temp_free_ptr(status);
4056    }
4057    return true;
4058}
4059
4060/*
4061 *** SVE Floating Point Unary Operations Predicated Group
4062 */
4063
4064static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4065                       bool is_fp16, gen_helper_gvec_3_ptr *fn)
4066{
4067    if (sve_access_check(s)) {
4068        unsigned vsz = vec_full_reg_size(s);
4069        TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4070        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4071                           vec_full_reg_offset(s, rn),
4072                           pred_full_reg_offset(s, pg),
4073                           status, vsz, vsz, 0, fn);
4074        tcg_temp_free_ptr(status);
4075    }
4076    return true;
4077}
4078
4079static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4080{
4081    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_sh);
4082}
4083
4084static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4085{
4086    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4087}
4088
4089static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4090{
4091    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvt_dh);
4092}
4093
4094static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4095{
4096    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4097}
4098
4099static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4100{
4101    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4102}
4103
4104static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4105{
4106    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4107}
4108
4109static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4110{
4111    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4112}
4113
4114static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4115{
4116    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4117}
4118
4119static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4120{
4121    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4122}
4123
4124static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4125{
4126    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4127}
4128
4129static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4130{
4131    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4132}
4133
4134static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4135{
4136    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4137}
4138
4139static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4140{
4141    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4142}
4143
4144static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4145{
4146    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4147}
4148
4149static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4150{
4151    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4152}
4153
4154static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4155{
4156    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4157}
4158
4159static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4160{
4161    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4162}
4163
4164static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4165{
4166    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4167}
4168
4169static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4170{
4171    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4172}
4173
4174static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4175{
4176    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4177}
4178
4179static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4180    gen_helper_sve_frint_h,
4181    gen_helper_sve_frint_s,
4182    gen_helper_sve_frint_d
4183};
4184
4185static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4186{
4187    if (a->esz == 0) {
4188        return false;
4189    }
4190    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4191                      frint_fns[a->esz - 1]);
4192}
4193
4194static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4195{
4196    static gen_helper_gvec_3_ptr * const fns[3] = {
4197        gen_helper_sve_frintx_h,
4198        gen_helper_sve_frintx_s,
4199        gen_helper_sve_frintx_d
4200    };
4201    if (a->esz == 0) {
4202        return false;
4203    }
4204    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4205}
4206
4207static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4208{
4209    if (a->esz == 0) {
4210        return false;
4211    }
4212    if (sve_access_check(s)) {
4213        unsigned vsz = vec_full_reg_size(s);
4214        TCGv_i32 tmode = tcg_const_i32(mode);
4215        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4216
4217        gen_helper_set_rmode(tmode, tmode, status);
4218
4219        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4220                           vec_full_reg_offset(s, a->rn),
4221                           pred_full_reg_offset(s, a->pg),
4222                           status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4223
4224        gen_helper_set_rmode(tmode, tmode, status);
4225        tcg_temp_free_i32(tmode);
4226        tcg_temp_free_ptr(status);
4227    }
4228    return true;
4229}
4230
4231static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4232{
4233    return do_frint_mode(s, a, float_round_nearest_even);
4234}
4235
4236static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4237{
4238    return do_frint_mode(s, a, float_round_up);
4239}
4240
4241static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4242{
4243    return do_frint_mode(s, a, float_round_down);
4244}
4245
4246static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4247{
4248    return do_frint_mode(s, a, float_round_to_zero);
4249}
4250
4251static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4252{
4253    return do_frint_mode(s, a, float_round_ties_away);
4254}
4255
4256static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4257{
4258    static gen_helper_gvec_3_ptr * const fns[3] = {
4259        gen_helper_sve_frecpx_h,
4260        gen_helper_sve_frecpx_s,
4261        gen_helper_sve_frecpx_d
4262    };
4263    if (a->esz == 0) {
4264        return false;
4265    }
4266    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4267}
4268
4269static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4270{
4271    static gen_helper_gvec_3_ptr * const fns[3] = {
4272        gen_helper_sve_fsqrt_h,
4273        gen_helper_sve_fsqrt_s,
4274        gen_helper_sve_fsqrt_d
4275    };
4276    if (a->esz == 0) {
4277        return false;
4278    }
4279    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4280}
4281
4282static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4283{
4284    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4285}
4286
4287static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4288{
4289    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4290}
4291
4292static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4293{
4294    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4295}
4296
4297static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4298{
4299    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4300}
4301
4302static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4303{
4304    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4305}
4306
4307static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4308{
4309    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4310}
4311
4312static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4313{
4314    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4315}
4316
4317static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4318{
4319    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4320}
4321
4322static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4323{
4324    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4325}
4326
4327static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4328{
4329    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4330}
4331
4332static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4333{
4334    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4335}
4336
4337static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4338{
4339    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4340}
4341
4342static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4343{
4344    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4345}
4346
4347static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4348{
4349    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4350}
4351
4352/*
4353 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4354 */
4355
4356/* Subroutine loading a vector register at VOFS of LEN bytes.
4357 * The load should begin at the address Rn + IMM.
4358 */
4359
4360static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
4361                   int rn, int imm)
4362{
4363    uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4364    uint32_t len_remain = len % 8;
4365    uint32_t nparts = len / 8 + ctpop8(len_remain);
4366    int midx = get_mem_index(s);
4367    TCGv_i64 addr, t0, t1;
4368
4369    addr = tcg_temp_new_i64();
4370    t0 = tcg_temp_new_i64();
4371
4372    /* Note that unpredicated load/store of vector/predicate registers
4373     * are defined as a stream of bytes, which equates to little-endian
4374     * operations on larger quantities.  There is no nice way to force
4375     * a little-endian load for aarch64_be-linux-user out of line.
4376     *
4377     * Attempt to keep code expansion to a minimum by limiting the
4378     * amount of unrolling done.
4379     */
4380    if (nparts <= 4) {
4381        int i;
4382
4383        for (i = 0; i < len_align; i += 8) {
4384            tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4385            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4386            tcg_gen_st_i64(t0, cpu_env, vofs + i);
4387        }
4388    } else {
4389        TCGLabel *loop = gen_new_label();
4390        TCGv_ptr tp, i = tcg_const_local_ptr(0);
4391
4392        gen_set_label(loop);
4393
4394        /* Minimize the number of local temps that must be re-read from
4395         * the stack each iteration.  Instead, re-compute values other
4396         * than the loop counter.
4397         */
4398        tp = tcg_temp_new_ptr();
4399        tcg_gen_addi_ptr(tp, i, imm);
4400        tcg_gen_extu_ptr_i64(addr, tp);
4401        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4402
4403        tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4404
4405        tcg_gen_add_ptr(tp, cpu_env, i);
4406        tcg_gen_addi_ptr(i, i, 8);
4407        tcg_gen_st_i64(t0, tp, vofs);
4408        tcg_temp_free_ptr(tp);
4409
4410        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4411        tcg_temp_free_ptr(i);
4412    }
4413
4414    /* Predicate register loads can be any multiple of 2.
4415     * Note that we still store the entire 64-bit unit into cpu_env.
4416     */
4417    if (len_remain) {
4418        tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4419
4420        switch (len_remain) {
4421        case 2:
4422        case 4:
4423        case 8:
4424            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4425            break;
4426
4427        case 6:
4428            t1 = tcg_temp_new_i64();
4429            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4430            tcg_gen_addi_i64(addr, addr, 4);
4431            tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4432            tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4433            tcg_temp_free_i64(t1);
4434            break;
4435
4436        default:
4437            g_assert_not_reached();
4438        }
4439        tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4440    }
4441    tcg_temp_free_i64(addr);
4442    tcg_temp_free_i64(t0);
4443}
4444
4445/* Similarly for stores.  */
4446static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
4447                   int rn, int imm)
4448{
4449    uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
4450    uint32_t len_remain = len % 8;
4451    uint32_t nparts = len / 8 + ctpop8(len_remain);
4452    int midx = get_mem_index(s);
4453    TCGv_i64 addr, t0;
4454
4455    addr = tcg_temp_new_i64();
4456    t0 = tcg_temp_new_i64();
4457
4458    /* Note that unpredicated load/store of vector/predicate registers
4459     * are defined as a stream of bytes, which equates to little-endian
4460     * operations on larger quantities.  There is no nice way to force
4461     * a little-endian store for aarch64_be-linux-user out of line.
4462     *
4463     * Attempt to keep code expansion to a minimum by limiting the
4464     * amount of unrolling done.
4465     */
4466    if (nparts <= 4) {
4467        int i;
4468
4469        for (i = 0; i < len_align; i += 8) {
4470            tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4471            tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4472            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4473        }
4474    } else {
4475        TCGLabel *loop = gen_new_label();
4476        TCGv_ptr t2, i = tcg_const_local_ptr(0);
4477
4478        gen_set_label(loop);
4479
4480        t2 = tcg_temp_new_ptr();
4481        tcg_gen_add_ptr(t2, cpu_env, i);
4482        tcg_gen_ld_i64(t0, t2, vofs);
4483
4484        /* Minimize the number of local temps that must be re-read from
4485         * the stack each iteration.  Instead, re-compute values other
4486         * than the loop counter.
4487         */
4488        tcg_gen_addi_ptr(t2, i, imm);
4489        tcg_gen_extu_ptr_i64(addr, t2);
4490        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4491        tcg_temp_free_ptr(t2);
4492
4493        tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4494
4495        tcg_gen_addi_ptr(i, i, 8);
4496
4497        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4498        tcg_temp_free_ptr(i);
4499    }
4500
4501    /* Predicate register stores can be any multiple of 2.  */
4502    if (len_remain) {
4503        tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4504        tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4505
4506        switch (len_remain) {
4507        case 2:
4508        case 4:
4509        case 8:
4510            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4511            break;
4512
4513        case 6:
4514            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4515            tcg_gen_addi_i64(addr, addr, 4);
4516            tcg_gen_shri_i64(t0, t0, 32);
4517            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4518            break;
4519
4520        default:
4521            g_assert_not_reached();
4522        }
4523    }
4524    tcg_temp_free_i64(addr);
4525    tcg_temp_free_i64(t0);
4526}
4527
4528static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4529{
4530    if (sve_access_check(s)) {
4531        int size = vec_full_reg_size(s);
4532        int off = vec_full_reg_offset(s, a->rd);
4533        do_ldr(s, off, size, a->rn, a->imm * size);
4534    }
4535    return true;
4536}
4537
4538static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4539{
4540    if (sve_access_check(s)) {
4541        int size = pred_full_reg_size(s);
4542        int off = pred_full_reg_offset(s, a->rd);
4543        do_ldr(s, off, size, a->rn, a->imm * size);
4544    }
4545    return true;
4546}
4547
4548static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4549{
4550    if (sve_access_check(s)) {
4551        int size = vec_full_reg_size(s);
4552        int off = vec_full_reg_offset(s, a->rd);
4553        do_str(s, off, size, a->rn, a->imm * size);
4554    }
4555    return true;
4556}
4557
4558static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4559{
4560    if (sve_access_check(s)) {
4561        int size = pred_full_reg_size(s);
4562        int off = pred_full_reg_offset(s, a->rd);
4563        do_str(s, off, size, a->rn, a->imm * size);
4564    }
4565    return true;
4566}
4567
4568/*
4569 *** SVE Memory - Contiguous Load Group
4570 */
4571
4572/* The memory mode of the dtype.  */
4573static const TCGMemOp dtype_mop[16] = {
4574    MO_UB, MO_UB, MO_UB, MO_UB,
4575    MO_SL, MO_UW, MO_UW, MO_UW,
4576    MO_SW, MO_SW, MO_UL, MO_UL,
4577    MO_SB, MO_SB, MO_SB, MO_Q
4578};
4579
4580#define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4581
4582/* The vector element size of dtype.  */
4583static const uint8_t dtype_esz[16] = {
4584    0, 1, 2, 3,
4585    3, 1, 2, 3,
4586    3, 2, 2, 3,
4587    3, 2, 1, 3
4588};
4589
4590static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4591                       gen_helper_gvec_mem *fn)
4592{
4593    unsigned vsz = vec_full_reg_size(s);
4594    TCGv_ptr t_pg;
4595    TCGv_i32 desc;
4596
4597    /* For e.g. LD4, there are not enough arguments to pass all 4
4598     * registers as pointers, so encode the regno into the data field.
4599     * For consistency, do this even for LD1.
4600     */
4601    desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
4602    t_pg = tcg_temp_new_ptr();
4603
4604    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4605    fn(cpu_env, t_pg, addr, desc);
4606
4607    tcg_temp_free_ptr(t_pg);
4608    tcg_temp_free_i32(desc);
4609}
4610
4611static void do_ld_zpa(DisasContext *s, int zt, int pg,
4612                      TCGv_i64 addr, int dtype, int nreg)
4613{
4614    static gen_helper_gvec_mem * const fns[16][4] = {
4615        { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4616          gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4617        { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4618        { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4619        { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4620
4621        { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
4622        { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
4623          gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
4624        { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
4625        { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
4626
4627        { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
4628        { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
4629        { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
4630          gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
4631        { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
4632
4633        { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4634        { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4635        { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4636        { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
4637          gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
4638    };
4639    gen_helper_gvec_mem *fn = fns[dtype][nreg];
4640
4641    /* While there are holes in the table, they are not
4642     * accessible via the instruction encoding.
4643     */
4644    assert(fn != NULL);
4645    do_mem_zpa(s, zt, pg, addr, fn);
4646}
4647
4648static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4649{
4650    if (a->rm == 31) {
4651        return false;
4652    }
4653    if (sve_access_check(s)) {
4654        TCGv_i64 addr = new_tmp_a64(s);
4655        tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
4656                         (a->nreg + 1) << dtype_msz(a->dtype));
4657        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4658        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4659    }
4660    return true;
4661}
4662
4663static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4664{
4665    if (sve_access_check(s)) {
4666        int vsz = vec_full_reg_size(s);
4667        int elements = vsz >> dtype_esz[a->dtype];
4668        TCGv_i64 addr = new_tmp_a64(s);
4669
4670        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4671                         (a->imm * elements * (a->nreg + 1))
4672                         << dtype_msz(a->dtype));
4673        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4674    }
4675    return true;
4676}
4677
4678static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4679{
4680    static gen_helper_gvec_mem * const fns[16] = {
4681        gen_helper_sve_ldff1bb_r,
4682        gen_helper_sve_ldff1bhu_r,
4683        gen_helper_sve_ldff1bsu_r,
4684        gen_helper_sve_ldff1bdu_r,
4685
4686        gen_helper_sve_ldff1sds_r,
4687        gen_helper_sve_ldff1hh_r,
4688        gen_helper_sve_ldff1hsu_r,
4689        gen_helper_sve_ldff1hdu_r,
4690
4691        gen_helper_sve_ldff1hds_r,
4692        gen_helper_sve_ldff1hss_r,
4693        gen_helper_sve_ldff1ss_r,
4694        gen_helper_sve_ldff1sdu_r,
4695
4696        gen_helper_sve_ldff1bds_r,
4697        gen_helper_sve_ldff1bss_r,
4698        gen_helper_sve_ldff1bhs_r,
4699        gen_helper_sve_ldff1dd_r,
4700    };
4701
4702    if (sve_access_check(s)) {
4703        TCGv_i64 addr = new_tmp_a64(s);
4704        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4705        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4706        do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4707    }
4708    return true;
4709}
4710
4711static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4712{
4713    static gen_helper_gvec_mem * const fns[16] = {
4714        gen_helper_sve_ldnf1bb_r,
4715        gen_helper_sve_ldnf1bhu_r,
4716        gen_helper_sve_ldnf1bsu_r,
4717        gen_helper_sve_ldnf1bdu_r,
4718
4719        gen_helper_sve_ldnf1sds_r,
4720        gen_helper_sve_ldnf1hh_r,
4721        gen_helper_sve_ldnf1hsu_r,
4722        gen_helper_sve_ldnf1hdu_r,
4723
4724        gen_helper_sve_ldnf1hds_r,
4725        gen_helper_sve_ldnf1hss_r,
4726        gen_helper_sve_ldnf1ss_r,
4727        gen_helper_sve_ldnf1sdu_r,
4728
4729        gen_helper_sve_ldnf1bds_r,
4730        gen_helper_sve_ldnf1bss_r,
4731        gen_helper_sve_ldnf1bhs_r,
4732        gen_helper_sve_ldnf1dd_r,
4733    };
4734
4735    if (sve_access_check(s)) {
4736        int vsz = vec_full_reg_size(s);
4737        int elements = vsz >> dtype_esz[a->dtype];
4738        int off = (a->imm * elements) << dtype_msz(a->dtype);
4739        TCGv_i64 addr = new_tmp_a64(s);
4740
4741        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4742        do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4743    }
4744    return true;
4745}
4746
4747static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4748{
4749    static gen_helper_gvec_mem * const fns[4] = {
4750        gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4751        gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4752    };
4753    unsigned vsz = vec_full_reg_size(s);
4754    TCGv_ptr t_pg;
4755    TCGv_i32 desc;
4756
4757    /* Load the first quadword using the normal predicated load helpers.  */
4758    desc = tcg_const_i32(simd_desc(16, 16, zt));
4759    t_pg = tcg_temp_new_ptr();
4760
4761    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4762    fns[msz](cpu_env, t_pg, addr, desc);
4763
4764    tcg_temp_free_ptr(t_pg);
4765    tcg_temp_free_i32(desc);
4766
4767    /* Replicate that first quadword.  */
4768    if (vsz > 16) {
4769        unsigned dofs = vec_full_reg_offset(s, zt);
4770        tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4771    }
4772}
4773
4774static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4775{
4776    if (a->rm == 31) {
4777        return false;
4778    }
4779    if (sve_access_check(s)) {
4780        int msz = dtype_msz(a->dtype);
4781        TCGv_i64 addr = new_tmp_a64(s);
4782        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4783        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4784        do_ldrq(s, a->rd, a->pg, addr, msz);
4785    }
4786    return true;
4787}
4788
4789static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4790{
4791    if (sve_access_check(s)) {
4792        TCGv_i64 addr = new_tmp_a64(s);
4793        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4794        do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4795    }
4796    return true;
4797}
4798
4799/* Load and broadcast element.  */
4800static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4801{
4802    if (!sve_access_check(s)) {
4803        return true;
4804    }
4805
4806    unsigned vsz = vec_full_reg_size(s);
4807    unsigned psz = pred_full_reg_size(s);
4808    unsigned esz = dtype_esz[a->dtype];
4809    TCGLabel *over = gen_new_label();
4810    TCGv_i64 temp;
4811
4812    /* If the guarding predicate has no bits set, no load occurs.  */
4813    if (psz <= 8) {
4814        /* Reduce the pred_esz_masks value simply to reduce the
4815         * size of the code generated here.
4816         */
4817        uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4818        temp = tcg_temp_new_i64();
4819        tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4820        tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4821        tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4822        tcg_temp_free_i64(temp);
4823    } else {
4824        TCGv_i32 t32 = tcg_temp_new_i32();
4825        find_last_active(s, t32, esz, a->pg);
4826        tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4827        tcg_temp_free_i32(t32);
4828    }
4829
4830    /* Load the data.  */
4831    temp = tcg_temp_new_i64();
4832    tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4833    tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4834                        s->be_data | dtype_mop[a->dtype]);
4835
4836    /* Broadcast to *all* elements.  */
4837    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4838                         vsz, vsz, temp);
4839    tcg_temp_free_i64(temp);
4840
4841    /* Zero the inactive elements.  */
4842    gen_set_label(over);
4843    do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4844    return true;
4845}
4846
4847static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4848                      int msz, int esz, int nreg)
4849{
4850    static gen_helper_gvec_mem * const fn_single[4][4] = {
4851        { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4852          gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4853        { NULL,                   gen_helper_sve_st1hh_r,
4854          gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4855        { NULL, NULL,
4856          gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4857        { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4858    };
4859    static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4860        { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4861          gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4862        { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4863          gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4864        { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4865          gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4866    };
4867    gen_helper_gvec_mem *fn;
4868
4869    if (nreg == 0) {
4870        /* ST1 */
4871        fn = fn_single[msz][esz];
4872    } else {
4873        /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4874        assert(msz == esz);
4875        fn = fn_multiple[nreg - 1][msz];
4876    }
4877    assert(fn != NULL);
4878    do_mem_zpa(s, zt, pg, addr, fn);
4879}
4880
4881static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4882{
4883    if (a->rm == 31 || a->msz > a->esz) {
4884        return false;
4885    }
4886    if (sve_access_check(s)) {
4887        TCGv_i64 addr = new_tmp_a64(s);
4888        tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4889        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4890        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4891    }
4892    return true;
4893}
4894
4895static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4896{
4897    if (a->msz > a->esz) {
4898        return false;
4899    }
4900    if (sve_access_check(s)) {
4901        int vsz = vec_full_reg_size(s);
4902        int elements = vsz >> a->esz;
4903        TCGv_i64 addr = new_tmp_a64(s);
4904
4905        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4906                         (a->imm * elements * (a->nreg + 1)) << a->msz);
4907        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4908    }
4909    return true;
4910}
4911
4912/*
4913 *** SVE gather loads / scatter stores
4914 */
4915
4916static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4917                       TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4918{
4919    unsigned vsz = vec_full_reg_size(s);
4920    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4921    TCGv_ptr t_zm = tcg_temp_new_ptr();
4922    TCGv_ptr t_pg = tcg_temp_new_ptr();
4923    TCGv_ptr t_zt = tcg_temp_new_ptr();
4924
4925    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4926    tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4927    tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4928    fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4929
4930    tcg_temp_free_ptr(t_zt);
4931    tcg_temp_free_ptr(t_zm);
4932    tcg_temp_free_ptr(t_pg);
4933    tcg_temp_free_i32(desc);
4934}
4935
4936/* Indexed by [ff][xs][u][msz].  */
4937static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4938    { { { gen_helper_sve_ldbss_zsu,
4939          gen_helper_sve_ldhss_zsu,
4940          NULL, },
4941        { gen_helper_sve_ldbsu_zsu,
4942          gen_helper_sve_ldhsu_zsu,
4943          gen_helper_sve_ldssu_zsu, } },
4944      { { gen_helper_sve_ldbss_zss,
4945          gen_helper_sve_ldhss_zss,
4946          NULL, },
4947        { gen_helper_sve_ldbsu_zss,
4948          gen_helper_sve_ldhsu_zss,
4949          gen_helper_sve_ldssu_zss, } } },
4950
4951    { { { gen_helper_sve_ldffbss_zsu,
4952          gen_helper_sve_ldffhss_zsu,
4953          NULL, },
4954        { gen_helper_sve_ldffbsu_zsu,
4955          gen_helper_sve_ldffhsu_zsu,
4956          gen_helper_sve_ldffssu_zsu, } },
4957      { { gen_helper_sve_ldffbss_zss,
4958          gen_helper_sve_ldffhss_zss,
4959          NULL, },
4960        { gen_helper_sve_ldffbsu_zss,
4961          gen_helper_sve_ldffhsu_zss,
4962          gen_helper_sve_ldffssu_zss, } } }
4963};
4964
4965/* Note that we overload xs=2 to indicate 64-bit offset.  */
4966static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
4967    { { { gen_helper_sve_ldbds_zsu,
4968          gen_helper_sve_ldhds_zsu,
4969          gen_helper_sve_ldsds_zsu,
4970          NULL, },
4971        { gen_helper_sve_ldbdu_zsu,
4972          gen_helper_sve_ldhdu_zsu,
4973          gen_helper_sve_ldsdu_zsu,
4974          gen_helper_sve_ldddu_zsu, } },
4975      { { gen_helper_sve_ldbds_zss,
4976          gen_helper_sve_ldhds_zss,
4977          gen_helper_sve_ldsds_zss,
4978          NULL, },
4979        { gen_helper_sve_ldbdu_zss,
4980          gen_helper_sve_ldhdu_zss,
4981          gen_helper_sve_ldsdu_zss,
4982          gen_helper_sve_ldddu_zss, } },
4983      { { gen_helper_sve_ldbds_zd,
4984          gen_helper_sve_ldhds_zd,
4985          gen_helper_sve_ldsds_zd,
4986          NULL, },
4987        { gen_helper_sve_ldbdu_zd,
4988          gen_helper_sve_ldhdu_zd,
4989          gen_helper_sve_ldsdu_zd,
4990          gen_helper_sve_ldddu_zd, } } },
4991
4992    { { { gen_helper_sve_ldffbds_zsu,
4993          gen_helper_sve_ldffhds_zsu,
4994          gen_helper_sve_ldffsds_zsu,
4995          NULL, },
4996        { gen_helper_sve_ldffbdu_zsu,
4997          gen_helper_sve_ldffhdu_zsu,
4998          gen_helper_sve_ldffsdu_zsu,
4999          gen_helper_sve_ldffddu_zsu, } },
5000      { { gen_helper_sve_ldffbds_zss,
5001          gen_helper_sve_ldffhds_zss,
5002          gen_helper_sve_ldffsds_zss,
5003          NULL, },
5004        { gen_helper_sve_ldffbdu_zss,
5005          gen_helper_sve_ldffhdu_zss,
5006          gen_helper_sve_ldffsdu_zss,
5007          gen_helper_sve_ldffddu_zss, } },
5008      { { gen_helper_sve_ldffbds_zd,
5009          gen_helper_sve_ldffhds_zd,
5010          gen_helper_sve_ldffsds_zd,
5011          NULL, },
5012        { gen_helper_sve_ldffbdu_zd,
5013          gen_helper_sve_ldffhdu_zd,
5014          gen_helper_sve_ldffsdu_zd,
5015          gen_helper_sve_ldffddu_zd, } } }
5016};
5017
5018static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
5019{
5020    gen_helper_gvec_mem_scatter *fn = NULL;
5021
5022    if (!sve_access_check(s)) {
5023        return true;
5024    }
5025
5026    switch (a->esz) {
5027    case MO_32:
5028        fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
5029        break;
5030    case MO_64:
5031        fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
5032        break;
5033    }
5034    assert(fn != NULL);
5035
5036    do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5037               cpu_reg_sp(s, a->rn), fn);
5038    return true;
5039}
5040
5041static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
5042{
5043    gen_helper_gvec_mem_scatter *fn = NULL;
5044    TCGv_i64 imm;
5045
5046    if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5047        return false;
5048    }
5049    if (!sve_access_check(s)) {
5050        return true;
5051    }
5052
5053    switch (a->esz) {
5054    case MO_32:
5055        fn = gather_load_fn32[a->ff][0][a->u][a->msz];
5056        break;
5057    case MO_64:
5058        fn = gather_load_fn64[a->ff][2][a->u][a->msz];
5059        break;
5060    }
5061    assert(fn != NULL);
5062
5063    /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5064     * by loading the immediate into the scalar parameter.
5065     */
5066    imm = tcg_const_i64(a->imm << a->msz);
5067    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5068    tcg_temp_free_i64(imm);
5069    return true;
5070}
5071
5072/* Indexed by [xs][msz].  */
5073static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
5074    { gen_helper_sve_stbs_zsu,
5075      gen_helper_sve_sths_zsu,
5076      gen_helper_sve_stss_zsu, },
5077    { gen_helper_sve_stbs_zss,
5078      gen_helper_sve_sths_zss,
5079      gen_helper_sve_stss_zss, },
5080};
5081
5082/* Note that we overload xs=2 to indicate 64-bit offset.  */
5083static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
5084    { gen_helper_sve_stbd_zsu,
5085      gen_helper_sve_sthd_zsu,
5086      gen_helper_sve_stsd_zsu,
5087      gen_helper_sve_stdd_zsu, },
5088    { gen_helper_sve_stbd_zss,
5089      gen_helper_sve_sthd_zss,
5090      gen_helper_sve_stsd_zss,
5091      gen_helper_sve_stdd_zss, },
5092    { gen_helper_sve_stbd_zd,
5093      gen_helper_sve_sthd_zd,
5094      gen_helper_sve_stsd_zd,
5095      gen_helper_sve_stdd_zd, },
5096};
5097
5098static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
5099{
5100    gen_helper_gvec_mem_scatter *fn;
5101
5102    if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5103        return false;
5104    }
5105    if (!sve_access_check(s)) {
5106        return true;
5107    }
5108    switch (a->esz) {
5109    case MO_32:
5110        fn = scatter_store_fn32[a->xs][a->msz];
5111        break;
5112    case MO_64:
5113        fn = scatter_store_fn64[a->xs][a->msz];
5114        break;
5115    default:
5116        g_assert_not_reached();
5117    }
5118    do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5119               cpu_reg_sp(s, a->rn), fn);
5120    return true;
5121}
5122
5123static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
5124{
5125    gen_helper_gvec_mem_scatter *fn = NULL;
5126    TCGv_i64 imm;
5127
5128    if (a->esz < a->msz) {
5129        return false;
5130    }
5131    if (!sve_access_check(s)) {
5132        return true;
5133    }
5134
5135    switch (a->esz) {
5136    case MO_32:
5137        fn = scatter_store_fn32[0][a->msz];
5138        break;
5139    case MO_64:
5140        fn = scatter_store_fn64[2][a->msz];
5141        break;
5142    }
5143    assert(fn != NULL);
5144
5145    /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5146     * by loading the immediate into the scalar parameter.
5147     */
5148    imm = tcg_const_i64(a->imm << a->msz);
5149    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5150    tcg_temp_free_i64(imm);
5151    return true;
5152}
5153
5154/*
5155 * Prefetches
5156 */
5157
5158static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
5159{
5160    /* Prefetch is a nop within QEMU.  */
5161    (void)sve_access_check(s);
5162    return true;
5163}
5164
5165static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
5166{
5167    if (a->rm == 31) {
5168        return false;
5169    }
5170    /* Prefetch is a nop within QEMU.  */
5171    (void)sve_access_check(s);
5172    return true;
5173}
5174
5175/*
5176 * Move Prefix
5177 *
5178 * TODO: The implementation so far could handle predicated merging movprfx.
5179 * The helper functions as written take an extra source register to
5180 * use in the operation, but the result is only written when predication
5181 * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5182 * to allow the final write back to the destination to be unconditional.
5183 * For predicated zeroing movprfx, we need to rearrange the helpers to
5184 * allow the final write back to zero inactives.
5185 *
5186 * In the meantime, just emit the moves.
5187 */
5188
5189static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a, uint32_t insn)
5190{
5191    return do_mov_z(s, a->rd, a->rn);
5192}
5193
5194static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5195{
5196    if (sve_access_check(s)) {
5197        do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5198    }
5199    return true;
5200}
5201
5202static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5203{
5204    if (sve_access_check(s)) {
5205        do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5206    }
5207    return true;
5208}
5209