qemu/target/arm/translate-sve.c
<<
>>
Prefs
   1/*
   2 * AArch64 SVE translation
   3 *
   4 * Copyright (c) 2018 Linaro, Ltd
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg-op.h"
  24#include "tcg-op-gvec.h"
  25#include "tcg-gvec-desc.h"
  26#include "qemu/log.h"
  27#include "arm_ldst.h"
  28#include "translate.h"
  29#include "internals.h"
  30#include "exec/helper-proto.h"
  31#include "exec/helper-gen.h"
  32#include "exec/log.h"
  33#include "trace-tcg.h"
  34#include "translate-a64.h"
  35#include "fpu/softfloat.h"
  36
  37
  38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                         TCGv_i64, uint32_t, uint32_t);
  40
  41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                     TCGv_ptr, TCGv_i32);
  43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                     TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                         TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50/*
  51 * Helpers for extracting complex instruction fields.
  52 */
  53
  54/* See e.g. ASR (immediate, predicated).
  55 * Returns -1 for unallocated encoding; diagnose later.
  56 */
  57static int tszimm_esz(int x)
  58{
  59    x >>= 3;  /* discard imm3 */
  60    return 31 - clz32(x);
  61}
  62
  63static int tszimm_shr(int x)
  64{
  65    return (16 << tszimm_esz(x)) - x;
  66}
  67
  68/* See e.g. LSL (immediate, predicated).  */
  69static int tszimm_shl(int x)
  70{
  71    return x - (8 << tszimm_esz(x));
  72}
  73
  74static inline int plus1(int x)
  75{
  76    return x + 1;
  77}
  78
  79/* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80static inline int expand_imm_sh8s(int x)
  81{
  82    return (int8_t)x << (x & 0x100 ? 8 : 0);
  83}
  84
  85static inline int expand_imm_sh8u(int x)
  86{
  87    return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88}
  89
  90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91 * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92 */
  93static inline int msz_dtype(int msz)
  94{
  95    static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96    return dtype[msz];
  97}
  98
  99/*
 100 * Include the generated decoder.
 101 */
 102
 103#include "decode-sve.inc.c"
 104
 105/*
 106 * Implement all of the translator functions referenced by the decoder.
 107 */
 108
 109/* Return the offset info CPUARMState of the predicate vector register Pn.
 110 * Note for this purpose, FFR is P16.
 111 */
 112static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113{
 114    return offsetof(CPUARMState, vfp.pregs[regno]);
 115}
 116
 117/* Return the byte size of the whole predicate register, VL / 64.  */
 118static inline int pred_full_reg_size(DisasContext *s)
 119{
 120    return s->sve_len >> 3;
 121}
 122
 123/* Round up the size of a register to a size allowed by
 124 * the tcg vector infrastructure.  Any operation which uses this
 125 * size may assume that the bits above pred_full_reg_size are zero,
 126 * and must leave them the same way.
 127 *
 128 * Note that this is not needed for the vector registers as they
 129 * are always properly sized for tcg vectors.
 130 */
 131static int size_for_gvec(int size)
 132{
 133    if (size <= 8) {
 134        return 8;
 135    } else {
 136        return QEMU_ALIGN_UP(size, 16);
 137    }
 138}
 139
 140static int pred_gvec_reg_size(DisasContext *s)
 141{
 142    return size_for_gvec(pred_full_reg_size(s));
 143}
 144
 145/* Invoke a vector expander on two Zregs.  */
 146static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                         int esz, int rd, int rn)
 148{
 149    if (sve_access_check(s)) {
 150        unsigned vsz = vec_full_reg_size(s);
 151        gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                vec_full_reg_offset(s, rn), vsz, vsz);
 153    }
 154    return true;
 155}
 156
 157/* Invoke a vector expander on three Zregs.  */
 158static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                         int esz, int rd, int rn, int rm)
 160{
 161    if (sve_access_check(s)) {
 162        unsigned vsz = vec_full_reg_size(s);
 163        gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                vec_full_reg_offset(s, rn),
 165                vec_full_reg_offset(s, rm), vsz, vsz);
 166    }
 167    return true;
 168}
 169
 170/* Invoke a vector move on two Zregs.  */
 171static bool do_mov_z(DisasContext *s, int rd, int rn)
 172{
 173    return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174}
 175
 176/* Initialize a Zreg with replications of a 64-bit immediate.  */
 177static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178{
 179    unsigned vsz = vec_full_reg_size(s);
 180    tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181}
 182
 183/* Invoke a vector expander on two Pregs.  */
 184static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                         int esz, int rd, int rn)
 186{
 187    if (sve_access_check(s)) {
 188        unsigned psz = pred_gvec_reg_size(s);
 189        gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                pred_full_reg_offset(s, rn), psz, psz);
 191    }
 192    return true;
 193}
 194
 195/* Invoke a vector expander on three Pregs.  */
 196static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                         int esz, int rd, int rn, int rm)
 198{
 199    if (sve_access_check(s)) {
 200        unsigned psz = pred_gvec_reg_size(s);
 201        gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                pred_full_reg_offset(s, rn),
 203                pred_full_reg_offset(s, rm), psz, psz);
 204    }
 205    return true;
 206}
 207
 208/* Invoke a vector operation on four Pregs.  */
 209static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                        int rd, int rn, int rm, int rg)
 211{
 212    if (sve_access_check(s)) {
 213        unsigned psz = pred_gvec_reg_size(s);
 214        tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                       pred_full_reg_offset(s, rn),
 216                       pred_full_reg_offset(s, rm),
 217                       pred_full_reg_offset(s, rg),
 218                       psz, psz, gvec_op);
 219    }
 220    return true;
 221}
 222
 223/* Invoke a vector move on two Pregs.  */
 224static bool do_mov_p(DisasContext *s, int rd, int rn)
 225{
 226    return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227}
 228
 229/* Set the cpu flags as per a return from an SVE helper.  */
 230static void do_pred_flags(TCGv_i32 t)
 231{
 232    tcg_gen_mov_i32(cpu_NF, t);
 233    tcg_gen_andi_i32(cpu_ZF, t, 2);
 234    tcg_gen_andi_i32(cpu_CF, t, 1);
 235    tcg_gen_movi_i32(cpu_VF, 0);
 236}
 237
 238/* Subroutines computing the ARM PredTest psuedofunction.  */
 239static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240{
 241    TCGv_i32 t = tcg_temp_new_i32();
 242
 243    gen_helper_sve_predtest1(t, d, g);
 244    do_pred_flags(t);
 245    tcg_temp_free_i32(t);
 246}
 247
 248static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249{
 250    TCGv_ptr dptr = tcg_temp_new_ptr();
 251    TCGv_ptr gptr = tcg_temp_new_ptr();
 252    TCGv_i32 t;
 253
 254    tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255    tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256    t = tcg_const_i32(words);
 257
 258    gen_helper_sve_predtest(t, dptr, gptr, t);
 259    tcg_temp_free_ptr(dptr);
 260    tcg_temp_free_ptr(gptr);
 261
 262    do_pred_flags(t);
 263    tcg_temp_free_i32(t);
 264}
 265
 266/* For each element size, the bits within a predicate word that are active.  */
 267const uint64_t pred_esz_masks[4] = {
 268    0xffffffffffffffffull, 0x5555555555555555ull,
 269    0x1111111111111111ull, 0x0101010101010101ull
 270};
 271
 272/*
 273 *** SVE Logical - Unpredicated Group
 274 */
 275
 276static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
 277{
 278    return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279}
 280
 281static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
 282{
 283    return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 284}
 285
 286static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
 287{
 288    return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 289}
 290
 291static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
 292{
 293    return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 294}
 295
 296/*
 297 *** SVE Integer Arithmetic - Unpredicated Group
 298 */
 299
 300static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
 301{
 302    return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 303}
 304
 305static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
 306{
 307    return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 308}
 309
 310static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 311{
 312    return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 313}
 314
 315static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 316{
 317    return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 318}
 319
 320static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 321{
 322    return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 323}
 324
 325static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 326{
 327    return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 328}
 329
 330/*
 331 *** SVE Integer Arithmetic - Binary Predicated Group
 332 */
 333
 334static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 335{
 336    unsigned vsz = vec_full_reg_size(s);
 337    if (fn == NULL) {
 338        return false;
 339    }
 340    if (sve_access_check(s)) {
 341        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 342                           vec_full_reg_offset(s, a->rn),
 343                           vec_full_reg_offset(s, a->rm),
 344                           pred_full_reg_offset(s, a->pg),
 345                           vsz, vsz, 0, fn);
 346    }
 347    return true;
 348}
 349
 350/* Select active elememnts from Zn and inactive elements from Zm,
 351 * storing the result in Zd.
 352 */
 353static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 354{
 355    static gen_helper_gvec_4 * const fns[4] = {
 356        gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 357        gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 358    };
 359    unsigned vsz = vec_full_reg_size(s);
 360    tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 361                       vec_full_reg_offset(s, rn),
 362                       vec_full_reg_offset(s, rm),
 363                       pred_full_reg_offset(s, pg),
 364                       vsz, vsz, 0, fns[esz]);
 365}
 366
 367#define DO_ZPZZ(NAME, name) \
 368static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a)         \
 369{                                                                         \
 370    static gen_helper_gvec_4 * const fns[4] = {                           \
 371        gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 372        gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 373    };                                                                    \
 374    return do_zpzz_ool(s, a, fns[a->esz]);                                \
 375}
 376
 377DO_ZPZZ(AND, and)
 378DO_ZPZZ(EOR, eor)
 379DO_ZPZZ(ORR, orr)
 380DO_ZPZZ(BIC, bic)
 381
 382DO_ZPZZ(ADD, add)
 383DO_ZPZZ(SUB, sub)
 384
 385DO_ZPZZ(SMAX, smax)
 386DO_ZPZZ(UMAX, umax)
 387DO_ZPZZ(SMIN, smin)
 388DO_ZPZZ(UMIN, umin)
 389DO_ZPZZ(SABD, sabd)
 390DO_ZPZZ(UABD, uabd)
 391
 392DO_ZPZZ(MUL, mul)
 393DO_ZPZZ(SMULH, smulh)
 394DO_ZPZZ(UMULH, umulh)
 395
 396DO_ZPZZ(ASR, asr)
 397DO_ZPZZ(LSR, lsr)
 398DO_ZPZZ(LSL, lsl)
 399
 400static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 401{
 402    static gen_helper_gvec_4 * const fns[4] = {
 403        NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 404    };
 405    return do_zpzz_ool(s, a, fns[a->esz]);
 406}
 407
 408static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 409{
 410    static gen_helper_gvec_4 * const fns[4] = {
 411        NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 412    };
 413    return do_zpzz_ool(s, a, fns[a->esz]);
 414}
 415
 416static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
 417{
 418    if (sve_access_check(s)) {
 419        do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 420    }
 421    return true;
 422}
 423
 424#undef DO_ZPZZ
 425
 426/*
 427 *** SVE Integer Arithmetic - Unary Predicated Group
 428 */
 429
 430static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 431{
 432    if (fn == NULL) {
 433        return false;
 434    }
 435    if (sve_access_check(s)) {
 436        unsigned vsz = vec_full_reg_size(s);
 437        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 438                           vec_full_reg_offset(s, a->rn),
 439                           pred_full_reg_offset(s, a->pg),
 440                           vsz, vsz, 0, fn);
 441    }
 442    return true;
 443}
 444
 445#define DO_ZPZ(NAME, name) \
 446static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)           \
 447{                                                                   \
 448    static gen_helper_gvec_3 * const fns[4] = {                     \
 449        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 450        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 451    };                                                              \
 452    return do_zpz_ool(s, a, fns[a->esz]);                           \
 453}
 454
 455DO_ZPZ(CLS, cls)
 456DO_ZPZ(CLZ, clz)
 457DO_ZPZ(CNT_zpz, cnt_zpz)
 458DO_ZPZ(CNOT, cnot)
 459DO_ZPZ(NOT_zpz, not_zpz)
 460DO_ZPZ(ABS, abs)
 461DO_ZPZ(NEG, neg)
 462
 463static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
 464{
 465    static gen_helper_gvec_3 * const fns[4] = {
 466        NULL,
 467        gen_helper_sve_fabs_h,
 468        gen_helper_sve_fabs_s,
 469        gen_helper_sve_fabs_d
 470    };
 471    return do_zpz_ool(s, a, fns[a->esz]);
 472}
 473
 474static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
 475{
 476    static gen_helper_gvec_3 * const fns[4] = {
 477        NULL,
 478        gen_helper_sve_fneg_h,
 479        gen_helper_sve_fneg_s,
 480        gen_helper_sve_fneg_d
 481    };
 482    return do_zpz_ool(s, a, fns[a->esz]);
 483}
 484
 485static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
 486{
 487    static gen_helper_gvec_3 * const fns[4] = {
 488        NULL,
 489        gen_helper_sve_sxtb_h,
 490        gen_helper_sve_sxtb_s,
 491        gen_helper_sve_sxtb_d
 492    };
 493    return do_zpz_ool(s, a, fns[a->esz]);
 494}
 495
 496static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
 497{
 498    static gen_helper_gvec_3 * const fns[4] = {
 499        NULL,
 500        gen_helper_sve_uxtb_h,
 501        gen_helper_sve_uxtb_s,
 502        gen_helper_sve_uxtb_d
 503    };
 504    return do_zpz_ool(s, a, fns[a->esz]);
 505}
 506
 507static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
 508{
 509    static gen_helper_gvec_3 * const fns[4] = {
 510        NULL, NULL,
 511        gen_helper_sve_sxth_s,
 512        gen_helper_sve_sxth_d
 513    };
 514    return do_zpz_ool(s, a, fns[a->esz]);
 515}
 516
 517static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
 518{
 519    static gen_helper_gvec_3 * const fns[4] = {
 520        NULL, NULL,
 521        gen_helper_sve_uxth_s,
 522        gen_helper_sve_uxth_d
 523    };
 524    return do_zpz_ool(s, a, fns[a->esz]);
 525}
 526
 527static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
 528{
 529    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 530}
 531
 532static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
 533{
 534    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 535}
 536
 537#undef DO_ZPZ
 538
 539/*
 540 *** SVE Integer Reduction Group
 541 */
 542
 543typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 544static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 545                       gen_helper_gvec_reduc *fn)
 546{
 547    unsigned vsz = vec_full_reg_size(s);
 548    TCGv_ptr t_zn, t_pg;
 549    TCGv_i32 desc;
 550    TCGv_i64 temp;
 551
 552    if (fn == NULL) {
 553        return false;
 554    }
 555    if (!sve_access_check(s)) {
 556        return true;
 557    }
 558
 559    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 560    temp = tcg_temp_new_i64();
 561    t_zn = tcg_temp_new_ptr();
 562    t_pg = tcg_temp_new_ptr();
 563
 564    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 565    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 566    fn(temp, t_zn, t_pg, desc);
 567    tcg_temp_free_ptr(t_zn);
 568    tcg_temp_free_ptr(t_pg);
 569    tcg_temp_free_i32(desc);
 570
 571    write_fp_dreg(s, a->rd, temp);
 572    tcg_temp_free_i64(temp);
 573    return true;
 574}
 575
 576#define DO_VPZ(NAME, name) \
 577static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
 578{                                                                        \
 579    static gen_helper_gvec_reduc * const fns[4] = {                      \
 580        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 581        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 582    };                                                                   \
 583    return do_vpz_ool(s, a, fns[a->esz]);                                \
 584}
 585
 586DO_VPZ(ORV, orv)
 587DO_VPZ(ANDV, andv)
 588DO_VPZ(EORV, eorv)
 589
 590DO_VPZ(UADDV, uaddv)
 591DO_VPZ(SMAXV, smaxv)
 592DO_VPZ(UMAXV, umaxv)
 593DO_VPZ(SMINV, sminv)
 594DO_VPZ(UMINV, uminv)
 595
 596static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
 597{
 598    static gen_helper_gvec_reduc * const fns[4] = {
 599        gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 600        gen_helper_sve_saddv_s, NULL
 601    };
 602    return do_vpz_ool(s, a, fns[a->esz]);
 603}
 604
 605#undef DO_VPZ
 606
 607/*
 608 *** SVE Shift by Immediate - Predicated Group
 609 */
 610
 611/* Store zero into every active element of Zd.  We will use this for two
 612 * and three-operand predicated instructions for which logic dictates a
 613 * zero result.
 614 */
 615static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 616{
 617    static gen_helper_gvec_2 * const fns[4] = {
 618        gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 619        gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 620    };
 621    if (sve_access_check(s)) {
 622        unsigned vsz = vec_full_reg_size(s);
 623        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 624                           pred_full_reg_offset(s, pg),
 625                           vsz, vsz, 0, fns[esz]);
 626    }
 627    return true;
 628}
 629
 630/* Copy Zn into Zd, storing zeros into inactive elements.  */
 631static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 632{
 633    static gen_helper_gvec_3 * const fns[4] = {
 634        gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 635        gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 636    };
 637    unsigned vsz = vec_full_reg_size(s);
 638    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 639                       vec_full_reg_offset(s, rn),
 640                       pred_full_reg_offset(s, pg),
 641                       vsz, vsz, 0, fns[esz]);
 642}
 643
 644static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 645                        gen_helper_gvec_3 *fn)
 646{
 647    if (sve_access_check(s)) {
 648        unsigned vsz = vec_full_reg_size(s);
 649        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 650                           vec_full_reg_offset(s, a->rn),
 651                           pred_full_reg_offset(s, a->pg),
 652                           vsz, vsz, a->imm, fn);
 653    }
 654    return true;
 655}
 656
 657static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
 658{
 659    static gen_helper_gvec_3 * const fns[4] = {
 660        gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 661        gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 662    };
 663    if (a->esz < 0) {
 664        /* Invalid tsz encoding -- see tszimm_esz. */
 665        return false;
 666    }
 667    /* Shift by element size is architecturally valid.  For
 668       arithmetic right-shift, it's the same as by one less. */
 669    a->imm = MIN(a->imm, (8 << a->esz) - 1);
 670    return do_zpzi_ool(s, a, fns[a->esz]);
 671}
 672
 673static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
 674{
 675    static gen_helper_gvec_3 * const fns[4] = {
 676        gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 677        gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 678    };
 679    if (a->esz < 0) {
 680        return false;
 681    }
 682    /* Shift by element size is architecturally valid.
 683       For logical shifts, it is a zeroing operation.  */
 684    if (a->imm >= (8 << a->esz)) {
 685        return do_clr_zp(s, a->rd, a->pg, a->esz);
 686    } else {
 687        return do_zpzi_ool(s, a, fns[a->esz]);
 688    }
 689}
 690
 691static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
 692{
 693    static gen_helper_gvec_3 * const fns[4] = {
 694        gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 695        gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 696    };
 697    if (a->esz < 0) {
 698        return false;
 699    }
 700    /* Shift by element size is architecturally valid.
 701       For logical shifts, it is a zeroing operation.  */
 702    if (a->imm >= (8 << a->esz)) {
 703        return do_clr_zp(s, a->rd, a->pg, a->esz);
 704    } else {
 705        return do_zpzi_ool(s, a, fns[a->esz]);
 706    }
 707}
 708
 709static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
 710{
 711    static gen_helper_gvec_3 * const fns[4] = {
 712        gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 713        gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 714    };
 715    if (a->esz < 0) {
 716        return false;
 717    }
 718    /* Shift by element size is architecturally valid.  For arithmetic
 719       right shift for division, it is a zeroing operation.  */
 720    if (a->imm >= (8 << a->esz)) {
 721        return do_clr_zp(s, a->rd, a->pg, a->esz);
 722    } else {
 723        return do_zpzi_ool(s, a, fns[a->esz]);
 724    }
 725}
 726
 727/*
 728 *** SVE Bitwise Shift - Predicated Group
 729 */
 730
 731#define DO_ZPZW(NAME, name) \
 732static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a)         \
 733{                                                                         \
 734    static gen_helper_gvec_4 * const fns[3] = {                           \
 735        gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 736        gen_helper_sve_##name##_zpzw_s,                                   \
 737    };                                                                    \
 738    if (a->esz < 0 || a->esz >= 3) {                                      \
 739        return false;                                                     \
 740    }                                                                     \
 741    return do_zpzz_ool(s, a, fns[a->esz]);                                \
 742}
 743
 744DO_ZPZW(ASR, asr)
 745DO_ZPZW(LSR, lsr)
 746DO_ZPZW(LSL, lsl)
 747
 748#undef DO_ZPZW
 749
 750/*
 751 *** SVE Bitwise Shift - Unpredicated Group
 752 */
 753
 754static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 755                         void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 756                                         int64_t, uint32_t, uint32_t))
 757{
 758    if (a->esz < 0) {
 759        /* Invalid tsz encoding -- see tszimm_esz. */
 760        return false;
 761    }
 762    if (sve_access_check(s)) {
 763        unsigned vsz = vec_full_reg_size(s);
 764        /* Shift by element size is architecturally valid.  For
 765           arithmetic right-shift, it's the same as by one less.
 766           Otherwise it is a zeroing operation.  */
 767        if (a->imm >= 8 << a->esz) {
 768            if (asr) {
 769                a->imm = (8 << a->esz) - 1;
 770            } else {
 771                do_dupi_z(s, a->rd, 0);
 772                return true;
 773            }
 774        }
 775        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 776                vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 777    }
 778    return true;
 779}
 780
 781static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
 782{
 783    return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 784}
 785
 786static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
 787{
 788    return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 789}
 790
 791static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
 792{
 793    return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 794}
 795
 796static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 797{
 798    if (fn == NULL) {
 799        return false;
 800    }
 801    if (sve_access_check(s)) {
 802        unsigned vsz = vec_full_reg_size(s);
 803        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 804                           vec_full_reg_offset(s, a->rn),
 805                           vec_full_reg_offset(s, a->rm),
 806                           vsz, vsz, 0, fn);
 807    }
 808    return true;
 809}
 810
 811#define DO_ZZW(NAME, name) \
 812static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a)           \
 813{                                                                         \
 814    static gen_helper_gvec_3 * const fns[4] = {                           \
 815        gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 816        gen_helper_sve_##name##_zzw_s, NULL                               \
 817    };                                                                    \
 818    return do_zzw_ool(s, a, fns[a->esz]);                                 \
 819}
 820
 821DO_ZZW(ASR, asr)
 822DO_ZZW(LSR, lsr)
 823DO_ZZW(LSL, lsl)
 824
 825#undef DO_ZZW
 826
 827/*
 828 *** SVE Integer Multiply-Add Group
 829 */
 830
 831static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 832                         gen_helper_gvec_5 *fn)
 833{
 834    if (sve_access_check(s)) {
 835        unsigned vsz = vec_full_reg_size(s);
 836        tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 837                           vec_full_reg_offset(s, a->ra),
 838                           vec_full_reg_offset(s, a->rn),
 839                           vec_full_reg_offset(s, a->rm),
 840                           pred_full_reg_offset(s, a->pg),
 841                           vsz, vsz, 0, fn);
 842    }
 843    return true;
 844}
 845
 846#define DO_ZPZZZ(NAME, name) \
 847static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
 848{                                                                    \
 849    static gen_helper_gvec_5 * const fns[4] = {                      \
 850        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 851        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 852    };                                                               \
 853    return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 854}
 855
 856DO_ZPZZZ(MLA, mla)
 857DO_ZPZZZ(MLS, mls)
 858
 859#undef DO_ZPZZZ
 860
 861/*
 862 *** SVE Index Generation Group
 863 */
 864
 865static void do_index(DisasContext *s, int esz, int rd,
 866                     TCGv_i64 start, TCGv_i64 incr)
 867{
 868    unsigned vsz = vec_full_reg_size(s);
 869    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 870    TCGv_ptr t_zd = tcg_temp_new_ptr();
 871
 872    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 873    if (esz == 3) {
 874        gen_helper_sve_index_d(t_zd, start, incr, desc);
 875    } else {
 876        typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 877        static index_fn * const fns[3] = {
 878            gen_helper_sve_index_b,
 879            gen_helper_sve_index_h,
 880            gen_helper_sve_index_s,
 881        };
 882        TCGv_i32 s32 = tcg_temp_new_i32();
 883        TCGv_i32 i32 = tcg_temp_new_i32();
 884
 885        tcg_gen_extrl_i64_i32(s32, start);
 886        tcg_gen_extrl_i64_i32(i32, incr);
 887        fns[esz](t_zd, s32, i32, desc);
 888
 889        tcg_temp_free_i32(s32);
 890        tcg_temp_free_i32(i32);
 891    }
 892    tcg_temp_free_ptr(t_zd);
 893    tcg_temp_free_i32(desc);
 894}
 895
 896static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
 897{
 898    if (sve_access_check(s)) {
 899        TCGv_i64 start = tcg_const_i64(a->imm1);
 900        TCGv_i64 incr = tcg_const_i64(a->imm2);
 901        do_index(s, a->esz, a->rd, start, incr);
 902        tcg_temp_free_i64(start);
 903        tcg_temp_free_i64(incr);
 904    }
 905    return true;
 906}
 907
 908static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
 909{
 910    if (sve_access_check(s)) {
 911        TCGv_i64 start = tcg_const_i64(a->imm);
 912        TCGv_i64 incr = cpu_reg(s, a->rm);
 913        do_index(s, a->esz, a->rd, start, incr);
 914        tcg_temp_free_i64(start);
 915    }
 916    return true;
 917}
 918
 919static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
 920{
 921    if (sve_access_check(s)) {
 922        TCGv_i64 start = cpu_reg(s, a->rn);
 923        TCGv_i64 incr = tcg_const_i64(a->imm);
 924        do_index(s, a->esz, a->rd, start, incr);
 925        tcg_temp_free_i64(incr);
 926    }
 927    return true;
 928}
 929
 930static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
 931{
 932    if (sve_access_check(s)) {
 933        TCGv_i64 start = cpu_reg(s, a->rn);
 934        TCGv_i64 incr = cpu_reg(s, a->rm);
 935        do_index(s, a->esz, a->rd, start, incr);
 936    }
 937    return true;
 938}
 939
 940/*
 941 *** SVE Stack Allocation Group
 942 */
 943
 944static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
 945{
 946    if (sve_access_check(s)) {
 947        TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 948        TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 949        tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 950    }
 951    return true;
 952}
 953
 954static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
 955{
 956    if (sve_access_check(s)) {
 957        TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 958        TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 959        tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 960    }
 961    return true;
 962}
 963
 964static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
 965{
 966    if (sve_access_check(s)) {
 967        TCGv_i64 reg = cpu_reg(s, a->rd);
 968        tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 969    }
 970    return true;
 971}
 972
 973/*
 974 *** SVE Compute Vector Address Group
 975 */
 976
 977static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 978{
 979    if (sve_access_check(s)) {
 980        unsigned vsz = vec_full_reg_size(s);
 981        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 982                           vec_full_reg_offset(s, a->rn),
 983                           vec_full_reg_offset(s, a->rm),
 984                           vsz, vsz, a->imm, fn);
 985    }
 986    return true;
 987}
 988
 989static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
 990{
 991    return do_adr(s, a, gen_helper_sve_adr_p32);
 992}
 993
 994static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
 995{
 996    return do_adr(s, a, gen_helper_sve_adr_p64);
 997}
 998
 999static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
1000{
1001    return do_adr(s, a, gen_helper_sve_adr_s32);
1002}
1003
1004static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
1005{
1006    return do_adr(s, a, gen_helper_sve_adr_u32);
1007}
1008
1009/*
1010 *** SVE Integer Misc - Unpredicated Group
1011 */
1012
1013static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1014{
1015    static gen_helper_gvec_2 * const fns[4] = {
1016        NULL,
1017        gen_helper_sve_fexpa_h,
1018        gen_helper_sve_fexpa_s,
1019        gen_helper_sve_fexpa_d,
1020    };
1021    if (a->esz == 0) {
1022        return false;
1023    }
1024    if (sve_access_check(s)) {
1025        unsigned vsz = vec_full_reg_size(s);
1026        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1027                           vec_full_reg_offset(s, a->rn),
1028                           vsz, vsz, 0, fns[a->esz]);
1029    }
1030    return true;
1031}
1032
1033static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1034{
1035    static gen_helper_gvec_3 * const fns[4] = {
1036        NULL,
1037        gen_helper_sve_ftssel_h,
1038        gen_helper_sve_ftssel_s,
1039        gen_helper_sve_ftssel_d,
1040    };
1041    if (a->esz == 0) {
1042        return false;
1043    }
1044    if (sve_access_check(s)) {
1045        unsigned vsz = vec_full_reg_size(s);
1046        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1047                           vec_full_reg_offset(s, a->rn),
1048                           vec_full_reg_offset(s, a->rm),
1049                           vsz, vsz, 0, fns[a->esz]);
1050    }
1051    return true;
1052}
1053
1054/*
1055 *** SVE Predicate Logical Operations Group
1056 */
1057
1058static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1059                          const GVecGen4 *gvec_op)
1060{
1061    if (!sve_access_check(s)) {
1062        return true;
1063    }
1064
1065    unsigned psz = pred_gvec_reg_size(s);
1066    int dofs = pred_full_reg_offset(s, a->rd);
1067    int nofs = pred_full_reg_offset(s, a->rn);
1068    int mofs = pred_full_reg_offset(s, a->rm);
1069    int gofs = pred_full_reg_offset(s, a->pg);
1070
1071    if (psz == 8) {
1072        /* Do the operation and the flags generation in temps.  */
1073        TCGv_i64 pd = tcg_temp_new_i64();
1074        TCGv_i64 pn = tcg_temp_new_i64();
1075        TCGv_i64 pm = tcg_temp_new_i64();
1076        TCGv_i64 pg = tcg_temp_new_i64();
1077
1078        tcg_gen_ld_i64(pn, cpu_env, nofs);
1079        tcg_gen_ld_i64(pm, cpu_env, mofs);
1080        tcg_gen_ld_i64(pg, cpu_env, gofs);
1081
1082        gvec_op->fni8(pd, pn, pm, pg);
1083        tcg_gen_st_i64(pd, cpu_env, dofs);
1084
1085        do_predtest1(pd, pg);
1086
1087        tcg_temp_free_i64(pd);
1088        tcg_temp_free_i64(pn);
1089        tcg_temp_free_i64(pm);
1090        tcg_temp_free_i64(pg);
1091    } else {
1092        /* The operation and flags generation is large.  The computation
1093         * of the flags depends on the original contents of the guarding
1094         * predicate.  If the destination overwrites the guarding predicate,
1095         * then the easiest way to get this right is to save a copy.
1096          */
1097        int tofs = gofs;
1098        if (a->rd == a->pg) {
1099            tofs = offsetof(CPUARMState, vfp.preg_tmp);
1100            tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1101        }
1102
1103        tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1104        do_predtest(s, dofs, tofs, psz / 8);
1105    }
1106    return true;
1107}
1108
1109static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1110{
1111    tcg_gen_and_i64(pd, pn, pm);
1112    tcg_gen_and_i64(pd, pd, pg);
1113}
1114
1115static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1116                           TCGv_vec pm, TCGv_vec pg)
1117{
1118    tcg_gen_and_vec(vece, pd, pn, pm);
1119    tcg_gen_and_vec(vece, pd, pd, pg);
1120}
1121
1122static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1123{
1124    static const GVecGen4 op = {
1125        .fni8 = gen_and_pg_i64,
1126        .fniv = gen_and_pg_vec,
1127        .fno = gen_helper_sve_and_pppp,
1128        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1129    };
1130    if (a->s) {
1131        return do_pppp_flags(s, a, &op);
1132    } else if (a->rn == a->rm) {
1133        if (a->pg == a->rn) {
1134            return do_mov_p(s, a->rd, a->rn);
1135        } else {
1136            return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1137        }
1138    } else if (a->pg == a->rn || a->pg == a->rm) {
1139        return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1140    } else {
1141        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1142    }
1143}
1144
1145static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1146{
1147    tcg_gen_andc_i64(pd, pn, pm);
1148    tcg_gen_and_i64(pd, pd, pg);
1149}
1150
1151static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1152                           TCGv_vec pm, TCGv_vec pg)
1153{
1154    tcg_gen_andc_vec(vece, pd, pn, pm);
1155    tcg_gen_and_vec(vece, pd, pd, pg);
1156}
1157
1158static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1159{
1160    static const GVecGen4 op = {
1161        .fni8 = gen_bic_pg_i64,
1162        .fniv = gen_bic_pg_vec,
1163        .fno = gen_helper_sve_bic_pppp,
1164        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1165    };
1166    if (a->s) {
1167        return do_pppp_flags(s, a, &op);
1168    } else if (a->pg == a->rn) {
1169        return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1170    } else {
1171        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1172    }
1173}
1174
1175static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1176{
1177    tcg_gen_xor_i64(pd, pn, pm);
1178    tcg_gen_and_i64(pd, pd, pg);
1179}
1180
1181static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1182                           TCGv_vec pm, TCGv_vec pg)
1183{
1184    tcg_gen_xor_vec(vece, pd, pn, pm);
1185    tcg_gen_and_vec(vece, pd, pd, pg);
1186}
1187
1188static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1189{
1190    static const GVecGen4 op = {
1191        .fni8 = gen_eor_pg_i64,
1192        .fniv = gen_eor_pg_vec,
1193        .fno = gen_helper_sve_eor_pppp,
1194        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1195    };
1196    if (a->s) {
1197        return do_pppp_flags(s, a, &op);
1198    } else {
1199        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1200    }
1201}
1202
1203static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1204{
1205    tcg_gen_and_i64(pn, pn, pg);
1206    tcg_gen_andc_i64(pm, pm, pg);
1207    tcg_gen_or_i64(pd, pn, pm);
1208}
1209
1210static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1211                           TCGv_vec pm, TCGv_vec pg)
1212{
1213    tcg_gen_and_vec(vece, pn, pn, pg);
1214    tcg_gen_andc_vec(vece, pm, pm, pg);
1215    tcg_gen_or_vec(vece, pd, pn, pm);
1216}
1217
1218static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1219{
1220    static const GVecGen4 op = {
1221        .fni8 = gen_sel_pg_i64,
1222        .fniv = gen_sel_pg_vec,
1223        .fno = gen_helper_sve_sel_pppp,
1224        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1225    };
1226    if (a->s) {
1227        return false;
1228    } else {
1229        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1230    }
1231}
1232
1233static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1234{
1235    tcg_gen_or_i64(pd, pn, pm);
1236    tcg_gen_and_i64(pd, pd, pg);
1237}
1238
1239static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1240                           TCGv_vec pm, TCGv_vec pg)
1241{
1242    tcg_gen_or_vec(vece, pd, pn, pm);
1243    tcg_gen_and_vec(vece, pd, pd, pg);
1244}
1245
1246static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1247{
1248    static const GVecGen4 op = {
1249        .fni8 = gen_orr_pg_i64,
1250        .fniv = gen_orr_pg_vec,
1251        .fno = gen_helper_sve_orr_pppp,
1252        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1253    };
1254    if (a->s) {
1255        return do_pppp_flags(s, a, &op);
1256    } else if (a->pg == a->rn && a->rn == a->rm) {
1257        return do_mov_p(s, a->rd, a->rn);
1258    } else {
1259        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1260    }
1261}
1262
1263static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1264{
1265    tcg_gen_orc_i64(pd, pn, pm);
1266    tcg_gen_and_i64(pd, pd, pg);
1267}
1268
1269static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1270                           TCGv_vec pm, TCGv_vec pg)
1271{
1272    tcg_gen_orc_vec(vece, pd, pn, pm);
1273    tcg_gen_and_vec(vece, pd, pd, pg);
1274}
1275
1276static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1277{
1278    static const GVecGen4 op = {
1279        .fni8 = gen_orn_pg_i64,
1280        .fniv = gen_orn_pg_vec,
1281        .fno = gen_helper_sve_orn_pppp,
1282        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1283    };
1284    if (a->s) {
1285        return do_pppp_flags(s, a, &op);
1286    } else {
1287        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1288    }
1289}
1290
1291static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1292{
1293    tcg_gen_or_i64(pd, pn, pm);
1294    tcg_gen_andc_i64(pd, pg, pd);
1295}
1296
1297static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1298                           TCGv_vec pm, TCGv_vec pg)
1299{
1300    tcg_gen_or_vec(vece, pd, pn, pm);
1301    tcg_gen_andc_vec(vece, pd, pg, pd);
1302}
1303
1304static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1305{
1306    static const GVecGen4 op = {
1307        .fni8 = gen_nor_pg_i64,
1308        .fniv = gen_nor_pg_vec,
1309        .fno = gen_helper_sve_nor_pppp,
1310        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1311    };
1312    if (a->s) {
1313        return do_pppp_flags(s, a, &op);
1314    } else {
1315        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1316    }
1317}
1318
1319static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1320{
1321    tcg_gen_and_i64(pd, pn, pm);
1322    tcg_gen_andc_i64(pd, pg, pd);
1323}
1324
1325static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1326                           TCGv_vec pm, TCGv_vec pg)
1327{
1328    tcg_gen_and_vec(vece, pd, pn, pm);
1329    tcg_gen_andc_vec(vece, pd, pg, pd);
1330}
1331
1332static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1333{
1334    static const GVecGen4 op = {
1335        .fni8 = gen_nand_pg_i64,
1336        .fniv = gen_nand_pg_vec,
1337        .fno = gen_helper_sve_nand_pppp,
1338        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1339    };
1340    if (a->s) {
1341        return do_pppp_flags(s, a, &op);
1342    } else {
1343        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1344    }
1345}
1346
1347/*
1348 *** SVE Predicate Misc Group
1349 */
1350
1351static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1352{
1353    if (sve_access_check(s)) {
1354        int nofs = pred_full_reg_offset(s, a->rn);
1355        int gofs = pred_full_reg_offset(s, a->pg);
1356        int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1357
1358        if (words == 1) {
1359            TCGv_i64 pn = tcg_temp_new_i64();
1360            TCGv_i64 pg = tcg_temp_new_i64();
1361
1362            tcg_gen_ld_i64(pn, cpu_env, nofs);
1363            tcg_gen_ld_i64(pg, cpu_env, gofs);
1364            do_predtest1(pn, pg);
1365
1366            tcg_temp_free_i64(pn);
1367            tcg_temp_free_i64(pg);
1368        } else {
1369            do_predtest(s, nofs, gofs, words);
1370        }
1371    }
1372    return true;
1373}
1374
1375/* See the ARM pseudocode DecodePredCount.  */
1376static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1377{
1378    unsigned elements = fullsz >> esz;
1379    unsigned bound;
1380
1381    switch (pattern) {
1382    case 0x0: /* POW2 */
1383        return pow2floor(elements);
1384    case 0x1: /* VL1 */
1385    case 0x2: /* VL2 */
1386    case 0x3: /* VL3 */
1387    case 0x4: /* VL4 */
1388    case 0x5: /* VL5 */
1389    case 0x6: /* VL6 */
1390    case 0x7: /* VL7 */
1391    case 0x8: /* VL8 */
1392        bound = pattern;
1393        break;
1394    case 0x9: /* VL16 */
1395    case 0xa: /* VL32 */
1396    case 0xb: /* VL64 */
1397    case 0xc: /* VL128 */
1398    case 0xd: /* VL256 */
1399        bound = 16 << (pattern - 9);
1400        break;
1401    case 0x1d: /* MUL4 */
1402        return elements - elements % 4;
1403    case 0x1e: /* MUL3 */
1404        return elements - elements % 3;
1405    case 0x1f: /* ALL */
1406        return elements;
1407    default:   /* #uimm5 */
1408        return 0;
1409    }
1410    return elements >= bound ? bound : 0;
1411}
1412
1413/* This handles all of the predicate initialization instructions,
1414 * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1415 * so that decode_pred_count returns 0.  For SETFFR, we will have
1416 * set RD == 16 == FFR.
1417 */
1418static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1419{
1420    if (!sve_access_check(s)) {
1421        return true;
1422    }
1423
1424    unsigned fullsz = vec_full_reg_size(s);
1425    unsigned ofs = pred_full_reg_offset(s, rd);
1426    unsigned numelem, setsz, i;
1427    uint64_t word, lastword;
1428    TCGv_i64 t;
1429
1430    numelem = decode_pred_count(fullsz, pat, esz);
1431
1432    /* Determine what we must store into each bit, and how many.  */
1433    if (numelem == 0) {
1434        lastword = word = 0;
1435        setsz = fullsz;
1436    } else {
1437        setsz = numelem << esz;
1438        lastword = word = pred_esz_masks[esz];
1439        if (setsz % 64) {
1440            lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1441        }
1442    }
1443
1444    t = tcg_temp_new_i64();
1445    if (fullsz <= 64) {
1446        tcg_gen_movi_i64(t, lastword);
1447        tcg_gen_st_i64(t, cpu_env, ofs);
1448        goto done;
1449    }
1450
1451    if (word == lastword) {
1452        unsigned maxsz = size_for_gvec(fullsz / 8);
1453        unsigned oprsz = size_for_gvec(setsz / 8);
1454
1455        if (oprsz * 8 == setsz) {
1456            tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1457            goto done;
1458        }
1459    }
1460
1461    setsz /= 8;
1462    fullsz /= 8;
1463
1464    tcg_gen_movi_i64(t, word);
1465    for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1466        tcg_gen_st_i64(t, cpu_env, ofs + i);
1467    }
1468    if (lastword != word) {
1469        tcg_gen_movi_i64(t, lastword);
1470        tcg_gen_st_i64(t, cpu_env, ofs + i);
1471        i += 8;
1472    }
1473    if (i < fullsz) {
1474        tcg_gen_movi_i64(t, 0);
1475        for (; i < fullsz; i += 8) {
1476            tcg_gen_st_i64(t, cpu_env, ofs + i);
1477        }
1478    }
1479
1480 done:
1481    tcg_temp_free_i64(t);
1482
1483    /* PTRUES */
1484    if (setflag) {
1485        tcg_gen_movi_i32(cpu_NF, -(word != 0));
1486        tcg_gen_movi_i32(cpu_CF, word == 0);
1487        tcg_gen_movi_i32(cpu_VF, 0);
1488        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1489    }
1490    return true;
1491}
1492
1493static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1494{
1495    return do_predset(s, a->esz, a->rd, a->pat, a->s);
1496}
1497
1498static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1499{
1500    /* Note pat == 31 is #all, to set all elements.  */
1501    return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1502}
1503
1504static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1505{
1506    /* Note pat == 32 is #unimp, to set no elements.  */
1507    return do_predset(s, 0, a->rd, 32, false);
1508}
1509
1510static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1511{
1512    /* The path through do_pppp_flags is complicated enough to want to avoid
1513     * duplication.  Frob the arguments into the form of a predicated AND.
1514     */
1515    arg_rprr_s alt_a = {
1516        .rd = a->rd, .pg = a->pg, .s = a->s,
1517        .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1518    };
1519    return trans_AND_pppp(s, &alt_a);
1520}
1521
1522static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1523{
1524    return do_mov_p(s, a->rd, FFR_PRED_NUM);
1525}
1526
1527static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1528{
1529    return do_mov_p(s, FFR_PRED_NUM, a->rn);
1530}
1531
1532static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1533                            void (*gen_fn)(TCGv_i32, TCGv_ptr,
1534                                           TCGv_ptr, TCGv_i32))
1535{
1536    if (!sve_access_check(s)) {
1537        return true;
1538    }
1539
1540    TCGv_ptr t_pd = tcg_temp_new_ptr();
1541    TCGv_ptr t_pg = tcg_temp_new_ptr();
1542    TCGv_i32 t;
1543    unsigned desc;
1544
1545    desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1546    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1547
1548    tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1549    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1550    t = tcg_const_i32(desc);
1551
1552    gen_fn(t, t_pd, t_pg, t);
1553    tcg_temp_free_ptr(t_pd);
1554    tcg_temp_free_ptr(t_pg);
1555
1556    do_pred_flags(t);
1557    tcg_temp_free_i32(t);
1558    return true;
1559}
1560
1561static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1562{
1563    return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1564}
1565
1566static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1567{
1568    return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1569}
1570
1571/*
1572 *** SVE Element Count Group
1573 */
1574
1575/* Perform an inline saturating addition of a 32-bit value within
1576 * a 64-bit register.  The second operand is known to be positive,
1577 * which halves the comparisions we must perform to bound the result.
1578 */
1579static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1580{
1581    int64_t ibound;
1582    TCGv_i64 bound;
1583    TCGCond cond;
1584
1585    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1586    if (u) {
1587        tcg_gen_ext32u_i64(reg, reg);
1588    } else {
1589        tcg_gen_ext32s_i64(reg, reg);
1590    }
1591    if (d) {
1592        tcg_gen_sub_i64(reg, reg, val);
1593        ibound = (u ? 0 : INT32_MIN);
1594        cond = TCG_COND_LT;
1595    } else {
1596        tcg_gen_add_i64(reg, reg, val);
1597        ibound = (u ? UINT32_MAX : INT32_MAX);
1598        cond = TCG_COND_GT;
1599    }
1600    bound = tcg_const_i64(ibound);
1601    tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1602    tcg_temp_free_i64(bound);
1603}
1604
1605/* Similarly with 64-bit values.  */
1606static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1607{
1608    TCGv_i64 t0 = tcg_temp_new_i64();
1609    TCGv_i64 t1 = tcg_temp_new_i64();
1610    TCGv_i64 t2;
1611
1612    if (u) {
1613        if (d) {
1614            tcg_gen_sub_i64(t0, reg, val);
1615            tcg_gen_movi_i64(t1, 0);
1616            tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1617        } else {
1618            tcg_gen_add_i64(t0, reg, val);
1619            tcg_gen_movi_i64(t1, -1);
1620            tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1621        }
1622    } else {
1623        if (d) {
1624            /* Detect signed overflow for subtraction.  */
1625            tcg_gen_xor_i64(t0, reg, val);
1626            tcg_gen_sub_i64(t1, reg, val);
1627            tcg_gen_xor_i64(reg, reg, t1);
1628            tcg_gen_and_i64(t0, t0, reg);
1629
1630            /* Bound the result.  */
1631            tcg_gen_movi_i64(reg, INT64_MIN);
1632            t2 = tcg_const_i64(0);
1633            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1634        } else {
1635            /* Detect signed overflow for addition.  */
1636            tcg_gen_xor_i64(t0, reg, val);
1637            tcg_gen_add_i64(reg, reg, val);
1638            tcg_gen_xor_i64(t1, reg, val);
1639            tcg_gen_andc_i64(t0, t1, t0);
1640
1641            /* Bound the result.  */
1642            tcg_gen_movi_i64(t1, INT64_MAX);
1643            t2 = tcg_const_i64(0);
1644            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1645        }
1646        tcg_temp_free_i64(t2);
1647    }
1648    tcg_temp_free_i64(t0);
1649    tcg_temp_free_i64(t1);
1650}
1651
1652/* Similarly with a vector and a scalar operand.  */
1653static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1654                              TCGv_i64 val, bool u, bool d)
1655{
1656    unsigned vsz = vec_full_reg_size(s);
1657    TCGv_ptr dptr, nptr;
1658    TCGv_i32 t32, desc;
1659    TCGv_i64 t64;
1660
1661    dptr = tcg_temp_new_ptr();
1662    nptr = tcg_temp_new_ptr();
1663    tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1664    tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1665    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1666
1667    switch (esz) {
1668    case MO_8:
1669        t32 = tcg_temp_new_i32();
1670        tcg_gen_extrl_i64_i32(t32, val);
1671        if (d) {
1672            tcg_gen_neg_i32(t32, t32);
1673        }
1674        if (u) {
1675            gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1676        } else {
1677            gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1678        }
1679        tcg_temp_free_i32(t32);
1680        break;
1681
1682    case MO_16:
1683        t32 = tcg_temp_new_i32();
1684        tcg_gen_extrl_i64_i32(t32, val);
1685        if (d) {
1686            tcg_gen_neg_i32(t32, t32);
1687        }
1688        if (u) {
1689            gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1690        } else {
1691            gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1692        }
1693        tcg_temp_free_i32(t32);
1694        break;
1695
1696    case MO_32:
1697        t64 = tcg_temp_new_i64();
1698        if (d) {
1699            tcg_gen_neg_i64(t64, val);
1700        } else {
1701            tcg_gen_mov_i64(t64, val);
1702        }
1703        if (u) {
1704            gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1705        } else {
1706            gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1707        }
1708        tcg_temp_free_i64(t64);
1709        break;
1710
1711    case MO_64:
1712        if (u) {
1713            if (d) {
1714                gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1715            } else {
1716                gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1717            }
1718        } else if (d) {
1719            t64 = tcg_temp_new_i64();
1720            tcg_gen_neg_i64(t64, val);
1721            gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1722            tcg_temp_free_i64(t64);
1723        } else {
1724            gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1725        }
1726        break;
1727
1728    default:
1729        g_assert_not_reached();
1730    }
1731
1732    tcg_temp_free_ptr(dptr);
1733    tcg_temp_free_ptr(nptr);
1734    tcg_temp_free_i32(desc);
1735}
1736
1737static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1738{
1739    if (sve_access_check(s)) {
1740        unsigned fullsz = vec_full_reg_size(s);
1741        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1742        tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1743    }
1744    return true;
1745}
1746
1747static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1748{
1749    if (sve_access_check(s)) {
1750        unsigned fullsz = vec_full_reg_size(s);
1751        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752        int inc = numelem * a->imm * (a->d ? -1 : 1);
1753        TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755        tcg_gen_addi_i64(reg, reg, inc);
1756    }
1757    return true;
1758}
1759
1760static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1761{
1762    if (!sve_access_check(s)) {
1763        return true;
1764    }
1765
1766    unsigned fullsz = vec_full_reg_size(s);
1767    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1768    int inc = numelem * a->imm;
1769    TCGv_i64 reg = cpu_reg(s, a->rd);
1770
1771    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1772    if (inc == 0) {
1773        if (a->u) {
1774            tcg_gen_ext32u_i64(reg, reg);
1775        } else {
1776            tcg_gen_ext32s_i64(reg, reg);
1777        }
1778    } else {
1779        TCGv_i64 t = tcg_const_i64(inc);
1780        do_sat_addsub_32(reg, t, a->u, a->d);
1781        tcg_temp_free_i64(t);
1782    }
1783    return true;
1784}
1785
1786static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1787{
1788    if (!sve_access_check(s)) {
1789        return true;
1790    }
1791
1792    unsigned fullsz = vec_full_reg_size(s);
1793    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1794    int inc = numelem * a->imm;
1795    TCGv_i64 reg = cpu_reg(s, a->rd);
1796
1797    if (inc != 0) {
1798        TCGv_i64 t = tcg_const_i64(inc);
1799        do_sat_addsub_64(reg, t, a->u, a->d);
1800        tcg_temp_free_i64(t);
1801    }
1802    return true;
1803}
1804
1805static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1806{
1807    if (a->esz == 0) {
1808        return false;
1809    }
1810
1811    unsigned fullsz = vec_full_reg_size(s);
1812    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1813    int inc = numelem * a->imm;
1814
1815    if (inc != 0) {
1816        if (sve_access_check(s)) {
1817            TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1818            tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1819                              vec_full_reg_offset(s, a->rn),
1820                              t, fullsz, fullsz);
1821            tcg_temp_free_i64(t);
1822        }
1823    } else {
1824        do_mov_z(s, a->rd, a->rn);
1825    }
1826    return true;
1827}
1828
1829static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1830{
1831    if (a->esz == 0) {
1832        return false;
1833    }
1834
1835    unsigned fullsz = vec_full_reg_size(s);
1836    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1837    int inc = numelem * a->imm;
1838
1839    if (inc != 0) {
1840        if (sve_access_check(s)) {
1841            TCGv_i64 t = tcg_const_i64(inc);
1842            do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1843            tcg_temp_free_i64(t);
1844        }
1845    } else {
1846        do_mov_z(s, a->rd, a->rn);
1847    }
1848    return true;
1849}
1850
1851/*
1852 *** SVE Bitwise Immediate Group
1853 */
1854
1855static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1856{
1857    uint64_t imm;
1858    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1859                                extract32(a->dbm, 0, 6),
1860                                extract32(a->dbm, 6, 6))) {
1861        return false;
1862    }
1863    if (sve_access_check(s)) {
1864        unsigned vsz = vec_full_reg_size(s);
1865        gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1866                vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1867    }
1868    return true;
1869}
1870
1871static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1872{
1873    return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1874}
1875
1876static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1877{
1878    return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1879}
1880
1881static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1882{
1883    return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1884}
1885
1886static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1887{
1888    uint64_t imm;
1889    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1890                                extract32(a->dbm, 0, 6),
1891                                extract32(a->dbm, 6, 6))) {
1892        return false;
1893    }
1894    if (sve_access_check(s)) {
1895        do_dupi_z(s, a->rd, imm);
1896    }
1897    return true;
1898}
1899
1900/*
1901 *** SVE Integer Wide Immediate - Predicated Group
1902 */
1903
1904/* Implement all merging copies.  This is used for CPY (immediate),
1905 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1906 */
1907static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1908                     TCGv_i64 val)
1909{
1910    typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1911    static gen_cpy * const fns[4] = {
1912        gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1913        gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1914    };
1915    unsigned vsz = vec_full_reg_size(s);
1916    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1917    TCGv_ptr t_zd = tcg_temp_new_ptr();
1918    TCGv_ptr t_zn = tcg_temp_new_ptr();
1919    TCGv_ptr t_pg = tcg_temp_new_ptr();
1920
1921    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1922    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1923    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1924
1925    fns[esz](t_zd, t_zn, t_pg, val, desc);
1926
1927    tcg_temp_free_ptr(t_zd);
1928    tcg_temp_free_ptr(t_zn);
1929    tcg_temp_free_ptr(t_pg);
1930    tcg_temp_free_i32(desc);
1931}
1932
1933static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1934{
1935    if (a->esz == 0) {
1936        return false;
1937    }
1938    if (sve_access_check(s)) {
1939        /* Decode the VFP immediate.  */
1940        uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1941        TCGv_i64 t_imm = tcg_const_i64(imm);
1942        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1943        tcg_temp_free_i64(t_imm);
1944    }
1945    return true;
1946}
1947
1948static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1949{
1950    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1951        return false;
1952    }
1953    if (sve_access_check(s)) {
1954        TCGv_i64 t_imm = tcg_const_i64(a->imm);
1955        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1956        tcg_temp_free_i64(t_imm);
1957    }
1958    return true;
1959}
1960
1961static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1962{
1963    static gen_helper_gvec_2i * const fns[4] = {
1964        gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1965        gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1966    };
1967
1968    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1969        return false;
1970    }
1971    if (sve_access_check(s)) {
1972        unsigned vsz = vec_full_reg_size(s);
1973        TCGv_i64 t_imm = tcg_const_i64(a->imm);
1974        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1975                            pred_full_reg_offset(s, a->pg),
1976                            t_imm, vsz, vsz, 0, fns[a->esz]);
1977        tcg_temp_free_i64(t_imm);
1978    }
1979    return true;
1980}
1981
1982/*
1983 *** SVE Permute Extract Group
1984 */
1985
1986static bool trans_EXT(DisasContext *s, arg_EXT *a)
1987{
1988    if (!sve_access_check(s)) {
1989        return true;
1990    }
1991
1992    unsigned vsz = vec_full_reg_size(s);
1993    unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1994    unsigned n_siz = vsz - n_ofs;
1995    unsigned d = vec_full_reg_offset(s, a->rd);
1996    unsigned n = vec_full_reg_offset(s, a->rn);
1997    unsigned m = vec_full_reg_offset(s, a->rm);
1998
1999    /* Use host vector move insns if we have appropriate sizes
2000     * and no unfortunate overlap.
2001     */
2002    if (m != d
2003        && n_ofs == size_for_gvec(n_ofs)
2004        && n_siz == size_for_gvec(n_siz)
2005        && (d != n || n_siz <= n_ofs)) {
2006        tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2007        if (n_ofs != 0) {
2008            tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2009        }
2010    } else {
2011        tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2012    }
2013    return true;
2014}
2015
2016/*
2017 *** SVE Permute - Unpredicated Group
2018 */
2019
2020static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2021{
2022    if (sve_access_check(s)) {
2023        unsigned vsz = vec_full_reg_size(s);
2024        tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2025                             vsz, vsz, cpu_reg_sp(s, a->rn));
2026    }
2027    return true;
2028}
2029
2030static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2031{
2032    if ((a->imm & 0x1f) == 0) {
2033        return false;
2034    }
2035    if (sve_access_check(s)) {
2036        unsigned vsz = vec_full_reg_size(s);
2037        unsigned dofs = vec_full_reg_offset(s, a->rd);
2038        unsigned esz, index;
2039
2040        esz = ctz32(a->imm);
2041        index = a->imm >> (esz + 1);
2042
2043        if ((index << esz) < vsz) {
2044            unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2045            tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2046        } else {
2047            tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2048        }
2049    }
2050    return true;
2051}
2052
2053static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2054{
2055    typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2056    static gen_insr * const fns[4] = {
2057        gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2058        gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2059    };
2060    unsigned vsz = vec_full_reg_size(s);
2061    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2062    TCGv_ptr t_zd = tcg_temp_new_ptr();
2063    TCGv_ptr t_zn = tcg_temp_new_ptr();
2064
2065    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2066    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2067
2068    fns[a->esz](t_zd, t_zn, val, desc);
2069
2070    tcg_temp_free_ptr(t_zd);
2071    tcg_temp_free_ptr(t_zn);
2072    tcg_temp_free_i32(desc);
2073}
2074
2075static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2076{
2077    if (sve_access_check(s)) {
2078        TCGv_i64 t = tcg_temp_new_i64();
2079        tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2080        do_insr_i64(s, a, t);
2081        tcg_temp_free_i64(t);
2082    }
2083    return true;
2084}
2085
2086static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2087{
2088    if (sve_access_check(s)) {
2089        do_insr_i64(s, a, cpu_reg(s, a->rm));
2090    }
2091    return true;
2092}
2093
2094static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2095{
2096    static gen_helper_gvec_2 * const fns[4] = {
2097        gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2098        gen_helper_sve_rev_s, gen_helper_sve_rev_d
2099    };
2100
2101    if (sve_access_check(s)) {
2102        unsigned vsz = vec_full_reg_size(s);
2103        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2104                           vec_full_reg_offset(s, a->rn),
2105                           vsz, vsz, 0, fns[a->esz]);
2106    }
2107    return true;
2108}
2109
2110static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2111{
2112    static gen_helper_gvec_3 * const fns[4] = {
2113        gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2114        gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2115    };
2116
2117    if (sve_access_check(s)) {
2118        unsigned vsz = vec_full_reg_size(s);
2119        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2120                           vec_full_reg_offset(s, a->rn),
2121                           vec_full_reg_offset(s, a->rm),
2122                           vsz, vsz, 0, fns[a->esz]);
2123    }
2124    return true;
2125}
2126
2127static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2128{
2129    static gen_helper_gvec_2 * const fns[4][2] = {
2130        { NULL, NULL },
2131        { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2132        { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2133        { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2134    };
2135
2136    if (a->esz == 0) {
2137        return false;
2138    }
2139    if (sve_access_check(s)) {
2140        unsigned vsz = vec_full_reg_size(s);
2141        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2142                           vec_full_reg_offset(s, a->rn)
2143                           + (a->h ? vsz / 2 : 0),
2144                           vsz, vsz, 0, fns[a->esz][a->u]);
2145    }
2146    return true;
2147}
2148
2149/*
2150 *** SVE Permute - Predicates Group
2151 */
2152
2153static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2154                          gen_helper_gvec_3 *fn)
2155{
2156    if (!sve_access_check(s)) {
2157        return true;
2158    }
2159
2160    unsigned vsz = pred_full_reg_size(s);
2161
2162    /* Predicate sizes may be smaller and cannot use simd_desc.
2163       We cannot round up, as we do elsewhere, because we need
2164       the exact size for ZIP2 and REV.  We retain the style for
2165       the other helpers for consistency.  */
2166    TCGv_ptr t_d = tcg_temp_new_ptr();
2167    TCGv_ptr t_n = tcg_temp_new_ptr();
2168    TCGv_ptr t_m = tcg_temp_new_ptr();
2169    TCGv_i32 t_desc;
2170    int desc;
2171
2172    desc = vsz - 2;
2173    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2174    desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2175
2176    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2177    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2178    tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2179    t_desc = tcg_const_i32(desc);
2180
2181    fn(t_d, t_n, t_m, t_desc);
2182
2183    tcg_temp_free_ptr(t_d);
2184    tcg_temp_free_ptr(t_n);
2185    tcg_temp_free_ptr(t_m);
2186    tcg_temp_free_i32(t_desc);
2187    return true;
2188}
2189
2190static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2191                          gen_helper_gvec_2 *fn)
2192{
2193    if (!sve_access_check(s)) {
2194        return true;
2195    }
2196
2197    unsigned vsz = pred_full_reg_size(s);
2198    TCGv_ptr t_d = tcg_temp_new_ptr();
2199    TCGv_ptr t_n = tcg_temp_new_ptr();
2200    TCGv_i32 t_desc;
2201    int desc;
2202
2203    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2204    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2205
2206    /* Predicate sizes may be smaller and cannot use simd_desc.
2207       We cannot round up, as we do elsewhere, because we need
2208       the exact size for ZIP2 and REV.  We retain the style for
2209       the other helpers for consistency.  */
2210
2211    desc = vsz - 2;
2212    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2213    desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2214    t_desc = tcg_const_i32(desc);
2215
2216    fn(t_d, t_n, t_desc);
2217
2218    tcg_temp_free_i32(t_desc);
2219    tcg_temp_free_ptr(t_d);
2220    tcg_temp_free_ptr(t_n);
2221    return true;
2222}
2223
2224static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2225{
2226    return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2227}
2228
2229static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2230{
2231    return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2232}
2233
2234static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2235{
2236    return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2237}
2238
2239static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2240{
2241    return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2242}
2243
2244static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2245{
2246    return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2247}
2248
2249static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2250{
2251    return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2252}
2253
2254static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2255{
2256    return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2257}
2258
2259static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2260{
2261    return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2262}
2263
2264static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2265{
2266    return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2267}
2268
2269/*
2270 *** SVE Permute - Interleaving Group
2271 */
2272
2273static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2274{
2275    static gen_helper_gvec_3 * const fns[4] = {
2276        gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2277        gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2278    };
2279
2280    if (sve_access_check(s)) {
2281        unsigned vsz = vec_full_reg_size(s);
2282        unsigned high_ofs = high ? vsz / 2 : 0;
2283        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2284                           vec_full_reg_offset(s, a->rn) + high_ofs,
2285                           vec_full_reg_offset(s, a->rm) + high_ofs,
2286                           vsz, vsz, 0, fns[a->esz]);
2287    }
2288    return true;
2289}
2290
2291static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2292                            gen_helper_gvec_3 *fn)
2293{
2294    if (sve_access_check(s)) {
2295        unsigned vsz = vec_full_reg_size(s);
2296        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2297                           vec_full_reg_offset(s, a->rn),
2298                           vec_full_reg_offset(s, a->rm),
2299                           vsz, vsz, data, fn);
2300    }
2301    return true;
2302}
2303
2304static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2305{
2306    return do_zip(s, a, false);
2307}
2308
2309static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2310{
2311    return do_zip(s, a, true);
2312}
2313
2314static gen_helper_gvec_3 * const uzp_fns[4] = {
2315    gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2316    gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2317};
2318
2319static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2320{
2321    return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2322}
2323
2324static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2325{
2326    return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2327}
2328
2329static gen_helper_gvec_3 * const trn_fns[4] = {
2330    gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2331    gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2332};
2333
2334static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2335{
2336    return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2337}
2338
2339static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2340{
2341    return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2342}
2343
2344/*
2345 *** SVE Permute Vector - Predicated Group
2346 */
2347
2348static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2349{
2350    static gen_helper_gvec_3 * const fns[4] = {
2351        NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2352    };
2353    return do_zpz_ool(s, a, fns[a->esz]);
2354}
2355
2356/* Call the helper that computes the ARM LastActiveElement pseudocode
2357 * function, scaled by the element size.  This includes the not found
2358 * indication; e.g. not found for esz=3 is -8.
2359 */
2360static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2361{
2362    /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2363     * round up, as we do elsewhere, because we need the exact size.
2364     */
2365    TCGv_ptr t_p = tcg_temp_new_ptr();
2366    TCGv_i32 t_desc;
2367    unsigned vsz = pred_full_reg_size(s);
2368    unsigned desc;
2369
2370    desc = vsz - 2;
2371    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2372
2373    tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2374    t_desc = tcg_const_i32(desc);
2375
2376    gen_helper_sve_last_active_element(ret, t_p, t_desc);
2377
2378    tcg_temp_free_i32(t_desc);
2379    tcg_temp_free_ptr(t_p);
2380}
2381
2382/* Increment LAST to the offset of the next element in the vector,
2383 * wrapping around to 0.
2384 */
2385static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2386{
2387    unsigned vsz = vec_full_reg_size(s);
2388
2389    tcg_gen_addi_i32(last, last, 1 << esz);
2390    if (is_power_of_2(vsz)) {
2391        tcg_gen_andi_i32(last, last, vsz - 1);
2392    } else {
2393        TCGv_i32 max = tcg_const_i32(vsz);
2394        TCGv_i32 zero = tcg_const_i32(0);
2395        tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2396        tcg_temp_free_i32(max);
2397        tcg_temp_free_i32(zero);
2398    }
2399}
2400
2401/* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2402static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2403{
2404    unsigned vsz = vec_full_reg_size(s);
2405
2406    if (is_power_of_2(vsz)) {
2407        tcg_gen_andi_i32(last, last, vsz - 1);
2408    } else {
2409        TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2410        TCGv_i32 zero = tcg_const_i32(0);
2411        tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2412        tcg_temp_free_i32(max);
2413        tcg_temp_free_i32(zero);
2414    }
2415}
2416
2417/* Load an unsigned element of ESZ from BASE+OFS.  */
2418static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2419{
2420    TCGv_i64 r = tcg_temp_new_i64();
2421
2422    switch (esz) {
2423    case 0:
2424        tcg_gen_ld8u_i64(r, base, ofs);
2425        break;
2426    case 1:
2427        tcg_gen_ld16u_i64(r, base, ofs);
2428        break;
2429    case 2:
2430        tcg_gen_ld32u_i64(r, base, ofs);
2431        break;
2432    case 3:
2433        tcg_gen_ld_i64(r, base, ofs);
2434        break;
2435    default:
2436        g_assert_not_reached();
2437    }
2438    return r;
2439}
2440
2441/* Load an unsigned element of ESZ from RM[LAST].  */
2442static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2443                                 int rm, int esz)
2444{
2445    TCGv_ptr p = tcg_temp_new_ptr();
2446    TCGv_i64 r;
2447
2448    /* Convert offset into vector into offset into ENV.
2449     * The final adjustment for the vector register base
2450     * is added via constant offset to the load.
2451     */
2452#ifdef HOST_WORDS_BIGENDIAN
2453    /* Adjust for element ordering.  See vec_reg_offset.  */
2454    if (esz < 3) {
2455        tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2456    }
2457#endif
2458    tcg_gen_ext_i32_ptr(p, last);
2459    tcg_gen_add_ptr(p, p, cpu_env);
2460
2461    r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2462    tcg_temp_free_ptr(p);
2463
2464    return r;
2465}
2466
2467/* Compute CLAST for a Zreg.  */
2468static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2469{
2470    TCGv_i32 last;
2471    TCGLabel *over;
2472    TCGv_i64 ele;
2473    unsigned vsz, esz = a->esz;
2474
2475    if (!sve_access_check(s)) {
2476        return true;
2477    }
2478
2479    last = tcg_temp_local_new_i32();
2480    over = gen_new_label();
2481
2482    find_last_active(s, last, esz, a->pg);
2483
2484    /* There is of course no movcond for a 2048-bit vector,
2485     * so we must branch over the actual store.
2486     */
2487    tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2488
2489    if (!before) {
2490        incr_last_active(s, last, esz);
2491    }
2492
2493    ele = load_last_active(s, last, a->rm, esz);
2494    tcg_temp_free_i32(last);
2495
2496    vsz = vec_full_reg_size(s);
2497    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2498    tcg_temp_free_i64(ele);
2499
2500    /* If this insn used MOVPRFX, we may need a second move.  */
2501    if (a->rd != a->rn) {
2502        TCGLabel *done = gen_new_label();
2503        tcg_gen_br(done);
2504
2505        gen_set_label(over);
2506        do_mov_z(s, a->rd, a->rn);
2507
2508        gen_set_label(done);
2509    } else {
2510        gen_set_label(over);
2511    }
2512    return true;
2513}
2514
2515static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2516{
2517    return do_clast_vector(s, a, false);
2518}
2519
2520static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2521{
2522    return do_clast_vector(s, a, true);
2523}
2524
2525/* Compute CLAST for a scalar.  */
2526static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2527                            bool before, TCGv_i64 reg_val)
2528{
2529    TCGv_i32 last = tcg_temp_new_i32();
2530    TCGv_i64 ele, cmp, zero;
2531
2532    find_last_active(s, last, esz, pg);
2533
2534    /* Extend the original value of last prior to incrementing.  */
2535    cmp = tcg_temp_new_i64();
2536    tcg_gen_ext_i32_i64(cmp, last);
2537
2538    if (!before) {
2539        incr_last_active(s, last, esz);
2540    }
2541
2542    /* The conceit here is that while last < 0 indicates not found, after
2543     * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2544     * from which we can load garbage.  We then discard the garbage with
2545     * a conditional move.
2546     */
2547    ele = load_last_active(s, last, rm, esz);
2548    tcg_temp_free_i32(last);
2549
2550    zero = tcg_const_i64(0);
2551    tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2552
2553    tcg_temp_free_i64(zero);
2554    tcg_temp_free_i64(cmp);
2555    tcg_temp_free_i64(ele);
2556}
2557
2558/* Compute CLAST for a Vreg.  */
2559static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2560{
2561    if (sve_access_check(s)) {
2562        int esz = a->esz;
2563        int ofs = vec_reg_offset(s, a->rd, 0, esz);
2564        TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2565
2566        do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2567        write_fp_dreg(s, a->rd, reg);
2568        tcg_temp_free_i64(reg);
2569    }
2570    return true;
2571}
2572
2573static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2574{
2575    return do_clast_fp(s, a, false);
2576}
2577
2578static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2579{
2580    return do_clast_fp(s, a, true);
2581}
2582
2583/* Compute CLAST for a Xreg.  */
2584static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2585{
2586    TCGv_i64 reg;
2587
2588    if (!sve_access_check(s)) {
2589        return true;
2590    }
2591
2592    reg = cpu_reg(s, a->rd);
2593    switch (a->esz) {
2594    case 0:
2595        tcg_gen_ext8u_i64(reg, reg);
2596        break;
2597    case 1:
2598        tcg_gen_ext16u_i64(reg, reg);
2599        break;
2600    case 2:
2601        tcg_gen_ext32u_i64(reg, reg);
2602        break;
2603    case 3:
2604        break;
2605    default:
2606        g_assert_not_reached();
2607    }
2608
2609    do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2610    return true;
2611}
2612
2613static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2614{
2615    return do_clast_general(s, a, false);
2616}
2617
2618static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2619{
2620    return do_clast_general(s, a, true);
2621}
2622
2623/* Compute LAST for a scalar.  */
2624static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2625                               int pg, int rm, bool before)
2626{
2627    TCGv_i32 last = tcg_temp_new_i32();
2628    TCGv_i64 ret;
2629
2630    find_last_active(s, last, esz, pg);
2631    if (before) {
2632        wrap_last_active(s, last, esz);
2633    } else {
2634        incr_last_active(s, last, esz);
2635    }
2636
2637    ret = load_last_active(s, last, rm, esz);
2638    tcg_temp_free_i32(last);
2639    return ret;
2640}
2641
2642/* Compute LAST for a Vreg.  */
2643static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2644{
2645    if (sve_access_check(s)) {
2646        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2647        write_fp_dreg(s, a->rd, val);
2648        tcg_temp_free_i64(val);
2649    }
2650    return true;
2651}
2652
2653static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2654{
2655    return do_last_fp(s, a, false);
2656}
2657
2658static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2659{
2660    return do_last_fp(s, a, true);
2661}
2662
2663/* Compute LAST for a Xreg.  */
2664static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2665{
2666    if (sve_access_check(s)) {
2667        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2668        tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2669        tcg_temp_free_i64(val);
2670    }
2671    return true;
2672}
2673
2674static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2675{
2676    return do_last_general(s, a, false);
2677}
2678
2679static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2680{
2681    return do_last_general(s, a, true);
2682}
2683
2684static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2685{
2686    if (sve_access_check(s)) {
2687        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2688    }
2689    return true;
2690}
2691
2692static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2693{
2694    if (sve_access_check(s)) {
2695        int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2696        TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2697        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2698        tcg_temp_free_i64(t);
2699    }
2700    return true;
2701}
2702
2703static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2704{
2705    static gen_helper_gvec_3 * const fns[4] = {
2706        NULL,
2707        gen_helper_sve_revb_h,
2708        gen_helper_sve_revb_s,
2709        gen_helper_sve_revb_d,
2710    };
2711    return do_zpz_ool(s, a, fns[a->esz]);
2712}
2713
2714static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2715{
2716    static gen_helper_gvec_3 * const fns[4] = {
2717        NULL,
2718        NULL,
2719        gen_helper_sve_revh_s,
2720        gen_helper_sve_revh_d,
2721    };
2722    return do_zpz_ool(s, a, fns[a->esz]);
2723}
2724
2725static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2726{
2727    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2728}
2729
2730static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2731{
2732    static gen_helper_gvec_3 * const fns[4] = {
2733        gen_helper_sve_rbit_b,
2734        gen_helper_sve_rbit_h,
2735        gen_helper_sve_rbit_s,
2736        gen_helper_sve_rbit_d,
2737    };
2738    return do_zpz_ool(s, a, fns[a->esz]);
2739}
2740
2741static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2742{
2743    if (sve_access_check(s)) {
2744        unsigned vsz = vec_full_reg_size(s);
2745        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2746                           vec_full_reg_offset(s, a->rn),
2747                           vec_full_reg_offset(s, a->rm),
2748                           pred_full_reg_offset(s, a->pg),
2749                           vsz, vsz, a->esz, gen_helper_sve_splice);
2750    }
2751    return true;
2752}
2753
2754/*
2755 *** SVE Integer Compare - Vectors Group
2756 */
2757
2758static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2759                          gen_helper_gvec_flags_4 *gen_fn)
2760{
2761    TCGv_ptr pd, zn, zm, pg;
2762    unsigned vsz;
2763    TCGv_i32 t;
2764
2765    if (gen_fn == NULL) {
2766        return false;
2767    }
2768    if (!sve_access_check(s)) {
2769        return true;
2770    }
2771
2772    vsz = vec_full_reg_size(s);
2773    t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2774    pd = tcg_temp_new_ptr();
2775    zn = tcg_temp_new_ptr();
2776    zm = tcg_temp_new_ptr();
2777    pg = tcg_temp_new_ptr();
2778
2779    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2780    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2781    tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2782    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2783
2784    gen_fn(t, pd, zn, zm, pg, t);
2785
2786    tcg_temp_free_ptr(pd);
2787    tcg_temp_free_ptr(zn);
2788    tcg_temp_free_ptr(zm);
2789    tcg_temp_free_ptr(pg);
2790
2791    do_pred_flags(t);
2792
2793    tcg_temp_free_i32(t);
2794    return true;
2795}
2796
2797#define DO_PPZZ(NAME, name) \
2798static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)         \
2799{                                                                         \
2800    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2801        gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2802        gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2803    };                                                                    \
2804    return do_ppzz_flags(s, a, fns[a->esz]);                              \
2805}
2806
2807DO_PPZZ(CMPEQ, cmpeq)
2808DO_PPZZ(CMPNE, cmpne)
2809DO_PPZZ(CMPGT, cmpgt)
2810DO_PPZZ(CMPGE, cmpge)
2811DO_PPZZ(CMPHI, cmphi)
2812DO_PPZZ(CMPHS, cmphs)
2813
2814#undef DO_PPZZ
2815
2816#define DO_PPZW(NAME, name) \
2817static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a)         \
2818{                                                                         \
2819    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2820        gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2821        gen_helper_sve_##name##_ppzw_s, NULL                              \
2822    };                                                                    \
2823    return do_ppzz_flags(s, a, fns[a->esz]);                              \
2824}
2825
2826DO_PPZW(CMPEQ, cmpeq)
2827DO_PPZW(CMPNE, cmpne)
2828DO_PPZW(CMPGT, cmpgt)
2829DO_PPZW(CMPGE, cmpge)
2830DO_PPZW(CMPHI, cmphi)
2831DO_PPZW(CMPHS, cmphs)
2832DO_PPZW(CMPLT, cmplt)
2833DO_PPZW(CMPLE, cmple)
2834DO_PPZW(CMPLO, cmplo)
2835DO_PPZW(CMPLS, cmpls)
2836
2837#undef DO_PPZW
2838
2839/*
2840 *** SVE Integer Compare - Immediate Groups
2841 */
2842
2843static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2844                          gen_helper_gvec_flags_3 *gen_fn)
2845{
2846    TCGv_ptr pd, zn, pg;
2847    unsigned vsz;
2848    TCGv_i32 t;
2849
2850    if (gen_fn == NULL) {
2851        return false;
2852    }
2853    if (!sve_access_check(s)) {
2854        return true;
2855    }
2856
2857    vsz = vec_full_reg_size(s);
2858    t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2859    pd = tcg_temp_new_ptr();
2860    zn = tcg_temp_new_ptr();
2861    pg = tcg_temp_new_ptr();
2862
2863    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2864    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2865    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2866
2867    gen_fn(t, pd, zn, pg, t);
2868
2869    tcg_temp_free_ptr(pd);
2870    tcg_temp_free_ptr(zn);
2871    tcg_temp_free_ptr(pg);
2872
2873    do_pred_flags(t);
2874
2875    tcg_temp_free_i32(t);
2876    return true;
2877}
2878
2879#define DO_PPZI(NAME, name) \
2880static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a)         \
2881{                                                                         \
2882    static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2883        gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2884        gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2885    };                                                                    \
2886    return do_ppzi_flags(s, a, fns[a->esz]);                              \
2887}
2888
2889DO_PPZI(CMPEQ, cmpeq)
2890DO_PPZI(CMPNE, cmpne)
2891DO_PPZI(CMPGT, cmpgt)
2892DO_PPZI(CMPGE, cmpge)
2893DO_PPZI(CMPHI, cmphi)
2894DO_PPZI(CMPHS, cmphs)
2895DO_PPZI(CMPLT, cmplt)
2896DO_PPZI(CMPLE, cmple)
2897DO_PPZI(CMPLO, cmplo)
2898DO_PPZI(CMPLS, cmpls)
2899
2900#undef DO_PPZI
2901
2902/*
2903 *** SVE Partition Break Group
2904 */
2905
2906static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2907                    gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2908{
2909    if (!sve_access_check(s)) {
2910        return true;
2911    }
2912
2913    unsigned vsz = pred_full_reg_size(s);
2914
2915    /* Predicate sizes may be smaller and cannot use simd_desc.  */
2916    TCGv_ptr d = tcg_temp_new_ptr();
2917    TCGv_ptr n = tcg_temp_new_ptr();
2918    TCGv_ptr m = tcg_temp_new_ptr();
2919    TCGv_ptr g = tcg_temp_new_ptr();
2920    TCGv_i32 t = tcg_const_i32(vsz - 2);
2921
2922    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2923    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2924    tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2925    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2926
2927    if (a->s) {
2928        fn_s(t, d, n, m, g, t);
2929        do_pred_flags(t);
2930    } else {
2931        fn(d, n, m, g, t);
2932    }
2933    tcg_temp_free_ptr(d);
2934    tcg_temp_free_ptr(n);
2935    tcg_temp_free_ptr(m);
2936    tcg_temp_free_ptr(g);
2937    tcg_temp_free_i32(t);
2938    return true;
2939}
2940
2941static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2942                    gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2943{
2944    if (!sve_access_check(s)) {
2945        return true;
2946    }
2947
2948    unsigned vsz = pred_full_reg_size(s);
2949
2950    /* Predicate sizes may be smaller and cannot use simd_desc.  */
2951    TCGv_ptr d = tcg_temp_new_ptr();
2952    TCGv_ptr n = tcg_temp_new_ptr();
2953    TCGv_ptr g = tcg_temp_new_ptr();
2954    TCGv_i32 t = tcg_const_i32(vsz - 2);
2955
2956    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2957    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2958    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2959
2960    if (a->s) {
2961        fn_s(t, d, n, g, t);
2962        do_pred_flags(t);
2963    } else {
2964        fn(d, n, g, t);
2965    }
2966    tcg_temp_free_ptr(d);
2967    tcg_temp_free_ptr(n);
2968    tcg_temp_free_ptr(g);
2969    tcg_temp_free_i32(t);
2970    return true;
2971}
2972
2973static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2974{
2975    return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2976}
2977
2978static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2979{
2980    return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2981}
2982
2983static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2984{
2985    return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2986}
2987
2988static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2989{
2990    return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2991}
2992
2993static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2994{
2995    return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2996}
2997
2998static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
2999{
3000    return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3001}
3002
3003static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
3004{
3005    return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3006}
3007
3008/*
3009 *** SVE Predicate Count Group
3010 */
3011
3012static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3013{
3014    unsigned psz = pred_full_reg_size(s);
3015
3016    if (psz <= 8) {
3017        uint64_t psz_mask;
3018
3019        tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3020        if (pn != pg) {
3021            TCGv_i64 g = tcg_temp_new_i64();
3022            tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3023            tcg_gen_and_i64(val, val, g);
3024            tcg_temp_free_i64(g);
3025        }
3026
3027        /* Reduce the pred_esz_masks value simply to reduce the
3028         * size of the code generated here.
3029         */
3030        psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3031        tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3032
3033        tcg_gen_ctpop_i64(val, val);
3034    } else {
3035        TCGv_ptr t_pn = tcg_temp_new_ptr();
3036        TCGv_ptr t_pg = tcg_temp_new_ptr();
3037        unsigned desc;
3038        TCGv_i32 t_desc;
3039
3040        desc = psz - 2;
3041        desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3042
3043        tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3044        tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3045        t_desc = tcg_const_i32(desc);
3046
3047        gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3048        tcg_temp_free_ptr(t_pn);
3049        tcg_temp_free_ptr(t_pg);
3050        tcg_temp_free_i32(t_desc);
3051    }
3052}
3053
3054static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3055{
3056    if (sve_access_check(s)) {
3057        do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3058    }
3059    return true;
3060}
3061
3062static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3063{
3064    if (sve_access_check(s)) {
3065        TCGv_i64 reg = cpu_reg(s, a->rd);
3066        TCGv_i64 val = tcg_temp_new_i64();
3067
3068        do_cntp(s, val, a->esz, a->pg, a->pg);
3069        if (a->d) {
3070            tcg_gen_sub_i64(reg, reg, val);
3071        } else {
3072            tcg_gen_add_i64(reg, reg, val);
3073        }
3074        tcg_temp_free_i64(val);
3075    }
3076    return true;
3077}
3078
3079static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3080{
3081    if (a->esz == 0) {
3082        return false;
3083    }
3084    if (sve_access_check(s)) {
3085        unsigned vsz = vec_full_reg_size(s);
3086        TCGv_i64 val = tcg_temp_new_i64();
3087        GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3088
3089        do_cntp(s, val, a->esz, a->pg, a->pg);
3090        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3091                vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3092    }
3093    return true;
3094}
3095
3096static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3097{
3098    if (sve_access_check(s)) {
3099        TCGv_i64 reg = cpu_reg(s, a->rd);
3100        TCGv_i64 val = tcg_temp_new_i64();
3101
3102        do_cntp(s, val, a->esz, a->pg, a->pg);
3103        do_sat_addsub_32(reg, val, a->u, a->d);
3104    }
3105    return true;
3106}
3107
3108static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3109{
3110    if (sve_access_check(s)) {
3111        TCGv_i64 reg = cpu_reg(s, a->rd);
3112        TCGv_i64 val = tcg_temp_new_i64();
3113
3114        do_cntp(s, val, a->esz, a->pg, a->pg);
3115        do_sat_addsub_64(reg, val, a->u, a->d);
3116    }
3117    return true;
3118}
3119
3120static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3121{
3122    if (a->esz == 0) {
3123        return false;
3124    }
3125    if (sve_access_check(s)) {
3126        TCGv_i64 val = tcg_temp_new_i64();
3127        do_cntp(s, val, a->esz, a->pg, a->pg);
3128        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3129    }
3130    return true;
3131}
3132
3133/*
3134 *** SVE Integer Compare Scalars Group
3135 */
3136
3137static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3138{
3139    if (!sve_access_check(s)) {
3140        return true;
3141    }
3142
3143    TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3144    TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3145    TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3146    TCGv_i64 cmp = tcg_temp_new_i64();
3147
3148    tcg_gen_setcond_i64(cond, cmp, rn, rm);
3149    tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3150    tcg_temp_free_i64(cmp);
3151
3152    /* VF = !NF & !CF.  */
3153    tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3154    tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3155
3156    /* Both NF and VF actually look at bit 31.  */
3157    tcg_gen_neg_i32(cpu_NF, cpu_NF);
3158    tcg_gen_neg_i32(cpu_VF, cpu_VF);
3159    return true;
3160}
3161
3162static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3163{
3164    TCGv_i64 op0, op1, t0, t1, tmax;
3165    TCGv_i32 t2, t3;
3166    TCGv_ptr ptr;
3167    unsigned desc, vsz = vec_full_reg_size(s);
3168    TCGCond cond;
3169
3170    if (!sve_access_check(s)) {
3171        return true;
3172    }
3173
3174    op0 = read_cpu_reg(s, a->rn, 1);
3175    op1 = read_cpu_reg(s, a->rm, 1);
3176
3177    if (!a->sf) {
3178        if (a->u) {
3179            tcg_gen_ext32u_i64(op0, op0);
3180            tcg_gen_ext32u_i64(op1, op1);
3181        } else {
3182            tcg_gen_ext32s_i64(op0, op0);
3183            tcg_gen_ext32s_i64(op1, op1);
3184        }
3185    }
3186
3187    /* For the helper, compress the different conditions into a computation
3188     * of how many iterations for which the condition is true.
3189     */
3190    t0 = tcg_temp_new_i64();
3191    t1 = tcg_temp_new_i64();
3192    tcg_gen_sub_i64(t0, op1, op0);
3193
3194    tmax = tcg_const_i64(vsz >> a->esz);
3195    if (a->eq) {
3196        /* Equality means one more iteration.  */
3197        tcg_gen_addi_i64(t0, t0, 1);
3198
3199        /* If op1 is max (un)signed integer (and the only time the addition
3200         * above could overflow), then we produce an all-true predicate by
3201         * setting the count to the vector length.  This is because the
3202         * pseudocode is described as an increment + compare loop, and the
3203         * max integer would always compare true.
3204         */
3205        tcg_gen_movi_i64(t1, (a->sf
3206                              ? (a->u ? UINT64_MAX : INT64_MAX)
3207                              : (a->u ? UINT32_MAX : INT32_MAX)));
3208        tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3209    }
3210
3211    /* Bound to the maximum.  */
3212    tcg_gen_umin_i64(t0, t0, tmax);
3213    tcg_temp_free_i64(tmax);
3214
3215    /* Set the count to zero if the condition is false.  */
3216    cond = (a->u
3217            ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3218            : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3219    tcg_gen_movi_i64(t1, 0);
3220    tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3221    tcg_temp_free_i64(t1);
3222
3223    /* Since we're bounded, pass as a 32-bit type.  */
3224    t2 = tcg_temp_new_i32();
3225    tcg_gen_extrl_i64_i32(t2, t0);
3226    tcg_temp_free_i64(t0);
3227
3228    /* Scale elements to bits.  */
3229    tcg_gen_shli_i32(t2, t2, a->esz);
3230
3231    desc = (vsz / 8) - 2;
3232    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3233    t3 = tcg_const_i32(desc);
3234
3235    ptr = tcg_temp_new_ptr();
3236    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3237
3238    gen_helper_sve_while(t2, ptr, t2, t3);
3239    do_pred_flags(t2);
3240
3241    tcg_temp_free_ptr(ptr);
3242    tcg_temp_free_i32(t2);
3243    tcg_temp_free_i32(t3);
3244    return true;
3245}
3246
3247/*
3248 *** SVE Integer Wide Immediate - Unpredicated Group
3249 */
3250
3251static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3252{
3253    if (a->esz == 0) {
3254        return false;
3255    }
3256    if (sve_access_check(s)) {
3257        unsigned vsz = vec_full_reg_size(s);
3258        int dofs = vec_full_reg_offset(s, a->rd);
3259        uint64_t imm;
3260
3261        /* Decode the VFP immediate.  */
3262        imm = vfp_expand_imm(a->esz, a->imm);
3263        imm = dup_const(a->esz, imm);
3264
3265        tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3266    }
3267    return true;
3268}
3269
3270static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3271{
3272    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3273        return false;
3274    }
3275    if (sve_access_check(s)) {
3276        unsigned vsz = vec_full_reg_size(s);
3277        int dofs = vec_full_reg_offset(s, a->rd);
3278
3279        tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3280    }
3281    return true;
3282}
3283
3284static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3285{
3286    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3287        return false;
3288    }
3289    if (sve_access_check(s)) {
3290        unsigned vsz = vec_full_reg_size(s);
3291        tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3292                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3293    }
3294    return true;
3295}
3296
3297static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3298{
3299    a->imm = -a->imm;
3300    return trans_ADD_zzi(s, a);
3301}
3302
3303static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3304{
3305    static const GVecGen2s op[4] = {
3306        { .fni8 = tcg_gen_vec_sub8_i64,
3307          .fniv = tcg_gen_sub_vec,
3308          .fno = gen_helper_sve_subri_b,
3309          .opc = INDEX_op_sub_vec,
3310          .vece = MO_8,
3311          .scalar_first = true },
3312        { .fni8 = tcg_gen_vec_sub16_i64,
3313          .fniv = tcg_gen_sub_vec,
3314          .fno = gen_helper_sve_subri_h,
3315          .opc = INDEX_op_sub_vec,
3316          .vece = MO_16,
3317          .scalar_first = true },
3318        { .fni4 = tcg_gen_sub_i32,
3319          .fniv = tcg_gen_sub_vec,
3320          .fno = gen_helper_sve_subri_s,
3321          .opc = INDEX_op_sub_vec,
3322          .vece = MO_32,
3323          .scalar_first = true },
3324        { .fni8 = tcg_gen_sub_i64,
3325          .fniv = tcg_gen_sub_vec,
3326          .fno = gen_helper_sve_subri_d,
3327          .opc = INDEX_op_sub_vec,
3328          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3329          .vece = MO_64,
3330          .scalar_first = true }
3331    };
3332
3333    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3334        return false;
3335    }
3336    if (sve_access_check(s)) {
3337        unsigned vsz = vec_full_reg_size(s);
3338        TCGv_i64 c = tcg_const_i64(a->imm);
3339        tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3340                        vec_full_reg_offset(s, a->rn),
3341                        vsz, vsz, c, &op[a->esz]);
3342        tcg_temp_free_i64(c);
3343    }
3344    return true;
3345}
3346
3347static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3348{
3349    if (sve_access_check(s)) {
3350        unsigned vsz = vec_full_reg_size(s);
3351        tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3352                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3353    }
3354    return true;
3355}
3356
3357static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3358{
3359    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3360        return false;
3361    }
3362    if (sve_access_check(s)) {
3363        TCGv_i64 val = tcg_const_i64(a->imm);
3364        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3365        tcg_temp_free_i64(val);
3366    }
3367    return true;
3368}
3369
3370static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3371{
3372    return do_zzi_sat(s, a, false, false);
3373}
3374
3375static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3376{
3377    return do_zzi_sat(s, a, true, false);
3378}
3379
3380static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3381{
3382    return do_zzi_sat(s, a, false, true);
3383}
3384
3385static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3386{
3387    return do_zzi_sat(s, a, true, true);
3388}
3389
3390static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3391{
3392    if (sve_access_check(s)) {
3393        unsigned vsz = vec_full_reg_size(s);
3394        TCGv_i64 c = tcg_const_i64(a->imm);
3395
3396        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3397                            vec_full_reg_offset(s, a->rn),
3398                            c, vsz, vsz, 0, fn);
3399        tcg_temp_free_i64(c);
3400    }
3401    return true;
3402}
3403
3404#define DO_ZZI(NAME, name) \
3405static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a)         \
3406{                                                                       \
3407    static gen_helper_gvec_2i * const fns[4] = {                        \
3408        gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3409        gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3410    };                                                                  \
3411    return do_zzi_ool(s, a, fns[a->esz]);                               \
3412}
3413
3414DO_ZZI(SMAX, smax)
3415DO_ZZI(UMAX, umax)
3416DO_ZZI(SMIN, smin)
3417DO_ZZI(UMIN, umin)
3418
3419#undef DO_ZZI
3420
3421static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
3422{
3423    static gen_helper_gvec_3 * const fns[2][2] = {
3424        { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3425        { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3426    };
3427
3428    if (sve_access_check(s)) {
3429        unsigned vsz = vec_full_reg_size(s);
3430        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3431                           vec_full_reg_offset(s, a->rn),
3432                           vec_full_reg_offset(s, a->rm),
3433                           vsz, vsz, 0, fns[a->u][a->sz]);
3434    }
3435    return true;
3436}
3437
3438static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
3439{
3440    static gen_helper_gvec_3 * const fns[2][2] = {
3441        { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3442        { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3443    };
3444
3445    if (sve_access_check(s)) {
3446        unsigned vsz = vec_full_reg_size(s);
3447        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3448                           vec_full_reg_offset(s, a->rn),
3449                           vec_full_reg_offset(s, a->rm),
3450                           vsz, vsz, a->index, fns[a->u][a->sz]);
3451    }
3452    return true;
3453}
3454
3455
3456/*
3457 *** SVE Floating Point Multiply-Add Indexed Group
3458 */
3459
3460static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3461{
3462    static gen_helper_gvec_4_ptr * const fns[3] = {
3463        gen_helper_gvec_fmla_idx_h,
3464        gen_helper_gvec_fmla_idx_s,
3465        gen_helper_gvec_fmla_idx_d,
3466    };
3467
3468    if (sve_access_check(s)) {
3469        unsigned vsz = vec_full_reg_size(s);
3470        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3471        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3472                           vec_full_reg_offset(s, a->rn),
3473                           vec_full_reg_offset(s, a->rm),
3474                           vec_full_reg_offset(s, a->ra),
3475                           status, vsz, vsz, (a->index << 1) | a->sub,
3476                           fns[a->esz - 1]);
3477        tcg_temp_free_ptr(status);
3478    }
3479    return true;
3480}
3481
3482/*
3483 *** SVE Floating Point Multiply Indexed Group
3484 */
3485
3486static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
3487{
3488    static gen_helper_gvec_3_ptr * const fns[3] = {
3489        gen_helper_gvec_fmul_idx_h,
3490        gen_helper_gvec_fmul_idx_s,
3491        gen_helper_gvec_fmul_idx_d,
3492    };
3493
3494    if (sve_access_check(s)) {
3495        unsigned vsz = vec_full_reg_size(s);
3496        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3497        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3498                           vec_full_reg_offset(s, a->rn),
3499                           vec_full_reg_offset(s, a->rm),
3500                           status, vsz, vsz, a->index, fns[a->esz - 1]);
3501        tcg_temp_free_ptr(status);
3502    }
3503    return true;
3504}
3505
3506/*
3507 *** SVE Floating Point Fast Reduction Group
3508 */
3509
3510typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3511                                  TCGv_ptr, TCGv_i32);
3512
3513static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3514                      gen_helper_fp_reduce *fn)
3515{
3516    unsigned vsz = vec_full_reg_size(s);
3517    unsigned p2vsz = pow2ceil(vsz);
3518    TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3519    TCGv_ptr t_zn, t_pg, status;
3520    TCGv_i64 temp;
3521
3522    temp = tcg_temp_new_i64();
3523    t_zn = tcg_temp_new_ptr();
3524    t_pg = tcg_temp_new_ptr();
3525
3526    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3527    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3528    status = get_fpstatus_ptr(a->esz == MO_16);
3529
3530    fn(temp, t_zn, t_pg, status, t_desc);
3531    tcg_temp_free_ptr(t_zn);
3532    tcg_temp_free_ptr(t_pg);
3533    tcg_temp_free_ptr(status);
3534    tcg_temp_free_i32(t_desc);
3535
3536    write_fp_dreg(s, a->rd, temp);
3537    tcg_temp_free_i64(temp);
3538}
3539
3540#define DO_VPZ(NAME, name) \
3541static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
3542{                                                                        \
3543    static gen_helper_fp_reduce * const fns[3] = {                       \
3544        gen_helper_sve_##name##_h,                                       \
3545        gen_helper_sve_##name##_s,                                       \
3546        gen_helper_sve_##name##_d,                                       \
3547    };                                                                   \
3548    if (a->esz == 0) {                                                   \
3549        return false;                                                    \
3550    }                                                                    \
3551    if (sve_access_check(s)) {                                           \
3552        do_reduce(s, a, fns[a->esz - 1]);                                \
3553    }                                                                    \
3554    return true;                                                         \
3555}
3556
3557DO_VPZ(FADDV, faddv)
3558DO_VPZ(FMINNMV, fminnmv)
3559DO_VPZ(FMAXNMV, fmaxnmv)
3560DO_VPZ(FMINV, fminv)
3561DO_VPZ(FMAXV, fmaxv)
3562
3563/*
3564 *** SVE Floating Point Unary Operations - Unpredicated Group
3565 */
3566
3567static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3568{
3569    unsigned vsz = vec_full_reg_size(s);
3570    TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3571
3572    tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3573                       vec_full_reg_offset(s, a->rn),
3574                       status, vsz, vsz, 0, fn);
3575    tcg_temp_free_ptr(status);
3576}
3577
3578static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3579{
3580    static gen_helper_gvec_2_ptr * const fns[3] = {
3581        gen_helper_gvec_frecpe_h,
3582        gen_helper_gvec_frecpe_s,
3583        gen_helper_gvec_frecpe_d,
3584    };
3585    if (a->esz == 0) {
3586        return false;
3587    }
3588    if (sve_access_check(s)) {
3589        do_zz_fp(s, a, fns[a->esz - 1]);
3590    }
3591    return true;
3592}
3593
3594static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3595{
3596    static gen_helper_gvec_2_ptr * const fns[3] = {
3597        gen_helper_gvec_frsqrte_h,
3598        gen_helper_gvec_frsqrte_s,
3599        gen_helper_gvec_frsqrte_d,
3600    };
3601    if (a->esz == 0) {
3602        return false;
3603    }
3604    if (sve_access_check(s)) {
3605        do_zz_fp(s, a, fns[a->esz - 1]);
3606    }
3607    return true;
3608}
3609
3610/*
3611 *** SVE Floating Point Compare with Zero Group
3612 */
3613
3614static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3615                      gen_helper_gvec_3_ptr *fn)
3616{
3617    unsigned vsz = vec_full_reg_size(s);
3618    TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3619
3620    tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3621                       vec_full_reg_offset(s, a->rn),
3622                       pred_full_reg_offset(s, a->pg),
3623                       status, vsz, vsz, 0, fn);
3624    tcg_temp_free_ptr(status);
3625}
3626
3627#define DO_PPZ(NAME, name) \
3628static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)         \
3629{                                                                 \
3630    static gen_helper_gvec_3_ptr * const fns[3] = {               \
3631        gen_helper_sve_##name##_h,                                \
3632        gen_helper_sve_##name##_s,                                \
3633        gen_helper_sve_##name##_d,                                \
3634    };                                                            \
3635    if (a->esz == 0) {                                            \
3636        return false;                                             \
3637    }                                                             \
3638    if (sve_access_check(s)) {                                    \
3639        do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3640    }                                                             \
3641    return true;                                                  \
3642}
3643
3644DO_PPZ(FCMGE_ppz0, fcmge0)
3645DO_PPZ(FCMGT_ppz0, fcmgt0)
3646DO_PPZ(FCMLE_ppz0, fcmle0)
3647DO_PPZ(FCMLT_ppz0, fcmlt0)
3648DO_PPZ(FCMEQ_ppz0, fcmeq0)
3649DO_PPZ(FCMNE_ppz0, fcmne0)
3650
3651#undef DO_PPZ
3652
3653/*
3654 *** SVE floating-point trig multiply-add coefficient
3655 */
3656
3657static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
3658{
3659    static gen_helper_gvec_3_ptr * const fns[3] = {
3660        gen_helper_sve_ftmad_h,
3661        gen_helper_sve_ftmad_s,
3662        gen_helper_sve_ftmad_d,
3663    };
3664
3665    if (a->esz == 0) {
3666        return false;
3667    }
3668    if (sve_access_check(s)) {
3669        unsigned vsz = vec_full_reg_size(s);
3670        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3671        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3672                           vec_full_reg_offset(s, a->rn),
3673                           vec_full_reg_offset(s, a->rm),
3674                           status, vsz, vsz, a->imm, fns[a->esz - 1]);
3675        tcg_temp_free_ptr(status);
3676    }
3677    return true;
3678}
3679
3680/*
3681 *** SVE Floating Point Accumulating Reduction Group
3682 */
3683
3684static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3685{
3686    typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3687                          TCGv_ptr, TCGv_ptr, TCGv_i32);
3688    static fadda_fn * const fns[3] = {
3689        gen_helper_sve_fadda_h,
3690        gen_helper_sve_fadda_s,
3691        gen_helper_sve_fadda_d,
3692    };
3693    unsigned vsz = vec_full_reg_size(s);
3694    TCGv_ptr t_rm, t_pg, t_fpst;
3695    TCGv_i64 t_val;
3696    TCGv_i32 t_desc;
3697
3698    if (a->esz == 0) {
3699        return false;
3700    }
3701    if (!sve_access_check(s)) {
3702        return true;
3703    }
3704
3705    t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3706    t_rm = tcg_temp_new_ptr();
3707    t_pg = tcg_temp_new_ptr();
3708    tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3709    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3710    t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3711    t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3712
3713    fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3714
3715    tcg_temp_free_i32(t_desc);
3716    tcg_temp_free_ptr(t_fpst);
3717    tcg_temp_free_ptr(t_pg);
3718    tcg_temp_free_ptr(t_rm);
3719
3720    write_fp_dreg(s, a->rd, t_val);
3721    tcg_temp_free_i64(t_val);
3722    return true;
3723}
3724
3725/*
3726 *** SVE Floating Point Arithmetic - Unpredicated Group
3727 */
3728
3729static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3730                      gen_helper_gvec_3_ptr *fn)
3731{
3732    if (fn == NULL) {
3733        return false;
3734    }
3735    if (sve_access_check(s)) {
3736        unsigned vsz = vec_full_reg_size(s);
3737        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3738        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3739                           vec_full_reg_offset(s, a->rn),
3740                           vec_full_reg_offset(s, a->rm),
3741                           status, vsz, vsz, 0, fn);
3742        tcg_temp_free_ptr(status);
3743    }
3744    return true;
3745}
3746
3747
3748#define DO_FP3(NAME, name) \
3749static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)           \
3750{                                                                   \
3751    static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3752        NULL, gen_helper_gvec_##name##_h,                           \
3753        gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3754    };                                                              \
3755    return do_zzz_fp(s, a, fns[a->esz]);                            \
3756}
3757
3758DO_FP3(FADD_zzz, fadd)
3759DO_FP3(FSUB_zzz, fsub)
3760DO_FP3(FMUL_zzz, fmul)
3761DO_FP3(FTSMUL, ftsmul)
3762DO_FP3(FRECPS, recps)
3763DO_FP3(FRSQRTS, rsqrts)
3764
3765#undef DO_FP3
3766
3767/*
3768 *** SVE Floating Point Arithmetic - Predicated Group
3769 */
3770
3771static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3772                       gen_helper_gvec_4_ptr *fn)
3773{
3774    if (fn == NULL) {
3775        return false;
3776    }
3777    if (sve_access_check(s)) {
3778        unsigned vsz = vec_full_reg_size(s);
3779        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3780        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3781                           vec_full_reg_offset(s, a->rn),
3782                           vec_full_reg_offset(s, a->rm),
3783                           pred_full_reg_offset(s, a->pg),
3784                           status, vsz, vsz, 0, fn);
3785        tcg_temp_free_ptr(status);
3786    }
3787    return true;
3788}
3789
3790#define DO_FP3(NAME, name) \
3791static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)          \
3792{                                                                   \
3793    static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3794        NULL, gen_helper_sve_##name##_h,                            \
3795        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3796    };                                                              \
3797    return do_zpzz_fp(s, a, fns[a->esz]);                           \
3798}
3799
3800DO_FP3(FADD_zpzz, fadd)
3801DO_FP3(FSUB_zpzz, fsub)
3802DO_FP3(FMUL_zpzz, fmul)
3803DO_FP3(FMIN_zpzz, fmin)
3804DO_FP3(FMAX_zpzz, fmax)
3805DO_FP3(FMINNM_zpzz, fminnum)
3806DO_FP3(FMAXNM_zpzz, fmaxnum)
3807DO_FP3(FABD, fabd)
3808DO_FP3(FSCALE, fscalbn)
3809DO_FP3(FDIV, fdiv)
3810DO_FP3(FMULX, fmulx)
3811
3812#undef DO_FP3
3813
3814typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3815                                      TCGv_i64, TCGv_ptr, TCGv_i32);
3816
3817static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3818                         TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3819{
3820    unsigned vsz = vec_full_reg_size(s);
3821    TCGv_ptr t_zd, t_zn, t_pg, status;
3822    TCGv_i32 desc;
3823
3824    t_zd = tcg_temp_new_ptr();
3825    t_zn = tcg_temp_new_ptr();
3826    t_pg = tcg_temp_new_ptr();
3827    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3828    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3829    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3830
3831    status = get_fpstatus_ptr(is_fp16);
3832    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3833    fn(t_zd, t_zn, t_pg, scalar, status, desc);
3834
3835    tcg_temp_free_i32(desc);
3836    tcg_temp_free_ptr(status);
3837    tcg_temp_free_ptr(t_pg);
3838    tcg_temp_free_ptr(t_zn);
3839    tcg_temp_free_ptr(t_zd);
3840}
3841
3842static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3843                      gen_helper_sve_fp2scalar *fn)
3844{
3845    TCGv_i64 temp = tcg_const_i64(imm);
3846    do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3847    tcg_temp_free_i64(temp);
3848}
3849
3850#define DO_FP_IMM(NAME, name, const0, const1) \
3851static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a)         \
3852{                                                                         \
3853    static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3854        gen_helper_sve_##name##_h,                                        \
3855        gen_helper_sve_##name##_s,                                        \
3856        gen_helper_sve_##name##_d                                         \
3857    };                                                                    \
3858    static uint64_t const val[3][2] = {                                   \
3859        { float16_##const0, float16_##const1 },                           \
3860        { float32_##const0, float32_##const1 },                           \
3861        { float64_##const0, float64_##const1 },                           \
3862    };                                                                    \
3863    if (a->esz == 0) {                                                    \
3864        return false;                                                     \
3865    }                                                                     \
3866    if (sve_access_check(s)) {                                            \
3867        do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3868    }                                                                     \
3869    return true;                                                          \
3870}
3871
3872#define float16_two  make_float16(0x4000)
3873#define float32_two  make_float32(0x40000000)
3874#define float64_two  make_float64(0x4000000000000000ULL)
3875
3876DO_FP_IMM(FADD, fadds, half, one)
3877DO_FP_IMM(FSUB, fsubs, half, one)
3878DO_FP_IMM(FMUL, fmuls, half, two)
3879DO_FP_IMM(FSUBR, fsubrs, half, one)
3880DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3881DO_FP_IMM(FMINNM, fminnms, zero, one)
3882DO_FP_IMM(FMAX, fmaxs, zero, one)
3883DO_FP_IMM(FMIN, fmins, zero, one)
3884
3885#undef DO_FP_IMM
3886
3887static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3888                      gen_helper_gvec_4_ptr *fn)
3889{
3890    if (fn == NULL) {
3891        return false;
3892    }
3893    if (sve_access_check(s)) {
3894        unsigned vsz = vec_full_reg_size(s);
3895        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3896        tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3897                           vec_full_reg_offset(s, a->rn),
3898                           vec_full_reg_offset(s, a->rm),
3899                           pred_full_reg_offset(s, a->pg),
3900                           status, vsz, vsz, 0, fn);
3901        tcg_temp_free_ptr(status);
3902    }
3903    return true;
3904}
3905
3906#define DO_FPCMP(NAME, name) \
3907static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)     \
3908{                                                                     \
3909    static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3910        NULL, gen_helper_sve_##name##_h,                              \
3911        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3912    };                                                                \
3913    return do_fp_cmp(s, a, fns[a->esz]);                              \
3914}
3915
3916DO_FPCMP(FCMGE, fcmge)
3917DO_FPCMP(FCMGT, fcmgt)
3918DO_FPCMP(FCMEQ, fcmeq)
3919DO_FPCMP(FCMNE, fcmne)
3920DO_FPCMP(FCMUO, fcmuo)
3921DO_FPCMP(FACGE, facge)
3922DO_FPCMP(FACGT, facgt)
3923
3924#undef DO_FPCMP
3925
3926static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
3927{
3928    static gen_helper_gvec_4_ptr * const fns[3] = {
3929        gen_helper_sve_fcadd_h,
3930        gen_helper_sve_fcadd_s,
3931        gen_helper_sve_fcadd_d
3932    };
3933
3934    if (a->esz == 0) {
3935        return false;
3936    }
3937    if (sve_access_check(s)) {
3938        unsigned vsz = vec_full_reg_size(s);
3939        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3940        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3941                           vec_full_reg_offset(s, a->rn),
3942                           vec_full_reg_offset(s, a->rm),
3943                           pred_full_reg_offset(s, a->pg),
3944                           status, vsz, vsz, a->rot, fns[a->esz - 1]);
3945        tcg_temp_free_ptr(status);
3946    }
3947    return true;
3948}
3949
3950typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3951
3952static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3953{
3954    if (fn == NULL) {
3955        return false;
3956    }
3957    if (!sve_access_check(s)) {
3958        return true;
3959    }
3960
3961    unsigned vsz = vec_full_reg_size(s);
3962    unsigned desc;
3963    TCGv_i32 t_desc;
3964    TCGv_ptr pg = tcg_temp_new_ptr();
3965
3966    /* We would need 7 operands to pass these arguments "properly".
3967     * So we encode all the register numbers into the descriptor.
3968     */
3969    desc = deposit32(a->rd, 5, 5, a->rn);
3970    desc = deposit32(desc, 10, 5, a->rm);
3971    desc = deposit32(desc, 15, 5, a->ra);
3972    desc = simd_desc(vsz, vsz, desc);
3973
3974    t_desc = tcg_const_i32(desc);
3975    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3976    fn(cpu_env, pg, t_desc);
3977    tcg_temp_free_i32(t_desc);
3978    tcg_temp_free_ptr(pg);
3979    return true;
3980}
3981
3982#define DO_FMLA(NAME, name) \
3983static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
3984{                                                                    \
3985    static gen_helper_sve_fmla * const fns[4] = {                    \
3986        NULL, gen_helper_sve_##name##_h,                             \
3987        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3988    };                                                               \
3989    return do_fmla(s, a, fns[a->esz]);                               \
3990}
3991
3992DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3993DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3994DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3995DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3996
3997#undef DO_FMLA
3998
3999static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
4000{
4001    static gen_helper_sve_fmla * const fns[3] = {
4002        gen_helper_sve_fcmla_zpzzz_h,
4003        gen_helper_sve_fcmla_zpzzz_s,
4004        gen_helper_sve_fcmla_zpzzz_d,
4005    };
4006
4007    if (a->esz == 0) {
4008        return false;
4009    }
4010    if (sve_access_check(s)) {
4011        unsigned vsz = vec_full_reg_size(s);
4012        unsigned desc;
4013        TCGv_i32 t_desc;
4014        TCGv_ptr pg = tcg_temp_new_ptr();
4015
4016        /* We would need 7 operands to pass these arguments "properly".
4017         * So we encode all the register numbers into the descriptor.
4018         */
4019        desc = deposit32(a->rd, 5, 5, a->rn);
4020        desc = deposit32(desc, 10, 5, a->rm);
4021        desc = deposit32(desc, 15, 5, a->ra);
4022        desc = deposit32(desc, 20, 2, a->rot);
4023        desc = sextract32(desc, 0, 22);
4024        desc = simd_desc(vsz, vsz, desc);
4025
4026        t_desc = tcg_const_i32(desc);
4027        tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4028        fns[a->esz - 1](cpu_env, pg, t_desc);
4029        tcg_temp_free_i32(t_desc);
4030        tcg_temp_free_ptr(pg);
4031    }
4032    return true;
4033}
4034
4035static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4036{
4037    static gen_helper_gvec_3_ptr * const fns[2] = {
4038        gen_helper_gvec_fcmlah_idx,
4039        gen_helper_gvec_fcmlas_idx,
4040    };
4041
4042    tcg_debug_assert(a->esz == 1 || a->esz == 2);
4043    tcg_debug_assert(a->rd == a->ra);
4044    if (sve_access_check(s)) {
4045        unsigned vsz = vec_full_reg_size(s);
4046        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4047        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4048                           vec_full_reg_offset(s, a->rn),
4049                           vec_full_reg_offset(s, a->rm),
4050                           status, vsz, vsz,
4051                           a->index * 4 + a->rot,
4052                           fns[a->esz - 1]);
4053        tcg_temp_free_ptr(status);
4054    }
4055    return true;
4056}
4057
4058/*
4059 *** SVE Floating Point Unary Operations Predicated Group
4060 */
4061
4062static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4063                       bool is_fp16, gen_helper_gvec_3_ptr *fn)
4064{
4065    if (sve_access_check(s)) {
4066        unsigned vsz = vec_full_reg_size(s);
4067        TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4068        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4069                           vec_full_reg_offset(s, rn),
4070                           pred_full_reg_offset(s, pg),
4071                           status, vsz, vsz, 0, fn);
4072        tcg_temp_free_ptr(status);
4073    }
4074    return true;
4075}
4076
4077static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4078{
4079    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4080}
4081
4082static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4083{
4084    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4085}
4086
4087static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4088{
4089    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4090}
4091
4092static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4093{
4094    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4095}
4096
4097static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4098{
4099    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4100}
4101
4102static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4103{
4104    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4105}
4106
4107static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4108{
4109    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4110}
4111
4112static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4113{
4114    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4115}
4116
4117static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4118{
4119    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4120}
4121
4122static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4123{
4124    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4125}
4126
4127static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4128{
4129    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4130}
4131
4132static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4133{
4134    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4135}
4136
4137static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4138{
4139    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4140}
4141
4142static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4143{
4144    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4145}
4146
4147static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4148{
4149    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4150}
4151
4152static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4153{
4154    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4155}
4156
4157static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4158{
4159    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4160}
4161
4162static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4163{
4164    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4165}
4166
4167static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4168{
4169    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4170}
4171
4172static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4173{
4174    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4175}
4176
4177static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4178    gen_helper_sve_frint_h,
4179    gen_helper_sve_frint_s,
4180    gen_helper_sve_frint_d
4181};
4182
4183static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4184{
4185    if (a->esz == 0) {
4186        return false;
4187    }
4188    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4189                      frint_fns[a->esz - 1]);
4190}
4191
4192static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4193{
4194    static gen_helper_gvec_3_ptr * const fns[3] = {
4195        gen_helper_sve_frintx_h,
4196        gen_helper_sve_frintx_s,
4197        gen_helper_sve_frintx_d
4198    };
4199    if (a->esz == 0) {
4200        return false;
4201    }
4202    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4203}
4204
4205static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4206{
4207    if (a->esz == 0) {
4208        return false;
4209    }
4210    if (sve_access_check(s)) {
4211        unsigned vsz = vec_full_reg_size(s);
4212        TCGv_i32 tmode = tcg_const_i32(mode);
4213        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4214
4215        gen_helper_set_rmode(tmode, tmode, status);
4216
4217        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4218                           vec_full_reg_offset(s, a->rn),
4219                           pred_full_reg_offset(s, a->pg),
4220                           status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4221
4222        gen_helper_set_rmode(tmode, tmode, status);
4223        tcg_temp_free_i32(tmode);
4224        tcg_temp_free_ptr(status);
4225    }
4226    return true;
4227}
4228
4229static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4230{
4231    return do_frint_mode(s, a, float_round_nearest_even);
4232}
4233
4234static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4235{
4236    return do_frint_mode(s, a, float_round_up);
4237}
4238
4239static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4240{
4241    return do_frint_mode(s, a, float_round_down);
4242}
4243
4244static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4245{
4246    return do_frint_mode(s, a, float_round_to_zero);
4247}
4248
4249static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4250{
4251    return do_frint_mode(s, a, float_round_ties_away);
4252}
4253
4254static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4255{
4256    static gen_helper_gvec_3_ptr * const fns[3] = {
4257        gen_helper_sve_frecpx_h,
4258        gen_helper_sve_frecpx_s,
4259        gen_helper_sve_frecpx_d
4260    };
4261    if (a->esz == 0) {
4262        return false;
4263    }
4264    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4265}
4266
4267static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4268{
4269    static gen_helper_gvec_3_ptr * const fns[3] = {
4270        gen_helper_sve_fsqrt_h,
4271        gen_helper_sve_fsqrt_s,
4272        gen_helper_sve_fsqrt_d
4273    };
4274    if (a->esz == 0) {
4275        return false;
4276    }
4277    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4278}
4279
4280static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4281{
4282    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4283}
4284
4285static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4286{
4287    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4288}
4289
4290static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4291{
4292    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4293}
4294
4295static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4296{
4297    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4298}
4299
4300static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4301{
4302    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4303}
4304
4305static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4306{
4307    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4308}
4309
4310static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4311{
4312    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4313}
4314
4315static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4316{
4317    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4318}
4319
4320static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4321{
4322    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4323}
4324
4325static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4326{
4327    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4328}
4329
4330static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4331{
4332    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4333}
4334
4335static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4336{
4337    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4338}
4339
4340static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4341{
4342    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4343}
4344
4345static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4346{
4347    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4348}
4349
4350/*
4351 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4352 */
4353
4354/* Subroutine loading a vector register at VOFS of LEN bytes.
4355 * The load should begin at the address Rn + IMM.
4356 */
4357
4358static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4359{
4360    int len_align = QEMU_ALIGN_DOWN(len, 8);
4361    int len_remain = len % 8;
4362    int nparts = len / 8 + ctpop8(len_remain);
4363    int midx = get_mem_index(s);
4364    TCGv_i64 addr, t0, t1;
4365
4366    addr = tcg_temp_new_i64();
4367    t0 = tcg_temp_new_i64();
4368
4369    /* Note that unpredicated load/store of vector/predicate registers
4370     * are defined as a stream of bytes, which equates to little-endian
4371     * operations on larger quantities.  There is no nice way to force
4372     * a little-endian load for aarch64_be-linux-user out of line.
4373     *
4374     * Attempt to keep code expansion to a minimum by limiting the
4375     * amount of unrolling done.
4376     */
4377    if (nparts <= 4) {
4378        int i;
4379
4380        for (i = 0; i < len_align; i += 8) {
4381            tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4382            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4383            tcg_gen_st_i64(t0, cpu_env, vofs + i);
4384        }
4385    } else {
4386        TCGLabel *loop = gen_new_label();
4387        TCGv_ptr tp, i = tcg_const_local_ptr(0);
4388
4389        gen_set_label(loop);
4390
4391        /* Minimize the number of local temps that must be re-read from
4392         * the stack each iteration.  Instead, re-compute values other
4393         * than the loop counter.
4394         */
4395        tp = tcg_temp_new_ptr();
4396        tcg_gen_addi_ptr(tp, i, imm);
4397        tcg_gen_extu_ptr_i64(addr, tp);
4398        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4399
4400        tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4401
4402        tcg_gen_add_ptr(tp, cpu_env, i);
4403        tcg_gen_addi_ptr(i, i, 8);
4404        tcg_gen_st_i64(t0, tp, vofs);
4405        tcg_temp_free_ptr(tp);
4406
4407        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4408        tcg_temp_free_ptr(i);
4409    }
4410
4411    /* Predicate register loads can be any multiple of 2.
4412     * Note that we still store the entire 64-bit unit into cpu_env.
4413     */
4414    if (len_remain) {
4415        tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4416
4417        switch (len_remain) {
4418        case 2:
4419        case 4:
4420        case 8:
4421            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4422            break;
4423
4424        case 6:
4425            t1 = tcg_temp_new_i64();
4426            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4427            tcg_gen_addi_i64(addr, addr, 4);
4428            tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4429            tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4430            tcg_temp_free_i64(t1);
4431            break;
4432
4433        default:
4434            g_assert_not_reached();
4435        }
4436        tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4437    }
4438    tcg_temp_free_i64(addr);
4439    tcg_temp_free_i64(t0);
4440}
4441
4442/* Similarly for stores.  */
4443static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4444{
4445    int len_align = QEMU_ALIGN_DOWN(len, 8);
4446    int len_remain = len % 8;
4447    int nparts = len / 8 + ctpop8(len_remain);
4448    int midx = get_mem_index(s);
4449    TCGv_i64 addr, t0;
4450
4451    addr = tcg_temp_new_i64();
4452    t0 = tcg_temp_new_i64();
4453
4454    /* Note that unpredicated load/store of vector/predicate registers
4455     * are defined as a stream of bytes, which equates to little-endian
4456     * operations on larger quantities.  There is no nice way to force
4457     * a little-endian store for aarch64_be-linux-user out of line.
4458     *
4459     * Attempt to keep code expansion to a minimum by limiting the
4460     * amount of unrolling done.
4461     */
4462    if (nparts <= 4) {
4463        int i;
4464
4465        for (i = 0; i < len_align; i += 8) {
4466            tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4467            tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4468            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4469        }
4470    } else {
4471        TCGLabel *loop = gen_new_label();
4472        TCGv_ptr t2, i = tcg_const_local_ptr(0);
4473
4474        gen_set_label(loop);
4475
4476        t2 = tcg_temp_new_ptr();
4477        tcg_gen_add_ptr(t2, cpu_env, i);
4478        tcg_gen_ld_i64(t0, t2, vofs);
4479
4480        /* Minimize the number of local temps that must be re-read from
4481         * the stack each iteration.  Instead, re-compute values other
4482         * than the loop counter.
4483         */
4484        tcg_gen_addi_ptr(t2, i, imm);
4485        tcg_gen_extu_ptr_i64(addr, t2);
4486        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4487        tcg_temp_free_ptr(t2);
4488
4489        tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4490
4491        tcg_gen_addi_ptr(i, i, 8);
4492
4493        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4494        tcg_temp_free_ptr(i);
4495    }
4496
4497    /* Predicate register stores can be any multiple of 2.  */
4498    if (len_remain) {
4499        tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4500        tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4501
4502        switch (len_remain) {
4503        case 2:
4504        case 4:
4505        case 8:
4506            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4507            break;
4508
4509        case 6:
4510            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4511            tcg_gen_addi_i64(addr, addr, 4);
4512            tcg_gen_shri_i64(t0, t0, 32);
4513            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4514            break;
4515
4516        default:
4517            g_assert_not_reached();
4518        }
4519    }
4520    tcg_temp_free_i64(addr);
4521    tcg_temp_free_i64(t0);
4522}
4523
4524static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4525{
4526    if (sve_access_check(s)) {
4527        int size = vec_full_reg_size(s);
4528        int off = vec_full_reg_offset(s, a->rd);
4529        do_ldr(s, off, size, a->rn, a->imm * size);
4530    }
4531    return true;
4532}
4533
4534static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4535{
4536    if (sve_access_check(s)) {
4537        int size = pred_full_reg_size(s);
4538        int off = pred_full_reg_offset(s, a->rd);
4539        do_ldr(s, off, size, a->rn, a->imm * size);
4540    }
4541    return true;
4542}
4543
4544static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4545{
4546    if (sve_access_check(s)) {
4547        int size = vec_full_reg_size(s);
4548        int off = vec_full_reg_offset(s, a->rd);
4549        do_str(s, off, size, a->rn, a->imm * size);
4550    }
4551    return true;
4552}
4553
4554static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4555{
4556    if (sve_access_check(s)) {
4557        int size = pred_full_reg_size(s);
4558        int off = pred_full_reg_offset(s, a->rd);
4559        do_str(s, off, size, a->rn, a->imm * size);
4560    }
4561    return true;
4562}
4563
4564/*
4565 *** SVE Memory - Contiguous Load Group
4566 */
4567
4568/* The memory mode of the dtype.  */
4569static const TCGMemOp dtype_mop[16] = {
4570    MO_UB, MO_UB, MO_UB, MO_UB,
4571    MO_SL, MO_UW, MO_UW, MO_UW,
4572    MO_SW, MO_SW, MO_UL, MO_UL,
4573    MO_SB, MO_SB, MO_SB, MO_Q
4574};
4575
4576#define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4577
4578/* The vector element size of dtype.  */
4579static const uint8_t dtype_esz[16] = {
4580    0, 1, 2, 3,
4581    3, 1, 2, 3,
4582    3, 2, 2, 3,
4583    3, 2, 1, 3
4584};
4585
4586static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
4587{
4588    return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
4589}
4590
4591static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4592                       int dtype, gen_helper_gvec_mem *fn)
4593{
4594    unsigned vsz = vec_full_reg_size(s);
4595    TCGv_ptr t_pg;
4596    TCGv_i32 t_desc;
4597    int desc;
4598
4599    /* For e.g. LD4, there are not enough arguments to pass all 4
4600     * registers as pointers, so encode the regno into the data field.
4601     * For consistency, do this even for LD1.
4602     */
4603    desc = sve_memopidx(s, dtype);
4604    desc |= zt << MEMOPIDX_SHIFT;
4605    desc = simd_desc(vsz, vsz, desc);
4606    t_desc = tcg_const_i32(desc);
4607    t_pg = tcg_temp_new_ptr();
4608
4609    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4610    fn(cpu_env, t_pg, addr, t_desc);
4611
4612    tcg_temp_free_ptr(t_pg);
4613    tcg_temp_free_i32(t_desc);
4614}
4615
4616static void do_ld_zpa(DisasContext *s, int zt, int pg,
4617                      TCGv_i64 addr, int dtype, int nreg)
4618{
4619    static gen_helper_gvec_mem * const fns[2][16][4] = {
4620        /* Little-endian */
4621        { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4622            gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4623          { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4624          { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4625          { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4626
4627          { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4628          { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4629            gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4630          { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4631          { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4632
4633          { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4634          { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4635          { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4636            gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4637          { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4638
4639          { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4640          { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4641          { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4642          { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4643            gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4644
4645        /* Big-endian */
4646        { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4647            gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4648          { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4649          { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4650          { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4651
4652          { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4653          { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4654            gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4655          { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4656          { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4657
4658          { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4659          { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4660          { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4661            gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4662          { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4663
4664          { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4665          { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4666          { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4667          { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4668            gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
4669    };
4670    gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
4671
4672    /* While there are holes in the table, they are not
4673     * accessible via the instruction encoding.
4674     */
4675    assert(fn != NULL);
4676    do_mem_zpa(s, zt, pg, addr, dtype, fn);
4677}
4678
4679static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4680{
4681    if (a->rm == 31) {
4682        return false;
4683    }
4684    if (sve_access_check(s)) {
4685        TCGv_i64 addr = new_tmp_a64(s);
4686        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4687        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4688        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4689    }
4690    return true;
4691}
4692
4693static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4694{
4695    if (sve_access_check(s)) {
4696        int vsz = vec_full_reg_size(s);
4697        int elements = vsz >> dtype_esz[a->dtype];
4698        TCGv_i64 addr = new_tmp_a64(s);
4699
4700        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4701                         (a->imm * elements * (a->nreg + 1))
4702                         << dtype_msz(a->dtype));
4703        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4704    }
4705    return true;
4706}
4707
4708static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4709{
4710    static gen_helper_gvec_mem * const fns[2][16] = {
4711        /* Little-endian */
4712        { gen_helper_sve_ldff1bb_r,
4713          gen_helper_sve_ldff1bhu_r,
4714          gen_helper_sve_ldff1bsu_r,
4715          gen_helper_sve_ldff1bdu_r,
4716
4717          gen_helper_sve_ldff1sds_le_r,
4718          gen_helper_sve_ldff1hh_le_r,
4719          gen_helper_sve_ldff1hsu_le_r,
4720          gen_helper_sve_ldff1hdu_le_r,
4721
4722          gen_helper_sve_ldff1hds_le_r,
4723          gen_helper_sve_ldff1hss_le_r,
4724          gen_helper_sve_ldff1ss_le_r,
4725          gen_helper_sve_ldff1sdu_le_r,
4726
4727          gen_helper_sve_ldff1bds_r,
4728          gen_helper_sve_ldff1bss_r,
4729          gen_helper_sve_ldff1bhs_r,
4730          gen_helper_sve_ldff1dd_le_r },
4731
4732        /* Big-endian */
4733        { gen_helper_sve_ldff1bb_r,
4734          gen_helper_sve_ldff1bhu_r,
4735          gen_helper_sve_ldff1bsu_r,
4736          gen_helper_sve_ldff1bdu_r,
4737
4738          gen_helper_sve_ldff1sds_be_r,
4739          gen_helper_sve_ldff1hh_be_r,
4740          gen_helper_sve_ldff1hsu_be_r,
4741          gen_helper_sve_ldff1hdu_be_r,
4742
4743          gen_helper_sve_ldff1hds_be_r,
4744          gen_helper_sve_ldff1hss_be_r,
4745          gen_helper_sve_ldff1ss_be_r,
4746          gen_helper_sve_ldff1sdu_be_r,
4747
4748          gen_helper_sve_ldff1bds_r,
4749          gen_helper_sve_ldff1bss_r,
4750          gen_helper_sve_ldff1bhs_r,
4751          gen_helper_sve_ldff1dd_be_r },
4752    };
4753
4754    if (sve_access_check(s)) {
4755        TCGv_i64 addr = new_tmp_a64(s);
4756        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4757        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4758        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4759                   fns[s->be_data == MO_BE][a->dtype]);
4760    }
4761    return true;
4762}
4763
4764static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4765{
4766    static gen_helper_gvec_mem * const fns[2][16] = {
4767        /* Little-endian */
4768        { gen_helper_sve_ldnf1bb_r,
4769          gen_helper_sve_ldnf1bhu_r,
4770          gen_helper_sve_ldnf1bsu_r,
4771          gen_helper_sve_ldnf1bdu_r,
4772
4773          gen_helper_sve_ldnf1sds_le_r,
4774          gen_helper_sve_ldnf1hh_le_r,
4775          gen_helper_sve_ldnf1hsu_le_r,
4776          gen_helper_sve_ldnf1hdu_le_r,
4777
4778          gen_helper_sve_ldnf1hds_le_r,
4779          gen_helper_sve_ldnf1hss_le_r,
4780          gen_helper_sve_ldnf1ss_le_r,
4781          gen_helper_sve_ldnf1sdu_le_r,
4782
4783          gen_helper_sve_ldnf1bds_r,
4784          gen_helper_sve_ldnf1bss_r,
4785          gen_helper_sve_ldnf1bhs_r,
4786          gen_helper_sve_ldnf1dd_le_r },
4787
4788        /* Big-endian */
4789        { gen_helper_sve_ldnf1bb_r,
4790          gen_helper_sve_ldnf1bhu_r,
4791          gen_helper_sve_ldnf1bsu_r,
4792          gen_helper_sve_ldnf1bdu_r,
4793
4794          gen_helper_sve_ldnf1sds_be_r,
4795          gen_helper_sve_ldnf1hh_be_r,
4796          gen_helper_sve_ldnf1hsu_be_r,
4797          gen_helper_sve_ldnf1hdu_be_r,
4798
4799          gen_helper_sve_ldnf1hds_be_r,
4800          gen_helper_sve_ldnf1hss_be_r,
4801          gen_helper_sve_ldnf1ss_be_r,
4802          gen_helper_sve_ldnf1sdu_be_r,
4803
4804          gen_helper_sve_ldnf1bds_r,
4805          gen_helper_sve_ldnf1bss_r,
4806          gen_helper_sve_ldnf1bhs_r,
4807          gen_helper_sve_ldnf1dd_be_r },
4808    };
4809
4810    if (sve_access_check(s)) {
4811        int vsz = vec_full_reg_size(s);
4812        int elements = vsz >> dtype_esz[a->dtype];
4813        int off = (a->imm * elements) << dtype_msz(a->dtype);
4814        TCGv_i64 addr = new_tmp_a64(s);
4815
4816        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4817        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4818                   fns[s->be_data == MO_BE][a->dtype]);
4819    }
4820    return true;
4821}
4822
4823static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4824{
4825    static gen_helper_gvec_mem * const fns[2][4] = {
4826        { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_le_r,
4827          gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4828        { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_be_r,
4829          gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4830    };
4831    unsigned vsz = vec_full_reg_size(s);
4832    TCGv_ptr t_pg;
4833    TCGv_i32 t_desc;
4834    int desc, poff;
4835
4836    /* Load the first quadword using the normal predicated load helpers.  */
4837    desc = sve_memopidx(s, msz_dtype(msz));
4838    desc |= zt << MEMOPIDX_SHIFT;
4839    desc = simd_desc(16, 16, desc);
4840    t_desc = tcg_const_i32(desc);
4841
4842    poff = pred_full_reg_offset(s, pg);
4843    if (vsz > 16) {
4844        /*
4845         * Zero-extend the first 16 bits of the predicate into a temporary.
4846         * This avoids triggering an assert making sure we don't have bits
4847         * set within a predicate beyond VQ, but we have lowered VQ to 1
4848         * for this load operation.
4849         */
4850        TCGv_i64 tmp = tcg_temp_new_i64();
4851#ifdef HOST_WORDS_BIGENDIAN
4852        poff += 6;
4853#endif
4854        tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4855
4856        poff = offsetof(CPUARMState, vfp.preg_tmp);
4857        tcg_gen_st_i64(tmp, cpu_env, poff);
4858        tcg_temp_free_i64(tmp);
4859    }
4860
4861    t_pg = tcg_temp_new_ptr();
4862    tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4863
4864    fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
4865
4866    tcg_temp_free_ptr(t_pg);
4867    tcg_temp_free_i32(t_desc);
4868
4869    /* Replicate that first quadword.  */
4870    if (vsz > 16) {
4871        unsigned dofs = vec_full_reg_offset(s, zt);
4872        tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4873    }
4874}
4875
4876static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4877{
4878    if (a->rm == 31) {
4879        return false;
4880    }
4881    if (sve_access_check(s)) {
4882        int msz = dtype_msz(a->dtype);
4883        TCGv_i64 addr = new_tmp_a64(s);
4884        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4885        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4886        do_ldrq(s, a->rd, a->pg, addr, msz);
4887    }
4888    return true;
4889}
4890
4891static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4892{
4893    if (sve_access_check(s)) {
4894        TCGv_i64 addr = new_tmp_a64(s);
4895        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4896        do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4897    }
4898    return true;
4899}
4900
4901/* Load and broadcast element.  */
4902static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4903{
4904    if (!sve_access_check(s)) {
4905        return true;
4906    }
4907
4908    unsigned vsz = vec_full_reg_size(s);
4909    unsigned psz = pred_full_reg_size(s);
4910    unsigned esz = dtype_esz[a->dtype];
4911    unsigned msz = dtype_msz(a->dtype);
4912    TCGLabel *over = gen_new_label();
4913    TCGv_i64 temp;
4914
4915    /* If the guarding predicate has no bits set, no load occurs.  */
4916    if (psz <= 8) {
4917        /* Reduce the pred_esz_masks value simply to reduce the
4918         * size of the code generated here.
4919         */
4920        uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4921        temp = tcg_temp_new_i64();
4922        tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4923        tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4924        tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4925        tcg_temp_free_i64(temp);
4926    } else {
4927        TCGv_i32 t32 = tcg_temp_new_i32();
4928        find_last_active(s, t32, esz, a->pg);
4929        tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4930        tcg_temp_free_i32(t32);
4931    }
4932
4933    /* Load the data.  */
4934    temp = tcg_temp_new_i64();
4935    tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4936    tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4937                        s->be_data | dtype_mop[a->dtype]);
4938
4939    /* Broadcast to *all* elements.  */
4940    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4941                         vsz, vsz, temp);
4942    tcg_temp_free_i64(temp);
4943
4944    /* Zero the inactive elements.  */
4945    gen_set_label(over);
4946    do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4947    return true;
4948}
4949
4950static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4951                      int msz, int esz, int nreg)
4952{
4953    static gen_helper_gvec_mem * const fn_single[2][4][4] = {
4954        { { gen_helper_sve_st1bb_r,
4955            gen_helper_sve_st1bh_r,
4956            gen_helper_sve_st1bs_r,
4957            gen_helper_sve_st1bd_r },
4958          { NULL,
4959            gen_helper_sve_st1hh_le_r,
4960            gen_helper_sve_st1hs_le_r,
4961            gen_helper_sve_st1hd_le_r },
4962          { NULL, NULL,
4963            gen_helper_sve_st1ss_le_r,
4964            gen_helper_sve_st1sd_le_r },
4965          { NULL, NULL, NULL,
4966            gen_helper_sve_st1dd_le_r } },
4967        { { gen_helper_sve_st1bb_r,
4968            gen_helper_sve_st1bh_r,
4969            gen_helper_sve_st1bs_r,
4970            gen_helper_sve_st1bd_r },
4971          { NULL,
4972            gen_helper_sve_st1hh_be_r,
4973            gen_helper_sve_st1hs_be_r,
4974            gen_helper_sve_st1hd_be_r },
4975          { NULL, NULL,
4976            gen_helper_sve_st1ss_be_r,
4977            gen_helper_sve_st1sd_be_r },
4978          { NULL, NULL, NULL,
4979            gen_helper_sve_st1dd_be_r } },
4980    };
4981    static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
4982        { { gen_helper_sve_st2bb_r,
4983            gen_helper_sve_st2hh_le_r,
4984            gen_helper_sve_st2ss_le_r,
4985            gen_helper_sve_st2dd_le_r },
4986          { gen_helper_sve_st3bb_r,
4987            gen_helper_sve_st3hh_le_r,
4988            gen_helper_sve_st3ss_le_r,
4989            gen_helper_sve_st3dd_le_r },
4990          { gen_helper_sve_st4bb_r,
4991            gen_helper_sve_st4hh_le_r,
4992            gen_helper_sve_st4ss_le_r,
4993            gen_helper_sve_st4dd_le_r } },
4994        { { gen_helper_sve_st2bb_r,
4995            gen_helper_sve_st2hh_be_r,
4996            gen_helper_sve_st2ss_be_r,
4997            gen_helper_sve_st2dd_be_r },
4998          { gen_helper_sve_st3bb_r,
4999            gen_helper_sve_st3hh_be_r,
5000            gen_helper_sve_st3ss_be_r,
5001            gen_helper_sve_st3dd_be_r },
5002          { gen_helper_sve_st4bb_r,
5003            gen_helper_sve_st4hh_be_r,
5004            gen_helper_sve_st4ss_be_r,
5005            gen_helper_sve_st4dd_be_r } },
5006    };
5007    gen_helper_gvec_mem *fn;
5008    int be = s->be_data == MO_BE;
5009
5010    if (nreg == 0) {
5011        /* ST1 */
5012        fn = fn_single[be][msz][esz];
5013    } else {
5014        /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5015        assert(msz == esz);
5016        fn = fn_multiple[be][nreg - 1][msz];
5017    }
5018    assert(fn != NULL);
5019    do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn);
5020}
5021
5022static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5023{
5024    if (a->rm == 31 || a->msz > a->esz) {
5025        return false;
5026    }
5027    if (sve_access_check(s)) {
5028        TCGv_i64 addr = new_tmp_a64(s);
5029        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5030        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5031        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5032    }
5033    return true;
5034}
5035
5036static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5037{
5038    if (a->msz > a->esz) {
5039        return false;
5040    }
5041    if (sve_access_check(s)) {
5042        int vsz = vec_full_reg_size(s);
5043        int elements = vsz >> a->esz;
5044        TCGv_i64 addr = new_tmp_a64(s);
5045
5046        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5047                         (a->imm * elements * (a->nreg + 1)) << a->msz);
5048        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5049    }
5050    return true;
5051}
5052
5053/*
5054 *** SVE gather loads / scatter stores
5055 */
5056
5057static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5058                       int scale, TCGv_i64 scalar, int msz,
5059                       gen_helper_gvec_mem_scatter *fn)
5060{
5061    unsigned vsz = vec_full_reg_size(s);
5062    TCGv_ptr t_zm = tcg_temp_new_ptr();
5063    TCGv_ptr t_pg = tcg_temp_new_ptr();
5064    TCGv_ptr t_zt = tcg_temp_new_ptr();
5065    TCGv_i32 t_desc;
5066    int desc;
5067
5068    desc = sve_memopidx(s, msz_dtype(msz));
5069    desc |= scale << MEMOPIDX_SHIFT;
5070    desc = simd_desc(vsz, vsz, desc);
5071    t_desc = tcg_const_i32(desc);
5072
5073    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5074    tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5075    tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5076    fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
5077
5078    tcg_temp_free_ptr(t_zt);
5079    tcg_temp_free_ptr(t_zm);
5080    tcg_temp_free_ptr(t_pg);
5081    tcg_temp_free_i32(t_desc);
5082}
5083
5084/* Indexed by [be][ff][xs][u][msz].  */
5085static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
5086    /* Little-endian */
5087    { { { { gen_helper_sve_ldbss_zsu,
5088            gen_helper_sve_ldhss_le_zsu,
5089            NULL, },
5090          { gen_helper_sve_ldbsu_zsu,
5091            gen_helper_sve_ldhsu_le_zsu,
5092            gen_helper_sve_ldss_le_zsu, } },
5093        { { gen_helper_sve_ldbss_zss,
5094            gen_helper_sve_ldhss_le_zss,
5095            NULL, },
5096          { gen_helper_sve_ldbsu_zss,
5097            gen_helper_sve_ldhsu_le_zss,
5098            gen_helper_sve_ldss_le_zss, } } },
5099
5100      /* First-fault */
5101      { { { gen_helper_sve_ldffbss_zsu,
5102            gen_helper_sve_ldffhss_le_zsu,
5103            NULL, },
5104          { gen_helper_sve_ldffbsu_zsu,
5105            gen_helper_sve_ldffhsu_le_zsu,
5106            gen_helper_sve_ldffss_le_zsu, } },
5107        { { gen_helper_sve_ldffbss_zss,
5108            gen_helper_sve_ldffhss_le_zss,
5109            NULL, },
5110          { gen_helper_sve_ldffbsu_zss,
5111            gen_helper_sve_ldffhsu_le_zss,
5112            gen_helper_sve_ldffss_le_zss, } } } },
5113
5114    /* Big-endian */
5115    { { { { gen_helper_sve_ldbss_zsu,
5116            gen_helper_sve_ldhss_be_zsu,
5117            NULL, },
5118          { gen_helper_sve_ldbsu_zsu,
5119            gen_helper_sve_ldhsu_be_zsu,
5120            gen_helper_sve_ldss_be_zsu, } },
5121        { { gen_helper_sve_ldbss_zss,
5122            gen_helper_sve_ldhss_be_zss,
5123            NULL, },
5124          { gen_helper_sve_ldbsu_zss,
5125            gen_helper_sve_ldhsu_be_zss,
5126            gen_helper_sve_ldss_be_zss, } } },
5127
5128      /* First-fault */
5129      { { { gen_helper_sve_ldffbss_zsu,
5130            gen_helper_sve_ldffhss_be_zsu,
5131            NULL, },
5132          { gen_helper_sve_ldffbsu_zsu,
5133            gen_helper_sve_ldffhsu_be_zsu,
5134            gen_helper_sve_ldffss_be_zsu, } },
5135        { { gen_helper_sve_ldffbss_zss,
5136            gen_helper_sve_ldffhss_be_zss,
5137            NULL, },
5138          { gen_helper_sve_ldffbsu_zss,
5139            gen_helper_sve_ldffhsu_be_zss,
5140            gen_helper_sve_ldffss_be_zss, } } } },
5141};
5142
5143/* Note that we overload xs=2 to indicate 64-bit offset.  */
5144static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
5145    /* Little-endian */
5146    { { { { gen_helper_sve_ldbds_zsu,
5147            gen_helper_sve_ldhds_le_zsu,
5148            gen_helper_sve_ldsds_le_zsu,
5149            NULL, },
5150          { gen_helper_sve_ldbdu_zsu,
5151            gen_helper_sve_ldhdu_le_zsu,
5152            gen_helper_sve_ldsdu_le_zsu,
5153            gen_helper_sve_lddd_le_zsu, } },
5154        { { gen_helper_sve_ldbds_zss,
5155            gen_helper_sve_ldhds_le_zss,
5156            gen_helper_sve_ldsds_le_zss,
5157            NULL, },
5158          { gen_helper_sve_ldbdu_zss,
5159            gen_helper_sve_ldhdu_le_zss,
5160            gen_helper_sve_ldsdu_le_zss,
5161            gen_helper_sve_lddd_le_zss, } },
5162        { { gen_helper_sve_ldbds_zd,
5163            gen_helper_sve_ldhds_le_zd,
5164            gen_helper_sve_ldsds_le_zd,
5165            NULL, },
5166          { gen_helper_sve_ldbdu_zd,
5167            gen_helper_sve_ldhdu_le_zd,
5168            gen_helper_sve_ldsdu_le_zd,
5169            gen_helper_sve_lddd_le_zd, } } },
5170
5171      /* First-fault */
5172      { { { gen_helper_sve_ldffbds_zsu,
5173            gen_helper_sve_ldffhds_le_zsu,
5174            gen_helper_sve_ldffsds_le_zsu,
5175            NULL, },
5176          { gen_helper_sve_ldffbdu_zsu,
5177            gen_helper_sve_ldffhdu_le_zsu,
5178            gen_helper_sve_ldffsdu_le_zsu,
5179            gen_helper_sve_ldffdd_le_zsu, } },
5180        { { gen_helper_sve_ldffbds_zss,
5181            gen_helper_sve_ldffhds_le_zss,
5182            gen_helper_sve_ldffsds_le_zss,
5183            NULL, },
5184          { gen_helper_sve_ldffbdu_zss,
5185            gen_helper_sve_ldffhdu_le_zss,
5186            gen_helper_sve_ldffsdu_le_zss,
5187            gen_helper_sve_ldffdd_le_zss, } },
5188        { { gen_helper_sve_ldffbds_zd,
5189            gen_helper_sve_ldffhds_le_zd,
5190            gen_helper_sve_ldffsds_le_zd,
5191            NULL, },
5192          { gen_helper_sve_ldffbdu_zd,
5193            gen_helper_sve_ldffhdu_le_zd,
5194            gen_helper_sve_ldffsdu_le_zd,
5195            gen_helper_sve_ldffdd_le_zd, } } } },
5196
5197    /* Big-endian */
5198    { { { { gen_helper_sve_ldbds_zsu,
5199            gen_helper_sve_ldhds_be_zsu,
5200            gen_helper_sve_ldsds_be_zsu,
5201            NULL, },
5202          { gen_helper_sve_ldbdu_zsu,
5203            gen_helper_sve_ldhdu_be_zsu,
5204            gen_helper_sve_ldsdu_be_zsu,
5205            gen_helper_sve_lddd_be_zsu, } },
5206        { { gen_helper_sve_ldbds_zss,
5207            gen_helper_sve_ldhds_be_zss,
5208            gen_helper_sve_ldsds_be_zss,
5209            NULL, },
5210          { gen_helper_sve_ldbdu_zss,
5211            gen_helper_sve_ldhdu_be_zss,
5212            gen_helper_sve_ldsdu_be_zss,
5213            gen_helper_sve_lddd_be_zss, } },
5214        { { gen_helper_sve_ldbds_zd,
5215            gen_helper_sve_ldhds_be_zd,
5216            gen_helper_sve_ldsds_be_zd,
5217            NULL, },
5218          { gen_helper_sve_ldbdu_zd,
5219            gen_helper_sve_ldhdu_be_zd,
5220            gen_helper_sve_ldsdu_be_zd,
5221            gen_helper_sve_lddd_be_zd, } } },
5222
5223      /* First-fault */
5224      { { { gen_helper_sve_ldffbds_zsu,
5225            gen_helper_sve_ldffhds_be_zsu,
5226            gen_helper_sve_ldffsds_be_zsu,
5227            NULL, },
5228          { gen_helper_sve_ldffbdu_zsu,
5229            gen_helper_sve_ldffhdu_be_zsu,
5230            gen_helper_sve_ldffsdu_be_zsu,
5231            gen_helper_sve_ldffdd_be_zsu, } },
5232        { { gen_helper_sve_ldffbds_zss,
5233            gen_helper_sve_ldffhds_be_zss,
5234            gen_helper_sve_ldffsds_be_zss,
5235            NULL, },
5236          { gen_helper_sve_ldffbdu_zss,
5237            gen_helper_sve_ldffhdu_be_zss,
5238            gen_helper_sve_ldffsdu_be_zss,
5239            gen_helper_sve_ldffdd_be_zss, } },
5240        { { gen_helper_sve_ldffbds_zd,
5241            gen_helper_sve_ldffhds_be_zd,
5242            gen_helper_sve_ldffsds_be_zd,
5243            NULL, },
5244          { gen_helper_sve_ldffbdu_zd,
5245            gen_helper_sve_ldffhdu_be_zd,
5246            gen_helper_sve_ldffsdu_be_zd,
5247            gen_helper_sve_ldffdd_be_zd, } } } },
5248};
5249
5250static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
5251{
5252    gen_helper_gvec_mem_scatter *fn = NULL;
5253    int be = s->be_data == MO_BE;
5254
5255    if (!sve_access_check(s)) {
5256        return true;
5257    }
5258
5259    switch (a->esz) {
5260    case MO_32:
5261        fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
5262        break;
5263    case MO_64:
5264        fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
5265        break;
5266    }
5267    assert(fn != NULL);
5268
5269    do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5270               cpu_reg_sp(s, a->rn), a->msz, fn);
5271    return true;
5272}
5273
5274static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
5275{
5276    gen_helper_gvec_mem_scatter *fn = NULL;
5277    int be = s->be_data == MO_BE;
5278    TCGv_i64 imm;
5279
5280    if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5281        return false;
5282    }
5283    if (!sve_access_check(s)) {
5284        return true;
5285    }
5286
5287    switch (a->esz) {
5288    case MO_32:
5289        fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
5290        break;
5291    case MO_64:
5292        fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
5293        break;
5294    }
5295    assert(fn != NULL);
5296
5297    /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5298     * by loading the immediate into the scalar parameter.
5299     */
5300    imm = tcg_const_i64(a->imm << a->msz);
5301    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5302    tcg_temp_free_i64(imm);
5303    return true;
5304}
5305
5306/* Indexed by [be][xs][msz].  */
5307static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
5308    /* Little-endian */
5309    { { gen_helper_sve_stbs_zsu,
5310        gen_helper_sve_sths_le_zsu,
5311        gen_helper_sve_stss_le_zsu, },
5312      { gen_helper_sve_stbs_zss,
5313        gen_helper_sve_sths_le_zss,
5314        gen_helper_sve_stss_le_zss, } },
5315    /* Big-endian */
5316    { { gen_helper_sve_stbs_zsu,
5317        gen_helper_sve_sths_be_zsu,
5318        gen_helper_sve_stss_be_zsu, },
5319      { gen_helper_sve_stbs_zss,
5320        gen_helper_sve_sths_be_zss,
5321        gen_helper_sve_stss_be_zss, } },
5322};
5323
5324/* Note that we overload xs=2 to indicate 64-bit offset.  */
5325static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
5326    /* Little-endian */
5327    { { gen_helper_sve_stbd_zsu,
5328        gen_helper_sve_sthd_le_zsu,
5329        gen_helper_sve_stsd_le_zsu,
5330        gen_helper_sve_stdd_le_zsu, },
5331      { gen_helper_sve_stbd_zss,
5332        gen_helper_sve_sthd_le_zss,
5333        gen_helper_sve_stsd_le_zss,
5334        gen_helper_sve_stdd_le_zss, },
5335      { gen_helper_sve_stbd_zd,
5336        gen_helper_sve_sthd_le_zd,
5337        gen_helper_sve_stsd_le_zd,
5338        gen_helper_sve_stdd_le_zd, } },
5339    /* Big-endian */
5340    { { gen_helper_sve_stbd_zsu,
5341        gen_helper_sve_sthd_be_zsu,
5342        gen_helper_sve_stsd_be_zsu,
5343        gen_helper_sve_stdd_be_zsu, },
5344      { gen_helper_sve_stbd_zss,
5345        gen_helper_sve_sthd_be_zss,
5346        gen_helper_sve_stsd_be_zss,
5347        gen_helper_sve_stdd_be_zss, },
5348      { gen_helper_sve_stbd_zd,
5349        gen_helper_sve_sthd_be_zd,
5350        gen_helper_sve_stsd_be_zd,
5351        gen_helper_sve_stdd_be_zd, } },
5352};
5353
5354static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
5355{
5356    gen_helper_gvec_mem_scatter *fn;
5357    int be = s->be_data == MO_BE;
5358
5359    if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5360        return false;
5361    }
5362    if (!sve_access_check(s)) {
5363        return true;
5364    }
5365    switch (a->esz) {
5366    case MO_32:
5367        fn = scatter_store_fn32[be][a->xs][a->msz];
5368        break;
5369    case MO_64:
5370        fn = scatter_store_fn64[be][a->xs][a->msz];
5371        break;
5372    default:
5373        g_assert_not_reached();
5374    }
5375    do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5376               cpu_reg_sp(s, a->rn), a->msz, fn);
5377    return true;
5378}
5379
5380static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
5381{
5382    gen_helper_gvec_mem_scatter *fn = NULL;
5383    int be = s->be_data == MO_BE;
5384    TCGv_i64 imm;
5385
5386    if (a->esz < a->msz) {
5387        return false;
5388    }
5389    if (!sve_access_check(s)) {
5390        return true;
5391    }
5392
5393    switch (a->esz) {
5394    case MO_32:
5395        fn = scatter_store_fn32[be][0][a->msz];
5396        break;
5397    case MO_64:
5398        fn = scatter_store_fn64[be][2][a->msz];
5399        break;
5400    }
5401    assert(fn != NULL);
5402
5403    /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5404     * by loading the immediate into the scalar parameter.
5405     */
5406    imm = tcg_const_i64(a->imm << a->msz);
5407    do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
5408    tcg_temp_free_i64(imm);
5409    return true;
5410}
5411
5412/*
5413 * Prefetches
5414 */
5415
5416static bool trans_PRF(DisasContext *s, arg_PRF *a)
5417{
5418    /* Prefetch is a nop within QEMU.  */
5419    (void)sve_access_check(s);
5420    return true;
5421}
5422
5423static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
5424{
5425    if (a->rm == 31) {
5426        return false;
5427    }
5428    /* Prefetch is a nop within QEMU.  */
5429    (void)sve_access_check(s);
5430    return true;
5431}
5432
5433/*
5434 * Move Prefix
5435 *
5436 * TODO: The implementation so far could handle predicated merging movprfx.
5437 * The helper functions as written take an extra source register to
5438 * use in the operation, but the result is only written when predication
5439 * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5440 * to allow the final write back to the destination to be unconditional.
5441 * For predicated zeroing movprfx, we need to rearrange the helpers to
5442 * allow the final write back to zero inactives.
5443 *
5444 * In the meantime, just emit the moves.
5445 */
5446
5447static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a)
5448{
5449    return do_mov_z(s, a->rd, a->rn);
5450}
5451
5452static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
5453{
5454    if (sve_access_check(s)) {
5455        do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5456    }
5457    return true;
5458}
5459
5460static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
5461{
5462    if (sve_access_check(s)) {
5463        do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5464    }
5465    return true;
5466}
5467