qemu/target/arm/translate-sve.c
<<
>>
Prefs
   1/*
   2 * AArch64 SVE translation
   3 *
   4 * Copyright (c) 2018 Linaro, Ltd
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg-op.h"
  24#include "tcg-op-gvec.h"
  25#include "tcg-gvec-desc.h"
  26#include "qemu/log.h"
  27#include "arm_ldst.h"
  28#include "translate.h"
  29#include "internals.h"
  30#include "exec/helper-proto.h"
  31#include "exec/helper-gen.h"
  32#include "exec/log.h"
  33#include "trace-tcg.h"
  34#include "translate-a64.h"
  35#include "fpu/softfloat.h"
  36
  37
  38typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                         TCGv_i64, uint32_t, uint32_t);
  40
  41typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                     TCGv_ptr, TCGv_i32);
  43typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                     TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                         TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50/*
  51 * Helpers for extracting complex instruction fields.
  52 */
  53
  54/* See e.g. ASR (immediate, predicated).
  55 * Returns -1 for unallocated encoding; diagnose later.
  56 */
  57static int tszimm_esz(DisasContext *s, int x)
  58{
  59    x >>= 3;  /* discard imm3 */
  60    return 31 - clz32(x);
  61}
  62
  63static int tszimm_shr(DisasContext *s, int x)
  64{
  65    return (16 << tszimm_esz(s, x)) - x;
  66}
  67
  68/* See e.g. LSL (immediate, predicated).  */
  69static int tszimm_shl(DisasContext *s, int x)
  70{
  71    return x - (8 << tszimm_esz(s, x));
  72}
  73
  74static inline int plus1(DisasContext *s, int x)
  75{
  76    return x + 1;
  77}
  78
  79/* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80static inline int expand_imm_sh8s(DisasContext *s, int x)
  81{
  82    return (int8_t)x << (x & 0x100 ? 8 : 0);
  83}
  84
  85static inline int expand_imm_sh8u(DisasContext *s, int x)
  86{
  87    return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88}
  89
  90/* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91 * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92 */
  93static inline int msz_dtype(DisasContext *s, int msz)
  94{
  95    static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96    return dtype[msz];
  97}
  98
  99/*
 100 * Include the generated decoder.
 101 */
 102
 103#include "decode-sve.inc.c"
 104
 105/*
 106 * Implement all of the translator functions referenced by the decoder.
 107 */
 108
 109/* Return the offset info CPUARMState of the predicate vector register Pn.
 110 * Note for this purpose, FFR is P16.
 111 */
 112static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113{
 114    return offsetof(CPUARMState, vfp.pregs[regno]);
 115}
 116
 117/* Return the byte size of the whole predicate register, VL / 64.  */
 118static inline int pred_full_reg_size(DisasContext *s)
 119{
 120    return s->sve_len >> 3;
 121}
 122
 123/* Round up the size of a register to a size allowed by
 124 * the tcg vector infrastructure.  Any operation which uses this
 125 * size may assume that the bits above pred_full_reg_size are zero,
 126 * and must leave them the same way.
 127 *
 128 * Note that this is not needed for the vector registers as they
 129 * are always properly sized for tcg vectors.
 130 */
 131static int size_for_gvec(int size)
 132{
 133    if (size <= 8) {
 134        return 8;
 135    } else {
 136        return QEMU_ALIGN_UP(size, 16);
 137    }
 138}
 139
 140static int pred_gvec_reg_size(DisasContext *s)
 141{
 142    return size_for_gvec(pred_full_reg_size(s));
 143}
 144
 145/* Invoke a vector expander on two Zregs.  */
 146static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                         int esz, int rd, int rn)
 148{
 149    if (sve_access_check(s)) {
 150        unsigned vsz = vec_full_reg_size(s);
 151        gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                vec_full_reg_offset(s, rn), vsz, vsz);
 153    }
 154    return true;
 155}
 156
 157/* Invoke a vector expander on three Zregs.  */
 158static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                         int esz, int rd, int rn, int rm)
 160{
 161    if (sve_access_check(s)) {
 162        unsigned vsz = vec_full_reg_size(s);
 163        gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                vec_full_reg_offset(s, rn),
 165                vec_full_reg_offset(s, rm), vsz, vsz);
 166    }
 167    return true;
 168}
 169
 170/* Invoke a vector move on two Zregs.  */
 171static bool do_mov_z(DisasContext *s, int rd, int rn)
 172{
 173    return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174}
 175
 176/* Initialize a Zreg with replications of a 64-bit immediate.  */
 177static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178{
 179    unsigned vsz = vec_full_reg_size(s);
 180    tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181}
 182
 183/* Invoke a vector expander on two Pregs.  */
 184static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                         int esz, int rd, int rn)
 186{
 187    if (sve_access_check(s)) {
 188        unsigned psz = pred_gvec_reg_size(s);
 189        gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                pred_full_reg_offset(s, rn), psz, psz);
 191    }
 192    return true;
 193}
 194
 195/* Invoke a vector expander on three Pregs.  */
 196static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                         int esz, int rd, int rn, int rm)
 198{
 199    if (sve_access_check(s)) {
 200        unsigned psz = pred_gvec_reg_size(s);
 201        gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                pred_full_reg_offset(s, rn),
 203                pred_full_reg_offset(s, rm), psz, psz);
 204    }
 205    return true;
 206}
 207
 208/* Invoke a vector operation on four Pregs.  */
 209static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                        int rd, int rn, int rm, int rg)
 211{
 212    if (sve_access_check(s)) {
 213        unsigned psz = pred_gvec_reg_size(s);
 214        tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                       pred_full_reg_offset(s, rn),
 216                       pred_full_reg_offset(s, rm),
 217                       pred_full_reg_offset(s, rg),
 218                       psz, psz, gvec_op);
 219    }
 220    return true;
 221}
 222
 223/* Invoke a vector move on two Pregs.  */
 224static bool do_mov_p(DisasContext *s, int rd, int rn)
 225{
 226    return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227}
 228
 229/* Set the cpu flags as per a return from an SVE helper.  */
 230static void do_pred_flags(TCGv_i32 t)
 231{
 232    tcg_gen_mov_i32(cpu_NF, t);
 233    tcg_gen_andi_i32(cpu_ZF, t, 2);
 234    tcg_gen_andi_i32(cpu_CF, t, 1);
 235    tcg_gen_movi_i32(cpu_VF, 0);
 236}
 237
 238/* Subroutines computing the ARM PredTest psuedofunction.  */
 239static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240{
 241    TCGv_i32 t = tcg_temp_new_i32();
 242
 243    gen_helper_sve_predtest1(t, d, g);
 244    do_pred_flags(t);
 245    tcg_temp_free_i32(t);
 246}
 247
 248static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249{
 250    TCGv_ptr dptr = tcg_temp_new_ptr();
 251    TCGv_ptr gptr = tcg_temp_new_ptr();
 252    TCGv_i32 t;
 253
 254    tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255    tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256    t = tcg_const_i32(words);
 257
 258    gen_helper_sve_predtest(t, dptr, gptr, t);
 259    tcg_temp_free_ptr(dptr);
 260    tcg_temp_free_ptr(gptr);
 261
 262    do_pred_flags(t);
 263    tcg_temp_free_i32(t);
 264}
 265
 266/* For each element size, the bits within a predicate word that are active.  */
 267const uint64_t pred_esz_masks[4] = {
 268    0xffffffffffffffffull, 0x5555555555555555ull,
 269    0x1111111111111111ull, 0x0101010101010101ull
 270};
 271
 272/*
 273 *** SVE Logical - Unpredicated Group
 274 */
 275
 276static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
 277{
 278    return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279}
 280
 281static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
 282{
 283    return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 284}
 285
 286static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
 287{
 288    return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 289}
 290
 291static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
 292{
 293    return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 294}
 295
 296/*
 297 *** SVE Integer Arithmetic - Unpredicated Group
 298 */
 299
 300static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a)
 301{
 302    return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 303}
 304
 305static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a)
 306{
 307    return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 308}
 309
 310static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 311{
 312    return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 313}
 314
 315static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 316{
 317    return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 318}
 319
 320static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a)
 321{
 322    return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 323}
 324
 325static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a)
 326{
 327    return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 328}
 329
 330/*
 331 *** SVE Integer Arithmetic - Binary Predicated Group
 332 */
 333
 334static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 335{
 336    unsigned vsz = vec_full_reg_size(s);
 337    if (fn == NULL) {
 338        return false;
 339    }
 340    if (sve_access_check(s)) {
 341        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 342                           vec_full_reg_offset(s, a->rn),
 343                           vec_full_reg_offset(s, a->rm),
 344                           pred_full_reg_offset(s, a->pg),
 345                           vsz, vsz, 0, fn);
 346    }
 347    return true;
 348}
 349
 350/* Select active elememnts from Zn and inactive elements from Zm,
 351 * storing the result in Zd.
 352 */
 353static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 354{
 355    static gen_helper_gvec_4 * const fns[4] = {
 356        gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 357        gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 358    };
 359    unsigned vsz = vec_full_reg_size(s);
 360    tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 361                       vec_full_reg_offset(s, rn),
 362                       vec_full_reg_offset(s, rm),
 363                       pred_full_reg_offset(s, pg),
 364                       vsz, vsz, 0, fns[esz]);
 365}
 366
 367#define DO_ZPZZ(NAME, name) \
 368static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a)         \
 369{                                                                         \
 370    static gen_helper_gvec_4 * const fns[4] = {                           \
 371        gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 372        gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 373    };                                                                    \
 374    return do_zpzz_ool(s, a, fns[a->esz]);                                \
 375}
 376
 377DO_ZPZZ(AND, and)
 378DO_ZPZZ(EOR, eor)
 379DO_ZPZZ(ORR, orr)
 380DO_ZPZZ(BIC, bic)
 381
 382DO_ZPZZ(ADD, add)
 383DO_ZPZZ(SUB, sub)
 384
 385DO_ZPZZ(SMAX, smax)
 386DO_ZPZZ(UMAX, umax)
 387DO_ZPZZ(SMIN, smin)
 388DO_ZPZZ(UMIN, umin)
 389DO_ZPZZ(SABD, sabd)
 390DO_ZPZZ(UABD, uabd)
 391
 392DO_ZPZZ(MUL, mul)
 393DO_ZPZZ(SMULH, smulh)
 394DO_ZPZZ(UMULH, umulh)
 395
 396DO_ZPZZ(ASR, asr)
 397DO_ZPZZ(LSR, lsr)
 398DO_ZPZZ(LSL, lsl)
 399
 400static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 401{
 402    static gen_helper_gvec_4 * const fns[4] = {
 403        NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 404    };
 405    return do_zpzz_ool(s, a, fns[a->esz]);
 406}
 407
 408static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a)
 409{
 410    static gen_helper_gvec_4 * const fns[4] = {
 411        NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 412    };
 413    return do_zpzz_ool(s, a, fns[a->esz]);
 414}
 415
 416static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a)
 417{
 418    if (sve_access_check(s)) {
 419        do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 420    }
 421    return true;
 422}
 423
 424#undef DO_ZPZZ
 425
 426/*
 427 *** SVE Integer Arithmetic - Unary Predicated Group
 428 */
 429
 430static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 431{
 432    if (fn == NULL) {
 433        return false;
 434    }
 435    if (sve_access_check(s)) {
 436        unsigned vsz = vec_full_reg_size(s);
 437        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 438                           vec_full_reg_offset(s, a->rn),
 439                           pred_full_reg_offset(s, a->pg),
 440                           vsz, vsz, 0, fn);
 441    }
 442    return true;
 443}
 444
 445#define DO_ZPZ(NAME, name) \
 446static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)           \
 447{                                                                   \
 448    static gen_helper_gvec_3 * const fns[4] = {                     \
 449        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 450        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 451    };                                                              \
 452    return do_zpz_ool(s, a, fns[a->esz]);                           \
 453}
 454
 455DO_ZPZ(CLS, cls)
 456DO_ZPZ(CLZ, clz)
 457DO_ZPZ(CNT_zpz, cnt_zpz)
 458DO_ZPZ(CNOT, cnot)
 459DO_ZPZ(NOT_zpz, not_zpz)
 460DO_ZPZ(ABS, abs)
 461DO_ZPZ(NEG, neg)
 462
 463static bool trans_FABS(DisasContext *s, arg_rpr_esz *a)
 464{
 465    static gen_helper_gvec_3 * const fns[4] = {
 466        NULL,
 467        gen_helper_sve_fabs_h,
 468        gen_helper_sve_fabs_s,
 469        gen_helper_sve_fabs_d
 470    };
 471    return do_zpz_ool(s, a, fns[a->esz]);
 472}
 473
 474static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a)
 475{
 476    static gen_helper_gvec_3 * const fns[4] = {
 477        NULL,
 478        gen_helper_sve_fneg_h,
 479        gen_helper_sve_fneg_s,
 480        gen_helper_sve_fneg_d
 481    };
 482    return do_zpz_ool(s, a, fns[a->esz]);
 483}
 484
 485static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a)
 486{
 487    static gen_helper_gvec_3 * const fns[4] = {
 488        NULL,
 489        gen_helper_sve_sxtb_h,
 490        gen_helper_sve_sxtb_s,
 491        gen_helper_sve_sxtb_d
 492    };
 493    return do_zpz_ool(s, a, fns[a->esz]);
 494}
 495
 496static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a)
 497{
 498    static gen_helper_gvec_3 * const fns[4] = {
 499        NULL,
 500        gen_helper_sve_uxtb_h,
 501        gen_helper_sve_uxtb_s,
 502        gen_helper_sve_uxtb_d
 503    };
 504    return do_zpz_ool(s, a, fns[a->esz]);
 505}
 506
 507static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a)
 508{
 509    static gen_helper_gvec_3 * const fns[4] = {
 510        NULL, NULL,
 511        gen_helper_sve_sxth_s,
 512        gen_helper_sve_sxth_d
 513    };
 514    return do_zpz_ool(s, a, fns[a->esz]);
 515}
 516
 517static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a)
 518{
 519    static gen_helper_gvec_3 * const fns[4] = {
 520        NULL, NULL,
 521        gen_helper_sve_uxth_s,
 522        gen_helper_sve_uxth_d
 523    };
 524    return do_zpz_ool(s, a, fns[a->esz]);
 525}
 526
 527static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a)
 528{
 529    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 530}
 531
 532static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a)
 533{
 534    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 535}
 536
 537#undef DO_ZPZ
 538
 539/*
 540 *** SVE Integer Reduction Group
 541 */
 542
 543typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 544static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 545                       gen_helper_gvec_reduc *fn)
 546{
 547    unsigned vsz = vec_full_reg_size(s);
 548    TCGv_ptr t_zn, t_pg;
 549    TCGv_i32 desc;
 550    TCGv_i64 temp;
 551
 552    if (fn == NULL) {
 553        return false;
 554    }
 555    if (!sve_access_check(s)) {
 556        return true;
 557    }
 558
 559    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 560    temp = tcg_temp_new_i64();
 561    t_zn = tcg_temp_new_ptr();
 562    t_pg = tcg_temp_new_ptr();
 563
 564    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 565    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 566    fn(temp, t_zn, t_pg, desc);
 567    tcg_temp_free_ptr(t_zn);
 568    tcg_temp_free_ptr(t_pg);
 569    tcg_temp_free_i32(desc);
 570
 571    write_fp_dreg(s, a->rd, temp);
 572    tcg_temp_free_i64(temp);
 573    return true;
 574}
 575
 576#define DO_VPZ(NAME, name) \
 577static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
 578{                                                                        \
 579    static gen_helper_gvec_reduc * const fns[4] = {                      \
 580        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 581        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 582    };                                                                   \
 583    return do_vpz_ool(s, a, fns[a->esz]);                                \
 584}
 585
 586DO_VPZ(ORV, orv)
 587DO_VPZ(ANDV, andv)
 588DO_VPZ(EORV, eorv)
 589
 590DO_VPZ(UADDV, uaddv)
 591DO_VPZ(SMAXV, smaxv)
 592DO_VPZ(UMAXV, umaxv)
 593DO_VPZ(SMINV, sminv)
 594DO_VPZ(UMINV, uminv)
 595
 596static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
 597{
 598    static gen_helper_gvec_reduc * const fns[4] = {
 599        gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 600        gen_helper_sve_saddv_s, NULL
 601    };
 602    return do_vpz_ool(s, a, fns[a->esz]);
 603}
 604
 605#undef DO_VPZ
 606
 607/*
 608 *** SVE Shift by Immediate - Predicated Group
 609 */
 610
 611/* Store zero into every active element of Zd.  We will use this for two
 612 * and three-operand predicated instructions for which logic dictates a
 613 * zero result.
 614 */
 615static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 616{
 617    static gen_helper_gvec_2 * const fns[4] = {
 618        gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 619        gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 620    };
 621    if (sve_access_check(s)) {
 622        unsigned vsz = vec_full_reg_size(s);
 623        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 624                           pred_full_reg_offset(s, pg),
 625                           vsz, vsz, 0, fns[esz]);
 626    }
 627    return true;
 628}
 629
 630/* Copy Zn into Zd, storing zeros into inactive elements.  */
 631static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 632{
 633    static gen_helper_gvec_3 * const fns[4] = {
 634        gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 635        gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 636    };
 637    unsigned vsz = vec_full_reg_size(s);
 638    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 639                       vec_full_reg_offset(s, rn),
 640                       pred_full_reg_offset(s, pg),
 641                       vsz, vsz, 0, fns[esz]);
 642}
 643
 644static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 645                        gen_helper_gvec_3 *fn)
 646{
 647    if (sve_access_check(s)) {
 648        unsigned vsz = vec_full_reg_size(s);
 649        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 650                           vec_full_reg_offset(s, a->rn),
 651                           pred_full_reg_offset(s, a->pg),
 652                           vsz, vsz, a->imm, fn);
 653    }
 654    return true;
 655}
 656
 657static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a)
 658{
 659    static gen_helper_gvec_3 * const fns[4] = {
 660        gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 661        gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 662    };
 663    if (a->esz < 0) {
 664        /* Invalid tsz encoding -- see tszimm_esz. */
 665        return false;
 666    }
 667    /* Shift by element size is architecturally valid.  For
 668       arithmetic right-shift, it's the same as by one less. */
 669    a->imm = MIN(a->imm, (8 << a->esz) - 1);
 670    return do_zpzi_ool(s, a, fns[a->esz]);
 671}
 672
 673static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
 674{
 675    static gen_helper_gvec_3 * const fns[4] = {
 676        gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 677        gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 678    };
 679    if (a->esz < 0) {
 680        return false;
 681    }
 682    /* Shift by element size is architecturally valid.
 683       For logical shifts, it is a zeroing operation.  */
 684    if (a->imm >= (8 << a->esz)) {
 685        return do_clr_zp(s, a->rd, a->pg, a->esz);
 686    } else {
 687        return do_zpzi_ool(s, a, fns[a->esz]);
 688    }
 689}
 690
 691static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
 692{
 693    static gen_helper_gvec_3 * const fns[4] = {
 694        gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 695        gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 696    };
 697    if (a->esz < 0) {
 698        return false;
 699    }
 700    /* Shift by element size is architecturally valid.
 701       For logical shifts, it is a zeroing operation.  */
 702    if (a->imm >= (8 << a->esz)) {
 703        return do_clr_zp(s, a->rd, a->pg, a->esz);
 704    } else {
 705        return do_zpzi_ool(s, a, fns[a->esz]);
 706    }
 707}
 708
 709static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
 710{
 711    static gen_helper_gvec_3 * const fns[4] = {
 712        gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 713        gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 714    };
 715    if (a->esz < 0) {
 716        return false;
 717    }
 718    /* Shift by element size is architecturally valid.  For arithmetic
 719       right shift for division, it is a zeroing operation.  */
 720    if (a->imm >= (8 << a->esz)) {
 721        return do_clr_zp(s, a->rd, a->pg, a->esz);
 722    } else {
 723        return do_zpzi_ool(s, a, fns[a->esz]);
 724    }
 725}
 726
 727/*
 728 *** SVE Bitwise Shift - Predicated Group
 729 */
 730
 731#define DO_ZPZW(NAME, name) \
 732static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a)         \
 733{                                                                         \
 734    static gen_helper_gvec_4 * const fns[3] = {                           \
 735        gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 736        gen_helper_sve_##name##_zpzw_s,                                   \
 737    };                                                                    \
 738    if (a->esz < 0 || a->esz >= 3) {                                      \
 739        return false;                                                     \
 740    }                                                                     \
 741    return do_zpzz_ool(s, a, fns[a->esz]);                                \
 742}
 743
 744DO_ZPZW(ASR, asr)
 745DO_ZPZW(LSR, lsr)
 746DO_ZPZW(LSL, lsl)
 747
 748#undef DO_ZPZW
 749
 750/*
 751 *** SVE Bitwise Shift - Unpredicated Group
 752 */
 753
 754static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 755                         void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 756                                         int64_t, uint32_t, uint32_t))
 757{
 758    if (a->esz < 0) {
 759        /* Invalid tsz encoding -- see tszimm_esz. */
 760        return false;
 761    }
 762    if (sve_access_check(s)) {
 763        unsigned vsz = vec_full_reg_size(s);
 764        /* Shift by element size is architecturally valid.  For
 765           arithmetic right-shift, it's the same as by one less.
 766           Otherwise it is a zeroing operation.  */
 767        if (a->imm >= 8 << a->esz) {
 768            if (asr) {
 769                a->imm = (8 << a->esz) - 1;
 770            } else {
 771                do_dupi_z(s, a->rd, 0);
 772                return true;
 773            }
 774        }
 775        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 776                vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 777    }
 778    return true;
 779}
 780
 781static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a)
 782{
 783    return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 784}
 785
 786static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a)
 787{
 788    return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 789}
 790
 791static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a)
 792{
 793    return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 794}
 795
 796static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 797{
 798    if (fn == NULL) {
 799        return false;
 800    }
 801    if (sve_access_check(s)) {
 802        unsigned vsz = vec_full_reg_size(s);
 803        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 804                           vec_full_reg_offset(s, a->rn),
 805                           vec_full_reg_offset(s, a->rm),
 806                           vsz, vsz, 0, fn);
 807    }
 808    return true;
 809}
 810
 811#define DO_ZZW(NAME, name) \
 812static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a)           \
 813{                                                                         \
 814    static gen_helper_gvec_3 * const fns[4] = {                           \
 815        gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 816        gen_helper_sve_##name##_zzw_s, NULL                               \
 817    };                                                                    \
 818    return do_zzw_ool(s, a, fns[a->esz]);                                 \
 819}
 820
 821DO_ZZW(ASR, asr)
 822DO_ZZW(LSR, lsr)
 823DO_ZZW(LSL, lsl)
 824
 825#undef DO_ZZW
 826
 827/*
 828 *** SVE Integer Multiply-Add Group
 829 */
 830
 831static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 832                         gen_helper_gvec_5 *fn)
 833{
 834    if (sve_access_check(s)) {
 835        unsigned vsz = vec_full_reg_size(s);
 836        tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 837                           vec_full_reg_offset(s, a->ra),
 838                           vec_full_reg_offset(s, a->rn),
 839                           vec_full_reg_offset(s, a->rm),
 840                           pred_full_reg_offset(s, a->pg),
 841                           vsz, vsz, 0, fn);
 842    }
 843    return true;
 844}
 845
 846#define DO_ZPZZZ(NAME, name) \
 847static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
 848{                                                                    \
 849    static gen_helper_gvec_5 * const fns[4] = {                      \
 850        gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 851        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 852    };                                                               \
 853    return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 854}
 855
 856DO_ZPZZZ(MLA, mla)
 857DO_ZPZZZ(MLS, mls)
 858
 859#undef DO_ZPZZZ
 860
 861/*
 862 *** SVE Index Generation Group
 863 */
 864
 865static void do_index(DisasContext *s, int esz, int rd,
 866                     TCGv_i64 start, TCGv_i64 incr)
 867{
 868    unsigned vsz = vec_full_reg_size(s);
 869    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 870    TCGv_ptr t_zd = tcg_temp_new_ptr();
 871
 872    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 873    if (esz == 3) {
 874        gen_helper_sve_index_d(t_zd, start, incr, desc);
 875    } else {
 876        typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 877        static index_fn * const fns[3] = {
 878            gen_helper_sve_index_b,
 879            gen_helper_sve_index_h,
 880            gen_helper_sve_index_s,
 881        };
 882        TCGv_i32 s32 = tcg_temp_new_i32();
 883        TCGv_i32 i32 = tcg_temp_new_i32();
 884
 885        tcg_gen_extrl_i64_i32(s32, start);
 886        tcg_gen_extrl_i64_i32(i32, incr);
 887        fns[esz](t_zd, s32, i32, desc);
 888
 889        tcg_temp_free_i32(s32);
 890        tcg_temp_free_i32(i32);
 891    }
 892    tcg_temp_free_ptr(t_zd);
 893    tcg_temp_free_i32(desc);
 894}
 895
 896static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a)
 897{
 898    if (sve_access_check(s)) {
 899        TCGv_i64 start = tcg_const_i64(a->imm1);
 900        TCGv_i64 incr = tcg_const_i64(a->imm2);
 901        do_index(s, a->esz, a->rd, start, incr);
 902        tcg_temp_free_i64(start);
 903        tcg_temp_free_i64(incr);
 904    }
 905    return true;
 906}
 907
 908static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a)
 909{
 910    if (sve_access_check(s)) {
 911        TCGv_i64 start = tcg_const_i64(a->imm);
 912        TCGv_i64 incr = cpu_reg(s, a->rm);
 913        do_index(s, a->esz, a->rd, start, incr);
 914        tcg_temp_free_i64(start);
 915    }
 916    return true;
 917}
 918
 919static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a)
 920{
 921    if (sve_access_check(s)) {
 922        TCGv_i64 start = cpu_reg(s, a->rn);
 923        TCGv_i64 incr = tcg_const_i64(a->imm);
 924        do_index(s, a->esz, a->rd, start, incr);
 925        tcg_temp_free_i64(incr);
 926    }
 927    return true;
 928}
 929
 930static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a)
 931{
 932    if (sve_access_check(s)) {
 933        TCGv_i64 start = cpu_reg(s, a->rn);
 934        TCGv_i64 incr = cpu_reg(s, a->rm);
 935        do_index(s, a->esz, a->rd, start, incr);
 936    }
 937    return true;
 938}
 939
 940/*
 941 *** SVE Stack Allocation Group
 942 */
 943
 944static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
 945{
 946    if (sve_access_check(s)) {
 947        TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 948        TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 949        tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 950    }
 951    return true;
 952}
 953
 954static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
 955{
 956    if (sve_access_check(s)) {
 957        TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 958        TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 959        tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 960    }
 961    return true;
 962}
 963
 964static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
 965{
 966    if (sve_access_check(s)) {
 967        TCGv_i64 reg = cpu_reg(s, a->rd);
 968        tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 969    }
 970    return true;
 971}
 972
 973/*
 974 *** SVE Compute Vector Address Group
 975 */
 976
 977static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 978{
 979    if (sve_access_check(s)) {
 980        unsigned vsz = vec_full_reg_size(s);
 981        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 982                           vec_full_reg_offset(s, a->rn),
 983                           vec_full_reg_offset(s, a->rm),
 984                           vsz, vsz, a->imm, fn);
 985    }
 986    return true;
 987}
 988
 989static bool trans_ADR_p32(DisasContext *s, arg_rrri *a)
 990{
 991    return do_adr(s, a, gen_helper_sve_adr_p32);
 992}
 993
 994static bool trans_ADR_p64(DisasContext *s, arg_rrri *a)
 995{
 996    return do_adr(s, a, gen_helper_sve_adr_p64);
 997}
 998
 999static bool trans_ADR_s32(DisasContext *s, arg_rrri *a)
1000{
1001    return do_adr(s, a, gen_helper_sve_adr_s32);
1002}
1003
1004static bool trans_ADR_u32(DisasContext *s, arg_rrri *a)
1005{
1006    return do_adr(s, a, gen_helper_sve_adr_u32);
1007}
1008
1009/*
1010 *** SVE Integer Misc - Unpredicated Group
1011 */
1012
1013static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a)
1014{
1015    static gen_helper_gvec_2 * const fns[4] = {
1016        NULL,
1017        gen_helper_sve_fexpa_h,
1018        gen_helper_sve_fexpa_s,
1019        gen_helper_sve_fexpa_d,
1020    };
1021    if (a->esz == 0) {
1022        return false;
1023    }
1024    if (sve_access_check(s)) {
1025        unsigned vsz = vec_full_reg_size(s);
1026        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1027                           vec_full_reg_offset(s, a->rn),
1028                           vsz, vsz, 0, fns[a->esz]);
1029    }
1030    return true;
1031}
1032
1033static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a)
1034{
1035    static gen_helper_gvec_3 * const fns[4] = {
1036        NULL,
1037        gen_helper_sve_ftssel_h,
1038        gen_helper_sve_ftssel_s,
1039        gen_helper_sve_ftssel_d,
1040    };
1041    if (a->esz == 0) {
1042        return false;
1043    }
1044    if (sve_access_check(s)) {
1045        unsigned vsz = vec_full_reg_size(s);
1046        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1047                           vec_full_reg_offset(s, a->rn),
1048                           vec_full_reg_offset(s, a->rm),
1049                           vsz, vsz, 0, fns[a->esz]);
1050    }
1051    return true;
1052}
1053
1054/*
1055 *** SVE Predicate Logical Operations Group
1056 */
1057
1058static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1059                          const GVecGen4 *gvec_op)
1060{
1061    if (!sve_access_check(s)) {
1062        return true;
1063    }
1064
1065    unsigned psz = pred_gvec_reg_size(s);
1066    int dofs = pred_full_reg_offset(s, a->rd);
1067    int nofs = pred_full_reg_offset(s, a->rn);
1068    int mofs = pred_full_reg_offset(s, a->rm);
1069    int gofs = pred_full_reg_offset(s, a->pg);
1070
1071    if (psz == 8) {
1072        /* Do the operation and the flags generation in temps.  */
1073        TCGv_i64 pd = tcg_temp_new_i64();
1074        TCGv_i64 pn = tcg_temp_new_i64();
1075        TCGv_i64 pm = tcg_temp_new_i64();
1076        TCGv_i64 pg = tcg_temp_new_i64();
1077
1078        tcg_gen_ld_i64(pn, cpu_env, nofs);
1079        tcg_gen_ld_i64(pm, cpu_env, mofs);
1080        tcg_gen_ld_i64(pg, cpu_env, gofs);
1081
1082        gvec_op->fni8(pd, pn, pm, pg);
1083        tcg_gen_st_i64(pd, cpu_env, dofs);
1084
1085        do_predtest1(pd, pg);
1086
1087        tcg_temp_free_i64(pd);
1088        tcg_temp_free_i64(pn);
1089        tcg_temp_free_i64(pm);
1090        tcg_temp_free_i64(pg);
1091    } else {
1092        /* The operation and flags generation is large.  The computation
1093         * of the flags depends on the original contents of the guarding
1094         * predicate.  If the destination overwrites the guarding predicate,
1095         * then the easiest way to get this right is to save a copy.
1096          */
1097        int tofs = gofs;
1098        if (a->rd == a->pg) {
1099            tofs = offsetof(CPUARMState, vfp.preg_tmp);
1100            tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1101        }
1102
1103        tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1104        do_predtest(s, dofs, tofs, psz / 8);
1105    }
1106    return true;
1107}
1108
1109static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1110{
1111    tcg_gen_and_i64(pd, pn, pm);
1112    tcg_gen_and_i64(pd, pd, pg);
1113}
1114
1115static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1116                           TCGv_vec pm, TCGv_vec pg)
1117{
1118    tcg_gen_and_vec(vece, pd, pn, pm);
1119    tcg_gen_and_vec(vece, pd, pd, pg);
1120}
1121
1122static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a)
1123{
1124    static const GVecGen4 op = {
1125        .fni8 = gen_and_pg_i64,
1126        .fniv = gen_and_pg_vec,
1127        .fno = gen_helper_sve_and_pppp,
1128        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1129    };
1130    if (a->s) {
1131        return do_pppp_flags(s, a, &op);
1132    } else if (a->rn == a->rm) {
1133        if (a->pg == a->rn) {
1134            return do_mov_p(s, a->rd, a->rn);
1135        } else {
1136            return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1137        }
1138    } else if (a->pg == a->rn || a->pg == a->rm) {
1139        return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1140    } else {
1141        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1142    }
1143}
1144
1145static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1146{
1147    tcg_gen_andc_i64(pd, pn, pm);
1148    tcg_gen_and_i64(pd, pd, pg);
1149}
1150
1151static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1152                           TCGv_vec pm, TCGv_vec pg)
1153{
1154    tcg_gen_andc_vec(vece, pd, pn, pm);
1155    tcg_gen_and_vec(vece, pd, pd, pg);
1156}
1157
1158static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a)
1159{
1160    static const GVecGen4 op = {
1161        .fni8 = gen_bic_pg_i64,
1162        .fniv = gen_bic_pg_vec,
1163        .fno = gen_helper_sve_bic_pppp,
1164        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1165    };
1166    if (a->s) {
1167        return do_pppp_flags(s, a, &op);
1168    } else if (a->pg == a->rn) {
1169        return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1170    } else {
1171        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1172    }
1173}
1174
1175static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1176{
1177    tcg_gen_xor_i64(pd, pn, pm);
1178    tcg_gen_and_i64(pd, pd, pg);
1179}
1180
1181static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1182                           TCGv_vec pm, TCGv_vec pg)
1183{
1184    tcg_gen_xor_vec(vece, pd, pn, pm);
1185    tcg_gen_and_vec(vece, pd, pd, pg);
1186}
1187
1188static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a)
1189{
1190    static const GVecGen4 op = {
1191        .fni8 = gen_eor_pg_i64,
1192        .fniv = gen_eor_pg_vec,
1193        .fno = gen_helper_sve_eor_pppp,
1194        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1195    };
1196    if (a->s) {
1197        return do_pppp_flags(s, a, &op);
1198    } else {
1199        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1200    }
1201}
1202
1203static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1204{
1205    tcg_gen_and_i64(pn, pn, pg);
1206    tcg_gen_andc_i64(pm, pm, pg);
1207    tcg_gen_or_i64(pd, pn, pm);
1208}
1209
1210static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1211                           TCGv_vec pm, TCGv_vec pg)
1212{
1213    tcg_gen_and_vec(vece, pn, pn, pg);
1214    tcg_gen_andc_vec(vece, pm, pm, pg);
1215    tcg_gen_or_vec(vece, pd, pn, pm);
1216}
1217
1218static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a)
1219{
1220    static const GVecGen4 op = {
1221        .fni8 = gen_sel_pg_i64,
1222        .fniv = gen_sel_pg_vec,
1223        .fno = gen_helper_sve_sel_pppp,
1224        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1225    };
1226    if (a->s) {
1227        return false;
1228    } else {
1229        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1230    }
1231}
1232
1233static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1234{
1235    tcg_gen_or_i64(pd, pn, pm);
1236    tcg_gen_and_i64(pd, pd, pg);
1237}
1238
1239static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1240                           TCGv_vec pm, TCGv_vec pg)
1241{
1242    tcg_gen_or_vec(vece, pd, pn, pm);
1243    tcg_gen_and_vec(vece, pd, pd, pg);
1244}
1245
1246static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a)
1247{
1248    static const GVecGen4 op = {
1249        .fni8 = gen_orr_pg_i64,
1250        .fniv = gen_orr_pg_vec,
1251        .fno = gen_helper_sve_orr_pppp,
1252        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1253    };
1254    if (a->s) {
1255        return do_pppp_flags(s, a, &op);
1256    } else if (a->pg == a->rn && a->rn == a->rm) {
1257        return do_mov_p(s, a->rd, a->rn);
1258    } else {
1259        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1260    }
1261}
1262
1263static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1264{
1265    tcg_gen_orc_i64(pd, pn, pm);
1266    tcg_gen_and_i64(pd, pd, pg);
1267}
1268
1269static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1270                           TCGv_vec pm, TCGv_vec pg)
1271{
1272    tcg_gen_orc_vec(vece, pd, pn, pm);
1273    tcg_gen_and_vec(vece, pd, pd, pg);
1274}
1275
1276static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a)
1277{
1278    static const GVecGen4 op = {
1279        .fni8 = gen_orn_pg_i64,
1280        .fniv = gen_orn_pg_vec,
1281        .fno = gen_helper_sve_orn_pppp,
1282        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1283    };
1284    if (a->s) {
1285        return do_pppp_flags(s, a, &op);
1286    } else {
1287        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1288    }
1289}
1290
1291static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1292{
1293    tcg_gen_or_i64(pd, pn, pm);
1294    tcg_gen_andc_i64(pd, pg, pd);
1295}
1296
1297static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1298                           TCGv_vec pm, TCGv_vec pg)
1299{
1300    tcg_gen_or_vec(vece, pd, pn, pm);
1301    tcg_gen_andc_vec(vece, pd, pg, pd);
1302}
1303
1304static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a)
1305{
1306    static const GVecGen4 op = {
1307        .fni8 = gen_nor_pg_i64,
1308        .fniv = gen_nor_pg_vec,
1309        .fno = gen_helper_sve_nor_pppp,
1310        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1311    };
1312    if (a->s) {
1313        return do_pppp_flags(s, a, &op);
1314    } else {
1315        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1316    }
1317}
1318
1319static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1320{
1321    tcg_gen_and_i64(pd, pn, pm);
1322    tcg_gen_andc_i64(pd, pg, pd);
1323}
1324
1325static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1326                           TCGv_vec pm, TCGv_vec pg)
1327{
1328    tcg_gen_and_vec(vece, pd, pn, pm);
1329    tcg_gen_andc_vec(vece, pd, pg, pd);
1330}
1331
1332static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a)
1333{
1334    static const GVecGen4 op = {
1335        .fni8 = gen_nand_pg_i64,
1336        .fniv = gen_nand_pg_vec,
1337        .fno = gen_helper_sve_nand_pppp,
1338        .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1339    };
1340    if (a->s) {
1341        return do_pppp_flags(s, a, &op);
1342    } else {
1343        return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1344    }
1345}
1346
1347/*
1348 *** SVE Predicate Misc Group
1349 */
1350
1351static bool trans_PTEST(DisasContext *s, arg_PTEST *a)
1352{
1353    if (sve_access_check(s)) {
1354        int nofs = pred_full_reg_offset(s, a->rn);
1355        int gofs = pred_full_reg_offset(s, a->pg);
1356        int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1357
1358        if (words == 1) {
1359            TCGv_i64 pn = tcg_temp_new_i64();
1360            TCGv_i64 pg = tcg_temp_new_i64();
1361
1362            tcg_gen_ld_i64(pn, cpu_env, nofs);
1363            tcg_gen_ld_i64(pg, cpu_env, gofs);
1364            do_predtest1(pn, pg);
1365
1366            tcg_temp_free_i64(pn);
1367            tcg_temp_free_i64(pg);
1368        } else {
1369            do_predtest(s, nofs, gofs, words);
1370        }
1371    }
1372    return true;
1373}
1374
1375/* See the ARM pseudocode DecodePredCount.  */
1376static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1377{
1378    unsigned elements = fullsz >> esz;
1379    unsigned bound;
1380
1381    switch (pattern) {
1382    case 0x0: /* POW2 */
1383        return pow2floor(elements);
1384    case 0x1: /* VL1 */
1385    case 0x2: /* VL2 */
1386    case 0x3: /* VL3 */
1387    case 0x4: /* VL4 */
1388    case 0x5: /* VL5 */
1389    case 0x6: /* VL6 */
1390    case 0x7: /* VL7 */
1391    case 0x8: /* VL8 */
1392        bound = pattern;
1393        break;
1394    case 0x9: /* VL16 */
1395    case 0xa: /* VL32 */
1396    case 0xb: /* VL64 */
1397    case 0xc: /* VL128 */
1398    case 0xd: /* VL256 */
1399        bound = 16 << (pattern - 9);
1400        break;
1401    case 0x1d: /* MUL4 */
1402        return elements - elements % 4;
1403    case 0x1e: /* MUL3 */
1404        return elements - elements % 3;
1405    case 0x1f: /* ALL */
1406        return elements;
1407    default:   /* #uimm5 */
1408        return 0;
1409    }
1410    return elements >= bound ? bound : 0;
1411}
1412
1413/* This handles all of the predicate initialization instructions,
1414 * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1415 * so that decode_pred_count returns 0.  For SETFFR, we will have
1416 * set RD == 16 == FFR.
1417 */
1418static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1419{
1420    if (!sve_access_check(s)) {
1421        return true;
1422    }
1423
1424    unsigned fullsz = vec_full_reg_size(s);
1425    unsigned ofs = pred_full_reg_offset(s, rd);
1426    unsigned numelem, setsz, i;
1427    uint64_t word, lastword;
1428    TCGv_i64 t;
1429
1430    numelem = decode_pred_count(fullsz, pat, esz);
1431
1432    /* Determine what we must store into each bit, and how many.  */
1433    if (numelem == 0) {
1434        lastword = word = 0;
1435        setsz = fullsz;
1436    } else {
1437        setsz = numelem << esz;
1438        lastword = word = pred_esz_masks[esz];
1439        if (setsz % 64) {
1440            lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1441        }
1442    }
1443
1444    t = tcg_temp_new_i64();
1445    if (fullsz <= 64) {
1446        tcg_gen_movi_i64(t, lastword);
1447        tcg_gen_st_i64(t, cpu_env, ofs);
1448        goto done;
1449    }
1450
1451    if (word == lastword) {
1452        unsigned maxsz = size_for_gvec(fullsz / 8);
1453        unsigned oprsz = size_for_gvec(setsz / 8);
1454
1455        if (oprsz * 8 == setsz) {
1456            tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1457            goto done;
1458        }
1459    }
1460
1461    setsz /= 8;
1462    fullsz /= 8;
1463
1464    tcg_gen_movi_i64(t, word);
1465    for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1466        tcg_gen_st_i64(t, cpu_env, ofs + i);
1467    }
1468    if (lastword != word) {
1469        tcg_gen_movi_i64(t, lastword);
1470        tcg_gen_st_i64(t, cpu_env, ofs + i);
1471        i += 8;
1472    }
1473    if (i < fullsz) {
1474        tcg_gen_movi_i64(t, 0);
1475        for (; i < fullsz; i += 8) {
1476            tcg_gen_st_i64(t, cpu_env, ofs + i);
1477        }
1478    }
1479
1480 done:
1481    tcg_temp_free_i64(t);
1482
1483    /* PTRUES */
1484    if (setflag) {
1485        tcg_gen_movi_i32(cpu_NF, -(word != 0));
1486        tcg_gen_movi_i32(cpu_CF, word == 0);
1487        tcg_gen_movi_i32(cpu_VF, 0);
1488        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1489    }
1490    return true;
1491}
1492
1493static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a)
1494{
1495    return do_predset(s, a->esz, a->rd, a->pat, a->s);
1496}
1497
1498static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a)
1499{
1500    /* Note pat == 31 is #all, to set all elements.  */
1501    return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1502}
1503
1504static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a)
1505{
1506    /* Note pat == 32 is #unimp, to set no elements.  */
1507    return do_predset(s, 0, a->rd, 32, false);
1508}
1509
1510static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
1511{
1512    /* The path through do_pppp_flags is complicated enough to want to avoid
1513     * duplication.  Frob the arguments into the form of a predicated AND.
1514     */
1515    arg_rprr_s alt_a = {
1516        .rd = a->rd, .pg = a->pg, .s = a->s,
1517        .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1518    };
1519    return trans_AND_pppp(s, &alt_a);
1520}
1521
1522static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a)
1523{
1524    return do_mov_p(s, a->rd, FFR_PRED_NUM);
1525}
1526
1527static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a)
1528{
1529    return do_mov_p(s, FFR_PRED_NUM, a->rn);
1530}
1531
1532static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1533                            void (*gen_fn)(TCGv_i32, TCGv_ptr,
1534                                           TCGv_ptr, TCGv_i32))
1535{
1536    if (!sve_access_check(s)) {
1537        return true;
1538    }
1539
1540    TCGv_ptr t_pd = tcg_temp_new_ptr();
1541    TCGv_ptr t_pg = tcg_temp_new_ptr();
1542    TCGv_i32 t;
1543    unsigned desc;
1544
1545    desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1546    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1547
1548    tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1549    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1550    t = tcg_const_i32(desc);
1551
1552    gen_fn(t, t_pd, t_pg, t);
1553    tcg_temp_free_ptr(t_pd);
1554    tcg_temp_free_ptr(t_pg);
1555
1556    do_pred_flags(t);
1557    tcg_temp_free_i32(t);
1558    return true;
1559}
1560
1561static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a)
1562{
1563    return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1564}
1565
1566static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a)
1567{
1568    return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1569}
1570
1571/*
1572 *** SVE Element Count Group
1573 */
1574
1575/* Perform an inline saturating addition of a 32-bit value within
1576 * a 64-bit register.  The second operand is known to be positive,
1577 * which halves the comparisions we must perform to bound the result.
1578 */
1579static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1580{
1581    int64_t ibound;
1582    TCGv_i64 bound;
1583    TCGCond cond;
1584
1585    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1586    if (u) {
1587        tcg_gen_ext32u_i64(reg, reg);
1588    } else {
1589        tcg_gen_ext32s_i64(reg, reg);
1590    }
1591    if (d) {
1592        tcg_gen_sub_i64(reg, reg, val);
1593        ibound = (u ? 0 : INT32_MIN);
1594        cond = TCG_COND_LT;
1595    } else {
1596        tcg_gen_add_i64(reg, reg, val);
1597        ibound = (u ? UINT32_MAX : INT32_MAX);
1598        cond = TCG_COND_GT;
1599    }
1600    bound = tcg_const_i64(ibound);
1601    tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1602    tcg_temp_free_i64(bound);
1603}
1604
1605/* Similarly with 64-bit values.  */
1606static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1607{
1608    TCGv_i64 t0 = tcg_temp_new_i64();
1609    TCGv_i64 t1 = tcg_temp_new_i64();
1610    TCGv_i64 t2;
1611
1612    if (u) {
1613        if (d) {
1614            tcg_gen_sub_i64(t0, reg, val);
1615            tcg_gen_movi_i64(t1, 0);
1616            tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1617        } else {
1618            tcg_gen_add_i64(t0, reg, val);
1619            tcg_gen_movi_i64(t1, -1);
1620            tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1621        }
1622    } else {
1623        if (d) {
1624            /* Detect signed overflow for subtraction.  */
1625            tcg_gen_xor_i64(t0, reg, val);
1626            tcg_gen_sub_i64(t1, reg, val);
1627            tcg_gen_xor_i64(reg, reg, t1);
1628            tcg_gen_and_i64(t0, t0, reg);
1629
1630            /* Bound the result.  */
1631            tcg_gen_movi_i64(reg, INT64_MIN);
1632            t2 = tcg_const_i64(0);
1633            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1634        } else {
1635            /* Detect signed overflow for addition.  */
1636            tcg_gen_xor_i64(t0, reg, val);
1637            tcg_gen_add_i64(reg, reg, val);
1638            tcg_gen_xor_i64(t1, reg, val);
1639            tcg_gen_andc_i64(t0, t1, t0);
1640
1641            /* Bound the result.  */
1642            tcg_gen_movi_i64(t1, INT64_MAX);
1643            t2 = tcg_const_i64(0);
1644            tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1645        }
1646        tcg_temp_free_i64(t2);
1647    }
1648    tcg_temp_free_i64(t0);
1649    tcg_temp_free_i64(t1);
1650}
1651
1652/* Similarly with a vector and a scalar operand.  */
1653static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1654                              TCGv_i64 val, bool u, bool d)
1655{
1656    unsigned vsz = vec_full_reg_size(s);
1657    TCGv_ptr dptr, nptr;
1658    TCGv_i32 t32, desc;
1659    TCGv_i64 t64;
1660
1661    dptr = tcg_temp_new_ptr();
1662    nptr = tcg_temp_new_ptr();
1663    tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1664    tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1665    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1666
1667    switch (esz) {
1668    case MO_8:
1669        t32 = tcg_temp_new_i32();
1670        tcg_gen_extrl_i64_i32(t32, val);
1671        if (d) {
1672            tcg_gen_neg_i32(t32, t32);
1673        }
1674        if (u) {
1675            gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1676        } else {
1677            gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1678        }
1679        tcg_temp_free_i32(t32);
1680        break;
1681
1682    case MO_16:
1683        t32 = tcg_temp_new_i32();
1684        tcg_gen_extrl_i64_i32(t32, val);
1685        if (d) {
1686            tcg_gen_neg_i32(t32, t32);
1687        }
1688        if (u) {
1689            gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1690        } else {
1691            gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1692        }
1693        tcg_temp_free_i32(t32);
1694        break;
1695
1696    case MO_32:
1697        t64 = tcg_temp_new_i64();
1698        if (d) {
1699            tcg_gen_neg_i64(t64, val);
1700        } else {
1701            tcg_gen_mov_i64(t64, val);
1702        }
1703        if (u) {
1704            gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1705        } else {
1706            gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1707        }
1708        tcg_temp_free_i64(t64);
1709        break;
1710
1711    case MO_64:
1712        if (u) {
1713            if (d) {
1714                gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1715            } else {
1716                gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1717            }
1718        } else if (d) {
1719            t64 = tcg_temp_new_i64();
1720            tcg_gen_neg_i64(t64, val);
1721            gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1722            tcg_temp_free_i64(t64);
1723        } else {
1724            gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1725        }
1726        break;
1727
1728    default:
1729        g_assert_not_reached();
1730    }
1731
1732    tcg_temp_free_ptr(dptr);
1733    tcg_temp_free_ptr(nptr);
1734    tcg_temp_free_i32(desc);
1735}
1736
1737static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a)
1738{
1739    if (sve_access_check(s)) {
1740        unsigned fullsz = vec_full_reg_size(s);
1741        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1742        tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1743    }
1744    return true;
1745}
1746
1747static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a)
1748{
1749    if (sve_access_check(s)) {
1750        unsigned fullsz = vec_full_reg_size(s);
1751        unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752        int inc = numelem * a->imm * (a->d ? -1 : 1);
1753        TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755        tcg_gen_addi_i64(reg, reg, inc);
1756    }
1757    return true;
1758}
1759
1760static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a)
1761{
1762    if (!sve_access_check(s)) {
1763        return true;
1764    }
1765
1766    unsigned fullsz = vec_full_reg_size(s);
1767    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1768    int inc = numelem * a->imm;
1769    TCGv_i64 reg = cpu_reg(s, a->rd);
1770
1771    /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1772    if (inc == 0) {
1773        if (a->u) {
1774            tcg_gen_ext32u_i64(reg, reg);
1775        } else {
1776            tcg_gen_ext32s_i64(reg, reg);
1777        }
1778    } else {
1779        TCGv_i64 t = tcg_const_i64(inc);
1780        do_sat_addsub_32(reg, t, a->u, a->d);
1781        tcg_temp_free_i64(t);
1782    }
1783    return true;
1784}
1785
1786static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a)
1787{
1788    if (!sve_access_check(s)) {
1789        return true;
1790    }
1791
1792    unsigned fullsz = vec_full_reg_size(s);
1793    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1794    int inc = numelem * a->imm;
1795    TCGv_i64 reg = cpu_reg(s, a->rd);
1796
1797    if (inc != 0) {
1798        TCGv_i64 t = tcg_const_i64(inc);
1799        do_sat_addsub_64(reg, t, a->u, a->d);
1800        tcg_temp_free_i64(t);
1801    }
1802    return true;
1803}
1804
1805static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1806{
1807    if (a->esz == 0) {
1808        return false;
1809    }
1810
1811    unsigned fullsz = vec_full_reg_size(s);
1812    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1813    int inc = numelem * a->imm;
1814
1815    if (inc != 0) {
1816        if (sve_access_check(s)) {
1817            TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1818            tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1819                              vec_full_reg_offset(s, a->rn),
1820                              t, fullsz, fullsz);
1821            tcg_temp_free_i64(t);
1822        }
1823    } else {
1824        do_mov_z(s, a->rd, a->rn);
1825    }
1826    return true;
1827}
1828
1829static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a)
1830{
1831    if (a->esz == 0) {
1832        return false;
1833    }
1834
1835    unsigned fullsz = vec_full_reg_size(s);
1836    unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1837    int inc = numelem * a->imm;
1838
1839    if (inc != 0) {
1840        if (sve_access_check(s)) {
1841            TCGv_i64 t = tcg_const_i64(inc);
1842            do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1843            tcg_temp_free_i64(t);
1844        }
1845    } else {
1846        do_mov_z(s, a->rd, a->rn);
1847    }
1848    return true;
1849}
1850
1851/*
1852 *** SVE Bitwise Immediate Group
1853 */
1854
1855static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1856{
1857    uint64_t imm;
1858    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1859                                extract32(a->dbm, 0, 6),
1860                                extract32(a->dbm, 6, 6))) {
1861        return false;
1862    }
1863    if (sve_access_check(s)) {
1864        unsigned vsz = vec_full_reg_size(s);
1865        gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1866                vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1867    }
1868    return true;
1869}
1870
1871static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a)
1872{
1873    return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1874}
1875
1876static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a)
1877{
1878    return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1879}
1880
1881static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a)
1882{
1883    return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1884}
1885
1886static bool trans_DUPM(DisasContext *s, arg_DUPM *a)
1887{
1888    uint64_t imm;
1889    if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1890                                extract32(a->dbm, 0, 6),
1891                                extract32(a->dbm, 6, 6))) {
1892        return false;
1893    }
1894    if (sve_access_check(s)) {
1895        do_dupi_z(s, a->rd, imm);
1896    }
1897    return true;
1898}
1899
1900/*
1901 *** SVE Integer Wide Immediate - Predicated Group
1902 */
1903
1904/* Implement all merging copies.  This is used for CPY (immediate),
1905 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1906 */
1907static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1908                     TCGv_i64 val)
1909{
1910    typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1911    static gen_cpy * const fns[4] = {
1912        gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1913        gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1914    };
1915    unsigned vsz = vec_full_reg_size(s);
1916    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1917    TCGv_ptr t_zd = tcg_temp_new_ptr();
1918    TCGv_ptr t_zn = tcg_temp_new_ptr();
1919    TCGv_ptr t_pg = tcg_temp_new_ptr();
1920
1921    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1922    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1923    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1924
1925    fns[esz](t_zd, t_zn, t_pg, val, desc);
1926
1927    tcg_temp_free_ptr(t_zd);
1928    tcg_temp_free_ptr(t_zn);
1929    tcg_temp_free_ptr(t_pg);
1930    tcg_temp_free_i32(desc);
1931}
1932
1933static bool trans_FCPY(DisasContext *s, arg_FCPY *a)
1934{
1935    if (a->esz == 0) {
1936        return false;
1937    }
1938    if (sve_access_check(s)) {
1939        /* Decode the VFP immediate.  */
1940        uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1941        TCGv_i64 t_imm = tcg_const_i64(imm);
1942        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1943        tcg_temp_free_i64(t_imm);
1944    }
1945    return true;
1946}
1947
1948static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a)
1949{
1950    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1951        return false;
1952    }
1953    if (sve_access_check(s)) {
1954        TCGv_i64 t_imm = tcg_const_i64(a->imm);
1955        do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1956        tcg_temp_free_i64(t_imm);
1957    }
1958    return true;
1959}
1960
1961static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a)
1962{
1963    static gen_helper_gvec_2i * const fns[4] = {
1964        gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1965        gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1966    };
1967
1968    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
1969        return false;
1970    }
1971    if (sve_access_check(s)) {
1972        unsigned vsz = vec_full_reg_size(s);
1973        TCGv_i64 t_imm = tcg_const_i64(a->imm);
1974        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1975                            pred_full_reg_offset(s, a->pg),
1976                            t_imm, vsz, vsz, 0, fns[a->esz]);
1977        tcg_temp_free_i64(t_imm);
1978    }
1979    return true;
1980}
1981
1982/*
1983 *** SVE Permute Extract Group
1984 */
1985
1986static bool trans_EXT(DisasContext *s, arg_EXT *a)
1987{
1988    if (!sve_access_check(s)) {
1989        return true;
1990    }
1991
1992    unsigned vsz = vec_full_reg_size(s);
1993    unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1994    unsigned n_siz = vsz - n_ofs;
1995    unsigned d = vec_full_reg_offset(s, a->rd);
1996    unsigned n = vec_full_reg_offset(s, a->rn);
1997    unsigned m = vec_full_reg_offset(s, a->rm);
1998
1999    /* Use host vector move insns if we have appropriate sizes
2000     * and no unfortunate overlap.
2001     */
2002    if (m != d
2003        && n_ofs == size_for_gvec(n_ofs)
2004        && n_siz == size_for_gvec(n_siz)
2005        && (d != n || n_siz <= n_ofs)) {
2006        tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2007        if (n_ofs != 0) {
2008            tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2009        }
2010    } else {
2011        tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2012    }
2013    return true;
2014}
2015
2016/*
2017 *** SVE Permute - Unpredicated Group
2018 */
2019
2020static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a)
2021{
2022    if (sve_access_check(s)) {
2023        unsigned vsz = vec_full_reg_size(s);
2024        tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2025                             vsz, vsz, cpu_reg_sp(s, a->rn));
2026    }
2027    return true;
2028}
2029
2030static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
2031{
2032    if ((a->imm & 0x1f) == 0) {
2033        return false;
2034    }
2035    if (sve_access_check(s)) {
2036        unsigned vsz = vec_full_reg_size(s);
2037        unsigned dofs = vec_full_reg_offset(s, a->rd);
2038        unsigned esz, index;
2039
2040        esz = ctz32(a->imm);
2041        index = a->imm >> (esz + 1);
2042
2043        if ((index << esz) < vsz) {
2044            unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2045            tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2046        } else {
2047            tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2048        }
2049    }
2050    return true;
2051}
2052
2053static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2054{
2055    typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2056    static gen_insr * const fns[4] = {
2057        gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2058        gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2059    };
2060    unsigned vsz = vec_full_reg_size(s);
2061    TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2062    TCGv_ptr t_zd = tcg_temp_new_ptr();
2063    TCGv_ptr t_zn = tcg_temp_new_ptr();
2064
2065    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2066    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2067
2068    fns[a->esz](t_zd, t_zn, val, desc);
2069
2070    tcg_temp_free_ptr(t_zd);
2071    tcg_temp_free_ptr(t_zn);
2072    tcg_temp_free_i32(desc);
2073}
2074
2075static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a)
2076{
2077    if (sve_access_check(s)) {
2078        TCGv_i64 t = tcg_temp_new_i64();
2079        tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2080        do_insr_i64(s, a, t);
2081        tcg_temp_free_i64(t);
2082    }
2083    return true;
2084}
2085
2086static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a)
2087{
2088    if (sve_access_check(s)) {
2089        do_insr_i64(s, a, cpu_reg(s, a->rm));
2090    }
2091    return true;
2092}
2093
2094static bool trans_REV_v(DisasContext *s, arg_rr_esz *a)
2095{
2096    static gen_helper_gvec_2 * const fns[4] = {
2097        gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2098        gen_helper_sve_rev_s, gen_helper_sve_rev_d
2099    };
2100
2101    if (sve_access_check(s)) {
2102        unsigned vsz = vec_full_reg_size(s);
2103        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2104                           vec_full_reg_offset(s, a->rn),
2105                           vsz, vsz, 0, fns[a->esz]);
2106    }
2107    return true;
2108}
2109
2110static bool trans_TBL(DisasContext *s, arg_rrr_esz *a)
2111{
2112    static gen_helper_gvec_3 * const fns[4] = {
2113        gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2114        gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2115    };
2116
2117    if (sve_access_check(s)) {
2118        unsigned vsz = vec_full_reg_size(s);
2119        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2120                           vec_full_reg_offset(s, a->rn),
2121                           vec_full_reg_offset(s, a->rm),
2122                           vsz, vsz, 0, fns[a->esz]);
2123    }
2124    return true;
2125}
2126
2127static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
2128{
2129    static gen_helper_gvec_2 * const fns[4][2] = {
2130        { NULL, NULL },
2131        { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2132        { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2133        { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2134    };
2135
2136    if (a->esz == 0) {
2137        return false;
2138    }
2139    if (sve_access_check(s)) {
2140        unsigned vsz = vec_full_reg_size(s);
2141        tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2142                           vec_full_reg_offset(s, a->rn)
2143                           + (a->h ? vsz / 2 : 0),
2144                           vsz, vsz, 0, fns[a->esz][a->u]);
2145    }
2146    return true;
2147}
2148
2149/*
2150 *** SVE Permute - Predicates Group
2151 */
2152
2153static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2154                          gen_helper_gvec_3 *fn)
2155{
2156    if (!sve_access_check(s)) {
2157        return true;
2158    }
2159
2160    unsigned vsz = pred_full_reg_size(s);
2161
2162    /* Predicate sizes may be smaller and cannot use simd_desc.
2163       We cannot round up, as we do elsewhere, because we need
2164       the exact size for ZIP2 and REV.  We retain the style for
2165       the other helpers for consistency.  */
2166    TCGv_ptr t_d = tcg_temp_new_ptr();
2167    TCGv_ptr t_n = tcg_temp_new_ptr();
2168    TCGv_ptr t_m = tcg_temp_new_ptr();
2169    TCGv_i32 t_desc;
2170    int desc;
2171
2172    desc = vsz - 2;
2173    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2174    desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2175
2176    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2177    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2178    tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2179    t_desc = tcg_const_i32(desc);
2180
2181    fn(t_d, t_n, t_m, t_desc);
2182
2183    tcg_temp_free_ptr(t_d);
2184    tcg_temp_free_ptr(t_n);
2185    tcg_temp_free_ptr(t_m);
2186    tcg_temp_free_i32(t_desc);
2187    return true;
2188}
2189
2190static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2191                          gen_helper_gvec_2 *fn)
2192{
2193    if (!sve_access_check(s)) {
2194        return true;
2195    }
2196
2197    unsigned vsz = pred_full_reg_size(s);
2198    TCGv_ptr t_d = tcg_temp_new_ptr();
2199    TCGv_ptr t_n = tcg_temp_new_ptr();
2200    TCGv_i32 t_desc;
2201    int desc;
2202
2203    tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2204    tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2205
2206    /* Predicate sizes may be smaller and cannot use simd_desc.
2207       We cannot round up, as we do elsewhere, because we need
2208       the exact size for ZIP2 and REV.  We retain the style for
2209       the other helpers for consistency.  */
2210
2211    desc = vsz - 2;
2212    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2213    desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2214    t_desc = tcg_const_i32(desc);
2215
2216    fn(t_d, t_n, t_desc);
2217
2218    tcg_temp_free_i32(t_desc);
2219    tcg_temp_free_ptr(t_d);
2220    tcg_temp_free_ptr(t_n);
2221    return true;
2222}
2223
2224static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a)
2225{
2226    return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2227}
2228
2229static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a)
2230{
2231    return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2232}
2233
2234static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a)
2235{
2236    return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2237}
2238
2239static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a)
2240{
2241    return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2242}
2243
2244static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a)
2245{
2246    return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2247}
2248
2249static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a)
2250{
2251    return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2252}
2253
2254static bool trans_REV_p(DisasContext *s, arg_rr_esz *a)
2255{
2256    return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2257}
2258
2259static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a)
2260{
2261    return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2262}
2263
2264static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a)
2265{
2266    return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2267}
2268
2269/*
2270 *** SVE Permute - Interleaving Group
2271 */
2272
2273static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2274{
2275    static gen_helper_gvec_3 * const fns[4] = {
2276        gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2277        gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2278    };
2279
2280    if (sve_access_check(s)) {
2281        unsigned vsz = vec_full_reg_size(s);
2282        unsigned high_ofs = high ? vsz / 2 : 0;
2283        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2284                           vec_full_reg_offset(s, a->rn) + high_ofs,
2285                           vec_full_reg_offset(s, a->rm) + high_ofs,
2286                           vsz, vsz, 0, fns[a->esz]);
2287    }
2288    return true;
2289}
2290
2291static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2292                            gen_helper_gvec_3 *fn)
2293{
2294    if (sve_access_check(s)) {
2295        unsigned vsz = vec_full_reg_size(s);
2296        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2297                           vec_full_reg_offset(s, a->rn),
2298                           vec_full_reg_offset(s, a->rm),
2299                           vsz, vsz, data, fn);
2300    }
2301    return true;
2302}
2303
2304static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a)
2305{
2306    return do_zip(s, a, false);
2307}
2308
2309static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a)
2310{
2311    return do_zip(s, a, true);
2312}
2313
2314static gen_helper_gvec_3 * const uzp_fns[4] = {
2315    gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2316    gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2317};
2318
2319static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a)
2320{
2321    return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2322}
2323
2324static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a)
2325{
2326    return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2327}
2328
2329static gen_helper_gvec_3 * const trn_fns[4] = {
2330    gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2331    gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2332};
2333
2334static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a)
2335{
2336    return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2337}
2338
2339static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a)
2340{
2341    return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2342}
2343
2344/*
2345 *** SVE Permute Vector - Predicated Group
2346 */
2347
2348static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a)
2349{
2350    static gen_helper_gvec_3 * const fns[4] = {
2351        NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2352    };
2353    return do_zpz_ool(s, a, fns[a->esz]);
2354}
2355
2356/* Call the helper that computes the ARM LastActiveElement pseudocode
2357 * function, scaled by the element size.  This includes the not found
2358 * indication; e.g. not found for esz=3 is -8.
2359 */
2360static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2361{
2362    /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2363     * round up, as we do elsewhere, because we need the exact size.
2364     */
2365    TCGv_ptr t_p = tcg_temp_new_ptr();
2366    TCGv_i32 t_desc;
2367    unsigned vsz = pred_full_reg_size(s);
2368    unsigned desc;
2369
2370    desc = vsz - 2;
2371    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2372
2373    tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2374    t_desc = tcg_const_i32(desc);
2375
2376    gen_helper_sve_last_active_element(ret, t_p, t_desc);
2377
2378    tcg_temp_free_i32(t_desc);
2379    tcg_temp_free_ptr(t_p);
2380}
2381
2382/* Increment LAST to the offset of the next element in the vector,
2383 * wrapping around to 0.
2384 */
2385static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2386{
2387    unsigned vsz = vec_full_reg_size(s);
2388
2389    tcg_gen_addi_i32(last, last, 1 << esz);
2390    if (is_power_of_2(vsz)) {
2391        tcg_gen_andi_i32(last, last, vsz - 1);
2392    } else {
2393        TCGv_i32 max = tcg_const_i32(vsz);
2394        TCGv_i32 zero = tcg_const_i32(0);
2395        tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2396        tcg_temp_free_i32(max);
2397        tcg_temp_free_i32(zero);
2398    }
2399}
2400
2401/* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2402static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2403{
2404    unsigned vsz = vec_full_reg_size(s);
2405
2406    if (is_power_of_2(vsz)) {
2407        tcg_gen_andi_i32(last, last, vsz - 1);
2408    } else {
2409        TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2410        TCGv_i32 zero = tcg_const_i32(0);
2411        tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2412        tcg_temp_free_i32(max);
2413        tcg_temp_free_i32(zero);
2414    }
2415}
2416
2417/* Load an unsigned element of ESZ from BASE+OFS.  */
2418static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2419{
2420    TCGv_i64 r = tcg_temp_new_i64();
2421
2422    switch (esz) {
2423    case 0:
2424        tcg_gen_ld8u_i64(r, base, ofs);
2425        break;
2426    case 1:
2427        tcg_gen_ld16u_i64(r, base, ofs);
2428        break;
2429    case 2:
2430        tcg_gen_ld32u_i64(r, base, ofs);
2431        break;
2432    case 3:
2433        tcg_gen_ld_i64(r, base, ofs);
2434        break;
2435    default:
2436        g_assert_not_reached();
2437    }
2438    return r;
2439}
2440
2441/* Load an unsigned element of ESZ from RM[LAST].  */
2442static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2443                                 int rm, int esz)
2444{
2445    TCGv_ptr p = tcg_temp_new_ptr();
2446    TCGv_i64 r;
2447
2448    /* Convert offset into vector into offset into ENV.
2449     * The final adjustment for the vector register base
2450     * is added via constant offset to the load.
2451     */
2452#ifdef HOST_WORDS_BIGENDIAN
2453    /* Adjust for element ordering.  See vec_reg_offset.  */
2454    if (esz < 3) {
2455        tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2456    }
2457#endif
2458    tcg_gen_ext_i32_ptr(p, last);
2459    tcg_gen_add_ptr(p, p, cpu_env);
2460
2461    r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2462    tcg_temp_free_ptr(p);
2463
2464    return r;
2465}
2466
2467/* Compute CLAST for a Zreg.  */
2468static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2469{
2470    TCGv_i32 last;
2471    TCGLabel *over;
2472    TCGv_i64 ele;
2473    unsigned vsz, esz = a->esz;
2474
2475    if (!sve_access_check(s)) {
2476        return true;
2477    }
2478
2479    last = tcg_temp_local_new_i32();
2480    over = gen_new_label();
2481
2482    find_last_active(s, last, esz, a->pg);
2483
2484    /* There is of course no movcond for a 2048-bit vector,
2485     * so we must branch over the actual store.
2486     */
2487    tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2488
2489    if (!before) {
2490        incr_last_active(s, last, esz);
2491    }
2492
2493    ele = load_last_active(s, last, a->rm, esz);
2494    tcg_temp_free_i32(last);
2495
2496    vsz = vec_full_reg_size(s);
2497    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2498    tcg_temp_free_i64(ele);
2499
2500    /* If this insn used MOVPRFX, we may need a second move.  */
2501    if (a->rd != a->rn) {
2502        TCGLabel *done = gen_new_label();
2503        tcg_gen_br(done);
2504
2505        gen_set_label(over);
2506        do_mov_z(s, a->rd, a->rn);
2507
2508        gen_set_label(done);
2509    } else {
2510        gen_set_label(over);
2511    }
2512    return true;
2513}
2514
2515static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a)
2516{
2517    return do_clast_vector(s, a, false);
2518}
2519
2520static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a)
2521{
2522    return do_clast_vector(s, a, true);
2523}
2524
2525/* Compute CLAST for a scalar.  */
2526static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2527                            bool before, TCGv_i64 reg_val)
2528{
2529    TCGv_i32 last = tcg_temp_new_i32();
2530    TCGv_i64 ele, cmp, zero;
2531
2532    find_last_active(s, last, esz, pg);
2533
2534    /* Extend the original value of last prior to incrementing.  */
2535    cmp = tcg_temp_new_i64();
2536    tcg_gen_ext_i32_i64(cmp, last);
2537
2538    if (!before) {
2539        incr_last_active(s, last, esz);
2540    }
2541
2542    /* The conceit here is that while last < 0 indicates not found, after
2543     * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2544     * from which we can load garbage.  We then discard the garbage with
2545     * a conditional move.
2546     */
2547    ele = load_last_active(s, last, rm, esz);
2548    tcg_temp_free_i32(last);
2549
2550    zero = tcg_const_i64(0);
2551    tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2552
2553    tcg_temp_free_i64(zero);
2554    tcg_temp_free_i64(cmp);
2555    tcg_temp_free_i64(ele);
2556}
2557
2558/* Compute CLAST for a Vreg.  */
2559static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2560{
2561    if (sve_access_check(s)) {
2562        int esz = a->esz;
2563        int ofs = vec_reg_offset(s, a->rd, 0, esz);
2564        TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2565
2566        do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2567        write_fp_dreg(s, a->rd, reg);
2568        tcg_temp_free_i64(reg);
2569    }
2570    return true;
2571}
2572
2573static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a)
2574{
2575    return do_clast_fp(s, a, false);
2576}
2577
2578static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a)
2579{
2580    return do_clast_fp(s, a, true);
2581}
2582
2583/* Compute CLAST for a Xreg.  */
2584static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2585{
2586    TCGv_i64 reg;
2587
2588    if (!sve_access_check(s)) {
2589        return true;
2590    }
2591
2592    reg = cpu_reg(s, a->rd);
2593    switch (a->esz) {
2594    case 0:
2595        tcg_gen_ext8u_i64(reg, reg);
2596        break;
2597    case 1:
2598        tcg_gen_ext16u_i64(reg, reg);
2599        break;
2600    case 2:
2601        tcg_gen_ext32u_i64(reg, reg);
2602        break;
2603    case 3:
2604        break;
2605    default:
2606        g_assert_not_reached();
2607    }
2608
2609    do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2610    return true;
2611}
2612
2613static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a)
2614{
2615    return do_clast_general(s, a, false);
2616}
2617
2618static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a)
2619{
2620    return do_clast_general(s, a, true);
2621}
2622
2623/* Compute LAST for a scalar.  */
2624static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2625                               int pg, int rm, bool before)
2626{
2627    TCGv_i32 last = tcg_temp_new_i32();
2628    TCGv_i64 ret;
2629
2630    find_last_active(s, last, esz, pg);
2631    if (before) {
2632        wrap_last_active(s, last, esz);
2633    } else {
2634        incr_last_active(s, last, esz);
2635    }
2636
2637    ret = load_last_active(s, last, rm, esz);
2638    tcg_temp_free_i32(last);
2639    return ret;
2640}
2641
2642/* Compute LAST for a Vreg.  */
2643static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2644{
2645    if (sve_access_check(s)) {
2646        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2647        write_fp_dreg(s, a->rd, val);
2648        tcg_temp_free_i64(val);
2649    }
2650    return true;
2651}
2652
2653static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a)
2654{
2655    return do_last_fp(s, a, false);
2656}
2657
2658static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a)
2659{
2660    return do_last_fp(s, a, true);
2661}
2662
2663/* Compute LAST for a Xreg.  */
2664static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2665{
2666    if (sve_access_check(s)) {
2667        TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2668        tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2669        tcg_temp_free_i64(val);
2670    }
2671    return true;
2672}
2673
2674static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a)
2675{
2676    return do_last_general(s, a, false);
2677}
2678
2679static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a)
2680{
2681    return do_last_general(s, a, true);
2682}
2683
2684static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a)
2685{
2686    if (sve_access_check(s)) {
2687        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2688    }
2689    return true;
2690}
2691
2692static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a)
2693{
2694    if (sve_access_check(s)) {
2695        int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2696        TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2697        do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2698        tcg_temp_free_i64(t);
2699    }
2700    return true;
2701}
2702
2703static bool trans_REVB(DisasContext *s, arg_rpr_esz *a)
2704{
2705    static gen_helper_gvec_3 * const fns[4] = {
2706        NULL,
2707        gen_helper_sve_revb_h,
2708        gen_helper_sve_revb_s,
2709        gen_helper_sve_revb_d,
2710    };
2711    return do_zpz_ool(s, a, fns[a->esz]);
2712}
2713
2714static bool trans_REVH(DisasContext *s, arg_rpr_esz *a)
2715{
2716    static gen_helper_gvec_3 * const fns[4] = {
2717        NULL,
2718        NULL,
2719        gen_helper_sve_revh_s,
2720        gen_helper_sve_revh_d,
2721    };
2722    return do_zpz_ool(s, a, fns[a->esz]);
2723}
2724
2725static bool trans_REVW(DisasContext *s, arg_rpr_esz *a)
2726{
2727    return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2728}
2729
2730static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a)
2731{
2732    static gen_helper_gvec_3 * const fns[4] = {
2733        gen_helper_sve_rbit_b,
2734        gen_helper_sve_rbit_h,
2735        gen_helper_sve_rbit_s,
2736        gen_helper_sve_rbit_d,
2737    };
2738    return do_zpz_ool(s, a, fns[a->esz]);
2739}
2740
2741static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a)
2742{
2743    if (sve_access_check(s)) {
2744        unsigned vsz = vec_full_reg_size(s);
2745        tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2746                           vec_full_reg_offset(s, a->rn),
2747                           vec_full_reg_offset(s, a->rm),
2748                           pred_full_reg_offset(s, a->pg),
2749                           vsz, vsz, a->esz, gen_helper_sve_splice);
2750    }
2751    return true;
2752}
2753
2754/*
2755 *** SVE Integer Compare - Vectors Group
2756 */
2757
2758static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2759                          gen_helper_gvec_flags_4 *gen_fn)
2760{
2761    TCGv_ptr pd, zn, zm, pg;
2762    unsigned vsz;
2763    TCGv_i32 t;
2764
2765    if (gen_fn == NULL) {
2766        return false;
2767    }
2768    if (!sve_access_check(s)) {
2769        return true;
2770    }
2771
2772    vsz = vec_full_reg_size(s);
2773    t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2774    pd = tcg_temp_new_ptr();
2775    zn = tcg_temp_new_ptr();
2776    zm = tcg_temp_new_ptr();
2777    pg = tcg_temp_new_ptr();
2778
2779    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2780    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2781    tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2782    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2783
2784    gen_fn(t, pd, zn, zm, pg, t);
2785
2786    tcg_temp_free_ptr(pd);
2787    tcg_temp_free_ptr(zn);
2788    tcg_temp_free_ptr(zm);
2789    tcg_temp_free_ptr(pg);
2790
2791    do_pred_flags(t);
2792
2793    tcg_temp_free_i32(t);
2794    return true;
2795}
2796
2797#define DO_PPZZ(NAME, name) \
2798static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)         \
2799{                                                                         \
2800    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2801        gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2802        gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2803    };                                                                    \
2804    return do_ppzz_flags(s, a, fns[a->esz]);                              \
2805}
2806
2807DO_PPZZ(CMPEQ, cmpeq)
2808DO_PPZZ(CMPNE, cmpne)
2809DO_PPZZ(CMPGT, cmpgt)
2810DO_PPZZ(CMPGE, cmpge)
2811DO_PPZZ(CMPHI, cmphi)
2812DO_PPZZ(CMPHS, cmphs)
2813
2814#undef DO_PPZZ
2815
2816#define DO_PPZW(NAME, name) \
2817static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a)         \
2818{                                                                         \
2819    static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2820        gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2821        gen_helper_sve_##name##_ppzw_s, NULL                              \
2822    };                                                                    \
2823    return do_ppzz_flags(s, a, fns[a->esz]);                              \
2824}
2825
2826DO_PPZW(CMPEQ, cmpeq)
2827DO_PPZW(CMPNE, cmpne)
2828DO_PPZW(CMPGT, cmpgt)
2829DO_PPZW(CMPGE, cmpge)
2830DO_PPZW(CMPHI, cmphi)
2831DO_PPZW(CMPHS, cmphs)
2832DO_PPZW(CMPLT, cmplt)
2833DO_PPZW(CMPLE, cmple)
2834DO_PPZW(CMPLO, cmplo)
2835DO_PPZW(CMPLS, cmpls)
2836
2837#undef DO_PPZW
2838
2839/*
2840 *** SVE Integer Compare - Immediate Groups
2841 */
2842
2843static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2844                          gen_helper_gvec_flags_3 *gen_fn)
2845{
2846    TCGv_ptr pd, zn, pg;
2847    unsigned vsz;
2848    TCGv_i32 t;
2849
2850    if (gen_fn == NULL) {
2851        return false;
2852    }
2853    if (!sve_access_check(s)) {
2854        return true;
2855    }
2856
2857    vsz = vec_full_reg_size(s);
2858    t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2859    pd = tcg_temp_new_ptr();
2860    zn = tcg_temp_new_ptr();
2861    pg = tcg_temp_new_ptr();
2862
2863    tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2864    tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2865    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2866
2867    gen_fn(t, pd, zn, pg, t);
2868
2869    tcg_temp_free_ptr(pd);
2870    tcg_temp_free_ptr(zn);
2871    tcg_temp_free_ptr(pg);
2872
2873    do_pred_flags(t);
2874
2875    tcg_temp_free_i32(t);
2876    return true;
2877}
2878
2879#define DO_PPZI(NAME, name) \
2880static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a)         \
2881{                                                                         \
2882    static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2883        gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2884        gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2885    };                                                                    \
2886    return do_ppzi_flags(s, a, fns[a->esz]);                              \
2887}
2888
2889DO_PPZI(CMPEQ, cmpeq)
2890DO_PPZI(CMPNE, cmpne)
2891DO_PPZI(CMPGT, cmpgt)
2892DO_PPZI(CMPGE, cmpge)
2893DO_PPZI(CMPHI, cmphi)
2894DO_PPZI(CMPHS, cmphs)
2895DO_PPZI(CMPLT, cmplt)
2896DO_PPZI(CMPLE, cmple)
2897DO_PPZI(CMPLO, cmplo)
2898DO_PPZI(CMPLS, cmpls)
2899
2900#undef DO_PPZI
2901
2902/*
2903 *** SVE Partition Break Group
2904 */
2905
2906static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2907                    gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2908{
2909    if (!sve_access_check(s)) {
2910        return true;
2911    }
2912
2913    unsigned vsz = pred_full_reg_size(s);
2914
2915    /* Predicate sizes may be smaller and cannot use simd_desc.  */
2916    TCGv_ptr d = tcg_temp_new_ptr();
2917    TCGv_ptr n = tcg_temp_new_ptr();
2918    TCGv_ptr m = tcg_temp_new_ptr();
2919    TCGv_ptr g = tcg_temp_new_ptr();
2920    TCGv_i32 t = tcg_const_i32(vsz - 2);
2921
2922    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2923    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2924    tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2925    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2926
2927    if (a->s) {
2928        fn_s(t, d, n, m, g, t);
2929        do_pred_flags(t);
2930    } else {
2931        fn(d, n, m, g, t);
2932    }
2933    tcg_temp_free_ptr(d);
2934    tcg_temp_free_ptr(n);
2935    tcg_temp_free_ptr(m);
2936    tcg_temp_free_ptr(g);
2937    tcg_temp_free_i32(t);
2938    return true;
2939}
2940
2941static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2942                    gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2943{
2944    if (!sve_access_check(s)) {
2945        return true;
2946    }
2947
2948    unsigned vsz = pred_full_reg_size(s);
2949
2950    /* Predicate sizes may be smaller and cannot use simd_desc.  */
2951    TCGv_ptr d = tcg_temp_new_ptr();
2952    TCGv_ptr n = tcg_temp_new_ptr();
2953    TCGv_ptr g = tcg_temp_new_ptr();
2954    TCGv_i32 t = tcg_const_i32(vsz - 2);
2955
2956    tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2957    tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2958    tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2959
2960    if (a->s) {
2961        fn_s(t, d, n, g, t);
2962        do_pred_flags(t);
2963    } else {
2964        fn(d, n, g, t);
2965    }
2966    tcg_temp_free_ptr(d);
2967    tcg_temp_free_ptr(n);
2968    tcg_temp_free_ptr(g);
2969    tcg_temp_free_i32(t);
2970    return true;
2971}
2972
2973static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a)
2974{
2975    return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2976}
2977
2978static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a)
2979{
2980    return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2981}
2982
2983static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a)
2984{
2985    return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2986}
2987
2988static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a)
2989{
2990    return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2991}
2992
2993static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a)
2994{
2995    return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2996}
2997
2998static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a)
2999{
3000    return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3001}
3002
3003static bool trans_BRKN(DisasContext *s, arg_rpr_s *a)
3004{
3005    return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3006}
3007
3008/*
3009 *** SVE Predicate Count Group
3010 */
3011
3012static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3013{
3014    unsigned psz = pred_full_reg_size(s);
3015
3016    if (psz <= 8) {
3017        uint64_t psz_mask;
3018
3019        tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3020        if (pn != pg) {
3021            TCGv_i64 g = tcg_temp_new_i64();
3022            tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3023            tcg_gen_and_i64(val, val, g);
3024            tcg_temp_free_i64(g);
3025        }
3026
3027        /* Reduce the pred_esz_masks value simply to reduce the
3028         * size of the code generated here.
3029         */
3030        psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3031        tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3032
3033        tcg_gen_ctpop_i64(val, val);
3034    } else {
3035        TCGv_ptr t_pn = tcg_temp_new_ptr();
3036        TCGv_ptr t_pg = tcg_temp_new_ptr();
3037        unsigned desc;
3038        TCGv_i32 t_desc;
3039
3040        desc = psz - 2;
3041        desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3042
3043        tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3044        tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3045        t_desc = tcg_const_i32(desc);
3046
3047        gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3048        tcg_temp_free_ptr(t_pn);
3049        tcg_temp_free_ptr(t_pg);
3050        tcg_temp_free_i32(t_desc);
3051    }
3052}
3053
3054static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
3055{
3056    if (sve_access_check(s)) {
3057        do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3058    }
3059    return true;
3060}
3061
3062static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
3063{
3064    if (sve_access_check(s)) {
3065        TCGv_i64 reg = cpu_reg(s, a->rd);
3066        TCGv_i64 val = tcg_temp_new_i64();
3067
3068        do_cntp(s, val, a->esz, a->pg, a->pg);
3069        if (a->d) {
3070            tcg_gen_sub_i64(reg, reg, val);
3071        } else {
3072            tcg_gen_add_i64(reg, reg, val);
3073        }
3074        tcg_temp_free_i64(val);
3075    }
3076    return true;
3077}
3078
3079static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3080{
3081    if (a->esz == 0) {
3082        return false;
3083    }
3084    if (sve_access_check(s)) {
3085        unsigned vsz = vec_full_reg_size(s);
3086        TCGv_i64 val = tcg_temp_new_i64();
3087        GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3088
3089        do_cntp(s, val, a->esz, a->pg, a->pg);
3090        gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3091                vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3092    }
3093    return true;
3094}
3095
3096static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a)
3097{
3098    if (sve_access_check(s)) {
3099        TCGv_i64 reg = cpu_reg(s, a->rd);
3100        TCGv_i64 val = tcg_temp_new_i64();
3101
3102        do_cntp(s, val, a->esz, a->pg, a->pg);
3103        do_sat_addsub_32(reg, val, a->u, a->d);
3104    }
3105    return true;
3106}
3107
3108static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a)
3109{
3110    if (sve_access_check(s)) {
3111        TCGv_i64 reg = cpu_reg(s, a->rd);
3112        TCGv_i64 val = tcg_temp_new_i64();
3113
3114        do_cntp(s, val, a->esz, a->pg, a->pg);
3115        do_sat_addsub_64(reg, val, a->u, a->d);
3116    }
3117    return true;
3118}
3119
3120static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a)
3121{
3122    if (a->esz == 0) {
3123        return false;
3124    }
3125    if (sve_access_check(s)) {
3126        TCGv_i64 val = tcg_temp_new_i64();
3127        do_cntp(s, val, a->esz, a->pg, a->pg);
3128        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3129    }
3130    return true;
3131}
3132
3133/*
3134 *** SVE Integer Compare Scalars Group
3135 */
3136
3137static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
3138{
3139    if (!sve_access_check(s)) {
3140        return true;
3141    }
3142
3143    TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3144    TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3145    TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3146    TCGv_i64 cmp = tcg_temp_new_i64();
3147
3148    tcg_gen_setcond_i64(cond, cmp, rn, rm);
3149    tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3150    tcg_temp_free_i64(cmp);
3151
3152    /* VF = !NF & !CF.  */
3153    tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3154    tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3155
3156    /* Both NF and VF actually look at bit 31.  */
3157    tcg_gen_neg_i32(cpu_NF, cpu_NF);
3158    tcg_gen_neg_i32(cpu_VF, cpu_VF);
3159    return true;
3160}
3161
3162static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
3163{
3164    TCGv_i64 op0, op1, t0, t1, tmax;
3165    TCGv_i32 t2, t3;
3166    TCGv_ptr ptr;
3167    unsigned desc, vsz = vec_full_reg_size(s);
3168    TCGCond cond;
3169
3170    if (!sve_access_check(s)) {
3171        return true;
3172    }
3173
3174    op0 = read_cpu_reg(s, a->rn, 1);
3175    op1 = read_cpu_reg(s, a->rm, 1);
3176
3177    if (!a->sf) {
3178        if (a->u) {
3179            tcg_gen_ext32u_i64(op0, op0);
3180            tcg_gen_ext32u_i64(op1, op1);
3181        } else {
3182            tcg_gen_ext32s_i64(op0, op0);
3183            tcg_gen_ext32s_i64(op1, op1);
3184        }
3185    }
3186
3187    /* For the helper, compress the different conditions into a computation
3188     * of how many iterations for which the condition is true.
3189     */
3190    t0 = tcg_temp_new_i64();
3191    t1 = tcg_temp_new_i64();
3192    tcg_gen_sub_i64(t0, op1, op0);
3193
3194    tmax = tcg_const_i64(vsz >> a->esz);
3195    if (a->eq) {
3196        /* Equality means one more iteration.  */
3197        tcg_gen_addi_i64(t0, t0, 1);
3198
3199        /* If op1 is max (un)signed integer (and the only time the addition
3200         * above could overflow), then we produce an all-true predicate by
3201         * setting the count to the vector length.  This is because the
3202         * pseudocode is described as an increment + compare loop, and the
3203         * max integer would always compare true.
3204         */
3205        tcg_gen_movi_i64(t1, (a->sf
3206                              ? (a->u ? UINT64_MAX : INT64_MAX)
3207                              : (a->u ? UINT32_MAX : INT32_MAX)));
3208        tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3209    }
3210
3211    /* Bound to the maximum.  */
3212    tcg_gen_umin_i64(t0, t0, tmax);
3213    tcg_temp_free_i64(tmax);
3214
3215    /* Set the count to zero if the condition is false.  */
3216    cond = (a->u
3217            ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3218            : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3219    tcg_gen_movi_i64(t1, 0);
3220    tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3221    tcg_temp_free_i64(t1);
3222
3223    /* Since we're bounded, pass as a 32-bit type.  */
3224    t2 = tcg_temp_new_i32();
3225    tcg_gen_extrl_i64_i32(t2, t0);
3226    tcg_temp_free_i64(t0);
3227
3228    /* Scale elements to bits.  */
3229    tcg_gen_shli_i32(t2, t2, a->esz);
3230
3231    desc = (vsz / 8) - 2;
3232    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3233    t3 = tcg_const_i32(desc);
3234
3235    ptr = tcg_temp_new_ptr();
3236    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3237
3238    gen_helper_sve_while(t2, ptr, t2, t3);
3239    do_pred_flags(t2);
3240
3241    tcg_temp_free_ptr(ptr);
3242    tcg_temp_free_i32(t2);
3243    tcg_temp_free_i32(t3);
3244    return true;
3245}
3246
3247/*
3248 *** SVE Integer Wide Immediate - Unpredicated Group
3249 */
3250
3251static bool trans_FDUP(DisasContext *s, arg_FDUP *a)
3252{
3253    if (a->esz == 0) {
3254        return false;
3255    }
3256    if (sve_access_check(s)) {
3257        unsigned vsz = vec_full_reg_size(s);
3258        int dofs = vec_full_reg_offset(s, a->rd);
3259        uint64_t imm;
3260
3261        /* Decode the VFP immediate.  */
3262        imm = vfp_expand_imm(a->esz, a->imm);
3263        imm = dup_const(a->esz, imm);
3264
3265        tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3266    }
3267    return true;
3268}
3269
3270static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a)
3271{
3272    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3273        return false;
3274    }
3275    if (sve_access_check(s)) {
3276        unsigned vsz = vec_full_reg_size(s);
3277        int dofs = vec_full_reg_offset(s, a->rd);
3278
3279        tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3280    }
3281    return true;
3282}
3283
3284static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a)
3285{
3286    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3287        return false;
3288    }
3289    if (sve_access_check(s)) {
3290        unsigned vsz = vec_full_reg_size(s);
3291        tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3292                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3293    }
3294    return true;
3295}
3296
3297static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)
3298{
3299    a->imm = -a->imm;
3300    return trans_ADD_zzi(s, a);
3301}
3302
3303static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
3304{
3305    static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
3306    static const GVecGen2s op[4] = {
3307        { .fni8 = tcg_gen_vec_sub8_i64,
3308          .fniv = tcg_gen_sub_vec,
3309          .fno = gen_helper_sve_subri_b,
3310          .opt_opc = vecop_list,
3311          .vece = MO_8,
3312          .scalar_first = true },
3313        { .fni8 = tcg_gen_vec_sub16_i64,
3314          .fniv = tcg_gen_sub_vec,
3315          .fno = gen_helper_sve_subri_h,
3316          .opt_opc = vecop_list,
3317          .vece = MO_16,
3318          .scalar_first = true },
3319        { .fni4 = tcg_gen_sub_i32,
3320          .fniv = tcg_gen_sub_vec,
3321          .fno = gen_helper_sve_subri_s,
3322          .opt_opc = vecop_list,
3323          .vece = MO_32,
3324          .scalar_first = true },
3325        { .fni8 = tcg_gen_sub_i64,
3326          .fniv = tcg_gen_sub_vec,
3327          .fno = gen_helper_sve_subri_d,
3328          .opt_opc = vecop_list,
3329          .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3330          .vece = MO_64,
3331          .scalar_first = true }
3332    };
3333
3334    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3335        return false;
3336    }
3337    if (sve_access_check(s)) {
3338        unsigned vsz = vec_full_reg_size(s);
3339        TCGv_i64 c = tcg_const_i64(a->imm);
3340        tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3341                        vec_full_reg_offset(s, a->rn),
3342                        vsz, vsz, c, &op[a->esz]);
3343        tcg_temp_free_i64(c);
3344    }
3345    return true;
3346}
3347
3348static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a)
3349{
3350    if (sve_access_check(s)) {
3351        unsigned vsz = vec_full_reg_size(s);
3352        tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3353                          vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3354    }
3355    return true;
3356}
3357
3358static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, bool u, bool d)
3359{
3360    if (a->esz == 0 && extract32(s->insn, 13, 1)) {
3361        return false;
3362    }
3363    if (sve_access_check(s)) {
3364        TCGv_i64 val = tcg_const_i64(a->imm);
3365        do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3366        tcg_temp_free_i64(val);
3367    }
3368    return true;
3369}
3370
3371static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a)
3372{
3373    return do_zzi_sat(s, a, false, false);
3374}
3375
3376static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a)
3377{
3378    return do_zzi_sat(s, a, true, false);
3379}
3380
3381static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3382{
3383    return do_zzi_sat(s, a, false, true);
3384}
3385
3386static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a)
3387{
3388    return do_zzi_sat(s, a, true, true);
3389}
3390
3391static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3392{
3393    if (sve_access_check(s)) {
3394        unsigned vsz = vec_full_reg_size(s);
3395        TCGv_i64 c = tcg_const_i64(a->imm);
3396
3397        tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3398                            vec_full_reg_offset(s, a->rn),
3399                            c, vsz, vsz, 0, fn);
3400        tcg_temp_free_i64(c);
3401    }
3402    return true;
3403}
3404
3405#define DO_ZZI(NAME, name) \
3406static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a)         \
3407{                                                                       \
3408    static gen_helper_gvec_2i * const fns[4] = {                        \
3409        gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3410        gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3411    };                                                                  \
3412    return do_zzi_ool(s, a, fns[a->esz]);                               \
3413}
3414
3415DO_ZZI(SMAX, smax)
3416DO_ZZI(UMAX, umax)
3417DO_ZZI(SMIN, smin)
3418DO_ZZI(UMIN, umin)
3419
3420#undef DO_ZZI
3421
3422static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a)
3423{
3424    static gen_helper_gvec_3 * const fns[2][2] = {
3425        { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3426        { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3427    };
3428
3429    if (sve_access_check(s)) {
3430        unsigned vsz = vec_full_reg_size(s);
3431        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3432                           vec_full_reg_offset(s, a->rn),
3433                           vec_full_reg_offset(s, a->rm),
3434                           vsz, vsz, 0, fns[a->u][a->sz]);
3435    }
3436    return true;
3437}
3438
3439static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a)
3440{
3441    static gen_helper_gvec_3 * const fns[2][2] = {
3442        { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3443        { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3444    };
3445
3446    if (sve_access_check(s)) {
3447        unsigned vsz = vec_full_reg_size(s);
3448        tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3449                           vec_full_reg_offset(s, a->rn),
3450                           vec_full_reg_offset(s, a->rm),
3451                           vsz, vsz, a->index, fns[a->u][a->sz]);
3452    }
3453    return true;
3454}
3455
3456
3457/*
3458 *** SVE Floating Point Multiply-Add Indexed Group
3459 */
3460
3461static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a)
3462{
3463    static gen_helper_gvec_4_ptr * const fns[3] = {
3464        gen_helper_gvec_fmla_idx_h,
3465        gen_helper_gvec_fmla_idx_s,
3466        gen_helper_gvec_fmla_idx_d,
3467    };
3468
3469    if (sve_access_check(s)) {
3470        unsigned vsz = vec_full_reg_size(s);
3471        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3472        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3473                           vec_full_reg_offset(s, a->rn),
3474                           vec_full_reg_offset(s, a->rm),
3475                           vec_full_reg_offset(s, a->ra),
3476                           status, vsz, vsz, (a->index << 1) | a->sub,
3477                           fns[a->esz - 1]);
3478        tcg_temp_free_ptr(status);
3479    }
3480    return true;
3481}
3482
3483/*
3484 *** SVE Floating Point Multiply Indexed Group
3485 */
3486
3487static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a)
3488{
3489    static gen_helper_gvec_3_ptr * const fns[3] = {
3490        gen_helper_gvec_fmul_idx_h,
3491        gen_helper_gvec_fmul_idx_s,
3492        gen_helper_gvec_fmul_idx_d,
3493    };
3494
3495    if (sve_access_check(s)) {
3496        unsigned vsz = vec_full_reg_size(s);
3497        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3498        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3499                           vec_full_reg_offset(s, a->rn),
3500                           vec_full_reg_offset(s, a->rm),
3501                           status, vsz, vsz, a->index, fns[a->esz - 1]);
3502        tcg_temp_free_ptr(status);
3503    }
3504    return true;
3505}
3506
3507/*
3508 *** SVE Floating Point Fast Reduction Group
3509 */
3510
3511typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3512                                  TCGv_ptr, TCGv_i32);
3513
3514static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3515                      gen_helper_fp_reduce *fn)
3516{
3517    unsigned vsz = vec_full_reg_size(s);
3518    unsigned p2vsz = pow2ceil(vsz);
3519    TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3520    TCGv_ptr t_zn, t_pg, status;
3521    TCGv_i64 temp;
3522
3523    temp = tcg_temp_new_i64();
3524    t_zn = tcg_temp_new_ptr();
3525    t_pg = tcg_temp_new_ptr();
3526
3527    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3528    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3529    status = get_fpstatus_ptr(a->esz == MO_16);
3530
3531    fn(temp, t_zn, t_pg, status, t_desc);
3532    tcg_temp_free_ptr(t_zn);
3533    tcg_temp_free_ptr(t_pg);
3534    tcg_temp_free_ptr(status);
3535    tcg_temp_free_i32(t_desc);
3536
3537    write_fp_dreg(s, a->rd, temp);
3538    tcg_temp_free_i64(temp);
3539}
3540
3541#define DO_VPZ(NAME, name) \
3542static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)                \
3543{                                                                        \
3544    static gen_helper_fp_reduce * const fns[3] = {                       \
3545        gen_helper_sve_##name##_h,                                       \
3546        gen_helper_sve_##name##_s,                                       \
3547        gen_helper_sve_##name##_d,                                       \
3548    };                                                                   \
3549    if (a->esz == 0) {                                                   \
3550        return false;                                                    \
3551    }                                                                    \
3552    if (sve_access_check(s)) {                                           \
3553        do_reduce(s, a, fns[a->esz - 1]);                                \
3554    }                                                                    \
3555    return true;                                                         \
3556}
3557
3558DO_VPZ(FADDV, faddv)
3559DO_VPZ(FMINNMV, fminnmv)
3560DO_VPZ(FMAXNMV, fmaxnmv)
3561DO_VPZ(FMINV, fminv)
3562DO_VPZ(FMAXV, fmaxv)
3563
3564/*
3565 *** SVE Floating Point Unary Operations - Unpredicated Group
3566 */
3567
3568static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3569{
3570    unsigned vsz = vec_full_reg_size(s);
3571    TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3572
3573    tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3574                       vec_full_reg_offset(s, a->rn),
3575                       status, vsz, vsz, 0, fn);
3576    tcg_temp_free_ptr(status);
3577}
3578
3579static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a)
3580{
3581    static gen_helper_gvec_2_ptr * const fns[3] = {
3582        gen_helper_gvec_frecpe_h,
3583        gen_helper_gvec_frecpe_s,
3584        gen_helper_gvec_frecpe_d,
3585    };
3586    if (a->esz == 0) {
3587        return false;
3588    }
3589    if (sve_access_check(s)) {
3590        do_zz_fp(s, a, fns[a->esz - 1]);
3591    }
3592    return true;
3593}
3594
3595static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a)
3596{
3597    static gen_helper_gvec_2_ptr * const fns[3] = {
3598        gen_helper_gvec_frsqrte_h,
3599        gen_helper_gvec_frsqrte_s,
3600        gen_helper_gvec_frsqrte_d,
3601    };
3602    if (a->esz == 0) {
3603        return false;
3604    }
3605    if (sve_access_check(s)) {
3606        do_zz_fp(s, a, fns[a->esz - 1]);
3607    }
3608    return true;
3609}
3610
3611/*
3612 *** SVE Floating Point Compare with Zero Group
3613 */
3614
3615static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3616                      gen_helper_gvec_3_ptr *fn)
3617{
3618    unsigned vsz = vec_full_reg_size(s);
3619    TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3620
3621    tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3622                       vec_full_reg_offset(s, a->rn),
3623                       pred_full_reg_offset(s, a->pg),
3624                       status, vsz, vsz, 0, fn);
3625    tcg_temp_free_ptr(status);
3626}
3627
3628#define DO_PPZ(NAME, name) \
3629static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a)         \
3630{                                                                 \
3631    static gen_helper_gvec_3_ptr * const fns[3] = {               \
3632        gen_helper_sve_##name##_h,                                \
3633        gen_helper_sve_##name##_s,                                \
3634        gen_helper_sve_##name##_d,                                \
3635    };                                                            \
3636    if (a->esz == 0) {                                            \
3637        return false;                                             \
3638    }                                                             \
3639    if (sve_access_check(s)) {                                    \
3640        do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3641    }                                                             \
3642    return true;                                                  \
3643}
3644
3645DO_PPZ(FCMGE_ppz0, fcmge0)
3646DO_PPZ(FCMGT_ppz0, fcmgt0)
3647DO_PPZ(FCMLE_ppz0, fcmle0)
3648DO_PPZ(FCMLT_ppz0, fcmlt0)
3649DO_PPZ(FCMEQ_ppz0, fcmeq0)
3650DO_PPZ(FCMNE_ppz0, fcmne0)
3651
3652#undef DO_PPZ
3653
3654/*
3655 *** SVE floating-point trig multiply-add coefficient
3656 */
3657
3658static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a)
3659{
3660    static gen_helper_gvec_3_ptr * const fns[3] = {
3661        gen_helper_sve_ftmad_h,
3662        gen_helper_sve_ftmad_s,
3663        gen_helper_sve_ftmad_d,
3664    };
3665
3666    if (a->esz == 0) {
3667        return false;
3668    }
3669    if (sve_access_check(s)) {
3670        unsigned vsz = vec_full_reg_size(s);
3671        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3672        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3673                           vec_full_reg_offset(s, a->rn),
3674                           vec_full_reg_offset(s, a->rm),
3675                           status, vsz, vsz, a->imm, fns[a->esz - 1]);
3676        tcg_temp_free_ptr(status);
3677    }
3678    return true;
3679}
3680
3681/*
3682 *** SVE Floating Point Accumulating Reduction Group
3683 */
3684
3685static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
3686{
3687    typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3688                          TCGv_ptr, TCGv_ptr, TCGv_i32);
3689    static fadda_fn * const fns[3] = {
3690        gen_helper_sve_fadda_h,
3691        gen_helper_sve_fadda_s,
3692        gen_helper_sve_fadda_d,
3693    };
3694    unsigned vsz = vec_full_reg_size(s);
3695    TCGv_ptr t_rm, t_pg, t_fpst;
3696    TCGv_i64 t_val;
3697    TCGv_i32 t_desc;
3698
3699    if (a->esz == 0) {
3700        return false;
3701    }
3702    if (!sve_access_check(s)) {
3703        return true;
3704    }
3705
3706    t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3707    t_rm = tcg_temp_new_ptr();
3708    t_pg = tcg_temp_new_ptr();
3709    tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3710    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3711    t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3712    t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3713
3714    fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3715
3716    tcg_temp_free_i32(t_desc);
3717    tcg_temp_free_ptr(t_fpst);
3718    tcg_temp_free_ptr(t_pg);
3719    tcg_temp_free_ptr(t_rm);
3720
3721    write_fp_dreg(s, a->rd, t_val);
3722    tcg_temp_free_i64(t_val);
3723    return true;
3724}
3725
3726/*
3727 *** SVE Floating Point Arithmetic - Unpredicated Group
3728 */
3729
3730static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3731                      gen_helper_gvec_3_ptr *fn)
3732{
3733    if (fn == NULL) {
3734        return false;
3735    }
3736    if (sve_access_check(s)) {
3737        unsigned vsz = vec_full_reg_size(s);
3738        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3739        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3740                           vec_full_reg_offset(s, a->rn),
3741                           vec_full_reg_offset(s, a->rm),
3742                           status, vsz, vsz, 0, fn);
3743        tcg_temp_free_ptr(status);
3744    }
3745    return true;
3746}
3747
3748
3749#define DO_FP3(NAME, name) \
3750static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a)           \
3751{                                                                   \
3752    static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3753        NULL, gen_helper_gvec_##name##_h,                           \
3754        gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3755    };                                                              \
3756    return do_zzz_fp(s, a, fns[a->esz]);                            \
3757}
3758
3759DO_FP3(FADD_zzz, fadd)
3760DO_FP3(FSUB_zzz, fsub)
3761DO_FP3(FMUL_zzz, fmul)
3762DO_FP3(FTSMUL, ftsmul)
3763DO_FP3(FRECPS, recps)
3764DO_FP3(FRSQRTS, rsqrts)
3765
3766#undef DO_FP3
3767
3768/*
3769 *** SVE Floating Point Arithmetic - Predicated Group
3770 */
3771
3772static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3773                       gen_helper_gvec_4_ptr *fn)
3774{
3775    if (fn == NULL) {
3776        return false;
3777    }
3778    if (sve_access_check(s)) {
3779        unsigned vsz = vec_full_reg_size(s);
3780        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3781        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3782                           vec_full_reg_offset(s, a->rn),
3783                           vec_full_reg_offset(s, a->rm),
3784                           pred_full_reg_offset(s, a->pg),
3785                           status, vsz, vsz, 0, fn);
3786        tcg_temp_free_ptr(status);
3787    }
3788    return true;
3789}
3790
3791#define DO_FP3(NAME, name) \
3792static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a)          \
3793{                                                                   \
3794    static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3795        NULL, gen_helper_sve_##name##_h,                            \
3796        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3797    };                                                              \
3798    return do_zpzz_fp(s, a, fns[a->esz]);                           \
3799}
3800
3801DO_FP3(FADD_zpzz, fadd)
3802DO_FP3(FSUB_zpzz, fsub)
3803DO_FP3(FMUL_zpzz, fmul)
3804DO_FP3(FMIN_zpzz, fmin)
3805DO_FP3(FMAX_zpzz, fmax)
3806DO_FP3(FMINNM_zpzz, fminnum)
3807DO_FP3(FMAXNM_zpzz, fmaxnum)
3808DO_FP3(FABD, fabd)
3809DO_FP3(FSCALE, fscalbn)
3810DO_FP3(FDIV, fdiv)
3811DO_FP3(FMULX, fmulx)
3812
3813#undef DO_FP3
3814
3815typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3816                                      TCGv_i64, TCGv_ptr, TCGv_i32);
3817
3818static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3819                         TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3820{
3821    unsigned vsz = vec_full_reg_size(s);
3822    TCGv_ptr t_zd, t_zn, t_pg, status;
3823    TCGv_i32 desc;
3824
3825    t_zd = tcg_temp_new_ptr();
3826    t_zn = tcg_temp_new_ptr();
3827    t_pg = tcg_temp_new_ptr();
3828    tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3829    tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3830    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3831
3832    status = get_fpstatus_ptr(is_fp16);
3833    desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3834    fn(t_zd, t_zn, t_pg, scalar, status, desc);
3835
3836    tcg_temp_free_i32(desc);
3837    tcg_temp_free_ptr(status);
3838    tcg_temp_free_ptr(t_pg);
3839    tcg_temp_free_ptr(t_zn);
3840    tcg_temp_free_ptr(t_zd);
3841}
3842
3843static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3844                      gen_helper_sve_fp2scalar *fn)
3845{
3846    TCGv_i64 temp = tcg_const_i64(imm);
3847    do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3848    tcg_temp_free_i64(temp);
3849}
3850
3851#define DO_FP_IMM(NAME, name, const0, const1) \
3852static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a)         \
3853{                                                                         \
3854    static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3855        gen_helper_sve_##name##_h,                                        \
3856        gen_helper_sve_##name##_s,                                        \
3857        gen_helper_sve_##name##_d                                         \
3858    };                                                                    \
3859    static uint64_t const val[3][2] = {                                   \
3860        { float16_##const0, float16_##const1 },                           \
3861        { float32_##const0, float32_##const1 },                           \
3862        { float64_##const0, float64_##const1 },                           \
3863    };                                                                    \
3864    if (a->esz == 0) {                                                    \
3865        return false;                                                     \
3866    }                                                                     \
3867    if (sve_access_check(s)) {                                            \
3868        do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3869    }                                                                     \
3870    return true;                                                          \
3871}
3872
3873#define float16_two  make_float16(0x4000)
3874#define float32_two  make_float32(0x40000000)
3875#define float64_two  make_float64(0x4000000000000000ULL)
3876
3877DO_FP_IMM(FADD, fadds, half, one)
3878DO_FP_IMM(FSUB, fsubs, half, one)
3879DO_FP_IMM(FMUL, fmuls, half, two)
3880DO_FP_IMM(FSUBR, fsubrs, half, one)
3881DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3882DO_FP_IMM(FMINNM, fminnms, zero, one)
3883DO_FP_IMM(FMAX, fmaxs, zero, one)
3884DO_FP_IMM(FMIN, fmins, zero, one)
3885
3886#undef DO_FP_IMM
3887
3888static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3889                      gen_helper_gvec_4_ptr *fn)
3890{
3891    if (fn == NULL) {
3892        return false;
3893    }
3894    if (sve_access_check(s)) {
3895        unsigned vsz = vec_full_reg_size(s);
3896        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3897        tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3898                           vec_full_reg_offset(s, a->rn),
3899                           vec_full_reg_offset(s, a->rm),
3900                           pred_full_reg_offset(s, a->pg),
3901                           status, vsz, vsz, 0, fn);
3902        tcg_temp_free_ptr(status);
3903    }
3904    return true;
3905}
3906
3907#define DO_FPCMP(NAME, name) \
3908static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a)     \
3909{                                                                     \
3910    static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3911        NULL, gen_helper_sve_##name##_h,                              \
3912        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3913    };                                                                \
3914    return do_fp_cmp(s, a, fns[a->esz]);                              \
3915}
3916
3917DO_FPCMP(FCMGE, fcmge)
3918DO_FPCMP(FCMGT, fcmgt)
3919DO_FPCMP(FCMEQ, fcmeq)
3920DO_FPCMP(FCMNE, fcmne)
3921DO_FPCMP(FCMUO, fcmuo)
3922DO_FPCMP(FACGE, facge)
3923DO_FPCMP(FACGT, facgt)
3924
3925#undef DO_FPCMP
3926
3927static bool trans_FCADD(DisasContext *s, arg_FCADD *a)
3928{
3929    static gen_helper_gvec_4_ptr * const fns[3] = {
3930        gen_helper_sve_fcadd_h,
3931        gen_helper_sve_fcadd_s,
3932        gen_helper_sve_fcadd_d
3933    };
3934
3935    if (a->esz == 0) {
3936        return false;
3937    }
3938    if (sve_access_check(s)) {
3939        unsigned vsz = vec_full_reg_size(s);
3940        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3941        tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3942                           vec_full_reg_offset(s, a->rn),
3943                           vec_full_reg_offset(s, a->rm),
3944                           pred_full_reg_offset(s, a->pg),
3945                           status, vsz, vsz, a->rot, fns[a->esz - 1]);
3946        tcg_temp_free_ptr(status);
3947    }
3948    return true;
3949}
3950
3951typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3952
3953static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3954{
3955    if (fn == NULL) {
3956        return false;
3957    }
3958    if (!sve_access_check(s)) {
3959        return true;
3960    }
3961
3962    unsigned vsz = vec_full_reg_size(s);
3963    unsigned desc;
3964    TCGv_i32 t_desc;
3965    TCGv_ptr pg = tcg_temp_new_ptr();
3966
3967    /* We would need 7 operands to pass these arguments "properly".
3968     * So we encode all the register numbers into the descriptor.
3969     */
3970    desc = deposit32(a->rd, 5, 5, a->rn);
3971    desc = deposit32(desc, 10, 5, a->rm);
3972    desc = deposit32(desc, 15, 5, a->ra);
3973    desc = simd_desc(vsz, vsz, desc);
3974
3975    t_desc = tcg_const_i32(desc);
3976    tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3977    fn(cpu_env, pg, t_desc);
3978    tcg_temp_free_i32(t_desc);
3979    tcg_temp_free_ptr(pg);
3980    return true;
3981}
3982
3983#define DO_FMLA(NAME, name) \
3984static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a)          \
3985{                                                                    \
3986    static gen_helper_sve_fmla * const fns[4] = {                    \
3987        NULL, gen_helper_sve_##name##_h,                             \
3988        gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3989    };                                                               \
3990    return do_fmla(s, a, fns[a->esz]);                               \
3991}
3992
3993DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3994DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3995DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3996DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3997
3998#undef DO_FMLA
3999
4000static bool trans_FCMLA_zpzzz(DisasContext *s, arg_FCMLA_zpzzz *a)
4001{
4002    static gen_helper_sve_fmla * const fns[3] = {
4003        gen_helper_sve_fcmla_zpzzz_h,
4004        gen_helper_sve_fcmla_zpzzz_s,
4005        gen_helper_sve_fcmla_zpzzz_d,
4006    };
4007
4008    if (a->esz == 0) {
4009        return false;
4010    }
4011    if (sve_access_check(s)) {
4012        unsigned vsz = vec_full_reg_size(s);
4013        unsigned desc;
4014        TCGv_i32 t_desc;
4015        TCGv_ptr pg = tcg_temp_new_ptr();
4016
4017        /* We would need 7 operands to pass these arguments "properly".
4018         * So we encode all the register numbers into the descriptor.
4019         */
4020        desc = deposit32(a->rd, 5, 5, a->rn);
4021        desc = deposit32(desc, 10, 5, a->rm);
4022        desc = deposit32(desc, 15, 5, a->ra);
4023        desc = deposit32(desc, 20, 2, a->rot);
4024        desc = sextract32(desc, 0, 22);
4025        desc = simd_desc(vsz, vsz, desc);
4026
4027        t_desc = tcg_const_i32(desc);
4028        tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4029        fns[a->esz - 1](cpu_env, pg, t_desc);
4030        tcg_temp_free_i32(t_desc);
4031        tcg_temp_free_ptr(pg);
4032    }
4033    return true;
4034}
4035
4036static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a)
4037{
4038    static gen_helper_gvec_3_ptr * const fns[2] = {
4039        gen_helper_gvec_fcmlah_idx,
4040        gen_helper_gvec_fcmlas_idx,
4041    };
4042
4043    tcg_debug_assert(a->esz == 1 || a->esz == 2);
4044    tcg_debug_assert(a->rd == a->ra);
4045    if (sve_access_check(s)) {
4046        unsigned vsz = vec_full_reg_size(s);
4047        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4048        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4049                           vec_full_reg_offset(s, a->rn),
4050                           vec_full_reg_offset(s, a->rm),
4051                           status, vsz, vsz,
4052                           a->index * 4 + a->rot,
4053                           fns[a->esz - 1]);
4054        tcg_temp_free_ptr(status);
4055    }
4056    return true;
4057}
4058
4059/*
4060 *** SVE Floating Point Unary Operations Predicated Group
4061 */
4062
4063static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4064                       bool is_fp16, gen_helper_gvec_3_ptr *fn)
4065{
4066    if (sve_access_check(s)) {
4067        unsigned vsz = vec_full_reg_size(s);
4068        TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4069        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4070                           vec_full_reg_offset(s, rn),
4071                           pred_full_reg_offset(s, pg),
4072                           status, vsz, vsz, 0, fn);
4073        tcg_temp_free_ptr(status);
4074    }
4075    return true;
4076}
4077
4078static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a)
4079{
4080    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4081}
4082
4083static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a)
4084{
4085    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4086}
4087
4088static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a)
4089{
4090    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4091}
4092
4093static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a)
4094{
4095    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4096}
4097
4098static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a)
4099{
4100    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4101}
4102
4103static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a)
4104{
4105    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4106}
4107
4108static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a)
4109{
4110    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4111}
4112
4113static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a)
4114{
4115    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4116}
4117
4118static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a)
4119{
4120    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4121}
4122
4123static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a)
4124{
4125    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4126}
4127
4128static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a)
4129{
4130    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4131}
4132
4133static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a)
4134{
4135    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4136}
4137
4138static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a)
4139{
4140    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4141}
4142
4143static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a)
4144{
4145    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4146}
4147
4148static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a)
4149{
4150    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4151}
4152
4153static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a)
4154{
4155    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4156}
4157
4158static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a)
4159{
4160    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4161}
4162
4163static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a)
4164{
4165    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4166}
4167
4168static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a)
4169{
4170    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4171}
4172
4173static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a)
4174{
4175    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4176}
4177
4178static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4179    gen_helper_sve_frint_h,
4180    gen_helper_sve_frint_s,
4181    gen_helper_sve_frint_d
4182};
4183
4184static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a)
4185{
4186    if (a->esz == 0) {
4187        return false;
4188    }
4189    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4190                      frint_fns[a->esz - 1]);
4191}
4192
4193static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a)
4194{
4195    static gen_helper_gvec_3_ptr * const fns[3] = {
4196        gen_helper_sve_frintx_h,
4197        gen_helper_sve_frintx_s,
4198        gen_helper_sve_frintx_d
4199    };
4200    if (a->esz == 0) {
4201        return false;
4202    }
4203    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4204}
4205
4206static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4207{
4208    if (a->esz == 0) {
4209        return false;
4210    }
4211    if (sve_access_check(s)) {
4212        unsigned vsz = vec_full_reg_size(s);
4213        TCGv_i32 tmode = tcg_const_i32(mode);
4214        TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4215
4216        gen_helper_set_rmode(tmode, tmode, status);
4217
4218        tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4219                           vec_full_reg_offset(s, a->rn),
4220                           pred_full_reg_offset(s, a->pg),
4221                           status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4222
4223        gen_helper_set_rmode(tmode, tmode, status);
4224        tcg_temp_free_i32(tmode);
4225        tcg_temp_free_ptr(status);
4226    }
4227    return true;
4228}
4229
4230static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a)
4231{
4232    return do_frint_mode(s, a, float_round_nearest_even);
4233}
4234
4235static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a)
4236{
4237    return do_frint_mode(s, a, float_round_up);
4238}
4239
4240static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a)
4241{
4242    return do_frint_mode(s, a, float_round_down);
4243}
4244
4245static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a)
4246{
4247    return do_frint_mode(s, a, float_round_to_zero);
4248}
4249
4250static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a)
4251{
4252    return do_frint_mode(s, a, float_round_ties_away);
4253}
4254
4255static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a)
4256{
4257    static gen_helper_gvec_3_ptr * const fns[3] = {
4258        gen_helper_sve_frecpx_h,
4259        gen_helper_sve_frecpx_s,
4260        gen_helper_sve_frecpx_d
4261    };
4262    if (a->esz == 0) {
4263        return false;
4264    }
4265    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4266}
4267
4268static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a)
4269{
4270    static gen_helper_gvec_3_ptr * const fns[3] = {
4271        gen_helper_sve_fsqrt_h,
4272        gen_helper_sve_fsqrt_s,
4273        gen_helper_sve_fsqrt_d
4274    };
4275    if (a->esz == 0) {
4276        return false;
4277    }
4278    return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4279}
4280
4281static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4282{
4283    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4284}
4285
4286static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4287{
4288    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4289}
4290
4291static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4292{
4293    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4294}
4295
4296static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4297{
4298    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4299}
4300
4301static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4302{
4303    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4304}
4305
4306static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4307{
4308    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4309}
4310
4311static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4312{
4313    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4314}
4315
4316static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a)
4317{
4318    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4319}
4320
4321static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a)
4322{
4323    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4324}
4325
4326static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a)
4327{
4328    return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4329}
4330
4331static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a)
4332{
4333    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4334}
4335
4336static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a)
4337{
4338    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4339}
4340
4341static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a)
4342{
4343    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4344}
4345
4346static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a)
4347{
4348    return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4349}
4350
4351/*
4352 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4353 */
4354
4355/* Subroutine loading a vector register at VOFS of LEN bytes.
4356 * The load should begin at the address Rn + IMM.
4357 */
4358
4359static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4360{
4361    int len_align = QEMU_ALIGN_DOWN(len, 8);
4362    int len_remain = len % 8;
4363    int nparts = len / 8 + ctpop8(len_remain);
4364    int midx = get_mem_index(s);
4365    TCGv_i64 addr, t0, t1;
4366
4367    addr = tcg_temp_new_i64();
4368    t0 = tcg_temp_new_i64();
4369
4370    /* Note that unpredicated load/store of vector/predicate registers
4371     * are defined as a stream of bytes, which equates to little-endian
4372     * operations on larger quantities.  There is no nice way to force
4373     * a little-endian load for aarch64_be-linux-user out of line.
4374     *
4375     * Attempt to keep code expansion to a minimum by limiting the
4376     * amount of unrolling done.
4377     */
4378    if (nparts <= 4) {
4379        int i;
4380
4381        for (i = 0; i < len_align; i += 8) {
4382            tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4383            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4384            tcg_gen_st_i64(t0, cpu_env, vofs + i);
4385        }
4386    } else {
4387        TCGLabel *loop = gen_new_label();
4388        TCGv_ptr tp, i = tcg_const_local_ptr(0);
4389
4390        gen_set_label(loop);
4391
4392        /* Minimize the number of local temps that must be re-read from
4393         * the stack each iteration.  Instead, re-compute values other
4394         * than the loop counter.
4395         */
4396        tp = tcg_temp_new_ptr();
4397        tcg_gen_addi_ptr(tp, i, imm);
4398        tcg_gen_extu_ptr_i64(addr, tp);
4399        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4400
4401        tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4402
4403        tcg_gen_add_ptr(tp, cpu_env, i);
4404        tcg_gen_addi_ptr(i, i, 8);
4405        tcg_gen_st_i64(t0, tp, vofs);
4406        tcg_temp_free_ptr(tp);
4407
4408        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4409        tcg_temp_free_ptr(i);
4410    }
4411
4412    /* Predicate register loads can be any multiple of 2.
4413     * Note that we still store the entire 64-bit unit into cpu_env.
4414     */
4415    if (len_remain) {
4416        tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4417
4418        switch (len_remain) {
4419        case 2:
4420        case 4:
4421        case 8:
4422            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4423            break;
4424
4425        case 6:
4426            t1 = tcg_temp_new_i64();
4427            tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4428            tcg_gen_addi_i64(addr, addr, 4);
4429            tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4430            tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4431            tcg_temp_free_i64(t1);
4432            break;
4433
4434        default:
4435            g_assert_not_reached();
4436        }
4437        tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4438    }
4439    tcg_temp_free_i64(addr);
4440    tcg_temp_free_i64(t0);
4441}
4442
4443/* Similarly for stores.  */
4444static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4445{
4446    int len_align = QEMU_ALIGN_DOWN(len, 8);
4447    int len_remain = len % 8;
4448    int nparts = len / 8 + ctpop8(len_remain);
4449    int midx = get_mem_index(s);
4450    TCGv_i64 addr, t0;
4451
4452    addr = tcg_temp_new_i64();
4453    t0 = tcg_temp_new_i64();
4454
4455    /* Note that unpredicated load/store of vector/predicate registers
4456     * are defined as a stream of bytes, which equates to little-endian
4457     * operations on larger quantities.  There is no nice way to force
4458     * a little-endian store for aarch64_be-linux-user out of line.
4459     *
4460     * Attempt to keep code expansion to a minimum by limiting the
4461     * amount of unrolling done.
4462     */
4463    if (nparts <= 4) {
4464        int i;
4465
4466        for (i = 0; i < len_align; i += 8) {
4467            tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4468            tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4469            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4470        }
4471    } else {
4472        TCGLabel *loop = gen_new_label();
4473        TCGv_ptr t2, i = tcg_const_local_ptr(0);
4474
4475        gen_set_label(loop);
4476
4477        t2 = tcg_temp_new_ptr();
4478        tcg_gen_add_ptr(t2, cpu_env, i);
4479        tcg_gen_ld_i64(t0, t2, vofs);
4480
4481        /* Minimize the number of local temps that must be re-read from
4482         * the stack each iteration.  Instead, re-compute values other
4483         * than the loop counter.
4484         */
4485        tcg_gen_addi_ptr(t2, i, imm);
4486        tcg_gen_extu_ptr_i64(addr, t2);
4487        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4488        tcg_temp_free_ptr(t2);
4489
4490        tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4491
4492        tcg_gen_addi_ptr(i, i, 8);
4493
4494        tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4495        tcg_temp_free_ptr(i);
4496    }
4497
4498    /* Predicate register stores can be any multiple of 2.  */
4499    if (len_remain) {
4500        tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4501        tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4502
4503        switch (len_remain) {
4504        case 2:
4505        case 4:
4506        case 8:
4507            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4508            break;
4509
4510        case 6:
4511            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4512            tcg_gen_addi_i64(addr, addr, 4);
4513            tcg_gen_shri_i64(t0, t0, 32);
4514            tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4515            break;
4516
4517        default:
4518            g_assert_not_reached();
4519        }
4520    }
4521    tcg_temp_free_i64(addr);
4522    tcg_temp_free_i64(t0);
4523}
4524
4525static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
4526{
4527    if (sve_access_check(s)) {
4528        int size = vec_full_reg_size(s);
4529        int off = vec_full_reg_offset(s, a->rd);
4530        do_ldr(s, off, size, a->rn, a->imm * size);
4531    }
4532    return true;
4533}
4534
4535static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
4536{
4537    if (sve_access_check(s)) {
4538        int size = pred_full_reg_size(s);
4539        int off = pred_full_reg_offset(s, a->rd);
4540        do_ldr(s, off, size, a->rn, a->imm * size);
4541    }
4542    return true;
4543}
4544
4545static bool trans_STR_zri(DisasContext *s, arg_rri *a)
4546{
4547    if (sve_access_check(s)) {
4548        int size = vec_full_reg_size(s);
4549        int off = vec_full_reg_offset(s, a->rd);
4550        do_str(s, off, size, a->rn, a->imm * size);
4551    }
4552    return true;
4553}
4554
4555static bool trans_STR_pri(DisasContext *s, arg_rri *a)
4556{
4557    if (sve_access_check(s)) {
4558        int size = pred_full_reg_size(s);
4559        int off = pred_full_reg_offset(s, a->rd);
4560        do_str(s, off, size, a->rn, a->imm * size);
4561    }
4562    return true;
4563}
4564
4565/*
4566 *** SVE Memory - Contiguous Load Group
4567 */
4568
4569/* The memory mode of the dtype.  */
4570static const TCGMemOp dtype_mop[16] = {
4571    MO_UB, MO_UB, MO_UB, MO_UB,
4572    MO_SL, MO_UW, MO_UW, MO_UW,
4573    MO_SW, MO_SW, MO_UL, MO_UL,
4574    MO_SB, MO_SB, MO_SB, MO_Q
4575};
4576
4577#define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4578
4579/* The vector element size of dtype.  */
4580static const uint8_t dtype_esz[16] = {
4581    0, 1, 2, 3,
4582    3, 1, 2, 3,
4583    3, 2, 2, 3,
4584    3, 2, 1, 3
4585};
4586
4587static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
4588{
4589    return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
4590}
4591
4592static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4593                       int dtype, gen_helper_gvec_mem *fn)
4594{
4595    unsigned vsz = vec_full_reg_size(s);
4596    TCGv_ptr t_pg;
4597    TCGv_i32 t_desc;
4598    int desc;
4599
4600    /* For e.g. LD4, there are not enough arguments to pass all 4
4601     * registers as pointers, so encode the regno into the data field.
4602     * For consistency, do this even for LD1.
4603     */
4604    desc = sve_memopidx(s, dtype);
4605    desc |= zt << MEMOPIDX_SHIFT;
4606    desc = simd_desc(vsz, vsz, desc);
4607    t_desc = tcg_const_i32(desc);
4608    t_pg = tcg_temp_new_ptr();
4609
4610    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4611    fn(cpu_env, t_pg, addr, t_desc);
4612
4613    tcg_temp_free_ptr(t_pg);
4614    tcg_temp_free_i32(t_desc);
4615}
4616
4617static void do_ld_zpa(DisasContext *s, int zt, int pg,
4618                      TCGv_i64 addr, int dtype, int nreg)
4619{
4620    static gen_helper_gvec_mem * const fns[2][16][4] = {
4621        /* Little-endian */
4622        { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4623            gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4624          { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4625          { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4626          { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4627
4628          { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4629          { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4630            gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4631          { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4632          { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4633
4634          { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4635          { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4636          { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4637            gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4638          { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4639
4640          { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4641          { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4642          { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4643          { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4644            gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4645
4646        /* Big-endian */
4647        { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4648            gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4649          { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4650          { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4651          { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4652
4653          { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4654          { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4655            gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4656          { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4657          { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4658
4659          { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4660          { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4661          { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4662            gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4663          { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4664
4665          { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4666          { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4667          { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4668          { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4669            gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
4670    };
4671    gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
4672
4673    /* While there are holes in the table, they are not
4674     * accessible via the instruction encoding.
4675     */
4676    assert(fn != NULL);
4677    do_mem_zpa(s, zt, pg, addr, dtype, fn);
4678}
4679
4680static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
4681{
4682    if (a->rm == 31) {
4683        return false;
4684    }
4685    if (sve_access_check(s)) {
4686        TCGv_i64 addr = new_tmp_a64(s);
4687        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4688        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4689        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4690    }
4691    return true;
4692}
4693
4694static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
4695{
4696    if (sve_access_check(s)) {
4697        int vsz = vec_full_reg_size(s);
4698        int elements = vsz >> dtype_esz[a->dtype];
4699        TCGv_i64 addr = new_tmp_a64(s);
4700
4701        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4702                         (a->imm * elements * (a->nreg + 1))
4703                         << dtype_msz(a->dtype));
4704        do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4705    }
4706    return true;
4707}
4708
4709static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
4710{
4711    static gen_helper_gvec_mem * const fns[2][16] = {
4712        /* Little-endian */
4713        { gen_helper_sve_ldff1bb_r,
4714          gen_helper_sve_ldff1bhu_r,
4715          gen_helper_sve_ldff1bsu_r,
4716          gen_helper_sve_ldff1bdu_r,
4717
4718          gen_helper_sve_ldff1sds_le_r,
4719          gen_helper_sve_ldff1hh_le_r,
4720          gen_helper_sve_ldff1hsu_le_r,
4721          gen_helper_sve_ldff1hdu_le_r,
4722
4723          gen_helper_sve_ldff1hds_le_r,
4724          gen_helper_sve_ldff1hss_le_r,
4725          gen_helper_sve_ldff1ss_le_r,
4726          gen_helper_sve_ldff1sdu_le_r,
4727
4728          gen_helper_sve_ldff1bds_r,
4729          gen_helper_sve_ldff1bss_r,
4730          gen_helper_sve_ldff1bhs_r,
4731          gen_helper_sve_ldff1dd_le_r },
4732
4733        /* Big-endian */
4734        { gen_helper_sve_ldff1bb_r,
4735          gen_helper_sve_ldff1bhu_r,
4736          gen_helper_sve_ldff1bsu_r,
4737          gen_helper_sve_ldff1bdu_r,
4738
4739          gen_helper_sve_ldff1sds_be_r,
4740          gen_helper_sve_ldff1hh_be_r,
4741          gen_helper_sve_ldff1hsu_be_r,
4742          gen_helper_sve_ldff1hdu_be_r,
4743
4744          gen_helper_sve_ldff1hds_be_r,
4745          gen_helper_sve_ldff1hss_be_r,
4746          gen_helper_sve_ldff1ss_be_r,
4747          gen_helper_sve_ldff1sdu_be_r,
4748
4749          gen_helper_sve_ldff1bds_r,
4750          gen_helper_sve_ldff1bss_r,
4751          gen_helper_sve_ldff1bhs_r,
4752          gen_helper_sve_ldff1dd_be_r },
4753    };
4754
4755    if (sve_access_check(s)) {
4756        TCGv_i64 addr = new_tmp_a64(s);
4757        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4758        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4759        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4760                   fns[s->be_data == MO_BE][a->dtype]);
4761    }
4762    return true;
4763}
4764
4765static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
4766{
4767    static gen_helper_gvec_mem * const fns[2][16] = {
4768        /* Little-endian */
4769        { gen_helper_sve_ldnf1bb_r,
4770          gen_helper_sve_ldnf1bhu_r,
4771          gen_helper_sve_ldnf1bsu_r,
4772          gen_helper_sve_ldnf1bdu_r,
4773
4774          gen_helper_sve_ldnf1sds_le_r,
4775          gen_helper_sve_ldnf1hh_le_r,
4776          gen_helper_sve_ldnf1hsu_le_r,
4777          gen_helper_sve_ldnf1hdu_le_r,
4778
4779          gen_helper_sve_ldnf1hds_le_r,
4780          gen_helper_sve_ldnf1hss_le_r,
4781          gen_helper_sve_ldnf1ss_le_r,
4782          gen_helper_sve_ldnf1sdu_le_r,
4783
4784          gen_helper_sve_ldnf1bds_r,
4785          gen_helper_sve_ldnf1bss_r,
4786          gen_helper_sve_ldnf1bhs_r,
4787          gen_helper_sve_ldnf1dd_le_r },
4788
4789        /* Big-endian */
4790        { gen_helper_sve_ldnf1bb_r,
4791          gen_helper_sve_ldnf1bhu_r,
4792          gen_helper_sve_ldnf1bsu_r,
4793          gen_helper_sve_ldnf1bdu_r,
4794
4795          gen_helper_sve_ldnf1sds_be_r,
4796          gen_helper_sve_ldnf1hh_be_r,
4797          gen_helper_sve_ldnf1hsu_be_r,
4798          gen_helper_sve_ldnf1hdu_be_r,
4799
4800          gen_helper_sve_ldnf1hds_be_r,
4801          gen_helper_sve_ldnf1hss_be_r,
4802          gen_helper_sve_ldnf1ss_be_r,
4803          gen_helper_sve_ldnf1sdu_be_r,
4804
4805          gen_helper_sve_ldnf1bds_r,
4806          gen_helper_sve_ldnf1bss_r,
4807          gen_helper_sve_ldnf1bhs_r,
4808          gen_helper_sve_ldnf1dd_be_r },
4809    };
4810
4811    if (sve_access_check(s)) {
4812        int vsz = vec_full_reg_size(s);
4813        int elements = vsz >> dtype_esz[a->dtype];
4814        int off = (a->imm * elements) << dtype_msz(a->dtype);
4815        TCGv_i64 addr = new_tmp_a64(s);
4816
4817        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4818        do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4819                   fns[s->be_data == MO_BE][a->dtype]);
4820    }
4821    return true;
4822}
4823
4824static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4825{
4826    static gen_helper_gvec_mem * const fns[2][4] = {
4827        { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_le_r,
4828          gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4829        { gen_helper_sve_ld1bb_r,    gen_helper_sve_ld1hh_be_r,
4830          gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4831    };
4832    unsigned vsz = vec_full_reg_size(s);
4833    TCGv_ptr t_pg;
4834    TCGv_i32 t_desc;
4835    int desc, poff;
4836
4837    /* Load the first quadword using the normal predicated load helpers.  */
4838    desc = sve_memopidx(s, msz_dtype(s, msz));
4839    desc |= zt << MEMOPIDX_SHIFT;
4840    desc = simd_desc(16, 16, desc);
4841    t_desc = tcg_const_i32(desc);
4842
4843    poff = pred_full_reg_offset(s, pg);
4844    if (vsz > 16) {
4845        /*
4846         * Zero-extend the first 16 bits of the predicate into a temporary.
4847         * This avoids triggering an assert making sure we don't have bits
4848         * set within a predicate beyond VQ, but we have lowered VQ to 1
4849         * for this load operation.
4850         */
4851        TCGv_i64 tmp = tcg_temp_new_i64();
4852#ifdef HOST_WORDS_BIGENDIAN
4853        poff += 6;
4854#endif
4855        tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4856
4857        poff = offsetof(CPUARMState, vfp.preg_tmp);
4858        tcg_gen_st_i64(tmp, cpu_env, poff);
4859        tcg_temp_free_i64(tmp);
4860    }
4861
4862    t_pg = tcg_temp_new_ptr();
4863    tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4864
4865    fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
4866
4867    tcg_temp_free_ptr(t_pg);
4868    tcg_temp_free_i32(t_desc);
4869
4870    /* Replicate that first quadword.  */
4871    if (vsz > 16) {
4872        unsigned dofs = vec_full_reg_offset(s, zt);
4873        tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4874    }
4875}
4876
4877static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a)
4878{
4879    if (a->rm == 31) {
4880        return false;
4881    }
4882    if (sve_access_check(s)) {
4883        int msz = dtype_msz(a->dtype);
4884        TCGv_i64 addr = new_tmp_a64(s);
4885        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4886        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4887        do_ldrq(s, a->rd, a->pg, addr, msz);
4888    }
4889    return true;
4890}
4891
4892static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
4893{
4894    if (sve_access_check(s)) {
4895        TCGv_i64 addr = new_tmp_a64(s);
4896        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4897        do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4898    }
4899    return true;
4900}
4901
4902/* Load and broadcast element.  */
4903static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
4904{
4905    if (!sve_access_check(s)) {
4906        return true;
4907    }
4908
4909    unsigned vsz = vec_full_reg_size(s);
4910    unsigned psz = pred_full_reg_size(s);
4911    unsigned esz = dtype_esz[a->dtype];
4912    unsigned msz = dtype_msz(a->dtype);
4913    TCGLabel *over = gen_new_label();
4914    TCGv_i64 temp;
4915
4916    /* If the guarding predicate has no bits set, no load occurs.  */
4917    if (psz <= 8) {
4918        /* Reduce the pred_esz_masks value simply to reduce the
4919         * size of the code generated here.
4920         */
4921        uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4922        temp = tcg_temp_new_i64();
4923        tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4924        tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4925        tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4926        tcg_temp_free_i64(temp);
4927    } else {
4928        TCGv_i32 t32 = tcg_temp_new_i32();
4929        find_last_active(s, t32, esz, a->pg);
4930        tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4931        tcg_temp_free_i32(t32);
4932    }
4933
4934    /* Load the data.  */
4935    temp = tcg_temp_new_i64();
4936    tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4937    tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4938                        s->be_data | dtype_mop[a->dtype]);
4939
4940    /* Broadcast to *all* elements.  */
4941    tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4942                         vsz, vsz, temp);
4943    tcg_temp_free_i64(temp);
4944
4945    /* Zero the inactive elements.  */
4946    gen_set_label(over);
4947    do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4948    return true;
4949}
4950
4951static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4952                      int msz, int esz, int nreg)
4953{
4954    static gen_helper_gvec_mem * const fn_single[2][4][4] = {
4955        { { gen_helper_sve_st1bb_r,
4956            gen_helper_sve_st1bh_r,
4957            gen_helper_sve_st1bs_r,
4958            gen_helper_sve_st1bd_r },
4959          { NULL,
4960            gen_helper_sve_st1hh_le_r,
4961            gen_helper_sve_st1hs_le_r,
4962            gen_helper_sve_st1hd_le_r },
4963          { NULL, NULL,
4964            gen_helper_sve_st1ss_le_r,
4965            gen_helper_sve_st1sd_le_r },
4966          { NULL, NULL, NULL,
4967            gen_helper_sve_st1dd_le_r } },
4968        { { gen_helper_sve_st1bb_r,
4969            gen_helper_sve_st1bh_r,
4970            gen_helper_sve_st1bs_r,
4971            gen_helper_sve_st1bd_r },
4972          { NULL,
4973            gen_helper_sve_st1hh_be_r,
4974            gen_helper_sve_st1hs_be_r,
4975            gen_helper_sve_st1hd_be_r },
4976          { NULL, NULL,
4977            gen_helper_sve_st1ss_be_r,
4978            gen_helper_sve_st1sd_be_r },
4979          { NULL, NULL, NULL,
4980            gen_helper_sve_st1dd_be_r } },
4981    };
4982    static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
4983        { { gen_helper_sve_st2bb_r,
4984            gen_helper_sve_st2hh_le_r,
4985            gen_helper_sve_st2ss_le_r,
4986            gen_helper_sve_st2dd_le_r },
4987          { gen_helper_sve_st3bb_r,
4988            gen_helper_sve_st3hh_le_r,
4989            gen_helper_sve_st3ss_le_r,
4990            gen_helper_sve_st3dd_le_r },
4991          { gen_helper_sve_st4bb_r,
4992            gen_helper_sve_st4hh_le_r,
4993            gen_helper_sve_st4ss_le_r,
4994            gen_helper_sve_st4dd_le_r } },
4995        { { gen_helper_sve_st2bb_r,
4996            gen_helper_sve_st2hh_be_r,
4997            gen_helper_sve_st2ss_be_r,
4998            gen_helper_sve_st2dd_be_r },
4999          { gen_helper_sve_st3bb_r,
5000            gen_helper_sve_st3hh_be_r,
5001            gen_helper_sve_st3ss_be_r,
5002            gen_helper_sve_st3dd_be_r },
5003          { gen_helper_sve_st4bb_r,
5004            gen_helper_sve_st4hh_be_r,
5005            gen_helper_sve_st4ss_be_r,
5006            gen_helper_sve_st4dd_be_r } },
5007    };
5008    gen_helper_gvec_mem *fn;
5009    int be = s->be_data == MO_BE;
5010
5011    if (nreg == 0) {
5012        /* ST1 */
5013        fn = fn_single[be][msz][esz];
5014    } else {
5015        /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
5016        assert(msz == esz);
5017        fn = fn_multiple[be][nreg - 1][msz];
5018    }
5019    assert(fn != NULL);
5020    do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), fn);
5021}
5022
5023static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
5024{
5025    if (a->rm == 31 || a->msz > a->esz) {
5026        return false;
5027    }
5028    if (sve_access_check(s)) {
5029        TCGv_i64 addr = new_tmp_a64(s);
5030        tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5031        tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5032        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5033    }
5034    return true;
5035}
5036
5037static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
5038{
5039    if (a->msz > a->esz) {
5040        return false;
5041    }
5042    if (sve_access_check(s)) {
5043        int vsz = vec_full_reg_size(s);
5044        int elements = vsz >> a->esz;
5045        TCGv_i64 addr = new_tmp_a64(s);
5046
5047        tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5048                         (a->imm * elements * (a->nreg + 1)) << a->msz);
5049        do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5050    }
5051    return true;
5052}
5053
5054/*
5055 *** SVE gather loads / scatter stores
5056 */
5057
5058static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
5059                       int scale, TCGv_i64 scalar, int msz,
5060                       gen_helper_gvec_mem_scatter *fn)
5061{
5062    unsigned vsz = vec_full_reg_size(s);
5063    TCGv_ptr t_zm = tcg_temp_new_ptr();
5064    TCGv_ptr t_pg = tcg_temp_new_ptr();
5065    TCGv_ptr t_zt = tcg_temp_new_ptr();
5066    TCGv_i32 t_desc;
5067    int desc;
5068
5069    desc = sve_memopidx(s, msz_dtype(s, msz));
5070    desc |= scale << MEMOPIDX_SHIFT;
5071    desc = simd_desc(vsz, vsz, desc);
5072    t_desc = tcg_const_i32(desc);
5073
5074    tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5075    tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5076    tcg_gen_addi_ptr(t_zt, cpu_env,