qemu/target/arm/translate-a64.c
<<
>>
Prefs
   1/*
   2 *  AArch64 translation
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg/tcg-op.h"
  24#include "tcg/tcg-op-gvec.h"
  25#include "qemu/log.h"
  26#include "arm_ldst.h"
  27#include "translate.h"
  28#include "internals.h"
  29#include "qemu/host-utils.h"
  30
  31#include "semihosting/semihost.h"
  32#include "exec/gen-icount.h"
  33
  34#include "exec/helper-proto.h"
  35#include "exec/helper-gen.h"
  36#include "exec/log.h"
  37
  38#include "translate-a64.h"
  39#include "qemu/atomic128.h"
  40
  41static TCGv_i64 cpu_X[32];
  42static TCGv_i64 cpu_pc;
  43
  44/* Load/store exclusive handling */
  45static TCGv_i64 cpu_exclusive_high;
  46
  47static const char *regnames[] = {
  48    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  49    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  50    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  51    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  52};
  53
  54enum a64_shift_type {
  55    A64_SHIFT_TYPE_LSL = 0,
  56    A64_SHIFT_TYPE_LSR = 1,
  57    A64_SHIFT_TYPE_ASR = 2,
  58    A64_SHIFT_TYPE_ROR = 3
  59};
  60
  61/* Table based decoder typedefs - used when the relevant bits for decode
  62 * are too awkwardly scattered across the instruction (eg SIMD).
  63 */
  64typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  65
  66typedef struct AArch64DecodeTable {
  67    uint32_t pattern;
  68    uint32_t mask;
  69    AArch64DecodeFn *disas_fn;
  70} AArch64DecodeTable;
  71
  72/* initialize TCG globals.  */
  73void a64_translate_init(void)
  74{
  75    int i;
  76
  77    cpu_pc = tcg_global_mem_new_i64(cpu_env,
  78                                    offsetof(CPUARMState, pc),
  79                                    "pc");
  80    for (i = 0; i < 32; i++) {
  81        cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  82                                          offsetof(CPUARMState, xregs[i]),
  83                                          regnames[i]);
  84    }
  85
  86    cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
  87        offsetof(CPUARMState, exclusive_high), "exclusive_high");
  88}
  89
  90/*
  91 * Return the core mmu_idx to use for A64 "unprivileged load/store" insns
  92 */
  93static int get_a64_user_mem_index(DisasContext *s)
  94{
  95    /*
  96     * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL,
  97     * which is the usual mmu_idx for this cpu state.
  98     */
  99    ARMMMUIdx useridx = s->mmu_idx;
 100
 101    if (s->unpriv) {
 102        /*
 103         * We have pre-computed the condition for AccType_UNPRIV.
 104         * Therefore we should never get here with a mmu_idx for
 105         * which we do not know the corresponding user mmu_idx.
 106         */
 107        switch (useridx) {
 108        case ARMMMUIdx_E10_1:
 109        case ARMMMUIdx_E10_1_PAN:
 110            useridx = ARMMMUIdx_E10_0;
 111            break;
 112        case ARMMMUIdx_E20_2:
 113        case ARMMMUIdx_E20_2_PAN:
 114            useridx = ARMMMUIdx_E20_0;
 115            break;
 116        case ARMMMUIdx_SE10_1:
 117        case ARMMMUIdx_SE10_1_PAN:
 118            useridx = ARMMMUIdx_SE10_0;
 119            break;
 120        case ARMMMUIdx_SE20_2:
 121        case ARMMMUIdx_SE20_2_PAN:
 122            useridx = ARMMMUIdx_SE20_0;
 123            break;
 124        default:
 125            g_assert_not_reached();
 126        }
 127    }
 128    return arm_to_core_mmu_idx(useridx);
 129}
 130
 131static void reset_btype(DisasContext *s)
 132{
 133    if (s->btype != 0) {
 134        TCGv_i32 zero = tcg_const_i32(0);
 135        tcg_gen_st_i32(zero, cpu_env, offsetof(CPUARMState, btype));
 136        tcg_temp_free_i32(zero);
 137        s->btype = 0;
 138    }
 139}
 140
 141static void set_btype(DisasContext *s, int val)
 142{
 143    TCGv_i32 tcg_val;
 144
 145    /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
 146    tcg_debug_assert(val >= 1 && val <= 3);
 147
 148    tcg_val = tcg_const_i32(val);
 149    tcg_gen_st_i32(tcg_val, cpu_env, offsetof(CPUARMState, btype));
 150    tcg_temp_free_i32(tcg_val);
 151    s->btype = -1;
 152}
 153
 154void gen_a64_set_pc_im(uint64_t val)
 155{
 156    tcg_gen_movi_i64(cpu_pc, val);
 157}
 158
 159/*
 160 * Handle Top Byte Ignore (TBI) bits.
 161 *
 162 * If address tagging is enabled via the TCR TBI bits:
 163 *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 164 *    then the address is zero-extended, clearing bits [63:56]
 165 *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 166 *    and TBI1 controls addressses with bit 55 == 1.
 167 *    If the appropriate TBI bit is set for the address then
 168 *    the address is sign-extended from bit 55 into bits [63:56]
 169 *
 170 * Here We have concatenated TBI{1,0} into tbi.
 171 */
 172static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
 173                                TCGv_i64 src, int tbi)
 174{
 175    if (tbi == 0) {
 176        /* Load unmodified address */
 177        tcg_gen_mov_i64(dst, src);
 178    } else if (!regime_has_2_ranges(s->mmu_idx)) {
 179        /* Force tag byte to all zero */
 180        tcg_gen_extract_i64(dst, src, 0, 56);
 181    } else {
 182        /* Sign-extend from bit 55.  */
 183        tcg_gen_sextract_i64(dst, src, 0, 56);
 184
 185        switch (tbi) {
 186        case 1:
 187            /* tbi0 but !tbi1: only use the extension if positive */
 188            tcg_gen_and_i64(dst, dst, src);
 189            break;
 190        case 2:
 191            /* !tbi0 but tbi1: only use the extension if negative */
 192            tcg_gen_or_i64(dst, dst, src);
 193            break;
 194        case 3:
 195            /* tbi0 and tbi1: always use the extension */
 196            break;
 197        default:
 198            g_assert_not_reached();
 199        }
 200    }
 201}
 202
 203static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 204{
 205    /*
 206     * If address tagging is enabled for instructions via the TCR TBI bits,
 207     * then loading an address into the PC will clear out any tag.
 208     */
 209    gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
 210}
 211
 212/*
 213 * Handle MTE and/or TBI.
 214 *
 215 * For TBI, ideally, we would do nothing.  Proper behaviour on fault is
 216 * for the tag to be present in the FAR_ELx register.  But for user-only
 217 * mode we do not have a TLB with which to implement this, so we must
 218 * remove the top byte now.
 219 *
 220 * Always return a fresh temporary that we can increment independently
 221 * of the write-back address.
 222 */
 223
 224TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
 225{
 226    TCGv_i64 clean = new_tmp_a64(s);
 227#ifdef CONFIG_USER_ONLY
 228    gen_top_byte_ignore(s, clean, addr, s->tbid);
 229#else
 230    tcg_gen_mov_i64(clean, addr);
 231#endif
 232    return clean;
 233}
 234
 235/* Insert a zero tag into src, with the result at dst. */
 236static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
 237{
 238    tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
 239}
 240
 241static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
 242                             MMUAccessType acc, int log2_size)
 243{
 244    TCGv_i32 t_acc = tcg_const_i32(acc);
 245    TCGv_i32 t_idx = tcg_const_i32(get_mem_index(s));
 246    TCGv_i32 t_size = tcg_const_i32(1 << log2_size);
 247
 248    gen_helper_probe_access(cpu_env, ptr, t_acc, t_idx, t_size);
 249    tcg_temp_free_i32(t_acc);
 250    tcg_temp_free_i32(t_idx);
 251    tcg_temp_free_i32(t_size);
 252}
 253
 254/*
 255 * For MTE, check a single logical or atomic access.  This probes a single
 256 * address, the exact one specified.  The size and alignment of the access
 257 * is not relevant to MTE, per se, but watchpoints do require the size,
 258 * and we want to recognize those before making any other changes to state.
 259 */
 260static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
 261                                      bool is_write, bool tag_checked,
 262                                      int log2_size, bool is_unpriv,
 263                                      int core_idx)
 264{
 265    if (tag_checked && s->mte_active[is_unpriv]) {
 266        TCGv_i32 tcg_desc;
 267        TCGv_i64 ret;
 268        int desc = 0;
 269
 270        desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
 271        desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
 272        desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
 273        desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
 274        desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1);
 275        tcg_desc = tcg_const_i32(desc);
 276
 277        ret = new_tmp_a64(s);
 278        gen_helper_mte_check(ret, cpu_env, tcg_desc, addr);
 279        tcg_temp_free_i32(tcg_desc);
 280
 281        return ret;
 282    }
 283    return clean_data_tbi(s, addr);
 284}
 285
 286TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
 287                        bool tag_checked, int log2_size)
 288{
 289    return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
 290                                 false, get_mem_index(s));
 291}
 292
 293/*
 294 * For MTE, check multiple logical sequential accesses.
 295 */
 296TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
 297                        bool tag_checked, int size)
 298{
 299    if (tag_checked && s->mte_active[0]) {
 300        TCGv_i32 tcg_desc;
 301        TCGv_i64 ret;
 302        int desc = 0;
 303
 304        desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
 305        desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
 306        desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
 307        desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
 308        desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1);
 309        tcg_desc = tcg_const_i32(desc);
 310
 311        ret = new_tmp_a64(s);
 312        gen_helper_mte_check(ret, cpu_env, tcg_desc, addr);
 313        tcg_temp_free_i32(tcg_desc);
 314
 315        return ret;
 316    }
 317    return clean_data_tbi(s, addr);
 318}
 319
 320typedef struct DisasCompare64 {
 321    TCGCond cond;
 322    TCGv_i64 value;
 323} DisasCompare64;
 324
 325static void a64_test_cc(DisasCompare64 *c64, int cc)
 326{
 327    DisasCompare c32;
 328
 329    arm_test_cc(&c32, cc);
 330
 331    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 332       * properly.  The NE/EQ comparisons are also fine with this choice.  */
 333    c64->cond = c32.cond;
 334    c64->value = tcg_temp_new_i64();
 335    tcg_gen_ext_i32_i64(c64->value, c32.value);
 336
 337    arm_free_cc(&c32);
 338}
 339
 340static void a64_free_cc(DisasCompare64 *c64)
 341{
 342    tcg_temp_free_i64(c64->value);
 343}
 344
 345static void gen_exception_internal(int excp)
 346{
 347    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 348
 349    assert(excp_is_internal(excp));
 350    gen_helper_exception_internal(cpu_env, tcg_excp);
 351    tcg_temp_free_i32(tcg_excp);
 352}
 353
 354static void gen_exception_internal_insn(DisasContext *s, uint64_t pc, int excp)
 355{
 356    gen_a64_set_pc_im(pc);
 357    gen_exception_internal(excp);
 358    s->base.is_jmp = DISAS_NORETURN;
 359}
 360
 361static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
 362{
 363    TCGv_i32 tcg_syn;
 364
 365    gen_a64_set_pc_im(s->pc_curr);
 366    tcg_syn = tcg_const_i32(syndrome);
 367    gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
 368    tcg_temp_free_i32(tcg_syn);
 369    s->base.is_jmp = DISAS_NORETURN;
 370}
 371
 372static void gen_step_complete_exception(DisasContext *s)
 373{
 374    /* We just completed step of an insn. Move from Active-not-pending
 375     * to Active-pending, and then also take the swstep exception.
 376     * This corresponds to making the (IMPDEF) choice to prioritize
 377     * swstep exceptions over asynchronous exceptions taken to an exception
 378     * level where debug is disabled. This choice has the advantage that
 379     * we do not need to maintain internal state corresponding to the
 380     * ISV/EX syndrome bits between completion of the step and generation
 381     * of the exception, and our syndrome information is always correct.
 382     */
 383    gen_ss_advance(s);
 384    gen_swstep_exception(s, 1, s->is_ldex);
 385    s->base.is_jmp = DISAS_NORETURN;
 386}
 387
 388static inline bool use_goto_tb(DisasContext *s, uint64_t dest)
 389{
 390    if (s->ss_active) {
 391        return false;
 392    }
 393    return translator_use_goto_tb(&s->base, dest);
 394}
 395
 396static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 397{
 398    if (use_goto_tb(s, dest)) {
 399        tcg_gen_goto_tb(n);
 400        gen_a64_set_pc_im(dest);
 401        tcg_gen_exit_tb(s->base.tb, n);
 402        s->base.is_jmp = DISAS_NORETURN;
 403    } else {
 404        gen_a64_set_pc_im(dest);
 405        if (s->ss_active) {
 406            gen_step_complete_exception(s);
 407        } else if (s->base.singlestep_enabled) {
 408            gen_exception_internal(EXCP_DEBUG);
 409        } else {
 410            tcg_gen_lookup_and_goto_ptr();
 411            s->base.is_jmp = DISAS_NORETURN;
 412        }
 413    }
 414}
 415
 416static void init_tmp_a64_array(DisasContext *s)
 417{
 418#ifdef CONFIG_DEBUG_TCG
 419    memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
 420#endif
 421    s->tmp_a64_count = 0;
 422}
 423
 424static void free_tmp_a64(DisasContext *s)
 425{
 426    int i;
 427    for (i = 0; i < s->tmp_a64_count; i++) {
 428        tcg_temp_free_i64(s->tmp_a64[i]);
 429    }
 430    init_tmp_a64_array(s);
 431}
 432
 433TCGv_i64 new_tmp_a64(DisasContext *s)
 434{
 435    assert(s->tmp_a64_count < TMP_A64_MAX);
 436    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 437}
 438
 439TCGv_i64 new_tmp_a64_local(DisasContext *s)
 440{
 441    assert(s->tmp_a64_count < TMP_A64_MAX);
 442    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_local_new_i64();
 443}
 444
 445TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 446{
 447    TCGv_i64 t = new_tmp_a64(s);
 448    tcg_gen_movi_i64(t, 0);
 449    return t;
 450}
 451
 452/*
 453 * Register access functions
 454 *
 455 * These functions are used for directly accessing a register in where
 456 * changes to the final register value are likely to be made. If you
 457 * need to use a register for temporary calculation (e.g. index type
 458 * operations) use the read_* form.
 459 *
 460 * B1.2.1 Register mappings
 461 *
 462 * In instruction register encoding 31 can refer to ZR (zero register) or
 463 * the SP (stack pointer) depending on context. In QEMU's case we map SP
 464 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 465 * This is the point of the _sp forms.
 466 */
 467TCGv_i64 cpu_reg(DisasContext *s, int reg)
 468{
 469    if (reg == 31) {
 470        return new_tmp_a64_zero(s);
 471    } else {
 472        return cpu_X[reg];
 473    }
 474}
 475
 476/* register access for when 31 == SP */
 477TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 478{
 479    return cpu_X[reg];
 480}
 481
 482/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 483 * representing the register contents. This TCGv is an auto-freed
 484 * temporary so it need not be explicitly freed, and may be modified.
 485 */
 486TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 487{
 488    TCGv_i64 v = new_tmp_a64(s);
 489    if (reg != 31) {
 490        if (sf) {
 491            tcg_gen_mov_i64(v, cpu_X[reg]);
 492        } else {
 493            tcg_gen_ext32u_i64(v, cpu_X[reg]);
 494        }
 495    } else {
 496        tcg_gen_movi_i64(v, 0);
 497    }
 498    return v;
 499}
 500
 501TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 502{
 503    TCGv_i64 v = new_tmp_a64(s);
 504    if (sf) {
 505        tcg_gen_mov_i64(v, cpu_X[reg]);
 506    } else {
 507        tcg_gen_ext32u_i64(v, cpu_X[reg]);
 508    }
 509    return v;
 510}
 511
 512/* Return the offset into CPUARMState of a slice (from
 513 * the least significant end) of FP register Qn (ie
 514 * Dn, Sn, Hn or Bn).
 515 * (Note that this is not the same mapping as for A32; see cpu.h)
 516 */
 517static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
 518{
 519    return vec_reg_offset(s, regno, 0, size);
 520}
 521
 522/* Offset of the high half of the 128 bit vector Qn */
 523static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 524{
 525    return vec_reg_offset(s, regno, 1, MO_64);
 526}
 527
 528/* Convenience accessors for reading and writing single and double
 529 * FP registers. Writing clears the upper parts of the associated
 530 * 128 bit vector register, as required by the architecture.
 531 * Note that unlike the GP register accessors, the values returned
 532 * by the read functions must be manually freed.
 533 */
 534static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 535{
 536    TCGv_i64 v = tcg_temp_new_i64();
 537
 538    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 539    return v;
 540}
 541
 542static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 543{
 544    TCGv_i32 v = tcg_temp_new_i32();
 545
 546    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 547    return v;
 548}
 549
 550static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
 551{
 552    TCGv_i32 v = tcg_temp_new_i32();
 553
 554    tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
 555    return v;
 556}
 557
 558/* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
 559 * If SVE is not enabled, then there are only 128 bits in the vector.
 560 */
 561static void clear_vec_high(DisasContext *s, bool is_q, int rd)
 562{
 563    unsigned ofs = fp_reg_offset(s, rd, MO_64);
 564    unsigned vsz = vec_full_reg_size(s);
 565
 566    /* Nop move, with side effect of clearing the tail. */
 567    tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz);
 568}
 569
 570void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 571{
 572    unsigned ofs = fp_reg_offset(s, reg, MO_64);
 573
 574    tcg_gen_st_i64(v, cpu_env, ofs);
 575    clear_vec_high(s, false, reg);
 576}
 577
 578static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 579{
 580    TCGv_i64 tmp = tcg_temp_new_i64();
 581
 582    tcg_gen_extu_i32_i64(tmp, v);
 583    write_fp_dreg(s, reg, tmp);
 584    tcg_temp_free_i64(tmp);
 585}
 586
 587/* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
 588static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
 589                         GVecGen2Fn *gvec_fn, int vece)
 590{
 591    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 592            is_q ? 16 : 8, vec_full_reg_size(s));
 593}
 594
 595/* Expand a 2-operand + immediate AdvSIMD vector operation using
 596 * an expander function.
 597 */
 598static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
 599                          int64_t imm, GVecGen2iFn *gvec_fn, int vece)
 600{
 601    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 602            imm, is_q ? 16 : 8, vec_full_reg_size(s));
 603}
 604
 605/* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
 606static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
 607                         GVecGen3Fn *gvec_fn, int vece)
 608{
 609    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 610            vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
 611}
 612
 613/* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
 614static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
 615                         int rx, GVecGen4Fn *gvec_fn, int vece)
 616{
 617    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 618            vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
 619            is_q ? 16 : 8, vec_full_reg_size(s));
 620}
 621
 622/* Expand a 2-operand operation using an out-of-line helper.  */
 623static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd,
 624                             int rn, int data, gen_helper_gvec_2 *fn)
 625{
 626    tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 627                       vec_full_reg_offset(s, rn),
 628                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 629}
 630
 631/* Expand a 3-operand operation using an out-of-line helper.  */
 632static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
 633                             int rn, int rm, int data, gen_helper_gvec_3 *fn)
 634{
 635    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 636                       vec_full_reg_offset(s, rn),
 637                       vec_full_reg_offset(s, rm),
 638                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 639}
 640
 641/* Expand a 3-operand + fpstatus pointer + simd data value operation using
 642 * an out-of-line helper.
 643 */
 644static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
 645                              int rm, bool is_fp16, int data,
 646                              gen_helper_gvec_3_ptr *fn)
 647{
 648    TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
 649    tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 650                       vec_full_reg_offset(s, rn),
 651                       vec_full_reg_offset(s, rm), fpst,
 652                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 653    tcg_temp_free_ptr(fpst);
 654}
 655
 656/* Expand a 3-operand + qc + operation using an out-of-line helper.  */
 657static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn,
 658                            int rm, gen_helper_gvec_3_ptr *fn)
 659{
 660    TCGv_ptr qc_ptr = tcg_temp_new_ptr();
 661
 662    tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
 663    tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 664                       vec_full_reg_offset(s, rn),
 665                       vec_full_reg_offset(s, rm), qc_ptr,
 666                       is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
 667    tcg_temp_free_ptr(qc_ptr);
 668}
 669
 670/* Expand a 4-operand operation using an out-of-line helper.  */
 671static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn,
 672                             int rm, int ra, int data, gen_helper_gvec_4 *fn)
 673{
 674    tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 675                       vec_full_reg_offset(s, rn),
 676                       vec_full_reg_offset(s, rm),
 677                       vec_full_reg_offset(s, ra),
 678                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 679}
 680
 681/*
 682 * Expand a 4-operand + fpstatus pointer + simd data value operation using
 683 * an out-of-line helper.
 684 */
 685static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
 686                              int rm, int ra, bool is_fp16, int data,
 687                              gen_helper_gvec_4_ptr *fn)
 688{
 689    TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
 690    tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
 691                       vec_full_reg_offset(s, rn),
 692                       vec_full_reg_offset(s, rm),
 693                       vec_full_reg_offset(s, ra), fpst,
 694                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 695    tcg_temp_free_ptr(fpst);
 696}
 697
 698/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 699 * than the 32 bit equivalent.
 700 */
 701static inline void gen_set_NZ64(TCGv_i64 result)
 702{
 703    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 704    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 705}
 706
 707/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 708static inline void gen_logic_CC(int sf, TCGv_i64 result)
 709{
 710    if (sf) {
 711        gen_set_NZ64(result);
 712    } else {
 713        tcg_gen_extrl_i64_i32(cpu_ZF, result);
 714        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 715    }
 716    tcg_gen_movi_i32(cpu_CF, 0);
 717    tcg_gen_movi_i32(cpu_VF, 0);
 718}
 719
 720/* dest = T0 + T1; compute C, N, V and Z flags */
 721static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 722{
 723    if (sf) {
 724        TCGv_i64 result, flag, tmp;
 725        result = tcg_temp_new_i64();
 726        flag = tcg_temp_new_i64();
 727        tmp = tcg_temp_new_i64();
 728
 729        tcg_gen_movi_i64(tmp, 0);
 730        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 731
 732        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 733
 734        gen_set_NZ64(result);
 735
 736        tcg_gen_xor_i64(flag, result, t0);
 737        tcg_gen_xor_i64(tmp, t0, t1);
 738        tcg_gen_andc_i64(flag, flag, tmp);
 739        tcg_temp_free_i64(tmp);
 740        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 741
 742        tcg_gen_mov_i64(dest, result);
 743        tcg_temp_free_i64(result);
 744        tcg_temp_free_i64(flag);
 745    } else {
 746        /* 32 bit arithmetic */
 747        TCGv_i32 t0_32 = tcg_temp_new_i32();
 748        TCGv_i32 t1_32 = tcg_temp_new_i32();
 749        TCGv_i32 tmp = tcg_temp_new_i32();
 750
 751        tcg_gen_movi_i32(tmp, 0);
 752        tcg_gen_extrl_i64_i32(t0_32, t0);
 753        tcg_gen_extrl_i64_i32(t1_32, t1);
 754        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 755        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 756        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 757        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 758        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 759        tcg_gen_extu_i32_i64(dest, cpu_NF);
 760
 761        tcg_temp_free_i32(tmp);
 762        tcg_temp_free_i32(t0_32);
 763        tcg_temp_free_i32(t1_32);
 764    }
 765}
 766
 767/* dest = T0 - T1; compute C, N, V and Z flags */
 768static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 769{
 770    if (sf) {
 771        /* 64 bit arithmetic */
 772        TCGv_i64 result, flag, tmp;
 773
 774        result = tcg_temp_new_i64();
 775        flag = tcg_temp_new_i64();
 776        tcg_gen_sub_i64(result, t0, t1);
 777
 778        gen_set_NZ64(result);
 779
 780        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 781        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 782
 783        tcg_gen_xor_i64(flag, result, t0);
 784        tmp = tcg_temp_new_i64();
 785        tcg_gen_xor_i64(tmp, t0, t1);
 786        tcg_gen_and_i64(flag, flag, tmp);
 787        tcg_temp_free_i64(tmp);
 788        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 789        tcg_gen_mov_i64(dest, result);
 790        tcg_temp_free_i64(flag);
 791        tcg_temp_free_i64(result);
 792    } else {
 793        /* 32 bit arithmetic */
 794        TCGv_i32 t0_32 = tcg_temp_new_i32();
 795        TCGv_i32 t1_32 = tcg_temp_new_i32();
 796        TCGv_i32 tmp;
 797
 798        tcg_gen_extrl_i64_i32(t0_32, t0);
 799        tcg_gen_extrl_i64_i32(t1_32, t1);
 800        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 801        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 802        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 803        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 804        tmp = tcg_temp_new_i32();
 805        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 806        tcg_temp_free_i32(t0_32);
 807        tcg_temp_free_i32(t1_32);
 808        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 809        tcg_temp_free_i32(tmp);
 810        tcg_gen_extu_i32_i64(dest, cpu_NF);
 811    }
 812}
 813
 814/* dest = T0 + T1 + CF; do not compute flags. */
 815static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 816{
 817    TCGv_i64 flag = tcg_temp_new_i64();
 818    tcg_gen_extu_i32_i64(flag, cpu_CF);
 819    tcg_gen_add_i64(dest, t0, t1);
 820    tcg_gen_add_i64(dest, dest, flag);
 821    tcg_temp_free_i64(flag);
 822
 823    if (!sf) {
 824        tcg_gen_ext32u_i64(dest, dest);
 825    }
 826}
 827
 828/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 829static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 830{
 831    if (sf) {
 832        TCGv_i64 result, cf_64, vf_64, tmp;
 833        result = tcg_temp_new_i64();
 834        cf_64 = tcg_temp_new_i64();
 835        vf_64 = tcg_temp_new_i64();
 836        tmp = tcg_const_i64(0);
 837
 838        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 839        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 840        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 841        tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 842        gen_set_NZ64(result);
 843
 844        tcg_gen_xor_i64(vf_64, result, t0);
 845        tcg_gen_xor_i64(tmp, t0, t1);
 846        tcg_gen_andc_i64(vf_64, vf_64, tmp);
 847        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 848
 849        tcg_gen_mov_i64(dest, result);
 850
 851        tcg_temp_free_i64(tmp);
 852        tcg_temp_free_i64(vf_64);
 853        tcg_temp_free_i64(cf_64);
 854        tcg_temp_free_i64(result);
 855    } else {
 856        TCGv_i32 t0_32, t1_32, tmp;
 857        t0_32 = tcg_temp_new_i32();
 858        t1_32 = tcg_temp_new_i32();
 859        tmp = tcg_const_i32(0);
 860
 861        tcg_gen_extrl_i64_i32(t0_32, t0);
 862        tcg_gen_extrl_i64_i32(t1_32, t1);
 863        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 864        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 865
 866        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 867        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 868        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 869        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 870        tcg_gen_extu_i32_i64(dest, cpu_NF);
 871
 872        tcg_temp_free_i32(tmp);
 873        tcg_temp_free_i32(t1_32);
 874        tcg_temp_free_i32(t0_32);
 875    }
 876}
 877
 878/*
 879 * Load/Store generators
 880 */
 881
 882/*
 883 * Store from GPR register to memory.
 884 */
 885static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 886                             TCGv_i64 tcg_addr, MemOp memop, int memidx,
 887                             bool iss_valid,
 888                             unsigned int iss_srt,
 889                             bool iss_sf, bool iss_ar)
 890{
 891    memop = finalize_memop(s, memop);
 892    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
 893
 894    if (iss_valid) {
 895        uint32_t syn;
 896
 897        syn = syn_data_abort_with_iss(0,
 898                                      (memop & MO_SIZE),
 899                                      false,
 900                                      iss_srt,
 901                                      iss_sf,
 902                                      iss_ar,
 903                                      0, 0, 0, 0, 0, false);
 904        disas_set_insn_syndrome(s, syn);
 905    }
 906}
 907
 908static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 909                      TCGv_i64 tcg_addr, MemOp memop,
 910                      bool iss_valid,
 911                      unsigned int iss_srt,
 912                      bool iss_sf, bool iss_ar)
 913{
 914    do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s),
 915                     iss_valid, iss_srt, iss_sf, iss_ar);
 916}
 917
 918/*
 919 * Load from memory to GPR register
 920 */
 921static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 922                             MemOp memop, bool extend, int memidx,
 923                             bool iss_valid, unsigned int iss_srt,
 924                             bool iss_sf, bool iss_ar)
 925{
 926    memop = finalize_memop(s, memop);
 927    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 928
 929    if (extend && (memop & MO_SIGN)) {
 930        g_assert((memop & MO_SIZE) <= MO_32);
 931        tcg_gen_ext32u_i64(dest, dest);
 932    }
 933
 934    if (iss_valid) {
 935        uint32_t syn;
 936
 937        syn = syn_data_abort_with_iss(0,
 938                                      (memop & MO_SIZE),
 939                                      (memop & MO_SIGN) != 0,
 940                                      iss_srt,
 941                                      iss_sf,
 942                                      iss_ar,
 943                                      0, 0, 0, 0, 0, false);
 944        disas_set_insn_syndrome(s, syn);
 945    }
 946}
 947
 948static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 949                      MemOp memop, bool extend,
 950                      bool iss_valid, unsigned int iss_srt,
 951                      bool iss_sf, bool iss_ar)
 952{
 953    do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s),
 954                     iss_valid, iss_srt, iss_sf, iss_ar);
 955}
 956
 957/*
 958 * Store from FP register to memory
 959 */
 960static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 961{
 962    /* This writes the bottom N bits of a 128 bit wide vector to memory */
 963    TCGv_i64 tmplo = tcg_temp_new_i64();
 964    MemOp mop;
 965
 966    tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 967
 968    if (size < 4) {
 969        mop = finalize_memop(s, size);
 970        tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
 971    } else {
 972        bool be = s->be_data == MO_BE;
 973        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 974        TCGv_i64 tmphi = tcg_temp_new_i64();
 975
 976        tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
 977
 978        mop = s->be_data | MO_Q;
 979        tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
 980                            mop | (s->align_mem ? MO_ALIGN_16 : 0));
 981        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 982        tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr,
 983                            get_mem_index(s), mop);
 984
 985        tcg_temp_free_i64(tcg_hiaddr);
 986        tcg_temp_free_i64(tmphi);
 987    }
 988
 989    tcg_temp_free_i64(tmplo);
 990}
 991
 992/*
 993 * Load from memory to FP register
 994 */
 995static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 996{
 997    /* This always zero-extends and writes to a full 128 bit wide vector */
 998    TCGv_i64 tmplo = tcg_temp_new_i64();
 999    TCGv_i64 tmphi = NULL;
1000    MemOp mop;
1001
1002    if (size < 4) {
1003        mop = finalize_memop(s, size);
1004        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
1005    } else {
1006        bool be = s->be_data == MO_BE;
1007        TCGv_i64 tcg_hiaddr;
1008
1009        tmphi = tcg_temp_new_i64();
1010        tcg_hiaddr = tcg_temp_new_i64();
1011
1012        mop = s->be_data | MO_Q;
1013        tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
1014                            mop | (s->align_mem ? MO_ALIGN_16 : 0));
1015        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
1016        tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr,
1017                            get_mem_index(s), mop);
1018        tcg_temp_free_i64(tcg_hiaddr);
1019    }
1020
1021    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
1022    tcg_temp_free_i64(tmplo);
1023
1024    if (tmphi) {
1025        tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
1026        tcg_temp_free_i64(tmphi);
1027    }
1028    clear_vec_high(s, tmphi != NULL, destidx);
1029}
1030
1031/*
1032 * Vector load/store helpers.
1033 *
1034 * The principal difference between this and a FP load is that we don't
1035 * zero extend as we are filling a partial chunk of the vector register.
1036 * These functions don't support 128 bit loads/stores, which would be
1037 * normal load/store operations.
1038 *
1039 * The _i32 versions are useful when operating on 32 bit quantities
1040 * (eg for floating point single or using Neon helper functions).
1041 */
1042
1043/* Get value of an element within a vector register */
1044static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1045                             int element, MemOp memop)
1046{
1047    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1048    switch (memop) {
1049    case MO_8:
1050        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
1051        break;
1052    case MO_16:
1053        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
1054        break;
1055    case MO_32:
1056        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
1057        break;
1058    case MO_8|MO_SIGN:
1059        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1060        break;
1061    case MO_16|MO_SIGN:
1062        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1063        break;
1064    case MO_32|MO_SIGN:
1065        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1066        break;
1067    case MO_64:
1068    case MO_64|MO_SIGN:
1069        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1070        break;
1071    default:
1072        g_assert_not_reached();
1073    }
1074}
1075
1076static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1077                                 int element, MemOp memop)
1078{
1079    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1080    switch (memop) {
1081    case MO_8:
1082        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1083        break;
1084    case MO_16:
1085        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1086        break;
1087    case MO_8|MO_SIGN:
1088        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1089        break;
1090    case MO_16|MO_SIGN:
1091        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1092        break;
1093    case MO_32:
1094    case MO_32|MO_SIGN:
1095        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1096        break;
1097    default:
1098        g_assert_not_reached();
1099    }
1100}
1101
1102/* Set value of an element within a vector register */
1103static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1104                              int element, MemOp memop)
1105{
1106    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1107    switch (memop) {
1108    case MO_8:
1109        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1110        break;
1111    case MO_16:
1112        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1113        break;
1114    case MO_32:
1115        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1116        break;
1117    case MO_64:
1118        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1119        break;
1120    default:
1121        g_assert_not_reached();
1122    }
1123}
1124
1125static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1126                                  int destidx, int element, MemOp memop)
1127{
1128    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1129    switch (memop) {
1130    case MO_8:
1131        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1132        break;
1133    case MO_16:
1134        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1135        break;
1136    case MO_32:
1137        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1138        break;
1139    default:
1140        g_assert_not_reached();
1141    }
1142}
1143
1144/* Store from vector register to memory */
1145static void do_vec_st(DisasContext *s, int srcidx, int element,
1146                      TCGv_i64 tcg_addr, MemOp mop)
1147{
1148    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1149
1150    read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE);
1151    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1152
1153    tcg_temp_free_i64(tcg_tmp);
1154}
1155
1156/* Load from memory to vector register */
1157static void do_vec_ld(DisasContext *s, int destidx, int element,
1158                      TCGv_i64 tcg_addr, MemOp mop)
1159{
1160    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1161
1162    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop);
1163    write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE);
1164
1165    tcg_temp_free_i64(tcg_tmp);
1166}
1167
1168/* Check that FP/Neon access is enabled. If it is, return
1169 * true. If not, emit code to generate an appropriate exception,
1170 * and return false; the caller should not emit any code for
1171 * the instruction. Note that this check must happen after all
1172 * unallocated-encoding checks (otherwise the syndrome information
1173 * for the resulting exception will be incorrect).
1174 */
1175static bool fp_access_check(DisasContext *s)
1176{
1177    if (s->fp_excp_el) {
1178        assert(!s->fp_access_checked);
1179        s->fp_access_checked = true;
1180
1181        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
1182                           syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
1183        return false;
1184    }
1185    s->fp_access_checked = true;
1186    return true;
1187}
1188
1189/* Check that SVE access is enabled.  If it is, return true.
1190 * If not, emit code to generate an appropriate exception and return false.
1191 */
1192bool sve_access_check(DisasContext *s)
1193{
1194    if (s->sve_excp_el) {
1195        assert(!s->sve_access_checked);
1196        s->sve_access_checked = true;
1197
1198        gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
1199                           syn_sve_access_trap(), s->sve_excp_el);
1200        return false;
1201    }
1202    s->sve_access_checked = true;
1203    return fp_access_check(s);
1204}
1205
1206/*
1207 * This utility function is for doing register extension with an
1208 * optional shift. You will likely want to pass a temporary for the
1209 * destination register. See DecodeRegExtend() in the ARM ARM.
1210 */
1211static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1212                              int option, unsigned int shift)
1213{
1214    int extsize = extract32(option, 0, 2);
1215    bool is_signed = extract32(option, 2, 1);
1216
1217    if (is_signed) {
1218        switch (extsize) {
1219        case 0:
1220            tcg_gen_ext8s_i64(tcg_out, tcg_in);
1221            break;
1222        case 1:
1223            tcg_gen_ext16s_i64(tcg_out, tcg_in);
1224            break;
1225        case 2:
1226            tcg_gen_ext32s_i64(tcg_out, tcg_in);
1227            break;
1228        case 3:
1229            tcg_gen_mov_i64(tcg_out, tcg_in);
1230            break;
1231        }
1232    } else {
1233        switch (extsize) {
1234        case 0:
1235            tcg_gen_ext8u_i64(tcg_out, tcg_in);
1236            break;
1237        case 1:
1238            tcg_gen_ext16u_i64(tcg_out, tcg_in);
1239            break;
1240        case 2:
1241            tcg_gen_ext32u_i64(tcg_out, tcg_in);
1242            break;
1243        case 3:
1244            tcg_gen_mov_i64(tcg_out, tcg_in);
1245            break;
1246        }
1247    }
1248
1249    if (shift) {
1250        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1251    }
1252}
1253
1254static inline void gen_check_sp_alignment(DisasContext *s)
1255{
1256    /* The AArch64 architecture mandates that (if enabled via PSTATE
1257     * or SCTLR bits) there is a check that SP is 16-aligned on every
1258     * SP-relative load or store (with an exception generated if it is not).
1259     * In line with general QEMU practice regarding misaligned accesses,
1260     * we omit these checks for the sake of guest program performance.
1261     * This function is provided as a hook so we can more easily add these
1262     * checks in future (possibly as a "favour catching guest program bugs
1263     * over speed" user selectable option).
1264     */
1265}
1266
1267/*
1268 * This provides a simple table based table lookup decoder. It is
1269 * intended to be used when the relevant bits for decode are too
1270 * awkwardly placed and switch/if based logic would be confusing and
1271 * deeply nested. Since it's a linear search through the table, tables
1272 * should be kept small.
1273 *
1274 * It returns the first handler where insn & mask == pattern, or
1275 * NULL if there is no match.
1276 * The table is terminated by an empty mask (i.e. 0)
1277 */
1278static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1279                                               uint32_t insn)
1280{
1281    const AArch64DecodeTable *tptr = table;
1282
1283    while (tptr->mask) {
1284        if ((insn & tptr->mask) == tptr->pattern) {
1285            return tptr->disas_fn;
1286        }
1287        tptr++;
1288    }
1289    return NULL;
1290}
1291
1292/*
1293 * The instruction disassembly implemented here matches
1294 * the instruction encoding classifications in chapter C4
1295 * of the ARM Architecture Reference Manual (DDI0487B_a);
1296 * classification names and decode diagrams here should generally
1297 * match up with those in the manual.
1298 */
1299
1300/* Unconditional branch (immediate)
1301 *   31  30       26 25                                  0
1302 * +----+-----------+-------------------------------------+
1303 * | op | 0 0 1 0 1 |                 imm26               |
1304 * +----+-----------+-------------------------------------+
1305 */
1306static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1307{
1308    uint64_t addr = s->pc_curr + sextract32(insn, 0, 26) * 4;
1309
1310    if (insn & (1U << 31)) {
1311        /* BL Branch with link */
1312        tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
1313    }
1314
1315    /* B Branch / BL Branch with link */
1316    reset_btype(s);
1317    gen_goto_tb(s, 0, addr);
1318}
1319
1320/* Compare and branch (immediate)
1321 *   31  30         25  24  23                  5 4      0
1322 * +----+-------------+----+---------------------+--------+
1323 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1324 * +----+-------------+----+---------------------+--------+
1325 */
1326static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1327{
1328    unsigned int sf, op, rt;
1329    uint64_t addr;
1330    TCGLabel *label_match;
1331    TCGv_i64 tcg_cmp;
1332
1333    sf = extract32(insn, 31, 1);
1334    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1335    rt = extract32(insn, 0, 5);
1336    addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1337
1338    tcg_cmp = read_cpu_reg(s, rt, sf);
1339    label_match = gen_new_label();
1340
1341    reset_btype(s);
1342    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1343                        tcg_cmp, 0, label_match);
1344
1345    gen_goto_tb(s, 0, s->base.pc_next);
1346    gen_set_label(label_match);
1347    gen_goto_tb(s, 1, addr);
1348}
1349
1350/* Test and branch (immediate)
1351 *   31  30         25  24  23   19 18          5 4    0
1352 * +----+-------------+----+-------+-------------+------+
1353 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1354 * +----+-------------+----+-------+-------------+------+
1355 */
1356static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1357{
1358    unsigned int bit_pos, op, rt;
1359    uint64_t addr;
1360    TCGLabel *label_match;
1361    TCGv_i64 tcg_cmp;
1362
1363    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1364    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1365    addr = s->pc_curr + sextract32(insn, 5, 14) * 4;
1366    rt = extract32(insn, 0, 5);
1367
1368    tcg_cmp = tcg_temp_new_i64();
1369    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1370    label_match = gen_new_label();
1371
1372    reset_btype(s);
1373    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1374                        tcg_cmp, 0, label_match);
1375    tcg_temp_free_i64(tcg_cmp);
1376    gen_goto_tb(s, 0, s->base.pc_next);
1377    gen_set_label(label_match);
1378    gen_goto_tb(s, 1, addr);
1379}
1380
1381/* Conditional branch (immediate)
1382 *  31           25  24  23                  5   4  3    0
1383 * +---------------+----+---------------------+----+------+
1384 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1385 * +---------------+----+---------------------+----+------+
1386 */
1387static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1388{
1389    unsigned int cond;
1390    uint64_t addr;
1391
1392    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1393        unallocated_encoding(s);
1394        return;
1395    }
1396    addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1397    cond = extract32(insn, 0, 4);
1398
1399    reset_btype(s);
1400    if (cond < 0x0e) {
1401        /* genuinely conditional branches */
1402        TCGLabel *label_match = gen_new_label();
1403        arm_gen_test_cc(cond, label_match);
1404        gen_goto_tb(s, 0, s->base.pc_next);
1405        gen_set_label(label_match);
1406        gen_goto_tb(s, 1, addr);
1407    } else {
1408        /* 0xe and 0xf are both "always" conditions */
1409        gen_goto_tb(s, 0, addr);
1410    }
1411}
1412
1413/* HINT instruction group, including various allocated HINTs */
1414static void handle_hint(DisasContext *s, uint32_t insn,
1415                        unsigned int op1, unsigned int op2, unsigned int crm)
1416{
1417    unsigned int selector = crm << 3 | op2;
1418
1419    if (op1 != 3) {
1420        unallocated_encoding(s);
1421        return;
1422    }
1423
1424    switch (selector) {
1425    case 0b00000: /* NOP */
1426        break;
1427    case 0b00011: /* WFI */
1428        s->base.is_jmp = DISAS_WFI;
1429        break;
1430    case 0b00001: /* YIELD */
1431        /* When running in MTTCG we don't generate jumps to the yield and
1432         * WFE helpers as it won't affect the scheduling of other vCPUs.
1433         * If we wanted to more completely model WFE/SEV so we don't busy
1434         * spin unnecessarily we would need to do something more involved.
1435         */
1436        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1437            s->base.is_jmp = DISAS_YIELD;
1438        }
1439        break;
1440    case 0b00010: /* WFE */
1441        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1442            s->base.is_jmp = DISAS_WFE;
1443        }
1444        break;
1445    case 0b00100: /* SEV */
1446    case 0b00101: /* SEVL */
1447        /* we treat all as NOP at least for now */
1448        break;
1449    case 0b00111: /* XPACLRI */
1450        if (s->pauth_active) {
1451            gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1452        }
1453        break;
1454    case 0b01000: /* PACIA1716 */
1455        if (s->pauth_active) {
1456            gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1457        }
1458        break;
1459    case 0b01010: /* PACIB1716 */
1460        if (s->pauth_active) {
1461            gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1462        }
1463        break;
1464    case 0b01100: /* AUTIA1716 */
1465        if (s->pauth_active) {
1466            gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1467        }
1468        break;
1469    case 0b01110: /* AUTIB1716 */
1470        if (s->pauth_active) {
1471            gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1472        }
1473        break;
1474    case 0b11000: /* PACIAZ */
1475        if (s->pauth_active) {
1476            gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1477                                new_tmp_a64_zero(s));
1478        }
1479        break;
1480    case 0b11001: /* PACIASP */
1481        if (s->pauth_active) {
1482            gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1483        }
1484        break;
1485    case 0b11010: /* PACIBZ */
1486        if (s->pauth_active) {
1487            gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1488                                new_tmp_a64_zero(s));
1489        }
1490        break;
1491    case 0b11011: /* PACIBSP */
1492        if (s->pauth_active) {
1493            gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1494        }
1495        break;
1496    case 0b11100: /* AUTIAZ */
1497        if (s->pauth_active) {
1498            gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1499                              new_tmp_a64_zero(s));
1500        }
1501        break;
1502    case 0b11101: /* AUTIASP */
1503        if (s->pauth_active) {
1504            gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1505        }
1506        break;
1507    case 0b11110: /* AUTIBZ */
1508        if (s->pauth_active) {
1509            gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1510                              new_tmp_a64_zero(s));
1511        }
1512        break;
1513    case 0b11111: /* AUTIBSP */
1514        if (s->pauth_active) {
1515            gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1516        }
1517        break;
1518    default:
1519        /* default specified as NOP equivalent */
1520        break;
1521    }
1522}
1523
1524static void gen_clrex(DisasContext *s, uint32_t insn)
1525{
1526    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1527}
1528
1529/* CLREX, DSB, DMB, ISB */
1530static void handle_sync(DisasContext *s, uint32_t insn,
1531                        unsigned int op1, unsigned int op2, unsigned int crm)
1532{
1533    TCGBar bar;
1534
1535    if (op1 != 3) {
1536        unallocated_encoding(s);
1537        return;
1538    }
1539
1540    switch (op2) {
1541    case 2: /* CLREX */
1542        gen_clrex(s, insn);
1543        return;
1544    case 4: /* DSB */
1545    case 5: /* DMB */
1546        switch (crm & 3) {
1547        case 1: /* MBReqTypes_Reads */
1548            bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1549            break;
1550        case 2: /* MBReqTypes_Writes */
1551            bar = TCG_BAR_SC | TCG_MO_ST_ST;
1552            break;
1553        default: /* MBReqTypes_All */
1554            bar = TCG_BAR_SC | TCG_MO_ALL;
1555            break;
1556        }
1557        tcg_gen_mb(bar);
1558        return;
1559    case 6: /* ISB */
1560        /* We need to break the TB after this insn to execute
1561         * a self-modified code correctly and also to take
1562         * any pending interrupts immediately.
1563         */
1564        reset_btype(s);
1565        gen_goto_tb(s, 0, s->base.pc_next);
1566        return;
1567
1568    case 7: /* SB */
1569        if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1570            goto do_unallocated;
1571        }
1572        /*
1573         * TODO: There is no speculation barrier opcode for TCG;
1574         * MB and end the TB instead.
1575         */
1576        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1577        gen_goto_tb(s, 0, s->base.pc_next);
1578        return;
1579
1580    default:
1581    do_unallocated:
1582        unallocated_encoding(s);
1583        return;
1584    }
1585}
1586
1587static void gen_xaflag(void)
1588{
1589    TCGv_i32 z = tcg_temp_new_i32();
1590
1591    tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1592
1593    /*
1594     * (!C & !Z) << 31
1595     * (!(C | Z)) << 31
1596     * ~((C | Z) << 31)
1597     * ~-(C | Z)
1598     * (C | Z) - 1
1599     */
1600    tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1601    tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1602
1603    /* !(Z & C) */
1604    tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1605    tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1606
1607    /* (!C & Z) << 31 -> -(Z & ~C) */
1608    tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1609    tcg_gen_neg_i32(cpu_VF, cpu_VF);
1610
1611    /* C | Z */
1612    tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1613
1614    tcg_temp_free_i32(z);
1615}
1616
1617static void gen_axflag(void)
1618{
1619    tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1620    tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1621
1622    /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1623    tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1624
1625    tcg_gen_movi_i32(cpu_NF, 0);
1626    tcg_gen_movi_i32(cpu_VF, 0);
1627}
1628
1629/* MSR (immediate) - move immediate to processor state field */
1630static void handle_msr_i(DisasContext *s, uint32_t insn,
1631                         unsigned int op1, unsigned int op2, unsigned int crm)
1632{
1633    TCGv_i32 t1;
1634    int op = op1 << 3 | op2;
1635
1636    /* End the TB by default, chaining is ok.  */
1637    s->base.is_jmp = DISAS_TOO_MANY;
1638
1639    switch (op) {
1640    case 0x00: /* CFINV */
1641        if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1642            goto do_unallocated;
1643        }
1644        tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1645        s->base.is_jmp = DISAS_NEXT;
1646        break;
1647
1648    case 0x01: /* XAFlag */
1649        if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1650            goto do_unallocated;
1651        }
1652        gen_xaflag();
1653        s->base.is_jmp = DISAS_NEXT;
1654        break;
1655
1656    case 0x02: /* AXFlag */
1657        if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1658            goto do_unallocated;
1659        }
1660        gen_axflag();
1661        s->base.is_jmp = DISAS_NEXT;
1662        break;
1663
1664    case 0x03: /* UAO */
1665        if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) {
1666            goto do_unallocated;
1667        }
1668        if (crm & 1) {
1669            set_pstate_bits(PSTATE_UAO);
1670        } else {
1671            clear_pstate_bits(PSTATE_UAO);
1672        }
1673        t1 = tcg_const_i32(s->current_el);
1674        gen_helper_rebuild_hflags_a64(cpu_env, t1);
1675        tcg_temp_free_i32(t1);
1676        break;
1677
1678    case 0x04: /* PAN */
1679        if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) {
1680            goto do_unallocated;
1681        }
1682        if (crm & 1) {
1683            set_pstate_bits(PSTATE_PAN);
1684        } else {
1685            clear_pstate_bits(PSTATE_PAN);
1686        }
1687        t1 = tcg_const_i32(s->current_el);
1688        gen_helper_rebuild_hflags_a64(cpu_env, t1);
1689        tcg_temp_free_i32(t1);
1690        break;
1691
1692    case 0x05: /* SPSel */
1693        if (s->current_el == 0) {
1694            goto do_unallocated;
1695        }
1696        t1 = tcg_const_i32(crm & PSTATE_SP);
1697        gen_helper_msr_i_spsel(cpu_env, t1);
1698        tcg_temp_free_i32(t1);
1699        break;
1700
1701    case 0x19: /* SSBS */
1702        if (!dc_isar_feature(aa64_ssbs, s)) {
1703            goto do_unallocated;
1704        }
1705        if (crm & 1) {
1706            set_pstate_bits(PSTATE_SSBS);
1707        } else {
1708            clear_pstate_bits(PSTATE_SSBS);
1709        }
1710        /* Don't need to rebuild hflags since SSBS is a nop */
1711        break;
1712
1713    case 0x1a: /* DIT */
1714        if (!dc_isar_feature(aa64_dit, s)) {
1715            goto do_unallocated;
1716        }
1717        if (crm & 1) {
1718            set_pstate_bits(PSTATE_DIT);
1719        } else {
1720            clear_pstate_bits(PSTATE_DIT);
1721        }
1722        /* There's no need to rebuild hflags because DIT is a nop */
1723        break;
1724
1725    case 0x1e: /* DAIFSet */
1726        t1 = tcg_const_i32(crm);
1727        gen_helper_msr_i_daifset(cpu_env, t1);
1728        tcg_temp_free_i32(t1);
1729        break;
1730
1731    case 0x1f: /* DAIFClear */
1732        t1 = tcg_const_i32(crm);
1733        gen_helper_msr_i_daifclear(cpu_env, t1);
1734        tcg_temp_free_i32(t1);
1735        /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1736        s->base.is_jmp = DISAS_UPDATE_EXIT;
1737        break;
1738
1739    case 0x1c: /* TCO */
1740        if (dc_isar_feature(aa64_mte, s)) {
1741            /* Full MTE is enabled -- set the TCO bit as directed. */
1742            if (crm & 1) {
1743                set_pstate_bits(PSTATE_TCO);
1744            } else {
1745                clear_pstate_bits(PSTATE_TCO);
1746            }
1747            t1 = tcg_const_i32(s->current_el);
1748            gen_helper_rebuild_hflags_a64(cpu_env, t1);
1749            tcg_temp_free_i32(t1);
1750            /* Many factors, including TCO, go into MTE_ACTIVE. */
1751            s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
1752        } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
1753            /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI.  */
1754            s->base.is_jmp = DISAS_NEXT;
1755        } else {
1756            goto do_unallocated;
1757        }
1758        break;
1759
1760    default:
1761    do_unallocated:
1762        unallocated_encoding(s);
1763        return;
1764    }
1765}
1766
1767static void gen_get_nzcv(TCGv_i64 tcg_rt)
1768{
1769    TCGv_i32 tmp = tcg_temp_new_i32();
1770    TCGv_i32 nzcv = tcg_temp_new_i32();
1771
1772    /* build bit 31, N */
1773    tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1774    /* build bit 30, Z */
1775    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1776    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1777    /* build bit 29, C */
1778    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1779    /* build bit 28, V */
1780    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1781    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1782    /* generate result */
1783    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1784
1785    tcg_temp_free_i32(nzcv);
1786    tcg_temp_free_i32(tmp);
1787}
1788
1789static void gen_set_nzcv(TCGv_i64 tcg_rt)
1790{
1791    TCGv_i32 nzcv = tcg_temp_new_i32();
1792
1793    /* take NZCV from R[t] */
1794    tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1795
1796    /* bit 31, N */
1797    tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1798    /* bit 30, Z */
1799    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1800    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1801    /* bit 29, C */
1802    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1803    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1804    /* bit 28, V */
1805    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1806    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1807    tcg_temp_free_i32(nzcv);
1808}
1809
1810/* MRS - move from system register
1811 * MSR (register) - move to system register
1812 * SYS
1813 * SYSL
1814 * These are all essentially the same insn in 'read' and 'write'
1815 * versions, with varying op0 fields.
1816 */
1817static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1818                       unsigned int op0, unsigned int op1, unsigned int op2,
1819                       unsigned int crn, unsigned int crm, unsigned int rt)
1820{
1821    const ARMCPRegInfo *ri;
1822    TCGv_i64 tcg_rt;
1823
1824    ri = get_arm_cp_reginfo(s->cp_regs,
1825                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1826                                               crn, crm, op0, op1, op2));
1827
1828    if (!ri) {
1829        /* Unknown register; this might be a guest error or a QEMU
1830         * unimplemented feature.
1831         */
1832        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1833                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1834                      isread ? "read" : "write", op0, op1, crn, crm, op2);
1835        unallocated_encoding(s);
1836        return;
1837    }
1838
1839    /* Check access permissions */
1840    if (!cp_access_ok(s->current_el, ri, isread)) {
1841        unallocated_encoding(s);
1842        return;
1843    }
1844
1845    if (ri->accessfn) {
1846        /* Emit code to perform further access permissions checks at
1847         * runtime; this may result in an exception.
1848         */
1849        TCGv_ptr tmpptr;
1850        TCGv_i32 tcg_syn, tcg_isread;
1851        uint32_t syndrome;
1852
1853        gen_a64_set_pc_im(s->pc_curr);
1854        tmpptr = tcg_const_ptr(ri);
1855        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1856        tcg_syn = tcg_const_i32(syndrome);
1857        tcg_isread = tcg_const_i32(isread);
1858        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1859        tcg_temp_free_ptr(tmpptr);
1860        tcg_temp_free_i32(tcg_syn);
1861        tcg_temp_free_i32(tcg_isread);
1862    } else if (ri->type & ARM_CP_RAISES_EXC) {
1863        /*
1864         * The readfn or writefn might raise an exception;
1865         * synchronize the CPU state in case it does.
1866         */
1867        gen_a64_set_pc_im(s->pc_curr);
1868    }
1869
1870    /* Handle special cases first */
1871    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1872    case ARM_CP_NOP:
1873        return;
1874    case ARM_CP_NZCV:
1875        tcg_rt = cpu_reg(s, rt);
1876        if (isread) {
1877            gen_get_nzcv(tcg_rt);
1878        } else {
1879            gen_set_nzcv(tcg_rt);
1880        }
1881        return;
1882    case ARM_CP_CURRENTEL:
1883        /* Reads as current EL value from pstate, which is
1884         * guaranteed to be constant by the tb flags.
1885         */
1886        tcg_rt = cpu_reg(s, rt);
1887        tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1888        return;
1889    case ARM_CP_DC_ZVA:
1890        /* Writes clear the aligned block of memory which rt points into. */
1891        if (s->mte_active[0]) {
1892            TCGv_i32 t_desc;
1893            int desc = 0;
1894
1895            desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
1896            desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
1897            desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
1898            t_desc = tcg_const_i32(desc);
1899
1900            tcg_rt = new_tmp_a64(s);
1901            gen_helper_mte_check_zva(tcg_rt, cpu_env, t_desc, cpu_reg(s, rt));
1902            tcg_temp_free_i32(t_desc);
1903        } else {
1904            tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
1905        }
1906        gen_helper_dc_zva(cpu_env, tcg_rt);
1907        return;
1908    case ARM_CP_DC_GVA:
1909        {
1910            TCGv_i64 clean_addr, tag;
1911
1912            /*
1913             * DC_GVA, like DC_ZVA, requires that we supply the original
1914             * pointer for an invalid page.  Probe that address first.
1915             */
1916            tcg_rt = cpu_reg(s, rt);
1917            clean_addr = clean_data_tbi(s, tcg_rt);
1918            gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
1919
1920            if (s->ata) {
1921                /* Extract the tag from the register to match STZGM.  */
1922                tag = tcg_temp_new_i64();
1923                tcg_gen_shri_i64(tag, tcg_rt, 56);
1924                gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
1925                tcg_temp_free_i64(tag);
1926            }
1927        }
1928        return;
1929    case ARM_CP_DC_GZVA:
1930        {
1931            TCGv_i64 clean_addr, tag;
1932
1933            /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
1934            tcg_rt = cpu_reg(s, rt);
1935            clean_addr = clean_data_tbi(s, tcg_rt);
1936            gen_helper_dc_zva(cpu_env, clean_addr);
1937
1938            if (s->ata) {
1939                /* Extract the tag from the register to match STZGM.  */
1940                tag = tcg_temp_new_i64();
1941                tcg_gen_shri_i64(tag, tcg_rt, 56);
1942                gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
1943                tcg_temp_free_i64(tag);
1944            }
1945        }
1946        return;
1947    default:
1948        break;
1949    }
1950    if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1951        return;
1952    } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1953        return;
1954    }
1955
1956    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1957        gen_io_start();
1958    }
1959
1960    tcg_rt = cpu_reg(s, rt);
1961
1962    if (isread) {
1963        if (ri->type & ARM_CP_CONST) {
1964            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1965        } else if (ri->readfn) {
1966            TCGv_ptr tmpptr;
1967            tmpptr = tcg_const_ptr(ri);
1968            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1969            tcg_temp_free_ptr(tmpptr);
1970        } else {
1971            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1972        }
1973    } else {
1974        if (ri->type & ARM_CP_CONST) {
1975            /* If not forbidden by access permissions, treat as WI */
1976            return;
1977        } else if (ri->writefn) {
1978            TCGv_ptr tmpptr;
1979            tmpptr = tcg_const_ptr(ri);
1980            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1981            tcg_temp_free_ptr(tmpptr);
1982        } else {
1983            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1984        }
1985    }
1986
1987    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1988        /* I/O operations must end the TB here (whether read or write) */
1989        s->base.is_jmp = DISAS_UPDATE_EXIT;
1990    }
1991    if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1992        /*
1993         * A write to any coprocessor regiser that ends a TB
1994         * must rebuild the hflags for the next TB.
1995         */
1996        TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
1997        gen_helper_rebuild_hflags_a64(cpu_env, tcg_el);
1998        tcg_temp_free_i32(tcg_el);
1999        /*
2000         * We default to ending the TB on a coprocessor register write,
2001         * but allow this to be suppressed by the register definition
2002         * (usually only necessary to work around guest bugs).
2003         */
2004        s->base.is_jmp = DISAS_UPDATE_EXIT;
2005    }
2006}
2007
2008/* System
2009 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
2010 * +---------------------+---+-----+-----+-------+-------+-----+------+
2011 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
2012 * +---------------------+---+-----+-----+-------+-------+-----+------+
2013 */
2014static void disas_system(DisasContext *s, uint32_t insn)
2015{
2016    unsigned int l, op0, op1, crn, crm, op2, rt;
2017    l = extract32(insn, 21, 1);
2018    op0 = extract32(insn, 19, 2);
2019    op1 = extract32(insn, 16, 3);
2020    crn = extract32(insn, 12, 4);
2021    crm = extract32(insn, 8, 4);
2022    op2 = extract32(insn, 5, 3);
2023    rt = extract32(insn, 0, 5);
2024
2025    if (op0 == 0) {
2026        if (l || rt != 31) {
2027            unallocated_encoding(s);
2028            return;
2029        }
2030        switch (crn) {
2031        case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
2032            handle_hint(s, insn, op1, op2, crm);
2033            break;
2034        case 3: /* CLREX, DSB, DMB, ISB */
2035            handle_sync(s, insn, op1, op2, crm);
2036            break;
2037        case 4: /* MSR (immediate) */
2038            handle_msr_i(s, insn, op1, op2, crm);
2039            break;
2040        default:
2041            unallocated_encoding(s);
2042            break;
2043        }
2044        return;
2045    }
2046    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
2047}
2048
2049/* Exception generation
2050 *
2051 *  31             24 23 21 20                     5 4   2 1  0
2052 * +-----------------+-----+------------------------+-----+----+
2053 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
2054 * +-----------------------+------------------------+----------+
2055 */
2056static void disas_exc(DisasContext *s, uint32_t insn)
2057{
2058    int opc = extract32(insn, 21, 3);
2059    int op2_ll = extract32(insn, 0, 5);
2060    int imm16 = extract32(insn, 5, 16);
2061    TCGv_i32 tmp;
2062
2063    switch (opc) {
2064    case 0:
2065        /* For SVC, HVC and SMC we advance the single-step state
2066         * machine before taking the exception. This is architecturally
2067         * mandated, to ensure that single-stepping a system call
2068         * instruction works properly.
2069         */
2070        switch (op2_ll) {
2071        case 1:                                                     /* SVC */
2072            gen_ss_advance(s);
2073            gen_exception_insn(s, s->base.pc_next, EXCP_SWI,
2074                               syn_aa64_svc(imm16), default_exception_el(s));
2075            break;
2076        case 2:                                                     /* HVC */
2077            if (s->current_el == 0) {
2078                unallocated_encoding(s);
2079                break;
2080            }
2081            /* The pre HVC helper handles cases when HVC gets trapped
2082             * as an undefined insn by runtime configuration.
2083             */
2084            gen_a64_set_pc_im(s->pc_curr);
2085            gen_helper_pre_hvc(cpu_env);
2086            gen_ss_advance(s);
2087            gen_exception_insn(s, s->base.pc_next, EXCP_HVC,
2088                               syn_aa64_hvc(imm16), 2);
2089            break;
2090        case 3:                                                     /* SMC */
2091            if (s->current_el == 0) {
2092                unallocated_encoding(s);
2093                break;
2094            }
2095            gen_a64_set_pc_im(s->pc_curr);
2096            tmp = tcg_const_i32(syn_aa64_smc(imm16));
2097            gen_helper_pre_smc(cpu_env, tmp);
2098            tcg_temp_free_i32(tmp);
2099            gen_ss_advance(s);
2100            gen_exception_insn(s, s->base.pc_next, EXCP_SMC,
2101                               syn_aa64_smc(imm16), 3);
2102            break;
2103        default:
2104            unallocated_encoding(s);
2105            break;
2106        }
2107        break;
2108    case 1:
2109        if (op2_ll != 0) {
2110            unallocated_encoding(s);
2111            break;
2112        }
2113        /* BRK */
2114        gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
2115        break;
2116    case 2:
2117        if (op2_ll != 0) {
2118            unallocated_encoding(s);
2119            break;
2120        }
2121        /* HLT. This has two purposes.
2122         * Architecturally, it is an external halting debug instruction.
2123         * Since QEMU doesn't implement external debug, we treat this as
2124         * it is required for halting debug disabled: it will UNDEF.
2125         * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
2126         */
2127        if (semihosting_enabled() && imm16 == 0xf000) {
2128#ifndef CONFIG_USER_ONLY
2129            /* In system mode, don't allow userspace access to semihosting,
2130             * to provide some semblance of security (and for consistency
2131             * with our 32-bit semihosting).
2132             */
2133            if (s->current_el == 0) {
2134                unsupported_encoding(s, insn);
2135                break;
2136            }
2137#endif
2138            gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
2139        } else {
2140            unsupported_encoding(s, insn);
2141        }
2142        break;
2143    case 5:
2144        if (op2_ll < 1 || op2_ll > 3) {
2145            unallocated_encoding(s);
2146            break;
2147        }
2148        /* DCPS1, DCPS2, DCPS3 */
2149        unsupported_encoding(s, insn);
2150        break;
2151    default:
2152        unallocated_encoding(s);
2153        break;
2154    }
2155}
2156
2157/* Unconditional branch (register)
2158 *  31           25 24   21 20   16 15   10 9    5 4     0
2159 * +---------------+-------+-------+-------+------+-------+
2160 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
2161 * +---------------+-------+-------+-------+------+-------+
2162 */
2163static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
2164{
2165    unsigned int opc, op2, op3, rn, op4;
2166    unsigned btype_mod = 2;   /* 0: BR, 1: BLR, 2: other */
2167    TCGv_i64 dst;
2168    TCGv_i64 modifier;
2169
2170    opc = extract32(insn, 21, 4);
2171    op2 = extract32(insn, 16, 5);
2172    op3 = extract32(insn, 10, 6);
2173    rn = extract32(insn, 5, 5);
2174    op4 = extract32(insn, 0, 5);
2175
2176    if (op2 != 0x1f) {
2177        goto do_unallocated;
2178    }
2179
2180    switch (opc) {
2181    case 0: /* BR */
2182    case 1: /* BLR */
2183    case 2: /* RET */
2184        btype_mod = opc;
2185        switch (op3) {
2186        case 0:
2187            /* BR, BLR, RET */
2188            if (op4 != 0) {
2189                goto do_unallocated;
2190            }
2191            dst = cpu_reg(s, rn);
2192            break;
2193
2194        case 2:
2195        case 3:
2196            if (!dc_isar_feature(aa64_pauth, s)) {
2197                goto do_unallocated;
2198            }
2199            if (opc == 2) {
2200                /* RETAA, RETAB */
2201                if (rn != 0x1f || op4 != 0x1f) {
2202                    goto do_unallocated;
2203                }
2204                rn = 30;
2205                modifier = cpu_X[31];
2206            } else {
2207                /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */
2208                if (op4 != 0x1f) {
2209                    goto do_unallocated;
2210                }
2211                modifier = new_tmp_a64_zero(s);
2212            }
2213            if (s->pauth_active) {
2214                dst = new_tmp_a64(s);
2215                if (op3 == 2) {
2216                    gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2217                } else {
2218                    gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2219                }
2220            } else {
2221                dst = cpu_reg(s, rn);
2222            }
2223            break;
2224
2225        default:
2226            goto do_unallocated;
2227        }
2228        gen_a64_set_pc(s, dst);
2229        /* BLR also needs to load return address */
2230        if (opc == 1) {
2231            tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2232        }
2233        break;
2234
2235    case 8: /* BRAA */
2236    case 9: /* BLRAA */
2237        if (!dc_isar_feature(aa64_pauth, s)) {
2238            goto do_unallocated;
2239        }
2240        if ((op3 & ~1) != 2) {
2241            goto do_unallocated;
2242        }
2243        btype_mod = opc & 1;
2244        if (s->pauth_active) {
2245            dst = new_tmp_a64(s);
2246            modifier = cpu_reg_sp(s, op4);
2247            if (op3 == 2) {
2248                gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2249            } else {
2250                gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2251            }
2252        } else {
2253            dst = cpu_reg(s, rn);
2254        }
2255        gen_a64_set_pc(s, dst);
2256        /* BLRAA also needs to load return address */
2257        if (opc == 9) {
2258            tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2259        }
2260        break;
2261
2262    case 4: /* ERET */
2263        if (s->current_el == 0) {
2264            goto do_unallocated;
2265        }
2266        switch (op3) {
2267        case 0: /* ERET */
2268            if (op4 != 0) {
2269                goto do_unallocated;
2270            }
2271            dst = tcg_temp_new_i64();
2272            tcg_gen_ld_i64(dst, cpu_env,
2273                           offsetof(CPUARMState, elr_el[s->current_el]));
2274            break;
2275
2276        case 2: /* ERETAA */
2277        case 3: /* ERETAB */
2278            if (!dc_isar_feature(aa64_pauth, s)) {
2279                goto do_unallocated;
2280            }
2281            if (rn != 0x1f || op4 != 0x1f) {
2282                goto do_unallocated;
2283            }
2284            dst = tcg_temp_new_i64();
2285            tcg_gen_ld_i64(dst, cpu_env,
2286                           offsetof(CPUARMState, elr_el[s->current_el]));
2287            if (s->pauth_active) {
2288                modifier = cpu_X[31];
2289                if (op3 == 2) {
2290                    gen_helper_autia(dst, cpu_env, dst, modifier);
2291                } else {
2292                    gen_helper_autib(dst, cpu_env, dst, modifier);
2293                }
2294            }
2295            break;
2296
2297        default:
2298            goto do_unallocated;
2299        }
2300        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2301            gen_io_start();
2302        }
2303
2304        gen_helper_exception_return(cpu_env, dst);
2305        tcg_temp_free_i64(dst);
2306        /* Must exit loop to check un-masked IRQs */
2307        s->base.is_jmp = DISAS_EXIT;
2308        return;
2309
2310    case 5: /* DRPS */
2311        if (op3 != 0 || op4 != 0 || rn != 0x1f) {
2312            goto do_unallocated;
2313        } else {
2314            unsupported_encoding(s, insn);
2315        }
2316        return;
2317
2318    default:
2319    do_unallocated:
2320        unallocated_encoding(s);
2321        return;
2322    }
2323
2324    switch (btype_mod) {
2325    case 0: /* BR */
2326        if (dc_isar_feature(aa64_bti, s)) {
2327            /* BR to {x16,x17} or !guard -> 1, else 3.  */
2328            set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
2329        }
2330        break;
2331
2332    case 1: /* BLR */
2333        if (dc_isar_feature(aa64_bti, s)) {
2334            /* BLR sets BTYPE to 2, regardless of source guarded page.  */
2335            set_btype(s, 2);
2336        }
2337        break;
2338
2339    default: /* RET or none of the above.  */
2340        /* BTYPE will be set to 0 by normal end-of-insn processing.  */
2341        break;
2342    }
2343
2344    s->base.is_jmp = DISAS_JUMP;
2345}
2346
2347/* Branches, exception generating and system instructions */
2348static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2349{
2350    switch (extract32(insn, 25, 7)) {
2351    case 0x0a: case 0x0b:
2352    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
2353        disas_uncond_b_imm(s, insn);
2354        break;
2355    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
2356        disas_comp_b_imm(s, insn);
2357        break;
2358    case 0x1b: case 0x5b: /* Test & branch (immediate) */
2359        disas_test_b_imm(s, insn);
2360        break;
2361    case 0x2a: /* Conditional branch (immediate) */
2362        disas_cond_b_imm(s, insn);
2363        break;
2364    case 0x6a: /* Exception generation / System */
2365        if (insn & (1 << 24)) {
2366            if (extract32(insn, 22, 2) == 0) {
2367                disas_system(s, insn);
2368            } else {
2369                unallocated_encoding(s);
2370            }
2371        } else {
2372            disas_exc(s, insn);
2373        }
2374        break;
2375    case 0x6b: /* Unconditional branch (register) */
2376        disas_uncond_b_reg(s, insn);
2377        break;
2378    default:
2379        unallocated_encoding(s);
2380        break;
2381    }
2382}
2383
2384/*
2385 * Load/Store exclusive instructions are implemented by remembering
2386 * the value/address loaded, and seeing if these are the same
2387 * when the store is performed. This is not actually the architecturally
2388 * mandated semantics, but it works for typical guest code sequences
2389 * and avoids having to monitor regular stores.
2390 *
2391 * The store exclusive uses the atomic cmpxchg primitives to avoid
2392 * races in multi-threaded linux-user and when MTTCG softmmu is
2393 * enabled.
2394 */
2395static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2396                               TCGv_i64 addr, int size, bool is_pair)
2397{
2398    int idx = get_mem_index(s);
2399    MemOp memop = s->be_data;
2400
2401    g_assert(size <= 3);
2402    if (is_pair) {
2403        g_assert(size >= 2);
2404        if (size == 2) {
2405            /* The pair must be single-copy atomic for the doubleword.  */
2406            memop |= MO_64 | MO_ALIGN;
2407            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2408            if (s->be_data == MO_LE) {
2409                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2410                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2411            } else {
2412                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2413                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2414            }
2415        } else {
2416            /* The pair must be single-copy atomic for *each* doubleword, not
2417               the entire quadword, however it must be quadword aligned.  */
2418            memop |= MO_64;
2419            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2420                                memop | MO_ALIGN_16);
2421
2422            TCGv_i64 addr2 = tcg_temp_new_i64();
2423            tcg_gen_addi_i64(addr2, addr, 8);
2424            tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2425            tcg_temp_free_i64(addr2);
2426
2427            tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2428            tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2429        }
2430    } else {
2431        memop |= size | MO_ALIGN;
2432        tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2433        tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2434    }
2435    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2436}
2437
2438static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2439                                TCGv_i64 addr, int size, int is_pair)
2440{
2441    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2442     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2443     *     [addr] = {Rt};
2444     *     if (is_pair) {
2445     *         [addr + datasize] = {Rt2};
2446     *     }
2447     *     {Rd} = 0;
2448     * } else {
2449     *     {Rd} = 1;
2450     * }
2451     * env->exclusive_addr = -1;
2452     */
2453    TCGLabel *fail_label = gen_new_label();
2454    TCGLabel *done_label = gen_new_label();
2455    TCGv_i64 tmp;
2456
2457    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2458
2459    tmp = tcg_temp_new_i64();
2460    if (is_pair) {
2461        if (size == 2) {
2462            if (s->be_data == MO_LE) {
2463                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2464            } else {
2465                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2466            }
2467            tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2468                                       cpu_exclusive_val, tmp,
2469                                       get_mem_index(s),
2470                                       MO_64 | MO_ALIGN | s->be_data);
2471            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2472        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2473            if (!HAVE_CMPXCHG128) {
2474                gen_helper_exit_atomic(cpu_env);
2475                s->base.is_jmp = DISAS_NORETURN;
2476            } else if (s->be_data == MO_LE) {
2477                gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2478                                                        cpu_exclusive_addr,
2479                                                        cpu_reg(s, rt),
2480                                                        cpu_reg(s, rt2));
2481            } else {
2482                gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2483                                                        cpu_exclusive_addr,
2484                                                        cpu_reg(s, rt),
2485                                                        cpu_reg(s, rt2));
2486            }
2487        } else if (s->be_data == MO_LE) {
2488            gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2489                                           cpu_reg(s, rt), cpu_reg(s, rt2));
2490        } else {
2491            gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2492                                           cpu_reg(s, rt), cpu_reg(s, rt2));
2493        }
2494    } else {
2495        tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2496                                   cpu_reg(s, rt), get_mem_index(s),
2497                                   size | MO_ALIGN | s->be_data);
2498        tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2499    }
2500    tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2501    tcg_temp_free_i64(tmp);
2502    tcg_gen_br(done_label);
2503
2504    gen_set_label(fail_label);
2505    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2506    gen_set_label(done_label);
2507    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2508}
2509
2510static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2511                                 int rn, int size)
2512{
2513    TCGv_i64 tcg_rs = cpu_reg(s, rs);
2514    TCGv_i64 tcg_rt = cpu_reg(s, rt);
2515    int memidx = get_mem_index(s);
2516    TCGv_i64 clean_addr;
2517
2518    if (rn == 31) {
2519        gen_check_sp_alignment(s);
2520    }
2521    clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
2522    tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2523                               size | MO_ALIGN | s->be_data);
2524}
2525
2526static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2527                                      int rn, int size)
2528{
2529    TCGv_i64 s1 = cpu_reg(s, rs);
2530    TCGv_i64 s2 = cpu_reg(s, rs + 1);
2531    TCGv_i64 t1 = cpu_reg(s, rt);
2532    TCGv_i64 t2 = cpu_reg(s, rt + 1);
2533    TCGv_i64 clean_addr;
2534    int memidx = get_mem_index(s);
2535
2536    if (rn == 31) {
2537        gen_check_sp_alignment(s);
2538    }
2539
2540    /* This is a single atomic access, despite the "pair". */
2541    clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
2542
2543    if (size == 2) {
2544        TCGv_i64 cmp = tcg_temp_new_i64();
2545        TCGv_i64 val = tcg_temp_new_i64();
2546
2547        if (s->be_data == MO_LE) {
2548            tcg_gen_concat32_i64(val, t1, t2);
2549            tcg_gen_concat32_i64(cmp, s1, s2);
2550        } else {
2551            tcg_gen_concat32_i64(val, t2, t1);
2552            tcg_gen_concat32_i64(cmp, s2, s1);
2553        }
2554
2555        tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2556                                   MO_64 | MO_ALIGN | s->be_data);
2557        tcg_temp_free_i64(val);
2558
2559        if (s->be_data == MO_LE) {
2560            tcg_gen_extr32_i64(s1, s2, cmp);
2561        } else {
2562            tcg_gen_extr32_i64(s2, s1, cmp);
2563        }
2564        tcg_temp_free_i64(cmp);
2565    } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2566        if (HAVE_CMPXCHG128) {
2567            TCGv_i32 tcg_rs = tcg_const_i32(rs);
2568            if (s->be_data == MO_LE) {
2569                gen_helper_casp_le_parallel(cpu_env, tcg_rs,
2570                                            clean_addr, t1, t2);
2571            } else {
2572                gen_helper_casp_be_parallel(cpu_env, tcg_rs,
2573                                            clean_addr, t1, t2);
2574            }
2575            tcg_temp_free_i32(tcg_rs);
2576        } else {
2577            gen_helper_exit_atomic(cpu_env);
2578            s->base.is_jmp = DISAS_NORETURN;
2579        }
2580    } else {
2581        TCGv_i64 d1 = tcg_temp_new_i64();
2582        TCGv_i64 d2 = tcg_temp_new_i64();
2583        TCGv_i64 a2 = tcg_temp_new_i64();
2584        TCGv_i64 c1 = tcg_temp_new_i64();
2585        TCGv_i64 c2 = tcg_temp_new_i64();
2586        TCGv_i64 zero = tcg_const_i64(0);
2587
2588        /* Load the two words, in memory order.  */
2589        tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
2590                            MO_64 | MO_ALIGN_16 | s->be_data);
2591        tcg_gen_addi_i64(a2, clean_addr, 8);
2592        tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
2593
2594        /* Compare the two words, also in memory order.  */
2595        tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
2596        tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
2597        tcg_gen_and_i64(c2, c2, c1);
2598
2599        /* If compare equal, write back new data, else write back old data.  */
2600        tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
2601        tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
2602        tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
2603        tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
2604        tcg_temp_free_i64(a2);
2605        tcg_temp_free_i64(c1);
2606        tcg_temp_free_i64(c2);
2607        tcg_temp_free_i64(zero);
2608
2609        /* Write back the data from memory to Rs.  */
2610        tcg_gen_mov_i64(s1, d1);
2611        tcg_gen_mov_i64(s2, d2);
2612        tcg_temp_free_i64(d1);
2613        tcg_temp_free_i64(d2);
2614    }
2615}
2616
2617/* Update the Sixty-Four bit (SF) registersize. This logic is derived
2618 * from the ARMv8 specs for LDR (Shared decode for all encodings).
2619 */
2620static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2621{
2622    int opc0 = extract32(opc, 0, 1);
2623    int regsize;
2624
2625    if (is_signed) {
2626        regsize = opc0 ? 32 : 64;
2627    } else {
2628        regsize = size == 3 ? 64 : 32;
2629    }
2630    return regsize == 64;
2631}
2632
2633/* Load/store exclusive
2634 *
2635 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2636 * +-----+-------------+----+---+----+------+----+-------+------+------+
2637 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2638 * +-----+-------------+----+---+----+------+----+-------+------+------+
2639 *
2640 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2641 *   L: 0 -> store, 1 -> load
2642 *  o2: 0 -> exclusive, 1 -> not
2643 *  o1: 0 -> single register, 1 -> register pair
2644 *  o0: 1 -> load-acquire/store-release, 0 -> not
2645 */
2646static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2647{
2648    int rt = extract32(insn, 0, 5);
2649    int rn = extract32(insn, 5, 5);
2650    int rt2 = extract32(insn, 10, 5);
2651    int rs = extract32(insn, 16, 5);
2652    int is_lasr = extract32(insn, 15, 1);
2653    int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2654    int size = extract32(insn, 30, 2);
2655    TCGv_i64 clean_addr;
2656
2657    switch (o2_L_o1_o0) {
2658    case 0x0: /* STXR */
2659    case 0x1: /* STLXR */
2660        if (rn == 31) {
2661            gen_check_sp_alignment(s);
2662        }
2663        if (is_lasr) {
2664            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2665        }
2666        clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2667                                    true, rn != 31, size);
2668        gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2669        return;
2670
2671    case 0x4: /* LDXR */
2672    case 0x5: /* LDAXR */
2673        if (rn == 31) {
2674            gen_check_sp_alignment(s);
2675        }
2676        clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2677                                    false, rn != 31, size);
2678        s->is_ldex = true;
2679        gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2680        if (is_lasr) {
2681            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2682        }
2683        return;
2684
2685    case 0x8: /* STLLR */
2686        if (!dc_isar_feature(aa64_lor, s)) {
2687            break;
2688        }
2689        /* StoreLORelease is the same as Store-Release for QEMU.  */
2690        /* fall through */
2691    case 0x9: /* STLR */
2692        /* Generate ISS for non-exclusive accesses including LASR.  */
2693        if (rn == 31) {
2694            gen_check_sp_alignment(s);
2695        }
2696        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2697        clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2698                                    true, rn != 31, size);
2699        /* TODO: ARMv8.4-LSE SCTLR.nAA */
2700        do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt,
2701                  disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2702        return;
2703
2704    case 0xc: /* LDLAR */
2705        if (!dc_isar_feature(aa64_lor, s)) {
2706            break;
2707        }
2708        /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2709        /* fall through */
2710    case 0xd: /* LDAR */
2711        /* Generate ISS for non-exclusive accesses including LASR.  */
2712        if (rn == 31) {
2713            gen_check_sp_alignment(s);
2714        }
2715        clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2716                                    false, rn != 31, size);
2717        /* TODO: ARMv8.4-LSE SCTLR.nAA */
2718        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true,
2719                  rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2720        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2721        return;
2722
2723    case 0x2: case 0x3: /* CASP / STXP */
2724        if (size & 2) { /* STXP / STLXP */
2725            if (rn == 31) {
2726                gen_check_sp_alignment(s);
2727            }
2728            if (is_lasr) {
2729                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2730            }
2731            clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2732                                        true, rn != 31, size);
2733            gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2734            return;
2735        }
2736        if (rt2 == 31
2737            && ((rt | rs) & 1) == 0
2738            && dc_isar_feature(aa64_atomics, s)) {
2739            /* CASP / CASPL */
2740            gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2741            return;
2742        }
2743        break;
2744
2745    case 0x6: case 0x7: /* CASPA / LDXP */
2746        if (size & 2) { /* LDXP / LDAXP */
2747            if (rn == 31) {
2748                gen_check_sp_alignment(s);
2749            }
2750            clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
2751                                        false, rn != 31, size);
2752            s->is_ldex = true;
2753            gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2754            if (is_lasr) {
2755                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2756            }
2757            return;
2758        }
2759        if (rt2 == 31
2760            && ((rt | rs) & 1) == 0
2761            && dc_isar_feature(aa64_atomics, s)) {
2762            /* CASPA / CASPAL */
2763            gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2764            return;
2765        }
2766        break;
2767
2768    case 0xa: /* CAS */
2769    case 0xb: /* CASL */
2770    case 0xe: /* CASA */
2771    case 0xf: /* CASAL */
2772        if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2773            gen_compare_and_swap(s, rs, rt, rn, size);
2774            return;
2775        }
2776        break;
2777    }
2778    unallocated_encoding(s);
2779}
2780
2781/*
2782 * Load register (literal)
2783 *
2784 *  31 30 29   27  26 25 24 23                5 4     0
2785 * +-----+-------+---+-----+-------------------+-------+
2786 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2787 * +-----+-------+---+-----+-------------------+-------+
2788 *
2789 * V: 1 -> vector (simd/fp)
2790 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2791 *                   10-> 32 bit signed, 11 -> prefetch
2792 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2793 */
2794static void disas_ld_lit(DisasContext *s, uint32_t insn)
2795{
2796    int rt = extract32(insn, 0, 5);
2797    int64_t imm = sextract32(insn, 5, 19) << 2;
2798    bool is_vector = extract32(insn, 26, 1);
2799    int opc = extract32(insn, 30, 2);
2800    bool is_signed = false;
2801    int size = 2;
2802    TCGv_i64 tcg_rt, clean_addr;
2803
2804    if (is_vector) {
2805        if (opc == 3) {
2806            unallocated_encoding(s);
2807            return;
2808        }
2809        size = 2 + opc;
2810        if (!fp_access_check(s)) {
2811            return;
2812        }
2813    } else {
2814        if (opc == 3) {
2815            /* PRFM (literal) : prefetch */
2816            return;
2817        }
2818        size = 2 + extract32(opc, 0, 1);
2819        is_signed = extract32(opc, 1, 1);
2820    }
2821
2822    tcg_rt = cpu_reg(s, rt);
2823
2824    clean_addr = tcg_const_i64(s->pc_curr + imm);
2825    if (is_vector) {
2826        do_fp_ld(s, rt, clean_addr, size);
2827    } else {
2828        /* Only unsigned 32bit loads target 32bit registers.  */
2829        bool iss_sf = opc != 0;
2830
2831        do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
2832                  false, true, rt, iss_sf, false);
2833    }
2834    tcg_temp_free_i64(clean_addr);
2835}
2836
2837/*
2838 * LDNP (Load Pair - non-temporal hint)
2839 * LDP (Load Pair - non vector)
2840 * LDPSW (Load Pair Signed Word - non vector)
2841 * STNP (Store Pair - non-temporal hint)
2842 * STP (Store Pair - non vector)
2843 * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2844 * LDP (Load Pair of SIMD&FP)
2845 * STNP (Store Pair of SIMD&FP - non-temporal hint)
2846 * STP (Store Pair of SIMD&FP)
2847 *
2848 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2849 * +-----+-------+---+---+-------+---+-----------------------------+
2850 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2851 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2852 *
2853 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2854 *      LDPSW/STGP               01
2855 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2856 *   V: 0 -> GPR, 1 -> Vector
2857 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2858 *      10 -> signed offset, 11 -> pre-index
2859 *   L: 0 -> Store 1 -> Load
2860 *
2861 * Rt, Rt2 = GPR or SIMD registers to be stored
2862 * Rn = general purpose register containing address
2863 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2864 */
2865static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2866{
2867    int rt = extract32(insn, 0, 5);
2868    int rn = extract32(insn, 5, 5);
2869    int rt2 = extract32(insn, 10, 5);
2870    uint64_t offset = sextract64(insn, 15, 7);
2871    int index = extract32(insn, 23, 2);
2872    bool is_vector = extract32(insn, 26, 1);
2873    bool is_load = extract32(insn, 22, 1);
2874    int opc = extract32(insn, 30, 2);
2875
2876    bool is_signed = false;
2877    bool postindex = false;
2878    bool wback = false;
2879    bool set_tag = false;
2880
2881    TCGv_i64 clean_addr, dirty_addr;
2882
2883    int size;
2884
2885    if (opc == 3) {
2886        unallocated_encoding(s);
2887        return;
2888    }
2889
2890    if (is_vector) {
2891        size = 2 + opc;
2892    } else if (opc == 1 && !is_load) {
2893        /* STGP */
2894        if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
2895            unallocated_encoding(s);
2896            return;
2897        }
2898        size = 3;
2899        set_tag = true;
2900    } else {
2901        size = 2 + extract32(opc, 1, 1);
2902        is_signed = extract32(opc, 0, 1);
2903        if (!is_load && is_signed) {
2904            unallocated_encoding(s);
2905            return;
2906        }
2907    }
2908
2909    switch (index) {
2910    case 1: /* post-index */
2911        postindex = true;
2912        wback = true;
2913        break;
2914    case 0:
2915        /* signed offset with "non-temporal" hint. Since we don't emulate
2916         * caches we don't care about hints to the cache system about
2917         * data access patterns, and handle this identically to plain
2918         * signed offset.
2919         */
2920        if (is_signed) {
2921            /* There is no non-temporal-hint version of LDPSW */
2922            unallocated_encoding(s);
2923            return;
2924        }
2925        postindex = false;
2926        break;
2927    case 2: /* signed offset, rn not updated */
2928        postindex = false;
2929        break;
2930    case 3: /* pre-index */
2931        postindex = false;
2932        wback = true;
2933        break;
2934    }
2935
2936    if (is_vector && !fp_access_check(s)) {
2937        return;
2938    }
2939
2940    offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
2941
2942    if (rn == 31) {
2943        gen_check_sp_alignment(s);
2944    }
2945
2946    dirty_addr = read_cpu_reg_sp(s, rn, 1);
2947    if (!postindex) {
2948        tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2949    }
2950
2951    if (set_tag) {
2952        if (!s->ata) {
2953            /*
2954             * TODO: We could rely on the stores below, at least for
2955             * system mode, if we arrange to add MO_ALIGN_16.
2956             */
2957            gen_helper_stg_stub(cpu_env, dirty_addr);
2958        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2959            gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
2960        } else {
2961            gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
2962        }
2963    }
2964
2965    clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
2966                                (wback || rn != 31) && !set_tag, 2 << size);
2967
2968    if (is_vector) {
2969        if (is_load) {
2970            do_fp_ld(s, rt, clean_addr, size);
2971        } else {
2972            do_fp_st(s, rt, clean_addr, size);
2973        }
2974        tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2975        if (is_load) {
2976            do_fp_ld(s, rt2, clean_addr, size);
2977        } else {
2978            do_fp_st(s, rt2, clean_addr, size);
2979        }
2980    } else {
2981        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2982        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2983
2984        if (is_load) {
2985            TCGv_i64 tmp = tcg_temp_new_i64();
2986
2987            /* Do not modify tcg_rt before recognizing any exception
2988             * from the second load.
2989             */
2990            do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN,
2991                      false, false, 0, false, false);
2992            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2993            do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN,
2994                      false, false, 0, false, false);
2995
2996            tcg_gen_mov_i64(tcg_rt, tmp);
2997            tcg_temp_free_i64(tmp);
2998        } else {
2999            do_gpr_st(s, tcg_rt, clean_addr, size,
3000                      false, 0, false, false);
3001            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
3002            do_gpr_st(s, tcg_rt2, clean_addr, size,
3003                      false, 0, false, false);
3004        }
3005    }
3006
3007    if (wback) {
3008        if (postindex) {
3009            tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3010        }
3011        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3012    }
3013}
3014
3015/*
3016 * Load/store (immediate post-indexed)
3017 * Load/store (immediate pre-indexed)
3018 * Load/store (unscaled immediate)
3019 *
3020 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
3021 * +----+-------+---+-----+-----+---+--------+-----+------+------+
3022 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
3023 * +----+-------+---+-----+-----+---+--------+-----+------+------+
3024 *
3025 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
3026         10 -> unprivileged
3027 * V = 0 -> non-vector
3028 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
3029 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3030 */
3031static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
3032                                int opc,
3033                                int size,
3034                                int rt,
3035                                bool is_vector)
3036{
3037    int rn = extract32(insn, 5, 5);
3038    int imm9 = sextract32(insn, 12, 9);
3039    int idx = extract32(insn, 10, 2);
3040    bool is_signed = false;
3041    bool is_store = false;
3042    bool is_extended = false;
3043    bool is_unpriv = (idx == 2);
3044    bool iss_valid = !is_vector;
3045    bool post_index;
3046    bool writeback;
3047    int memidx;
3048
3049    TCGv_i64 clean_addr, dirty_addr;
3050
3051    if (is_vector) {
3052        size |= (opc & 2) << 1;
3053        if (size > 4 || is_unpriv) {
3054            unallocated_encoding(s);
3055            return;
3056        }
3057        is_store = ((opc & 1) == 0);
3058        if (!fp_access_check(s)) {
3059            return;
3060        }
3061    } else {
3062        if (size == 3 && opc == 2) {
3063            /* PRFM - prefetch */
3064            if (idx != 0) {
3065                unallocated_encoding(s);
3066                return;
3067            }
3068            return;
3069        }
3070        if (opc == 3 && size > 1) {
3071            unallocated_encoding(s);
3072            return;
3073        }
3074        is_store = (opc == 0);
3075        is_signed = extract32(opc, 1, 1);
3076        is_extended = (size < 3) && extract32(opc, 0, 1);
3077    }
3078
3079    switch (idx) {
3080    case 0:
3081    case 2:
3082        post_index = false;
3083        writeback = false;
3084        break;
3085    case 1:
3086        post_index = true;
3087        writeback = true;
3088        break;
3089    case 3:
3090        post_index = false;
3091        writeback = true;
3092        break;
3093    default:
3094        g_assert_not_reached();
3095    }
3096
3097    if (rn == 31) {
3098        gen_check_sp_alignment(s);
3099    }
3100
3101    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3102    if (!post_index) {
3103        tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3104    }
3105
3106    memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
3107    clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
3108                                       writeback || rn != 31,
3109                                       size, is_unpriv, memidx);
3110
3111    if (is_vector) {
3112        if (is_store) {
3113            do_fp_st(s, rt, clean_addr, size);
3114        } else {
3115            do_fp_ld(s, rt, clean_addr, size);
3116        }
3117    } else {
3118        TCGv_i64 tcg_rt = cpu_reg(s, rt);
3119        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3120
3121        if (is_store) {
3122            do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
3123                             iss_valid, rt, iss_sf, false);
3124        } else {
3125            do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3126                             is_extended, memidx,
3127                             iss_valid, rt, iss_sf, false);
3128        }
3129    }
3130
3131    if (writeback) {
3132        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3133        if (post_index) {
3134            tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
3135        }
3136        tcg_gen_mov_i64(tcg_rn, dirty_addr);
3137    }
3138}
3139
3140/*
3141 * Load/store (register offset)
3142 *
3143 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
3144 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3145 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
3146 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
3147 *
3148 * For non-vector:
3149 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3150 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3151 * For vector:
3152 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3153 *   opc<0>: 0 -> store, 1 -> load
3154 * V: 1 -> vector/simd
3155 * opt: extend encoding (see DecodeRegExtend)
3156 * S: if S=1 then scale (essentially index by sizeof(size))
3157 * Rt: register to transfer into/out of
3158 * Rn: address register or SP for base
3159 * Rm: offset register or ZR for offset
3160 */
3161static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
3162                                   int opc,
3163                                   int size,
3164                                   int rt,
3165                                   bool is_vector)
3166{
3167    int rn = extract32(insn, 5, 5);
3168    int shift = extract32(insn, 12, 1);
3169    int rm = extract32(insn, 16, 5);
3170    int opt = extract32(insn, 13, 3);
3171    bool is_signed = false;
3172    bool is_store = false;
3173    bool is_extended = false;
3174
3175    TCGv_i64 tcg_rm, clean_addr, dirty_addr;
3176
3177    if (extract32(opt, 1, 1) == 0) {
3178        unallocated_encoding(s);
3179        return;
3180    }
3181
3182    if (is_vector) {
3183        size |= (opc & 2) << 1;
3184        if (size > 4) {
3185            unallocated_encoding(s);
3186            return;
3187        }
3188        is_store = !extract32(opc, 0, 1);
3189        if (!fp_access_check(s)) {
3190            return;
3191        }
3192    } else {
3193        if (size == 3 && opc == 2) {
3194            /* PRFM - prefetch */
3195            return;
3196        }
3197        if (opc == 3 && size > 1) {
3198            unallocated_encoding(s);
3199            return;
3200        }
3201        is_store = (opc == 0);
3202        is_signed = extract32(opc, 1, 1);
3203        is_extended = (size < 3) && extract32(opc, 0, 1);
3204    }
3205
3206    if (rn == 31) {
3207        gen_check_sp_alignment(s);
3208    }
3209    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3210
3211    tcg_rm = read_cpu_reg(s, rm, 1);
3212    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
3213
3214    tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
3215    clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
3216
3217    if (is_vector) {
3218        if (is_store) {
3219            do_fp_st(s, rt, clean_addr, size);
3220        } else {
3221            do_fp_ld(s, rt, clean_addr, size);
3222        }
3223    } else {
3224        TCGv_i64 tcg_rt = cpu_reg(s, rt);
3225        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3226        if (is_store) {
3227            do_gpr_st(s, tcg_rt, clean_addr, size,
3228                      true, rt, iss_sf, false);
3229        } else {
3230            do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3231                      is_extended, true, rt, iss_sf, false);
3232        }
3233    }
3234}
3235
3236/*
3237 * Load/store (unsigned immediate)
3238 *
3239 * 31 30 29   27  26 25 24 23 22 21        10 9     5
3240 * +----+-------+---+-----+-----+------------+-------+------+
3241 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
3242 * +----+-------+---+-----+-----+------------+-------+------+
3243 *
3244 * For non-vector:
3245 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3246 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3247 * For vector:
3248 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3249 *   opc<0>: 0 -> store, 1 -> load
3250 * Rn: base address register (inc SP)
3251 * Rt: target register
3252 */
3253static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3254                                        int opc,
3255                                        int size,
3256                                        int rt,
3257                                        bool is_vector)
3258{
3259    int rn = extract32(insn, 5, 5);
3260    unsigned int imm12 = extract32(insn, 10, 12);
3261    unsigned int offset;
3262
3263    TCGv_i64 clean_addr, dirty_addr;
3264
3265    bool is_store;
3266    bool is_signed = false;
3267    bool is_extended = false;
3268
3269    if (is_vector) {
3270        size |= (opc & 2) << 1;
3271        if (size > 4) {
3272            unallocated_encoding(s);
3273            return;
3274        }
3275        is_store = !extract32(opc, 0, 1);
3276        if (!fp_access_check(s)) {
3277            return;
3278        }
3279    } else {
3280        if (size == 3 && opc == 2) {
3281            /* PRFM - prefetch */
3282            return;
3283        }
3284        if (opc == 3 && size > 1) {
3285            unallocated_encoding(s);
3286            return;
3287        }
3288        is_store = (opc == 0);
3289        is_signed = extract32(opc, 1, 1);
3290        is_extended = (size < 3) && extract32(opc, 0, 1);
3291    }
3292
3293    if (rn == 31) {
3294        gen_check_sp_alignment(s);
3295    }
3296    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3297    offset = imm12 << size;
3298    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3299    clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size);
3300
3301    if (is_vector) {
3302        if (is_store) {
3303            do_fp_st(s, rt, clean_addr, size);
3304        } else {
3305            do_fp_ld(s, rt, clean_addr, size);
3306        }
3307    } else {
3308        TCGv_i64 tcg_rt = cpu_reg(s, rt);
3309        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3310        if (is_store) {
3311            do_gpr_st(s, tcg_rt, clean_addr, size,
3312                      true, rt, iss_sf, false);
3313        } else {
3314            do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
3315                      is_extended, true, rt, iss_sf, false);
3316        }
3317    }
3318}
3319
3320/* Atomic memory operations
3321 *
3322 *  31  30      27  26    24    22  21   16   15    12    10    5     0
3323 * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3324 * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
3325 * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3326 *
3327 * Rt: the result register
3328 * Rn: base address or SP
3329 * Rs: the source register for the operation
3330 * V: vector flag (always 0 as of v8.3)
3331 * A: acquire flag
3332 * R: release flag
3333 */
3334static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3335                              int size, int rt, bool is_vector)
3336{
3337    int rs = extract32(insn, 16, 5);
3338    int rn = extract32(insn, 5, 5);
3339    int o3_opc = extract32(insn, 12, 4);
3340    bool r = extract32(insn, 22, 1);
3341    bool a = extract32(insn, 23, 1);
3342    TCGv_i64 tcg_rs, tcg_rt, clean_addr;
3343    AtomicThreeOpFn *fn = NULL;
3344    MemOp mop = s->be_data | size | MO_ALIGN;
3345
3346    if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3347        unallocated_encoding(s);
3348        return;
3349    }
3350    switch (o3_opc) {
3351    case 000: /* LDADD */
3352        fn = tcg_gen_atomic_fetch_add_i64;
3353        break;
3354    case 001: /* LDCLR */
3355        fn = tcg_gen_atomic_fetch_and_i64;
3356        break;
3357    case 002: /* LDEOR */
3358        fn = tcg_gen_atomic_fetch_xor_i64;
3359        break;
3360    case 003: /* LDSET */
3361        fn = tcg_gen_atomic_fetch_or_i64;
3362        break;
3363    case 004: /* LDSMAX */
3364        fn = tcg_gen_atomic_fetch_smax_i64;
3365        mop |= MO_SIGN;
3366        break;
3367    case 005: /* LDSMIN */
3368        fn = tcg_gen_atomic_fetch_smin_i64;
3369        mop |= MO_SIGN;
3370        break;
3371    case 006: /* LDUMAX */
3372        fn = tcg_gen_atomic_fetch_umax_i64;
3373        break;
3374    case 007: /* LDUMIN */
3375        fn = tcg_gen_atomic_fetch_umin_i64;
3376        break;
3377    case 010: /* SWP */
3378        fn = tcg_gen_atomic_xchg_i64;
3379        break;
3380    case 014: /* LDAPR, LDAPRH, LDAPRB */
3381        if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
3382            rs != 31 || a != 1 || r != 0) {
3383            unallocated_encoding(s);
3384            return;
3385        }
3386        break;
3387    default:
3388        unallocated_encoding(s);
3389        return;
3390    }
3391
3392    if (rn == 31) {
3393        gen_check_sp_alignment(s);
3394    }
3395    clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
3396
3397    if (o3_opc == 014) {
3398        /*
3399         * LDAPR* are a special case because they are a simple load, not a
3400         * fetch-and-do-something op.
3401         * The architectural consistency requirements here are weaker than
3402         * full load-acquire (we only need "load-acquire processor consistent"),
3403         * but we choose to implement them as full LDAQ.
3404         */
3405        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false,
3406                  true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
3407        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3408        return;
3409    }
3410
3411    tcg_rs = read_cpu_reg(s, rs, true);
3412    tcg_rt = cpu_reg(s, rt);
3413
3414    if (o3_opc == 1) { /* LDCLR */
3415        tcg_gen_not_i64(tcg_rs, tcg_rs);
3416    }
3417
3418    /* The tcg atomic primitives are all full barriers.  Therefore we
3419     * can ignore the Acquire and Release bits of this instruction.
3420     */
3421    fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop);
3422
3423    if ((mop & MO_SIGN) && size != MO_64) {
3424        tcg_gen_ext32u_i64(tcg_rt, tcg_rt);
3425    }
3426}
3427
3428/*
3429 * PAC memory operations
3430 *
3431 *  31  30      27  26    24    22  21       12  11  10    5     0
3432 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3433 * | size | 1 1 1 | V | 0 0 | M S | 1 |  imm9  | W | 1 | Rn |  Rt |
3434 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3435 *
3436 * Rt: the result register
3437 * Rn: base address or SP
3438 * V: vector flag (always 0 as of v8.3)
3439 * M: clear for key DA, set for key DB
3440 * W: pre-indexing flag
3441 * S: sign for imm9.
3442 */
3443static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3444                           int size, int rt, bool is_vector)
3445{
3446    int rn = extract32(insn, 5, 5);
3447    bool is_wback = extract32(insn, 11, 1);
3448    bool use_key_a = !extract32(insn, 23, 1);
3449    int offset;
3450    TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3451
3452    if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3453        unallocated_encoding(s);
3454        return;
3455    }
3456
3457    if (rn == 31) {
3458        gen_check_sp_alignment(s);
3459    }
3460    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3461
3462    if (s->pauth_active) {
3463        if (use_key_a) {
3464            gen_helper_autda(dirty_addr, cpu_env, dirty_addr,
3465                             new_tmp_a64_zero(s));
3466        } else {
3467            gen_helper_autdb(dirty_addr, cpu_env, dirty_addr,
3468                             new_tmp_a64_zero(s));
3469        }
3470    }
3471
3472    /* Form the 10-bit signed, scaled offset.  */
3473    offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3474    offset = sextract32(offset << size, 0, 10 + size);
3475    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3476
3477    /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3478    clean_addr = gen_mte_check1(s, dirty_addr, false,
3479                                is_wback || rn != 31, size);
3480
3481    tcg_rt = cpu_reg(s, rt);
3482    do_gpr_ld(s, tcg_rt, clean_addr, size,
3483              /* extend */ false, /* iss_valid */ !is_wback,
3484              /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3485
3486    if (is_wback) {
3487        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3488    }
3489}
3490
3491/*
3492 * LDAPR/STLR (unscaled immediate)
3493 *
3494 *  31  30            24    22  21       12    10    5     0
3495 * +------+-------------+-----+---+--------+-----+----+-----+
3496 * | size | 0 1 1 0 0 1 | opc | 0 |  imm9  | 0 0 | Rn |  Rt |
3497 * +------+-------------+-----+---+--------+-----+----+-----+
3498 *
3499 * Rt: source or destination register
3500 * Rn: base register
3501 * imm9: unscaled immediate offset
3502 * opc: 00: STLUR*, 01/10/11: various LDAPUR*
3503 * size: size of load/store
3504 */
3505static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
3506{
3507    int rt = extract32(insn, 0, 5);
3508    int rn = extract32(insn, 5, 5);
3509    int offset = sextract32(insn, 12, 9);
3510    int opc = extract32(insn, 22, 2);
3511    int size = extract32(insn, 30, 2);
3512    TCGv_i64 clean_addr, dirty_addr;
3513    bool is_store = false;
3514    bool extend = false;
3515    bool iss_sf;
3516    MemOp mop;
3517
3518    if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
3519        unallocated_encoding(s);
3520        return;
3521    }
3522
3523    /* TODO: ARMv8.4-LSE SCTLR.nAA */
3524    mop = size | MO_ALIGN;
3525
3526    switch (opc) {
3527    case 0: /* STLURB */
3528        is_store = true;
3529        break;
3530    case 1: /* LDAPUR* */
3531        break;
3532    case 2: /* LDAPURS* 64-bit variant */
3533        if (size == 3) {
3534            unallocated_encoding(s);
3535            return;
3536        }
3537        mop |= MO_SIGN;
3538        break;
3539    case 3: /* LDAPURS* 32-bit variant */
3540        if (size > 1) {
3541            unallocated_encoding(s);
3542            return;
3543        }
3544        mop |= MO_SIGN;
3545        extend = true; /* zero-extend 32->64 after signed load */
3546        break;
3547    default:
3548        g_assert_not_reached();
3549    }
3550
3551    iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc);
3552
3553    if (rn == 31) {
3554        gen_check_sp_alignment(s);
3555    }
3556
3557    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3558    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3559    clean_addr = clean_data_tbi(s, dirty_addr);
3560
3561    if (is_store) {
3562        /* Store-Release semantics */
3563        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
3564        do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true);
3565    } else {
3566        /*
3567         * Load-AcquirePC semantics; we implement as the slightly more
3568         * restrictive Load-Acquire.
3569         */
3570        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop,
3571                  extend, true, rt, iss_sf, true);
3572        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
3573    }
3574}
3575
3576/* Load/store register (all forms) */
3577static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3578{
3579    int rt = extract32(insn, 0, 5);
3580    int opc = extract32(insn, 22, 2);
3581    bool is_vector = extract32(insn, 26, 1);
3582    int size = extract32(insn, 30, 2);
3583
3584    switch (extract32(insn, 24, 2)) {
3585    case 0:
3586        if (extract32(insn, 21, 1) == 0) {
3587            /* Load/store register (unscaled immediate)
3588             * Load/store immediate pre/post-indexed
3589             * Load/store register unprivileged
3590             */
3591            disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3592            return;
3593        }
3594        switch (extract32(insn, 10, 2)) {
3595        case 0:
3596            disas_ldst_atomic(s, insn, size, rt, is_vector);
3597            return;
3598        case 2:
3599            disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3600            return;
3601        default:
3602            disas_ldst_pac(s, insn, size, rt, is_vector);
3603            return;
3604        }
3605        break;
3606    case 1:
3607        disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3608        return;
3609    }
3610    unallocated_encoding(s);
3611}
3612
3613/* AdvSIMD load/store multiple structures
3614 *
3615 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
3616 * +---+---+---------------+---+-------------+--------+------+------+------+
3617 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
3618 * +---+---+---------------+---+-------------+--------+------+------+------+
3619 *
3620 * AdvSIMD load/store multiple structures (post-indexed)
3621 *
3622 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
3623 * +---+---+---------------+---+---+---------+--------+------+------+------+
3624 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
3625 * +---+---+---------------+---+---+---------+--------+------+------+------+
3626 *
3627 * Rt: first (or only) SIMD&FP register to be transferred
3628 * Rn: base address or SP
3629 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3630 */
3631static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3632{
3633    int rt = extract32(insn, 0, 5);
3634    int rn = extract32(insn, 5, 5);
3635    int rm = extract32(insn, 16, 5);
3636    int size = extract32(insn, 10, 2);
3637    int opcode = extract32(insn, 12, 4);
3638    bool is_store = !extract32(insn, 22, 1);
3639    bool is_postidx = extract32(insn, 23, 1);
3640    bool is_q = extract32(insn, 30, 1);
3641    TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3642    MemOp endian, align, mop;
3643
3644    int total;    /* total bytes */
3645    int elements; /* elements per vector */
3646    int rpt;    /* num iterations */
3647    int selem;  /* structure elements */
3648    int r;
3649
3650    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3651        unallocated_encoding(s);
3652        return;
3653    }
3654
3655    if (!is_postidx && rm != 0) {
3656        unallocated_encoding(s);
3657        return;
3658    }
3659
3660    /* From the shared decode logic */
3661    switch (opcode) {
3662    case 0x0:
3663        rpt = 1;
3664        selem = 4;
3665        break;
3666    case 0x2:
3667        rpt = 4;
3668        selem = 1;
3669        break;
3670    case 0x4:
3671        rpt = 1;
3672        selem = 3;
3673        break;
3674    case 0x6:
3675        rpt = 3;
3676        selem = 1;
3677        break;
3678    case 0x7:
3679        rpt = 1;
3680        selem = 1;
3681        break;
3682    case 0x8:
3683        rpt = 1;
3684        selem = 2;
3685        break;
3686    case 0xa:
3687        rpt = 2;
3688        selem = 1;
3689        break;
3690    default:
3691        unallocated_encoding(s);
3692        return;
3693    }
3694
3695    if (size == 3 && !is_q && selem != 1) {
3696        /* reserved */
3697        unallocated_encoding(s);
3698        return;
3699    }
3700
3701    if (!fp_access_check(s)) {
3702        return;
3703    }
3704
3705    if (rn == 31) {
3706        gen_check_sp_alignment(s);
3707    }
3708
3709    /* For our purposes, bytes are always little-endian.  */
3710    endian = s->be_data;
3711    if (size == 0) {
3712        endian = MO_LE;
3713    }
3714
3715    total = rpt * selem * (is_q ? 16 : 8);
3716    tcg_rn = cpu_reg_sp(s, rn);
3717
3718    /*
3719     * Issue the MTE check vs the logical repeat count, before we
3720     * promote consecutive little-endian elements below.
3721     */
3722    clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
3723                                total);
3724
3725    /*
3726     * Consecutive little-endian elements from a single register
3727     * can be promoted to a larger little-endian operation.
3728     */
3729    align = MO_ALIGN;
3730    if (selem == 1 && endian == MO_LE) {
3731        align = pow2_align(size);
3732        size = 3;
3733    }
3734    if (!s->align_mem) {
3735        align = 0;
3736    }
3737    mop = endian | size | align;
3738
3739    elements = (is_q ? 16 : 8) >> size;
3740    tcg_ebytes = tcg_const_i64(1 << size);
3741    for (r = 0; r < rpt; r++) {
3742        int e;
3743        for (e = 0; e < elements; e++) {
3744            int xs;
3745            for (xs = 0; xs < selem; xs++) {
3746                int tt = (rt + r + xs) % 32;
3747                if (is_store) {
3748                    do_vec_st(s, tt, e, clean_addr, mop);
3749                } else {
3750                    do_vec_ld(s, tt, e, clean_addr, mop);
3751                }
3752                tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3753            }
3754        }
3755    }
3756    tcg_temp_free_i64(tcg_ebytes);
3757
3758    if (!is_store) {
3759        /* For non-quad operations, setting a slice of the low
3760         * 64 bits of the register clears the high 64 bits (in
3761         * the ARM ARM pseudocode this is implicit in the fact
3762         * that 'rval' is a 64 bit wide variable).
3763         * For quad operations, we might still need to zero the
3764         * high bits of SVE.
3765         */
3766        for (r = 0; r < rpt * selem; r++) {
3767            int tt = (rt + r) % 32;
3768            clear_vec_high(s, is_q, tt);
3769        }
3770    }
3771
3772    if (is_postidx) {
3773        if (rm == 31) {
3774            tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3775        } else {
3776            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3777        }
3778    }
3779}
3780
3781/* AdvSIMD load/store single structure
3782 *
3783 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3784 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3785 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3786 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3787 *
3788 * AdvSIMD load/store single structure (post-indexed)
3789 *
3790 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3791 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3792 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3793 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3794 *
3795 * Rt: first (or only) SIMD&FP register to be transferred
3796 * Rn: base address or SP
3797 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3798 * index = encoded in Q:S:size dependent on size
3799 *
3800 * lane_size = encoded in R, opc
3801 * transfer width = encoded in opc, S, size
3802 */
3803static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3804{
3805    int rt = extract32(insn, 0, 5);
3806    int rn = extract32(insn, 5, 5);
3807    int rm = extract32(insn, 16, 5);
3808    int size = extract32(insn, 10, 2);
3809    int S = extract32(insn, 12, 1);
3810    int opc = extract32(insn, 13, 3);
3811    int R = extract32(insn, 21, 1);
3812    int is_load = extract32(insn, 22, 1);
3813    int is_postidx = extract32(insn, 23, 1);
3814    int is_q = extract32(insn, 30, 1);
3815
3816    int scale = extract32(opc, 1, 2);
3817    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3818    bool replicate = false;
3819    int index = is_q << 3 | S << 2 | size;
3820    int xs, total;
3821    TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3822    MemOp mop;
3823
3824    if (extract32(insn, 31, 1)) {
3825        unallocated_encoding(s);
3826        return;
3827    }
3828    if (!is_postidx && rm != 0) {
3829        unallocated_encoding(s);
3830        return;
3831    }
3832
3833    switch (scale) {
3834    case 3:
3835        if (!is_load || S) {
3836            unallocated_encoding(s);
3837            return;
3838        }
3839        scale = size;
3840        replicate = true;
3841        break;
3842    case 0:
3843        break;
3844    case 1:
3845        if (extract32(size, 0, 1)) {
3846            unallocated_encoding(s);
3847            return;
3848        }
3849        index >>= 1;
3850        break;
3851    case 2:
3852        if (extract32(size, 1, 1)) {
3853            unallocated_encoding(s);
3854            return;
3855        }
3856        if (!extract32(size, 0, 1)) {
3857            index >>= 2;
3858        } else {
3859            if (S) {
3860                unallocated_encoding(s);
3861                return;
3862            }
3863            index >>= 3;
3864            scale = 3;
3865        }
3866        break;
3867    default:
3868        g_assert_not_reached();
3869    }
3870
3871    if (!fp_access_check(s)) {
3872        return;
3873    }
3874
3875    if (rn == 31) {
3876        gen_check_sp_alignment(s);
3877    }
3878
3879    total = selem << scale;
3880    tcg_rn = cpu_reg_sp(s, rn);
3881
3882    clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
3883                                total);
3884    mop = finalize_memop(s, scale);
3885
3886    tcg_ebytes = tcg_const_i64(1 << scale);
3887    for (xs = 0; xs < selem; xs++) {
3888        if (replicate) {
3889            /* Load and replicate to all elements */
3890            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3891
3892            tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop);
3893            tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3894                                 (is_q + 1) * 8, vec_full_reg_size(s),
3895                                 tcg_tmp);
3896            tcg_temp_free_i64(tcg_tmp);
3897        } else {
3898            /* Load/store one element per register */
3899            if (is_load) {
3900                do_vec_ld(s, rt, index, clean_addr, mop);
3901            } else {
3902                do_vec_st(s, rt, index, clean_addr, mop);
3903            }
3904        }
3905        tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3906        rt = (rt + 1) % 32;
3907    }
3908    tcg_temp_free_i64(tcg_ebytes);
3909
3910    if (is_postidx) {
3911        if (rm == 31) {
3912            tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
3913        } else {
3914            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3915        }
3916    }
3917}
3918
3919/*
3920 * Load/Store memory tags
3921 *
3922 *  31 30 29         24     22  21     12    10      5      0
3923 * +-----+-------------+-----+---+------+-----+------+------+
3924 * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 |  Rn  |  Rt  |
3925 * +-----+-------------+-----+---+------+-----+------+------+
3926 */
3927static void disas_ldst_tag(DisasContext *s, uint32_t insn)
3928{
3929    int rt = extract32(insn, 0, 5);
3930    int rn = extract32(insn, 5, 5);
3931    uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
3932    int op2 = extract32(insn, 10, 2);
3933    int op1 = extract32(insn, 22, 2);
3934    bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
3935    int index = 0;
3936    TCGv_i64 addr, clean_addr, tcg_rt;
3937
3938    /* We checked insn bits [29:24,21] in the caller.  */
3939    if (extract32(insn, 30, 2) != 3) {
3940        goto do_unallocated;
3941    }
3942
3943    /*
3944     * @index is a tri-state variable which has 3 states:
3945     * < 0 : post-index, writeback
3946     * = 0 : signed offset
3947     * > 0 : pre-index, writeback
3948     */
3949    switch (op1) {
3950    case 0:
3951        if (op2 != 0) {
3952            /* STG */
3953            index = op2 - 2;
3954        } else {
3955            /* STZGM */
3956            if (s->current_el == 0 || offset != 0) {
3957                goto do_unallocated;
3958            }
3959            is_mult = is_zero = true;
3960        }
3961        break;
3962    case 1:
3963        if (op2 != 0) {
3964            /* STZG */
3965            is_zero = true;
3966            index = op2 - 2;
3967        } else {
3968            /* LDG */
3969            is_load = true;
3970        }
3971        break;
3972    case 2:
3973        if (op2 != 0) {
3974            /* ST2G */
3975            is_pair = true;
3976            index = op2 - 2;
3977        } else {
3978            /* STGM */
3979            if (s->current_el == 0 || offset != 0) {
3980                goto do_unallocated;
3981            }
3982            is_mult = true;
3983        }
3984        break;
3985    case 3:
3986        if (op2 != 0) {
3987            /* STZ2G */
3988            is_pair = is_zero = true;
3989            index = op2 - 2;
3990        } else {
3991            /* LDGM */
3992            if (s->current_el == 0 || offset != 0) {
3993                goto do_unallocated;
3994            }
3995            is_mult = is_load = true;
3996        }
3997        break;
3998
3999    default:
4000    do_unallocated:
4001        unallocated_encoding(s);
4002        return;
4003    }
4004
4005    if (is_mult
4006        ? !dc_isar_feature(aa64_mte, s)
4007        : !dc_isar_feature(aa64_mte_insn_reg, s)) {
4008        goto do_unallocated;
4009    }
4010
4011    if (rn == 31) {
4012        gen_check_sp_alignment(s);
4013    }
4014
4015    addr = read_cpu_reg_sp(s, rn, true);
4016    if (index >= 0) {
4017        /* pre-index or signed offset */
4018        tcg_gen_addi_i64(addr, addr, offset);
4019    }
4020
4021    if (is_mult) {
4022        tcg_rt = cpu_reg(s, rt);
4023
4024        if (is_zero) {
4025            int size = 4 << s->dcz_blocksize;
4026
4027            if (s->ata) {
4028                gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
4029            }
4030            /*
4031             * The non-tags portion of STZGM is mostly like DC_ZVA,
4032             * except the alignment happens before the access.
4033             */
4034            clean_addr = clean_data_tbi(s, addr);
4035            tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4036            gen_helper_dc_zva(cpu_env, clean_addr);
4037        } else if (s->ata) {
4038            if (is_load) {
4039                gen_helper_ldgm(tcg_rt, cpu_env, addr);
4040            } else {
4041                gen_helper_stgm(cpu_env, addr, tcg_rt);
4042            }
4043        } else {
4044            MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
4045            int size = 4 << GMID_EL1_BS;
4046
4047            clean_addr = clean_data_tbi(s, addr);
4048            tcg_gen_andi_i64(clean_addr, clean_addr, -size);
4049            gen_probe_access(s, clean_addr, acc, size);
4050
4051            if (is_load) {
4052                /* The result tags are zeros.  */
4053                tcg_gen_movi_i64(tcg_rt, 0);
4054            }
4055        }
4056        return;
4057    }
4058
4059    if (is_load) {
4060        tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
4061        tcg_rt = cpu_reg(s, rt);
4062        if (s->ata) {
4063            gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt);
4064        } else {
4065            clean_addr = clean_data_tbi(s, addr);
4066            gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
4067            gen_address_with_allocation_tag0(tcg_rt, addr);
4068        }
4069    } else {
4070        tcg_rt = cpu_reg_sp(s, rt);
4071        if (!s->ata) {
4072            /*
4073             * For STG and ST2G, we need to check alignment and probe memory.
4074             * TODO: For STZG and STZ2G, we could rely on the stores below,
4075             * at least for system mode; user-only won't enforce alignment.
4076             */
4077            if (is_pair) {
4078                gen_helper_st2g_stub(cpu_env, addr);
4079            } else {
4080                gen_helper_stg_stub(cpu_env, addr);
4081            }
4082        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
4083            if (is_pair) {
4084                gen_helper_st2g_parallel(cpu_env, addr, tcg_rt);
4085            } else {
4086                gen_helper_stg_parallel(cpu_env, addr, tcg_rt);
4087            }
4088        } else {
4089            if (is_pair) {
4090                gen_helper_st2g(cpu_env, addr, tcg_rt);
4091            } else {
4092                gen_helper_stg(cpu_env, addr, tcg_rt);
4093            }
4094        }
4095    }
4096
4097    if (is_zero) {
4098        TCGv_i64 clean_addr = clean_data_tbi(s, addr);
4099        TCGv_i64 tcg_zero = tcg_const_i64(0);
4100        int mem_index = get_mem_index(s);
4101        int i, n = (1 + is_pair) << LOG2_TAG_GRANULE;
4102
4103        tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index,
4104                            MO_Q | MO_ALIGN_16);
4105        for (i = 8; i < n; i += 8) {
4106            tcg_gen_addi_i64(clean_addr, clean_addr, 8);
4107            tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_Q);
4108        }
4109        tcg_temp_free_i64(tcg_zero);
4110    }
4111
4112    if (index != 0) {
4113        /* pre-index or post-index */
4114        if (index < 0) {
4115            /* post-index */
4116            tcg_gen_addi_i64(addr, addr, offset);
4117        }
4118        tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr);
4119    }
4120}
4121
4122/* Loads and stores */
4123static void disas_ldst(DisasContext *s, uint32_t insn)
4124{
4125    switch (extract32(insn, 24, 6)) {
4126    case 0x08: /* Load/store exclusive */
4127        disas_ldst_excl(s, insn);
4128        break;
4129    case 0x18: case 0x1c: /* Load register (literal) */
4130        disas_ld_lit(s, insn);
4131        break;
4132    case 0x28: case 0x29:
4133    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
4134        disas_ldst_pair(s, insn);
4135        break;
4136    case 0x38: case 0x39:
4137    case 0x3c: case 0x3d: /* Load/store register (all forms) */
4138        disas_ldst_reg(s, insn);
4139        break;
4140    case 0x0c: /* AdvSIMD load/store multiple structures */
4141        disas_ldst_multiple_struct(s, insn);
4142        break;
4143    case 0x0d: /* AdvSIMD load/store single structure */
4144        disas_ldst_single_struct(s, insn);
4145        break;
4146    case 0x19:
4147        if (extract32(insn, 21, 1) != 0) {
4148            disas_ldst_tag(s, insn);
4149        } else if (extract32(insn, 10, 2) == 0) {
4150            disas_ldst_ldapr_stlr(s, insn);
4151        } else {
4152            unallocated_encoding(s);
4153        }
4154        break;
4155    default:
4156        unallocated_encoding(s);
4157        break;
4158    }
4159}
4160
4161/* PC-rel. addressing
4162 *   31  30   29 28       24 23                5 4    0
4163 * +----+-------+-----------+-------------------+------+
4164 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
4165 * +----+-------+-----------+-------------------+------+
4166 */
4167static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
4168{
4169    unsigned int page, rd;
4170    uint64_t base;
4171    uint64_t offset;
4172
4173    page = extract32(insn, 31, 1);
4174    /* SignExtend(immhi:immlo) -> offset */
4175    offset = sextract64(insn, 5, 19);
4176    offset = offset << 2 | extract32(insn, 29, 2);
4177    rd = extract32(insn, 0, 5);
4178    base = s->pc_curr;
4179
4180    if (page) {
4181        /* ADRP (page based) */
4182        base &= ~0xfff;
4183        offset <<= 12;
4184    }
4185
4186    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
4187}
4188
4189/*
4190 * Add/subtract (immediate)
4191 *
4192 *  31 30 29 28         23 22 21         10 9   5 4   0
4193 * +--+--+--+-------------+--+-------------+-----+-----+
4194 * |sf|op| S| 1 0 0 0 1 0 |sh|    imm12    |  Rn | Rd  |
4195 * +--+--+--+-------------+--+-------------+-----+-----+
4196 *
4197 *    sf: 0 -> 32bit, 1 -> 64bit
4198 *    op: 0 -> add  , 1 -> sub
4199 *     S: 1 -> set flags
4200 *    sh: 1 -> LSL imm by 12
4201 */
4202static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
4203{
4204    int rd = extract32(insn, 0, 5);
4205    int rn = extract32(insn, 5, 5);
4206    uint64_t imm = extract32(insn, 10, 12);
4207    bool shift = extract32(insn, 22, 1);
4208    bool setflags = extract32(insn, 29, 1);
4209    bool sub_op = extract32(insn, 30, 1);
4210    bool is_64bit = extract32(insn, 31, 1);
4211
4212    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
4213    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
4214    TCGv_i64 tcg_result;
4215
4216    if (shift) {
4217        imm <<= 12;
4218    }
4219
4220    tcg_result = tcg_temp_new_i64();
4221    if (!setflags) {
4222        if (sub_op) {
4223            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
4224        } else {
4225            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
4226        }
4227    } else {
4228        TCGv_i64 tcg_imm = tcg_const_i64(imm);
4229        if (sub_op) {
4230            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
4231        } else {
4232            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
4233        }
4234        tcg_temp_free_i64(tcg_imm);
4235    }
4236
4237    if (is_64bit) {
4238        tcg_gen_mov_i64(tcg_rd, tcg_result);
4239    } else {
4240        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4241    }
4242
4243    tcg_temp_free_i64(tcg_result);
4244}
4245
4246/*
4247 * Add/subtract (immediate, with tags)
4248 *
4249 *  31 30 29 28         23 22 21     16 14      10 9   5 4   0
4250 * +--+--+--+-------------+--+---------+--+-------+-----+-----+
4251 * |sf|op| S| 1 0 0 0 1 1 |o2|  uimm6  |o3| uimm4 |  Rn | Rd  |
4252 * +--+--+--+-------------+--+---------+--+-------+-----+-----+
4253 *
4254 *    op: 0 -> add, 1 -> sub
4255 */
4256static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn)
4257{
4258    int rd = extract32(insn, 0, 5);
4259    int rn = extract32(insn, 5, 5);
4260    int uimm4 = extract32(insn, 10, 4);
4261    int uimm6 = extract32(insn, 16, 6);
4262    bool sub_op = extract32(insn, 30, 1);
4263    TCGv_i64 tcg_rn, tcg_rd;
4264    int imm;
4265
4266    /* Test all of sf=1, S=0, o2=0, o3=0.  */
4267    if ((insn & 0xa040c000u) != 0x80000000u ||
4268        !dc_isar_feature(aa64_mte_insn_reg, s)) {
4269        unallocated_encoding(s);
4270        return;
4271    }
4272
4273    imm = uimm6 << LOG2_TAG_GRANULE;
4274    if (sub_op) {
4275        imm = -imm;
4276    }
4277
4278    tcg_rn = cpu_reg_sp(s, rn);
4279    tcg_rd = cpu_reg_sp(s, rd);
4280
4281    if (s->ata) {
4282        TCGv_i32 offset = tcg_const_i32(imm);
4283        TCGv_i32 tag_offset = tcg_const_i32(uimm4);
4284
4285        gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn, offset, tag_offset);
4286        tcg_temp_free_i32(tag_offset);
4287        tcg_temp_free_i32(offset);
4288    } else {
4289        tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
4290        gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
4291    }
4292}
4293
4294/* The input should be a value in the bottom e bits (with higher
4295 * bits zero); returns that value replicated into every element
4296 * of size e in a 64 bit integer.
4297 */
4298static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
4299{
4300    assert(e != 0);
4301    while (e < 64) {
4302        mask |= mask << e;
4303        e *= 2;
4304    }
4305    return mask;
4306}
4307
4308/* Return a value with the bottom len bits set (where 0 < len <= 64) */
4309static inline uint64_t bitmask64(unsigned int length)
4310{
4311    assert(length > 0 && length <= 64);
4312    return ~0ULL >> (64 - length);
4313}
4314
4315/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
4316 * only require the wmask. Returns false if the imms/immr/immn are a reserved
4317 * value (ie should cause a guest UNDEF exception), and true if they are
4318 * valid, in which case the decoded bit pattern is written to result.
4319 */
4320bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
4321                            unsigned int imms, unsigned int immr)
4322{
4323    uint64_t mask;
4324    unsigned e, levels, s, r;
4325    int len;
4326
4327    assert(immn < 2 && imms < 64 && immr < 64);
4328
4329    /* The bit patterns we create here are 64 bit patterns which
4330     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
4331     * 64 bits each. Each element contains the same value: a run
4332     * of between 1 and e-1 non-zero bits, rotated within the
4333     * element by between 0 and e-1 bits.
4334     *
4335     * The element size and run length are encoded into immn (1 bit)
4336     * and imms (6 bits) as follows:
4337     * 64 bit elements: immn = 1, imms = <length of run - 1>
4338     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
4339     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
4340     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
4341     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
4342     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
4343     * Notice that immn = 0, imms = 11111x is the only combination
4344     * not covered by one of the above options; this is reserved.
4345     * Further, <length of run - 1> all-ones is a reserved pattern.
4346     *
4347     * In all cases the rotation is by immr % e (and immr is 6 bits).
4348     */
4349
4350    /* First determine the element size */
4351    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
4352    if (len < 1) {
4353        /* This is the immn == 0, imms == 0x11111x case */
4354        return false;
4355    }
4356    e = 1 << len;
4357
4358    levels = e - 1;
4359    s = imms & levels;
4360    r = immr & levels;
4361
4362    if (s == levels) {
4363        /* <length of run - 1> mustn't be all-ones. */
4364        return false;
4365    }
4366
4367    /* Create the value of one element: s+1 set bits rotated
4368     * by r within the element (which is e bits wide)...
4369     */
4370    mask = bitmask64(s + 1);
4371    if (r) {
4372        mask = (mask >> r) | (mask << (e - r));
4373        mask &= bitmask64(e);
4374    }
4375    /* ...then replicate the element over the whole 64 bit value */
4376    mask = bitfield_replicate(mask, e);
4377    *result = mask;
4378    return true;
4379}
4380
4381/* Logical (immediate)
4382 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
4383 * +----+-----+-------------+---+------+------+------+------+
4384 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
4385 * +----+-----+-------------+---+------+------+------+------+
4386 */
4387static void disas_logic_imm(DisasContext *s, uint32_t insn)
4388{
4389    unsigned int sf, opc, is_n, immr, imms, rn, rd;
4390    TCGv_i64 tcg_rd, tcg_rn;
4391    uint64_t wmask;
4392    bool is_and = false;
4393
4394    sf = extract32(insn, 31, 1);
4395    opc = extract32(insn, 29, 2);
4396    is_n = extract32(insn, 22, 1);
4397    immr = extract32(insn, 16, 6);
4398    imms = extract32(insn, 10, 6);
4399    rn = extract32(insn, 5, 5);
4400    rd = extract32(insn, 0, 5);
4401
4402    if (!sf && is_n) {
4403        unallocated_encoding(s);
4404        return;
4405    }
4406
4407    if (opc == 0x3) { /* ANDS */
4408        tcg_rd = cpu_reg(s, rd);
4409    } else {
4410        tcg_rd = cpu_reg_sp(s, rd);
4411    }
4412    tcg_rn = cpu_reg(s, rn);
4413
4414    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
4415        /* some immediate field values are reserved */
4416        unallocated_encoding(s);
4417        return;
4418    }
4419
4420    if (!sf) {
4421        wmask &= 0xffffffff;
4422    }
4423
4424    switch (opc) {
4425    case 0x3: /* ANDS */
4426    case 0x0: /* AND */
4427        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
4428        is_and = true;
4429        break;
4430    case 0x1: /* ORR */
4431        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
4432        break;
4433    case 0x2: /* EOR */
4434        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
4435        break;
4436    default:
4437        assert(FALSE); /* must handle all above */
4438        break;
4439    }
4440
4441    if (!sf && !is_and) {
4442        /* zero extend final result; we know we can skip this for AND
4443         * since the immediate had the high 32 bits clear.
4444         */
4445        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4446    }
4447
4448    if (opc == 3) { /* ANDS */
4449        gen_logic_CC(sf, tcg_rd);
4450    }
4451}
4452
4453/*
4454 * Move wide (immediate)
4455 *
4456 *  31 30 29 28         23 22 21 20             5 4    0
4457 * +--+-----+-------------+-----+----------------+------+
4458 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
4459 * +--+-----+-------------+-----+----------------+------+
4460 *
4461 * sf: 0 -> 32 bit, 1 -> 64 bit
4462 * opc: 00 -> N, 10 -> Z, 11 -> K
4463 * hw: shift/16 (0,16, and sf only 32, 48)
4464 */
4465static void disas_movw_imm(DisasContext *s, uint32_t insn)
4466{
4467    int rd = extract32(insn, 0, 5);
4468    uint64_t imm = extract32(insn, 5, 16);
4469    int sf = extract32(insn, 31, 1);
4470    int opc = extract32(insn, 29, 2);
4471    int pos = extract32(insn, 21, 2) << 4;
4472    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4473    TCGv_i64 tcg_imm;
4474
4475    if (!sf && (pos >= 32)) {
4476        unallocated_encoding(s);
4477        return;
4478    }
4479
4480    switch (opc) {
4481    case 0: /* MOVN */
4482    case 2: /* MOVZ */
4483        imm <<= pos;
4484        if (opc == 0) {
4485            imm = ~imm;
4486        }
4487        if (!sf) {
4488            imm &= 0xffffffffu;
4489        }
4490        tcg_gen_movi_i64(tcg_rd, imm);
4491        break;
4492    case 3: /* MOVK */
4493        tcg_imm = tcg_const_i64(imm);
4494        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
4495        tcg_temp_free_i64(tcg_imm);
4496        if (!sf) {
4497            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4498        }
4499        break;
4500    default:
4501        unallocated_encoding(s);
4502        break;
4503    }
4504}
4505
4506/* Bitfield
4507 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
4508 * +----+-----+-------------+---+------+------+------+------+
4509 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
4510 * +----+-----+-------------+---+------+------+------+------+
4511 */
4512static void disas_bitfield(DisasContext *s, uint32_t insn)
4513{
4514    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
4515    TCGv_i64 tcg_rd, tcg_tmp;
4516
4517    sf = extract32(insn, 31, 1);
4518    opc = extract32(insn, 29, 2);
4519    n = extract32(insn, 22, 1);
4520    ri = extract32(insn, 16, 6);
4521    si = extract32(insn, 10, 6);
4522    rn = extract32(insn, 5, 5);
4523    rd = extract32(insn, 0, 5);
4524    bitsize = sf ? 64 : 32;
4525
4526    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
4527        unallocated_encoding(s);
4528        return;
4529    }
4530
4531    tcg_rd = cpu_reg(s, rd);
4532
4533    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
4534       to be smaller than bitsize, we'll never reference data outside the
4535       low 32-bits anyway.  */
4536    tcg_tmp = read_cpu_reg(s, rn, 1);
4537
4538    /* Recognize simple(r) extractions.  */
4539    if (si >= ri) {
4540        /* Wd<s-r:0> = Wn<s:r> */
4541        len = (si - ri) + 1;
4542        if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
4543            tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
4544            goto done;
4545        } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
4546            tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
4547            return;
4548        }
4549        /* opc == 1, BFXIL fall through to deposit */
4550        tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
4551        pos = 0;
4552    } else {
4553        /* Handle the ri > si case with a deposit
4554         * Wd<32+s-r,32-r> = Wn<s:0>
4555         */
4556        len = si + 1;
4557        pos = (bitsize - ri) & (bitsize - 1);
4558    }
4559
4560    if (opc == 0 && len < ri) {
4561        /* SBFM: sign extend the destination field from len to fill
4562           the balance of the word.  Let the deposit below insert all
4563           of those sign bits.  */
4564        tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
4565        len = ri;
4566    }
4567
4568    if (opc == 1) { /* BFM, BFXIL */
4569        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
4570    } else {
4571        /* SBFM or UBFM: We start with zero, and we haven't modified
4572           any bits outside bitsize, therefore the zero-extension
4573           below is unneeded.  */
4574        tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
4575        return;
4576    }
4577
4578 done:
4579    if (!sf) { /* zero extend final result */
4580        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4581    }
4582}
4583
4584/* Extract
4585 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
4586 * +----+------+-------------+---+----+------+--------+------+------+
4587 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
4588 * +----+------+-------------+---+----+------+--------+------+------+
4589 */
4590static void disas_extract(DisasContext *s, uint32_t insn)
4591{
4592    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
4593
4594    sf = extract32(insn, 31, 1);
4595    n = extract32(insn, 22, 1);
4596    rm = extract32(insn, 16, 5);
4597    imm = extract32(insn, 10, 6);
4598    rn = extract32(insn, 5, 5);
4599    rd = extract32(insn, 0, 5);
4600    op21 = extract32(insn, 29, 2);
4601    op0 = extract32(insn, 21, 1);
4602    bitsize = sf ? 64 : 32;
4603
4604    if (sf != n || op21 || op0 || imm >= bitsize) {
4605        unallocated_encoding(s);
4606    } else {
4607        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
4608
4609        tcg_rd = cpu_reg(s, rd);
4610
4611        if (unlikely(imm == 0)) {
4612            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
4613             * so an extract from bit 0 is a special case.
4614             */
4615            if (sf) {
4616                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
4617            } else {
4618                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
4619            }
4620        } else {
4621            tcg_rm = cpu_reg(s, rm);
4622            tcg_rn = cpu_reg(s, rn);
4623
4624            if (sf) {
4625                /* Specialization to ROR happens in EXTRACT2.  */
4626                tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm);
4627            } else {
4628                TCGv_i32 t0 = tcg_temp_new_i32();
4629
4630                tcg_gen_extrl_i64_i32(t0, tcg_rm);
4631                if (rm == rn) {
4632                    tcg_gen_rotri_i32(t0, t0, imm);
4633                } else {
4634                    TCGv_i32 t1 = tcg_temp_new_i32();
4635                    tcg_gen_extrl_i64_i32(t1, tcg_rn);
4636                    tcg_gen_extract2_i32(t0, t0, t1, imm);
4637                    tcg_temp_free_i32(t1);
4638                }
4639                tcg_gen_extu_i32_i64(tcg_rd, t0);
4640                tcg_temp_free_i32(t0);
4641            }
4642        }
4643    }
4644}
4645
4646/* Data processing - immediate */
4647static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
4648{
4649    switch (extract32(insn, 23, 6)) {
4650    case 0x20: case 0x21: /* PC-rel. addressing */
4651        disas_pc_rel_adr(s, insn);
4652        break;
4653    case 0x22: /* Add/subtract (immediate) */
4654        disas_add_sub_imm(s, insn);
4655        break;
4656    case 0x23: /* Add/subtract (immediate, with tags) */
4657        disas_add_sub_imm_with_tags(s, insn);
4658        break;
4659    case 0x24: /* Logical (immediate) */
4660        disas_logic_imm(s, insn);
4661        break;
4662    case 0x25: /* Move wide (immediate) */
4663        disas_movw_imm(s, insn);
4664        break;
4665    case 0x26: /* Bitfield */
4666        disas_bitfield(s, insn);
4667        break;
4668    case 0x27: /* Extract */
4669        disas_extract(s, insn);
4670        break;
4671    default:
4672        unallocated_encoding(s);
4673        break;
4674    }
4675}
4676
4677/* Shift a TCGv src by TCGv shift_amount, put result in dst.
4678 * Note that it is the caller's responsibility to ensure that the
4679 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4680 * mandated semantics for out of range shifts.
4681 */
4682static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4683                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4684{
4685    switch (shift_type) {
4686    case A64_SHIFT_TYPE_LSL:
4687        tcg_gen_shl_i64(dst, src, shift_amount);
4688        break;
4689    case A64_SHIFT_TYPE_LSR:
4690        tcg_gen_shr_i64(dst, src, shift_amount);
4691        break;
4692    case A64_SHIFT_TYPE_ASR:
4693        if (!sf) {
4694            tcg_gen_ext32s_i64(dst, src);
4695        }
4696        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4697        break;
4698    case A64_SHIFT_TYPE_ROR:
4699        if (sf) {
4700            tcg_gen_rotr_i64(dst, src, shift_amount);
4701        } else {
4702            TCGv_i32 t0, t1;
4703            t0 = tcg_temp_new_i32();
4704            t1 = tcg_temp_new_i32();
4705            tcg_gen_extrl_i64_i32(t0, src);
4706            tcg_gen_extrl_i64_i32(t1, shift_amount);
4707            tcg_gen_rotr_i32(t0, t0, t1);
4708            tcg_gen_extu_i32_i64(dst, t0);
4709            tcg_temp_free_i32(t0);
4710            tcg_temp_free_i32(t1);
4711        }
4712        break;
4713    default:
4714        assert(FALSE); /* all shift types should be handled */
4715        break;
4716    }
4717
4718    if (!sf) { /* zero extend final result */
4719        tcg_gen_ext32u_i64(dst, dst);
4720    }
4721}
4722
4723/* Shift a TCGv src by immediate, put result in dst.
4724 * The shift amount must be in range (this should always be true as the
4725 * relevant instructions will UNDEF on bad shift immediates).
4726 */
4727static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4728                          enum a64_shift_type shift_type, unsigned int shift_i)
4729{
4730    assert(shift_i < (sf ? 64 : 32));
4731
4732    if (shift_i == 0) {
4733        tcg_gen_mov_i64(dst, src);
4734    } else {
4735        TCGv_i64 shift_const;
4736
4737        shift_const = tcg_const_i64(shift_i);
4738        shift_reg(dst, src, sf, shift_type, shift_const);
4739        tcg_temp_free_i64(shift_const);
4740    }
4741}
4742
4743/* Logical (shifted register)
4744 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4745 * +----+-----+-----------+-------+---+------+--------+------+------+
4746 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4747 * +----+-----+-----------+-------+---+------+--------+------+------+
4748 */
4749static void disas_logic_reg(DisasContext *s, uint32_t insn)
4750{
4751    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4752    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4753
4754    sf = extract32(insn, 31, 1);
4755    opc = extract32(insn, 29, 2);
4756    shift_type = extract32(insn, 22, 2);
4757    invert = extract32(insn, 21, 1);
4758    rm = extract32(insn, 16, 5);
4759    shift_amount = extract32(insn, 10, 6);
4760    rn = extract32(insn, 5, 5);
4761    rd = extract32(insn, 0, 5);
4762
4763    if (!sf && (shift_amount & (1 << 5))) {
4764        unallocated_encoding(s);
4765        return;
4766    }
4767
4768    tcg_rd = cpu_reg(s, rd);
4769
4770    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4771        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4772         * register-register MOV and MVN, so it is worth special casing.
4773         */
4774        tcg_rm = cpu_reg(s, rm);
4775        if (invert) {
4776            tcg_gen_not_i64(tcg_rd, tcg_rm);
4777            if (!sf) {
4778                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4779            }
4780        } else {
4781            if (sf) {
4782                tcg_gen_mov_i64(tcg_rd, tcg_rm);
4783            } else {
4784                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4785            }
4786        }
4787        return;
4788    }
4789
4790    tcg_rm = read_cpu_reg(s, rm, sf);
4791
4792    if (shift_amount) {
4793        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4794    }
4795
4796    tcg_rn = cpu_reg(s, rn);
4797
4798    switch (opc | (invert << 2)) {
4799    case 0: /* AND */
4800    case 3: /* ANDS */
4801        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4802        break;
4803    case 1: /* ORR */
4804        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4805        break;
4806    case 2: /* EOR */
4807        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4808        break;
4809    case 4: /* BIC */
4810    case 7: /* BICS */
4811        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4812        break;
4813    case 5: /* ORN */
4814        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4815        break;
4816    case 6: /* EON */
4817        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4818        break;
4819    default:
4820        assert(FALSE);
4821        break;
4822    }
4823
4824    if (!sf) {
4825        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4826    }
4827
4828    if (opc == 3) {
4829        gen_logic_CC(sf, tcg_rd);
4830    }
4831}
4832
4833/*
4834 * Add/subtract (extended register)
4835 *
4836 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4837 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4838 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4839 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4840 *
4841 *  sf: 0 -> 32bit, 1 -> 64bit
4842 *  op: 0 -> add  , 1 -> sub
4843 *   S: 1 -> set flags
4844 * opt: 00
4845 * option: extension type (see DecodeRegExtend)
4846 * imm3: optional shift to Rm
4847 *
4848 * Rd = Rn + LSL(extend(Rm), amount)
4849 */
4850static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4851{
4852    int rd = extract32(insn, 0, 5);
4853    int rn = extract32(insn, 5, 5);
4854    int imm3 = extract32(insn, 10, 3);
4855    int option = extract32(insn, 13, 3);
4856    int rm = extract32(insn, 16, 5);
4857    int opt = extract32(insn, 22, 2);
4858    bool setflags = extract32(insn, 29, 1);
4859    bool sub_op = extract32(insn, 30, 1);
4860    bool sf = extract32(insn, 31, 1);
4861
4862    TCGv_i64 tcg_rm, tcg_rn; /* temps */
4863    TCGv_i64 tcg_rd;
4864    TCGv_i64 tcg_result;
4865
4866    if (imm3 > 4 || opt != 0) {
4867        unallocated_encoding(s);
4868        return;
4869    }
4870
4871    /* non-flag setting ops may use SP */
4872    if (!setflags) {
4873        tcg_rd = cpu_reg_sp(s, rd);
4874    } else {
4875        tcg_rd = cpu_reg(s, rd);
4876    }
4877    tcg_rn = read_cpu_reg_sp(s, rn, sf);
4878
4879    tcg_rm = read_cpu_reg(s, rm, sf);
4880    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4881
4882    tcg_result = tcg_temp_new_i64();
4883
4884    if (!setflags) {
4885        if (sub_op) {
4886            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4887        } else {
4888            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4889        }
4890    } else {
4891        if (sub_op) {
4892            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4893        } else {
4894            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4895        }
4896    }
4897
4898    if (sf) {
4899        tcg_gen_mov_i64(tcg_rd, tcg_result);
4900    } else {
4901        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4902    }
4903
4904    tcg_temp_free_i64(tcg_result);
4905}
4906
4907/*
4908 * Add/subtract (shifted register)
4909 *
4910 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4911 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4912 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4913 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4914 *
4915 *    sf: 0 -> 32bit, 1 -> 64bit
4916 *    op: 0 -> add  , 1 -> sub
4917 *     S: 1 -> set flags
4918 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4919 *  imm6: Shift amount to apply to Rm before the add/sub
4920 */
4921static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4922{
4923    int rd = extract32(insn, 0, 5);
4924    int rn = extract32(insn, 5, 5);
4925    int imm6 = extract32(insn, 10, 6);
4926    int rm = extract32(insn, 16, 5);
4927    int shift_type = extract32(insn, 22, 2);
4928    bool setflags = extract32(insn, 29, 1);
4929    bool sub_op = extract32(insn, 30, 1);
4930    bool sf = extract32(insn, 31, 1);
4931
4932    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4933    TCGv_i64 tcg_rn, tcg_rm;
4934    TCGv_i64 tcg_result;
4935
4936    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4937        unallocated_encoding(s);
4938        return;
4939    }
4940
4941    tcg_rn = read_cpu_reg(s, rn, sf);
4942    tcg_rm = read_cpu_reg(s, rm, sf);
4943
4944    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4945
4946    tcg_result = tcg_temp_new_i64();
4947
4948    if (!setflags) {
4949        if (sub_op) {
4950            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4951        } else {
4952            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4953        }
4954    } else {
4955        if (sub_op) {
4956            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4957        } else {
4958            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4959        }
4960    }
4961
4962    if (sf) {
4963        tcg_gen_mov_i64(tcg_rd, tcg_result);
4964    } else {
4965        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4966    }
4967
4968    tcg_temp_free_i64(tcg_result);
4969}
4970
4971/* Data-processing (3 source)
4972 *
4973 *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4974 *  +--+------+-----------+------+------+----+------+------+------+
4975 *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4976 *  +--+------+-----------+------+------+----+------+------+------+
4977 */
4978static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4979{
4980    int rd = extract32(insn, 0, 5);
4981    int rn = extract32(insn, 5, 5);
4982    int ra = extract32(insn, 10, 5);
4983    int rm = extract32(insn, 16, 5);
4984    int op_id = (extract32(insn, 29, 3) << 4) |
4985        (extract32(insn, 21, 3) << 1) |
4986        extract32(insn, 15, 1);
4987    bool sf = extract32(insn, 31, 1);
4988    bool is_sub = extract32(op_id, 0, 1);
4989    bool is_high = extract32(op_id, 2, 1);
4990    bool is_signed = false;
4991    TCGv_i64 tcg_op1;
4992    TCGv_i64 tcg_op2;
4993    TCGv_i64 tcg_tmp;
4994
4995    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4996    switch (op_id) {
4997    case 0x42: /* SMADDL */
4998    case 0x43: /* SMSUBL */
4999    case 0x44: /* SMULH */
5000        is_signed = true;
5001        break;
5002    case 0x0: /* MADD (32bit) */
5003    case 0x1: /* MSUB (32bit) */
5004    case 0x40: /* MADD (64bit) */
5005    case 0x41: /* MSUB (64bit) */
5006    case 0x4a: /* UMADDL */
5007    case 0x4b: /* UMSUBL */
5008    case 0x4c: /* UMULH */
5009        break;
5010    default:
5011        unallocated_encoding(s);
5012        return;
5013    }
5014
5015    if (is_high) {
5016        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
5017        TCGv_i64 tcg_rd = cpu_reg(s, rd);
5018        TCGv_i64 tcg_rn = cpu_reg(s, rn);
5019        TCGv_i64 tcg_rm = cpu_reg(s, rm);
5020
5021        if (is_signed) {
5022            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
5023        } else {
5024            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
5025        }
5026
5027        tcg_temp_free_i64(low_bits);
5028        return;
5029    }
5030
5031    tcg_op1 = tcg_temp_new_i64();
5032    tcg_op2 = tcg_temp_new_i64();
5033    tcg_tmp = tcg_temp_new_i64();
5034
5035    if (op_id < 0x42) {
5036        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
5037        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
5038    } else {
5039        if (is_signed) {
5040            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
5041            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
5042        } else {
5043            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
5044            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
5045        }
5046    }
5047
5048    if (ra == 31 && !is_sub) {
5049        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
5050        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
5051    } else {
5052        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
5053        if (is_sub) {
5054            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
5055        } else {
5056            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
5057        }
5058    }
5059
5060    if (!sf) {
5061        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
5062    }
5063
5064    tcg_temp_free_i64(tcg_op1);
5065    tcg_temp_free_i64(tcg_op2);
5066    tcg_temp_free_i64(tcg_tmp);
5067}
5068
5069/* Add/subtract (with carry)
5070 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
5071 * +--+--+--+------------------------+------+-------------+------+-----+
5072 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
5073 * +--+--+--+------------------------+------+-------------+------+-----+
5074 */
5075
5076static void disas_adc_sbc(DisasContext *s, uint32_t insn)
5077{
5078    unsigned int sf, op, setflags, rm, rn, rd;
5079    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
5080
5081    sf = extract32(insn, 31, 1);
5082    op = extract32(insn, 30, 1);
5083    setflags = extract32(insn, 29, 1);
5084    rm = extract32(insn, 16, 5);
5085    rn = extract32(insn, 5, 5);
5086    rd = extract32(insn, 0, 5);
5087
5088    tcg_rd = cpu_reg(s, rd);
5089    tcg_rn = cpu_reg(s, rn);
5090
5091    if (op) {
5092        tcg_y = new_tmp_a64(s);
5093        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
5094    } else {
5095        tcg_y = cpu_reg(s, rm);
5096    }
5097
5098    if (setflags) {
5099        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
5100    } else {
5101        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
5102    }
5103}
5104
5105/*
5106 * Rotate right into flags
5107 *  31 30 29                21       15          10      5  4      0
5108 * +--+--+--+-----------------+--------+-----------+------+--+------+
5109 * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
5110 * +--+--+--+-----------------+--------+-----------+------+--+------+
5111 */
5112static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
5113{
5114    int mask = extract32(insn, 0, 4);
5115    int o2 = extract32(insn, 4, 1);
5116    int rn = extract32(insn, 5, 5);
5117    int imm6 = extract32(insn, 15, 6);
5118    int sf_op_s = extract32(insn, 29, 3);
5119    TCGv_i64 tcg_rn;
5120    TCGv_i32 nzcv;
5121
5122    if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
5123        unallocated_encoding(s);
5124        return;
5125    }
5126
5127    tcg_rn = read_cpu_reg(s, rn, 1);
5128    tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
5129
5130    nzcv = tcg_temp_new_i32();
5131    tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
5132
5133    if (mask & 8) { /* N */
5134        tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
5135    }
5136    if (mask & 4) { /* Z */
5137        tcg_gen_not_i32(cpu_ZF, nzcv);
5138        tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
5139    }
5140    if (mask & 2) { /* C */
5141        tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
5142    }
5143    if (mask & 1) { /* V */
5144        tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
5145    }
5146
5147    tcg_temp_free_i32(nzcv);
5148}
5149
5150/*
5151 * Evaluate into flags
5152 *  31 30 29                21        15   14        10      5  4      0
5153 * +--+--+--+-----------------+---------+----+---------+------+--+------+
5154 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
5155 * +--+--+--+-----------------+---------+----+---------+------+--+------+
5156 */
5157static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
5158{
5159    int o3_mask = extract32(insn, 0, 5);
5160    int rn = extract32(insn, 5, 5);
5161    int o2 = extract32(insn, 15, 6);
5162    int sz = extract32(insn, 14, 1);
5163    int sf_op_s = extract32(insn, 29, 3);
5164    TCGv_i32 tmp;
5165    int shift;
5166
5167    if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
5168        !dc_isar_feature(aa64_condm_4, s)) {
5169        unallocated_encoding(s);
5170        return;
5171    }
5172    shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
5173
5174    tmp = tcg_temp_new_i32();
5175    tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
5176    tcg_gen_shli_i32(cpu_NF, tmp, shift);
5177    tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
5178    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
5179    tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
5180    tcg_temp_free_i32(tmp);
5181}
5182
5183/* Conditional compare (immediate / register)
5184 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
5185 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5186 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
5187 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
5188 *        [1]                             y                [0]       [0]
5189 */
5190static void disas_cc(DisasContext *s, uint32_t insn)
5191{
5192    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
5193    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
5194    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
5195    DisasCompare c;
5196
5197    if (!extract32(insn, 29, 1)) {
5198        unallocated_encoding(s);
5199        return;
5200    }
5201    if (insn & (1 << 10 | 1 << 4)) {
5202        unallocated_encoding(s);
5203        return;
5204    }
5205    sf = extract32(insn, 31, 1);
5206    op = extract32(insn, 30, 1);
5207    is_imm = extract32(insn, 11, 1);
5208    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
5209    cond = extract32(insn, 12, 4);
5210    rn = extract32(insn, 5, 5);
5211    nzcv = extract32(insn, 0, 4);
5212
5213    /* Set T0 = !COND.  */
5214    tcg_t0 = tcg_temp_new_i32();
5215    arm_test_cc(&c, cond);
5216    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
5217    arm_free_cc(&c);
5218
5219    /* Load the arguments for the new comparison.  */
5220    if (is_imm) {
5221        tcg_y = new_tmp_a64(s);
5222        tcg_gen_movi_i64(tcg_y, y);
5223    } else {
5224        tcg_y = cpu_reg(s, y);
5225    }
5226    tcg_rn = cpu_reg(s, rn);
5227
5228    /* Set the flags for the new comparison.  */
5229    tcg_tmp = tcg_temp_new_i64();
5230    if (op) {
5231        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5232    } else {
5233        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
5234    }
5235    tcg_temp_free_i64(tcg_tmp);
5236
5237    /* If COND was false, force the flags to #nzcv.  Compute two masks
5238     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
5239     * For tcg hosts that support ANDC, we can make do with just T1.
5240     * In either case, allow the tcg optimizer to delete any unused mask.
5241     */
5242    tcg_t1 = tcg_temp_new_i32();
5243    tcg_t2 = tcg_temp_new_i32();
5244    tcg_gen_neg_i32(tcg_t1, tcg_t0);
5245    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
5246
5247    if (nzcv & 8) { /* N */
5248        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
5249    } else {
5250        if (TCG_TARGET_HAS_andc_i32) {
5251            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
5252        } else {
5253            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
5254        }
5255    }
5256    if (nzcv & 4) { /* Z */
5257        if (TCG_TARGET_HAS_andc_i32) {
5258            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
5259        } else {
5260            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
5261        }
5262    } else {
5263        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
5264    }
5265    if (nzcv & 2) { /* C */
5266        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
5267    } else {
5268        if (TCG_TARGET_HAS_andc_i32) {
5269            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
5270        } else {
5271            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
5272        }
5273    }
5274    if (nzcv & 1) { /* V */
5275        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
5276    } else {
5277        if (TCG_TARGET_HAS_andc_i32) {
5278            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
5279        } else {
5280            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
5281        }
5282    }
5283    tcg_temp_free_i32(tcg_t0);
5284    tcg_temp_free_i32(tcg_t1);
5285    tcg_temp_free_i32(tcg_t2);
5286}
5287
5288/* Conditional select
5289 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
5290 * +----+----+---+-----------------+------+------+-----+------+------+
5291 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
5292 * +----+----+---+-----------------+------+------+-----+------+------+
5293 */
5294static void disas_cond_select(DisasContext *s, uint32_t insn)
5295{
5296    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
5297    TCGv_i64 tcg_rd, zero;
5298    DisasCompare64 c;
5299
5300    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
5301        /* S == 1 or op2<1> == 1 */
5302        unallocated_encoding(s);
5303        return;
5304    }
5305    sf = extract32(insn, 31, 1);
5306    else_inv = extract32(insn, 30, 1);
5307    rm = extract32(insn, 16, 5);
5308    cond = extract32(insn, 12, 4);
5309    else_inc = extract32(insn, 10, 1);
5310    rn = extract32(insn, 5, 5);
5311    rd = extract32(insn, 0, 5);
5312
5313    tcg_rd = cpu_reg(s, rd);
5314
5315    a64_test_cc(&c, cond);
5316    zero = tcg_const_i64(0);
5317
5318    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
5319        /* CSET & CSETM.  */
5320        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
5321        if (else_inv) {
5322            tcg_gen_neg_i64(tcg_rd, tcg_rd);
5323        }
5324    } else {
5325        TCGv_i64 t_true = cpu_reg(s, rn);
5326        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
5327        if (else_inv && else_inc) {
5328            tcg_gen_neg_i64(t_false, t_false);
5329        } else if (else_inv) {
5330            tcg_gen_not_i64(t_false, t_false);
5331        } else if (else_inc) {
5332            tcg_gen_addi_i64(t_false, t_false, 1);
5333        }
5334        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
5335    }
5336
5337    tcg_temp_free_i64(zero);
5338    a64_free_cc(&c);
5339
5340    if (!sf) {
5341        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5342    }
5343}
5344
5345static void handle_clz(DisasContext *s, unsigned int sf,
5346                       unsigned int rn, unsigned int rd)
5347{
5348    TCGv_i64 tcg_rd, tcg_rn;
5349    tcg_rd = cpu_reg(s, rd);
5350    tcg_rn = cpu_reg(s, rn);
5351
5352    if (sf) {
5353        tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
5354    } else {
5355        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5356        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5357        tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
5358        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5359        tcg_temp_free_i32(tcg_tmp32);
5360    }
5361}
5362
5363static void handle_cls(DisasContext *s, unsigned int sf,
5364                       unsigned int rn, unsigned int rd)
5365{
5366    TCGv_i64 tcg_rd, tcg_rn;
5367    tcg_rd = cpu_reg(s, rd);
5368    tcg_rn = cpu_reg(s, rn);
5369
5370    if (sf) {
5371        tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
5372    } else {
5373        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5374        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5375        tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
5376        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5377        tcg_temp_free_i32(tcg_tmp32);
5378    }
5379}
5380
5381static void handle_rbit(DisasContext *s, unsigned int sf,
5382                        unsigned int rn, unsigned int rd)
5383{
5384    TCGv_i64 tcg_rd, tcg_rn;
5385    tcg_rd = cpu_reg(s, rd);
5386    tcg_rn = cpu_reg(s, rn);
5387
5388    if (sf) {
5389        gen_helper_rbit64(tcg_rd, tcg_rn);
5390    } else {
5391        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
5392        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
5393        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
5394        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
5395        tcg_temp_free_i32(tcg_tmp32);
5396    }
5397}
5398
5399/* REV with sf==1, opcode==3 ("REV64") */
5400static void handle_rev64(DisasContext *s, unsigned int sf,
5401                         unsigned int rn, unsigned int rd)
5402{
5403    if (!sf) {
5404        unallocated_encoding(s);
5405        return;
5406    }
5407    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
5408}
5409
5410/* REV with sf==0, opcode==2
5411 * REV32 (sf==1, opcode==2)
5412 */
5413static void handle_rev32(DisasContext *s, unsigned int sf,
5414                         unsigned int rn, unsigned int rd)
5415{
5416    TCGv_i64 tcg_rd = cpu_reg(s, rd);
5417    TCGv_i64 tcg_rn = cpu_reg(s, rn);
5418
5419    if (sf) {
5420        tcg_gen_bswap64_i64(tcg_rd, tcg_rn);
5421        tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32);
5422    } else {
5423        tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ);
5424    }
5425}
5426
5427/* REV16 (opcode==1) */
5428static void handle_rev16(DisasContext *s, unsigned int sf,
5429                         unsigned int rn, unsigned int rd)
5430{
5431    TCGv_i64 tcg_rd = cpu_reg(s, rd);
5432    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5433    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5434    TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
5435
5436    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
5437    tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
5438    tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
5439    tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
5440    tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
5441
5442    tcg_temp_free_i64(mask);
5443    tcg_temp_free_i64(tcg_tmp);
5444}
5445
5446/* Data-processing (1 source)
5447 *   31  30  29  28             21 20     16 15    10 9    5 4    0
5448 * +----+---+---+-----------------+---------+--------+------+------+
5449 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
5450 * +----+---+---+-----------------+---------+--------+------+------+
5451 */
5452static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
5453{
5454    unsigned int sf, opcode, opcode2, rn, rd;
5455    TCGv_i64 tcg_rd;
5456
5457    if (extract32(insn, 29, 1)) {
5458        unallocated_encoding(s);
5459        return;
5460    }
5461
5462    sf = extract32(insn, 31, 1);
5463    opcode = extract32(insn, 10, 6);
5464    opcode2 = extract32(insn, 16, 5);
5465    rn = extract32(insn, 5, 5);
5466    rd = extract32(insn, 0, 5);
5467
5468#define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
5469
5470    switch (MAP(sf, opcode2, opcode)) {
5471    case MAP(0, 0x00, 0x00): /* RBIT */
5472    case MAP(1, 0x00, 0x00):
5473        handle_rbit(s, sf, rn, rd);
5474        break;
5475    case MAP(0, 0x00, 0x01): /* REV16 */
5476    case MAP(1, 0x00, 0x01):
5477        handle_rev16(s, sf, rn, rd);
5478        break;
5479    case MAP(0, 0x00, 0x02): /* REV/REV32 */
5480    case MAP(1, 0x00, 0x02):
5481        handle_rev32(s, sf, rn, rd);
5482        break;
5483    case MAP(1, 0x00, 0x03): /* REV64 */
5484        handle_rev64(s, sf, rn, rd);
5485        break;
5486    case MAP(0, 0x00, 0x04): /* CLZ */
5487    case MAP(1, 0x00, 0x04):
5488        handle_clz(s, sf, rn, rd);
5489        break;
5490    case MAP(0, 0x00, 0x05): /* CLS */
5491    case MAP(1, 0x00, 0x05):
5492        handle_cls(s, sf, rn, rd);
5493        break;
5494    case MAP(1, 0x01, 0x00): /* PACIA */
5495        if (s->pauth_active) {
5496            tcg_rd = cpu_reg(s, rd);
5497            gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5498        } else if (!dc_isar_feature(aa64_pauth, s)) {
5499            goto do_unallocated;
5500        }
5501        break;
5502    case MAP(1, 0x01, 0x01): /* PACIB */
5503        if (s->pauth_active) {
5504            tcg_rd = cpu_reg(s, rd);
5505            gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5506        } else if (!dc_isar_feature(aa64_pauth, s)) {
5507            goto do_unallocated;
5508        }
5509        break;
5510    case MAP(1, 0x01, 0x02): /* PACDA */
5511        if (s->pauth_active) {
5512            tcg_rd = cpu_reg(s, rd);
5513            gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5514        } else if (!dc_isar_feature(aa64_pauth, s)) {
5515            goto do_unallocated;
5516        }
5517        break;
5518    case MAP(1, 0x01, 0x03): /* PACDB */
5519        if (s->pauth_active) {
5520            tcg_rd = cpu_reg(s, rd);
5521            gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5522        } else if (!dc_isar_feature(aa64_pauth, s)) {
5523            goto do_unallocated;
5524        }
5525        break;
5526    case MAP(1, 0x01, 0x04): /* AUTIA */
5527        if (s->pauth_active) {
5528            tcg_rd = cpu_reg(s, rd);
5529            gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5530        } else if (!dc_isar_feature(aa64_pauth, s)) {
5531            goto do_unallocated;
5532        }
5533        break;
5534    case MAP(1, 0x01, 0x05): /* AUTIB */
5535        if (s->pauth_active) {
5536            tcg_rd = cpu_reg(s, rd);
5537            gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5538        } else if (!dc_isar_feature(aa64_pauth, s)) {
5539            goto do_unallocated;
5540        }
5541        break;
5542    case MAP(1, 0x01, 0x06): /* AUTDA */
5543        if (s->pauth_active) {
5544            tcg_rd = cpu_reg(s, rd);
5545            gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5546        } else if (!dc_isar_feature(aa64_pauth, s)) {
5547            goto do_unallocated;
5548        }
5549        break;
5550    case MAP(1, 0x01, 0x07): /* AUTDB */
5551        if (s->pauth_active) {
5552            tcg_rd = cpu_reg(s, rd);
5553            gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
5554        } else if (!dc_isar_feature(aa64_pauth, s)) {
5555            goto do_unallocated;
5556        }
5557        break;
5558    case MAP(1, 0x01, 0x08): /* PACIZA */
5559        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5560            goto do_unallocated;
5561        } else if (s->pauth_active) {
5562            tcg_rd = cpu_reg(s, rd);
5563            gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5564        }
5565        break;
5566    case MAP(1, 0x01, 0x09): /* PACIZB */
5567        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5568            goto do_unallocated;
5569        } else if (s->pauth_active) {
5570            tcg_rd = cpu_reg(s, rd);
5571            gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5572        }
5573        break;
5574    case MAP(1, 0x01, 0x0a): /* PACDZA */
5575        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5576            goto do_unallocated;
5577        } else if (s->pauth_active) {
5578            tcg_rd = cpu_reg(s, rd);
5579            gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5580        }
5581        break;
5582    case MAP(1, 0x01, 0x0b): /* PACDZB */
5583        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5584            goto do_unallocated;
5585        } else if (s->pauth_active) {
5586            tcg_rd = cpu_reg(s, rd);
5587            gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5588        }
5589        break;
5590    case MAP(1, 0x01, 0x0c): /* AUTIZA */
5591        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5592            goto do_unallocated;
5593        } else if (s->pauth_active) {
5594            tcg_rd = cpu_reg(s, rd);
5595            gen_helper_autia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5596        }
5597        break;
5598    case MAP(1, 0x01, 0x0d): /* AUTIZB */
5599        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5600            goto do_unallocated;
5601        } else if (s->pauth_active) {
5602            tcg_rd = cpu_reg(s, rd);
5603            gen_helper_autib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5604        }
5605        break;
5606    case MAP(1, 0x01, 0x0e): /* AUTDZA */
5607        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5608            goto do_unallocated;
5609        } else if (s->pauth_active) {
5610            tcg_rd = cpu_reg(s, rd);
5611            gen_helper_autda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5612        }
5613        break;
5614    case MAP(1, 0x01, 0x0f): /* AUTDZB */
5615        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5616            goto do_unallocated;
5617        } else if (s->pauth_active) {
5618            tcg_rd = cpu_reg(s, rd);
5619            gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5620        }
5621        break;
5622    case MAP(1, 0x01, 0x10): /* XPACI */
5623        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5624            goto do_unallocated;
5625        } else if (s->pauth_active) {
5626            tcg_rd = cpu_reg(s, rd);
5627            gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5628        }
5629        break;
5630    case MAP(1, 0x01, 0x11): /* XPACD */
5631        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5632            goto do_unallocated;
5633        } else if (s->pauth_active) {
5634            tcg_rd = cpu_reg(s, rd);
5635            gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5636        }
5637        break;
5638    default:
5639    do_unallocated:
5640        unallocated_encoding(s);
5641        break;
5642    }
5643
5644#undef MAP
5645}
5646
5647static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5648                       unsigned int rm, unsigned int rn, unsigned int rd)
5649{
5650    TCGv_i64 tcg_n, tcg_m, tcg_rd;
5651    tcg_rd = cpu_reg(s, rd);
5652
5653    if (!sf && is_signed) {
5654        tcg_n = new_tmp_a64(s);
5655        tcg_m = new_tmp_a64(s);
5656        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5657        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5658    } else {
5659        tcg_n = read_cpu_reg(s, rn, sf);
5660        tcg_m = read_cpu_reg(s, rm, sf);
5661    }
5662
5663    if (is_signed) {
5664        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5665    } else {
5666        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5667    }
5668
5669    if (!sf) { /* zero extend final result */
5670        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5671    }
5672}
5673
5674/* LSLV, LSRV, ASRV, RORV */
5675static void handle_shift_reg(DisasContext *s,
5676                             enum a64_shift_type shift_type, unsigned int sf,
5677                             unsigned int rm, unsigned int rn, unsigned int rd)
5678{
5679    TCGv_i64 tcg_shift = tcg_temp_new_i64();
5680    TCGv_i64 tcg_rd = cpu_reg(s, rd);
5681    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5682
5683    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5684    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5685    tcg_temp_free_i64(tcg_shift);
5686}
5687
5688/* CRC32[BHWX], CRC32C[BHWX] */
5689static void handle_crc32(DisasContext *s,
5690                         unsigned int sf, unsigned int sz, bool crc32c,
5691                         unsigned int rm, unsigned int rn, unsigned int rd)
5692{
5693    TCGv_i64 tcg_acc, tcg_val;
5694    TCGv_i32 tcg_bytes;
5695
5696    if (!dc_isar_feature(aa64_crc32, s)
5697        || (sf == 1 && sz != 3)
5698        || (sf == 0 && sz == 3)) {
5699        unallocated_encoding(s);
5700        return;
5701    }
5702
5703    if (sz == 3) {
5704        tcg_val = cpu_reg(s, rm);
5705    } else {
5706        uint64_t mask;
5707        switch (sz) {
5708        case 0:
5709            mask = 0xFF;
5710            break;
5711        case 1:
5712            mask = 0xFFFF;
5713            break;
5714        case 2:
5715            mask = 0xFFFFFFFF;
5716            break;
5717        default:
5718            g_assert_not_reached();
5719        }
5720        tcg_val = new_tmp_a64(s);
5721        tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5722    }
5723
5724    tcg_acc = cpu_reg(s, rn);
5725    tcg_bytes = tcg_const_i32(1 << sz);
5726
5727    if (crc32c) {
5728        gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5729    } else {
5730        gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5731    }
5732
5733    tcg_temp_free_i32(tcg_bytes);
5734}
5735
5736/* Data-processing (2 source)
5737 *   31   30  29 28             21 20  16 15    10 9    5 4    0
5738 * +----+---+---+-----------------+------+--------+------+------+
5739 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5740 * +----+---+---+-----------------+------+--------+------+------+
5741 */
5742static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5743{
5744    unsigned int sf, rm, opcode, rn, rd, setflag;
5745    sf = extract32(insn, 31, 1);
5746    setflag = extract32(insn, 29, 1);
5747    rm = extract32(insn, 16, 5);
5748    opcode = extract32(insn, 10, 6);
5749    rn = extract32(insn, 5, 5);
5750    rd = extract32(insn, 0, 5);
5751
5752    if (setflag && opcode != 0) {
5753        unallocated_encoding(s);
5754        return;
5755    }
5756
5757    switch (opcode) {
5758    case 0: /* SUBP(S) */
5759        if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5760            goto do_unallocated;
5761        } else {
5762            TCGv_i64 tcg_n, tcg_m, tcg_d;
5763
5764            tcg_n = read_cpu_reg_sp(s, rn, true);
5765            tcg_m = read_cpu_reg_sp(s, rm, true);
5766            tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
5767            tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
5768            tcg_d = cpu_reg(s, rd);
5769
5770            if (setflag) {
5771                gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
5772            } else {
5773                tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
5774            }
5775        }
5776        break;
5777    case 2: /* UDIV */
5778        handle_div(s, false, sf, rm, rn, rd);
5779        break;
5780    case 3: /* SDIV */
5781        handle_div(s, true, sf, rm, rn, rd);
5782        break;
5783    case 4: /* IRG */
5784        if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5785            goto do_unallocated;
5786        }
5787        if (s->ata) {
5788            gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
5789                           cpu_reg_sp(s, rn), cpu_reg(s, rm));
5790        } else {
5791            gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
5792                                             cpu_reg_sp(s, rn));
5793        }
5794        break;
5795    case 5: /* GMI */
5796        if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
5797            goto do_unallocated;
5798        } else {
5799            TCGv_i64 t1 = tcg_const_i64(1);
5800            TCGv_i64 t2 = tcg_temp_new_i64();
5801
5802            tcg_gen_extract_i64(t2, cpu_reg_sp(s, rn), 56, 4);
5803            tcg_gen_shl_i64(t1, t1, t2);
5804            tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t1);
5805
5806            tcg_temp_free_i64(t1);
5807            tcg_temp_free_i64(t2);
5808        }
5809        break;
5810    case 8: /* LSLV */
5811        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5812        break;
5813    case 9: /* LSRV */
5814        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5815        break;
5816    case 10: /* ASRV */
5817        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5818        break;
5819    case 11: /* RORV */
5820        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5821        break;
5822    case 12: /* PACGA */
5823        if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5824            goto do_unallocated;
5825        }
5826        gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5827                         cpu_reg(s, rn), cpu_reg_sp(s, rm));
5828        break;
5829    case 16:
5830    case 17:
5831    case 18:
5832    case 19:
5833    case 20:
5834    case 21:
5835    case 22:
5836    case 23: /* CRC32 */
5837    {
5838        int sz = extract32(opcode, 0, 2);
5839        bool crc32c = extract32(opcode, 2, 1);
5840        handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5841        break;
5842    }
5843    default:
5844    do_unallocated:
5845        unallocated_encoding(s);
5846        break;
5847    }
5848}
5849
5850/*
5851 * Data processing - register
5852 *  31  30 29  28      25    21  20  16      10         0
5853 * +--+---+--+---+-------+-----+-------+-------+---------+
5854 * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5855 * +--+---+--+---+-------+-----+-------+-------+---------+
5856 */
5857static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5858{
5859    int op0 = extract32(insn, 30, 1);
5860    int op1 = extract32(insn, 28, 1);
5861    int op2 = extract32(insn, 21, 4);
5862    int op3 = extract32(insn, 10, 6);
5863
5864    if (!op1) {
5865        if (op2 & 8) {
5866            if (op2 & 1) {
5867                /* Add/sub (extended register) */
5868                disas_add_sub_ext_reg(s, insn);
5869            } else {
5870                /* Add/sub (shifted register) */
5871                disas_add_sub_reg(s, insn);
5872            }
5873        } else {
5874            /* Logical (shifted register) */
5875            disas_logic_reg(s, insn);
5876        }
5877        return;
5878    }
5879
5880    switch (op2) {
5881    case 0x0:
5882        switch (op3) {
5883        case 0x00: /* Add/subtract (with carry) */
5884            disas_adc_sbc(s, insn);
5885            break;
5886
5887        case 0x01: /* Rotate right into flags */
5888        case 0x21:
5889            disas_rotate_right_into_flags(s, insn);
5890            break;
5891
5892        case 0x02: /* Evaluate into flags */
5893        case 0x12:
5894        case 0x22:
5895        case 0x32:
5896            disas_evaluate_into_flags(s, insn);
5897            break;
5898
5899        default:
5900            goto do_unallocated;
5901        }
5902        break;
5903
5904    case 0x2: /* Conditional compare */
5905        disas_cc(s, insn); /* both imm and reg forms */
5906        break;
5907
5908    case 0x4: /* Conditional select */
5909        disas_cond_select(s, insn);
5910        break;
5911
5912    case 0x6: /* Data-processing */
5913        if (op0) {    /* (1 source) */
5914            disas_data_proc_1src(s, insn);
5915        } else {      /* (2 source) */
5916            disas_data_proc_2src(s, insn);
5917        }
5918        break;
5919    case 0x8 ... 0xf: /* (3 source) */
5920        disas_data_proc_3src(s, insn);
5921        break;
5922
5923    default:
5924    do_unallocated:
5925        unallocated_encoding(s);
5926        break;
5927    }
5928}
5929
5930static void handle_fp_compare(DisasContext *s, int size,
5931                              unsigned int rn, unsigned int rm,
5932                              bool cmp_with_zero, bool signal_all_nans)
5933{
5934    TCGv_i64 tcg_flags = tcg_temp_new_i64();
5935    TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
5936
5937    if (size == MO_64) {
5938        TCGv_i64 tcg_vn, tcg_vm;
5939
5940        tcg_vn = read_fp_dreg(s, rn);
5941        if (cmp_with_zero) {
5942            tcg_vm = tcg_const_i64(0);
5943        } else {
5944            tcg_vm = read_fp_dreg(s, rm);
5945        }
5946        if (signal_all_nans) {
5947            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5948        } else {
5949            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5950        }
5951        tcg_temp_free_i64(tcg_vn);
5952        tcg_temp_free_i64(tcg_vm);
5953    } else {
5954        TCGv_i32 tcg_vn = tcg_temp_new_i32();
5955        TCGv_i32 tcg_vm = tcg_temp_new_i32();
5956
5957        read_vec_element_i32(s, tcg_vn, rn, 0, size);
5958        if (cmp_with_zero) {
5959            tcg_gen_movi_i32(tcg_vm, 0);
5960        } else {
5961            read_vec_element_i32(s, tcg_vm, rm, 0, size);
5962        }
5963
5964        switch (size) {
5965        case MO_32:
5966            if (signal_all_nans) {
5967                gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5968            } else {
5969                gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5970            }
5971            break;
5972        case MO_16:
5973            if (signal_all_nans) {
5974                gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5975            } else {
5976                gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5977            }
5978            break;
5979        default:
5980            g_assert_not_reached();
5981        }
5982
5983        tcg_temp_free_i32(tcg_vn);
5984        tcg_temp_free_i32(tcg_vm);
5985    }
5986
5987    tcg_temp_free_ptr(fpst);
5988
5989    gen_set_nzcv(tcg_flags);
5990
5991    tcg_temp_free_i64(tcg_flags);
5992}
5993
5994/* Floating point compare
5995 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5996 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5997 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5998 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5999 */
6000static void disas_fp_compare(DisasContext *s, uint32_t insn)
6001{
6002    unsigned int mos, type, rm, op, rn, opc, op2r;
6003    int size;
6004
6005    mos = extract32(insn, 29, 3);
6006    type = extract32(insn, 22, 2);
6007    rm = extract32(insn, 16, 5);
6008    op = extract32(insn, 14, 2);
6009    rn = extract32(insn, 5, 5);
6010    opc = extract32(insn, 3, 2);
6011    op2r = extract32(insn, 0, 3);
6012
6013    if (mos || op || op2r) {
6014        unallocated_encoding(s);
6015        return;
6016    }
6017
6018    switch (type) {
6019    case 0:
6020        size = MO_32;
6021        break;
6022    case 1:
6023        size = MO_64;
6024        break;
6025    case 3:
6026        size = MO_16;
6027        if (dc_isar_feature(aa64_fp16, s)) {
6028            break;
6029        }
6030        /* fallthru */
6031    default:
6032        unallocated_encoding(s);
6033        return;
6034    }
6035
6036    if (!fp_access_check(s)) {
6037        return;
6038    }
6039
6040    handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
6041}
6042
6043/* Floating point conditional compare
6044 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
6045 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
6046 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
6047 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
6048 */
6049static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
6050{
6051    unsigned int mos, type, rm, cond, rn, op, nzcv;
6052    TCGv_i64 tcg_flags;
6053    TCGLabel *label_continue = NULL;
6054    int size;
6055
6056    mos = extract32(insn, 29, 3);
6057    type = extract32(insn, 22, 2);
6058    rm = extract32(insn, 16, 5);
6059    cond = extract32(insn, 12, 4);
6060    rn = extract32(insn, 5, 5);
6061    op = extract32(insn, 4, 1);
6062    nzcv = extract32(insn, 0, 4);
6063
6064    if (mos) {
6065        unallocated_encoding(s);
6066        return;
6067    }
6068
6069    switch (type) {
6070    case 0:
6071        size = MO_32;
6072        break;
6073    case 1:
6074        size = MO_64;
6075        break;
6076    case 3:
6077        size = MO_16;
6078        if (dc_isar_feature(aa64_fp16, s)) {
6079            break;
6080        }
6081        /* fallthru */
6082    default:
6083        unallocated_encoding(s);
6084        return;
6085    }
6086
6087    if (!fp_access_check(s)) {
6088        return;
6089    }
6090
6091    if (cond < 0x0e) { /* not always */
6092        TCGLabel *label_match = gen_new_label();
6093        label_continue = gen_new_label();
6094        arm_gen_test_cc(cond, label_match);
6095        /* nomatch: */
6096        tcg_flags = tcg_const_i64(nzcv << 28);
6097        gen_set_nzcv(tcg_flags);
6098        tcg_temp_free_i64(tcg_flags);
6099        tcg_gen_br(label_continue);
6100        gen_set_label(label_match);
6101    }
6102
6103    handle_fp_compare(s, size, rn, rm, false, op);
6104
6105    if (cond < 0x0e) {
6106        gen_set_label(label_continue);
6107    }
6108}
6109
6110/* Floating point conditional select
6111 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
6112 * +---+---+---+-----------+------+---+------+------+-----+------+------+
6113 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
6114 * +---+---+---+-----------+------+---+------+------+-----+------+------+
6115 */
6116static void disas_fp_csel(DisasContext *s, uint32_t insn)
6117{
6118    unsigned int mos, type, rm, cond, rn, rd;
6119    TCGv_i64 t_true, t_false, t_zero;
6120    DisasCompare64 c;
6121    MemOp sz;
6122
6123    mos = extract32(insn, 29, 3);
6124    type = extract32(insn, 22, 2);
6125    rm = extract32(insn, 16, 5);
6126    cond = extract32(insn, 12, 4);
6127    rn = extract32(insn, 5, 5);
6128    rd = extract32(insn, 0, 5);
6129
6130    if (mos) {
6131        unallocated_encoding(s);
6132        return;
6133    }
6134
6135    switch (type) {
6136    case 0:
6137        sz = MO_32;
6138        break;
6139    case 1:
6140        sz = MO_64;
6141        break;
6142    case 3:
6143        sz = MO_16;
6144        if (dc_isar_feature(aa64_fp16, s)) {
6145            break;
6146        }
6147        /* fallthru */
6148    default:
6149        unallocated_encoding(s);
6150        return;
6151    }
6152
6153    if (!fp_access_check(s)) {
6154        return;
6155    }
6156
6157    /* Zero extend sreg & hreg inputs to 64 bits now.  */
6158    t_true = tcg_temp_new_i64();
6159    t_false = tcg_temp_new_i64();
6160    read_vec_element(s, t_true, rn, 0, sz);
6161    read_vec_element(s, t_false, rm, 0, sz);
6162
6163    a64_test_cc(&c, cond);
6164    t_zero = tcg_const_i64(0);
6165    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
6166    tcg_temp_free_i64(t_zero);
6167    tcg_temp_free_i64(t_false);
6168    a64_free_cc(&c);
6169
6170    /* Note that sregs & hregs write back zeros to the high bits,
6171       and we've already done the zero-extension.  */
6172    write_fp_dreg(s, rd, t_true);
6173    tcg_temp_free_i64(t_true);
6174}
6175
6176/* Floating-point data-processing (1 source) - half precision */
6177static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
6178{
6179    TCGv_ptr fpst = NULL;
6180    TCGv_i32 tcg_op = read_fp_hreg(s, rn);
6181    TCGv_i32 tcg_res = tcg_temp_new_i32();
6182
6183    switch (opcode) {
6184    case 0x0: /* FMOV */
6185        tcg_gen_mov_i32(tcg_res, tcg_op);
6186        break;
6187    case 0x1: /* FABS */
6188        tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
6189        break;
6190    case 0x2: /* FNEG */
6191        tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
6192        break;
6193    case 0x3: /* FSQRT */
6194        fpst = fpstatus_ptr(FPST_FPCR_F16);
6195        gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
6196        break;
6197    case 0x8: /* FRINTN */
6198    case 0x9: /* FRINTP */
6199    case 0xa: /* FRINTM */
6200    case 0xb: /* FRINTZ */
6201    case 0xc: /* FRINTA */
6202    {
6203        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
6204        fpst = fpstatus_ptr(FPST_FPCR_F16);
6205
6206        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
6207        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6208
6209        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
6210        tcg_temp_free_i32(tcg_rmode);
6211        break;
6212    }
6213    case 0xe: /* FRINTX */
6214        fpst = fpstatus_ptr(FPST_FPCR_F16);
6215        gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
6216        break;
6217    case 0xf: /* FRINTI */
6218        fpst = fpstatus_ptr(FPST_FPCR_F16);
6219        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
6220        break;
6221    default:
6222        abort();
6223    }
6224
6225    write_fp_sreg(s, rd, tcg_res);
6226
6227    if (fpst) {
6228        tcg_temp_free_ptr(fpst);
6229    }
6230    tcg_temp_free_i32(tcg_op);
6231    tcg_temp_free_i32(tcg_res);
6232}
6233
6234/* Floating-point data-processing (1 source) - single precision */
6235static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
6236{
6237    void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
6238    TCGv_i32 tcg_op, tcg_res;
6239    TCGv_ptr fpst;
6240    int rmode = -1;
6241
6242    tcg_op = read_fp_sreg(s, rn);
6243    tcg_res = tcg_temp_new_i32();
6244
6245    switch (opcode) {
6246    case 0x0: /* FMOV */
6247        tcg_gen_mov_i32(tcg_res, tcg_op);
6248        goto done;
6249    case 0x1: /* FABS */
6250        gen_helper_vfp_abss(tcg_res, tcg_op);
6251        goto done;
6252    case 0x2: /* FNEG */
6253        gen_helper_vfp_negs(tcg_res, tcg_op);
6254        goto done;
6255    case 0x3: /* FSQRT */
6256        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
6257        goto done;
6258    case 0x6: /* BFCVT */
6259        gen_fpst = gen_helper_bfcvt;
6260        break;
6261    case 0x8: /* FRINTN */
6262    case 0x9: /* FRINTP */
6263    case 0xa: /* FRINTM */
6264    case 0xb: /* FRINTZ */
6265    case 0xc: /* FRINTA */
6266        rmode = arm_rmode_to_sf(opcode & 7);
6267        gen_fpst = gen_helper_rints;
6268        break;
6269    case 0xe: /* FRINTX */
6270        gen_fpst = gen_helper_rints_exact;
6271        break;
6272    case 0xf: /* FRINTI */
6273        gen_fpst = gen_helper_rints;
6274        break;
6275    case 0x10: /* FRINT32Z */
6276        rmode = float_round_to_zero;
6277        gen_fpst = gen_helper_frint32_s;
6278        break;
6279    case 0x11: /* FRINT32X */
6280        gen_fpst = gen_helper_frint32_s;
6281        break;
6282    case 0x12: /* FRINT64Z */
6283        rmode = float_round_to_zero;
6284        gen_fpst = gen_helper_frint64_s;
6285        break;
6286    case 0x13: /* FRINT64X */
6287        gen_fpst = gen_helper_frint64_s;
6288        break;
6289    default:
6290        g_assert_not_reached();
6291    }
6292
6293    fpst = fpstatus_ptr(FPST_FPCR);
6294    if (rmode >= 0) {
6295        TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
6296        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
6297        gen_fpst(tcg_res, tcg_op, fpst);
6298        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
6299        tcg_temp_free_i32(tcg_rmode);
6300    } else {
6301        gen_fpst(tcg_res, tcg_op, fpst);
6302    }
6303    tcg_temp_free_ptr(fpst);
6304
6305 done:
6306    write_fp_sreg(s, rd, tcg_res);
6307    tcg_temp_free_i32(tcg_op);
6308    tcg_temp_free_i32(tcg_res);
6309}
6310
6311/* Floating-point data-processing (1 source) - double precision */
6312static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
6313{
6314    void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
6315    TCGv_i64 tcg_op, tcg_res;
6316    TCGv_ptr fpst;
6317    int rmode = -1;
6318
6319    switch (opcode) {
6320    case 0x0: /* FMOV */
6321        gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
6322        return;
6323    }
6324
6325    tcg_op = read_fp_dreg(s, rn);
6326    tcg_res = tcg_temp_new_i64();
6327
6328    switch (opcode) {
6329    case 0x1: /* FABS */
6330        gen_helper_vfp_absd(tcg_res, tcg_op);
6331        goto done;
6332    case 0x2: /* FNEG */
6333        gen_helper_vfp_negd(tcg_res, tcg_op);
6334        goto done;
6335    case 0x3: /* FSQRT */
6336        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
6337        goto done;
6338    case 0x8: /* FRINTN */
6339    case 0x9: /* FRINTP */
6340    case 0xa: /* FRINTM */
6341    case 0xb: /* FRINTZ */
6342    case 0xc: /* FRINTA */
6343        rmode = arm_rmode_to_sf(opcode & 7);
6344        gen_fpst = gen_helper_rintd;
6345        break;
6346    case 0xe: /* FRINTX */
6347        gen_fpst = gen_helper_rintd_exact;
6348        break;
6349    case 0xf: /* FRINTI */
6350        gen_fpst = gen_helper_rintd;
6351        break;
6352    case 0x10: /* FRINT32Z */
6353        rmode = float_round_to_zero;
6354        gen_fpst = gen_helper_frint32_d;
6355        break;
6356    case 0x11: /* FRINT32X */
6357        gen_fpst = gen_helper_frint32_d;
6358        break;
6359    case 0x12: /* FRINT64Z */
6360        rmode = float_round_to_zero;
6361        gen_fpst = gen_helper_frint64_d;
6362        break;
6363    case 0x13: /* FRINT64X */
6364        gen_fpst = gen_helper_frint64_d;
6365        break;
6366    default:
6367        g_assert_not_reached();
6368    }
6369
6370    fpst = fpstatus_ptr(FPST_FPCR);
6371    if (rmode >= 0) {
6372        TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
6373        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
6374        gen_fpst(tcg_res, tcg_op, fpst);
6375        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
6376        tcg_temp_free_i32(tcg_rmode);
6377    } else {
6378        gen_fpst(tcg_res, tcg_op, fpst);
6379    }
6380    tcg_temp_free_ptr(fpst);
6381
6382 done:
6383    write_fp_dreg(s, rd, tcg_res);
6384    tcg_temp_free_i64(tcg_op);
6385    tcg_temp_free_i64(tcg_res);
6386}
6387
6388static void handle_fp_fcvt(DisasContext *s, int opcode,
6389                           int rd, int rn, int dtype, int ntype)
6390{
6391    switch (ntype) {
6392    case 0x0:
6393    {
6394        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6395        if (dtype == 1) {
6396            /* Single to double */
6397            TCGv_i64 tcg_rd = tcg_temp_new_i64();
6398            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
6399            write_fp_dreg(s, rd, tcg_rd);
6400            tcg_temp_free_i64(tcg_rd);
6401        } else {
6402            /* Single to half */
6403            TCGv_i32 tcg_rd = tcg_temp_new_i32();
6404            TCGv_i32 ahp = get_ahp_flag();
6405            TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6406
6407            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6408            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6409            write_fp_sreg(s, rd, tcg_rd);
6410            tcg_temp_free_i32(tcg_rd);
6411            tcg_temp_free_i32(ahp);
6412            tcg_temp_free_ptr(fpst);
6413        }
6414        tcg_temp_free_i32(tcg_rn);
6415        break;
6416    }
6417    case 0x1:
6418    {
6419        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6420        TCGv_i32 tcg_rd = tcg_temp_new_i32();
6421        if (dtype == 0) {
6422            /* Double to single */
6423            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
6424        } else {
6425            TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6426            TCGv_i32 ahp = get_ahp_flag();
6427            /* Double to half */
6428            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
6429            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
6430            tcg_temp_free_ptr(fpst);
6431            tcg_temp_free_i32(ahp);
6432        }
6433        write_fp_sreg(s, rd, tcg_rd);
6434        tcg_temp_free_i32(tcg_rd);
6435        tcg_temp_free_i64(tcg_rn);
6436        break;
6437    }
6438    case 0x3:
6439    {
6440        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
6441        TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR);
6442        TCGv_i32 tcg_ahp = get_ahp_flag();
6443        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
6444        if (dtype == 0) {
6445            /* Half to single */
6446            TCGv_i32 tcg_rd = tcg_temp_new_i32();
6447            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6448            write_fp_sreg(s, rd, tcg_rd);
6449            tcg_temp_free_i32(tcg_rd);
6450        } else {
6451            /* Half to double */
6452            TCGv_i64 tcg_rd = tcg_temp_new_i64();
6453            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
6454            write_fp_dreg(s, rd, tcg_rd);
6455            tcg_temp_free_i64(tcg_rd);
6456        }
6457        tcg_temp_free_i32(tcg_rn);
6458        tcg_temp_free_ptr(tcg_fpst);
6459        tcg_temp_free_i32(tcg_ahp);
6460        break;
6461    }
6462    default:
6463        abort();
6464    }
6465}
6466
6467/* Floating point data-processing (1 source)
6468 *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
6469 * +---+---+---+-----------+------+---+--------+-----------+------+------+
6470 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
6471 * +---+---+---+-----------+------+---+--------+-----------+------+------+
6472 */
6473static void disas_fp_1src(DisasContext *s, uint32_t insn)
6474{
6475    int mos = extract32(insn, 29, 3);
6476    int type = extract32(insn, 22, 2);
6477    int opcode = extract32(insn, 15, 6);
6478    int rn = extract32(insn, 5, 5);
6479    int rd = extract32(insn, 0, 5);
6480
6481    if (mos) {
6482        goto do_unallocated;
6483    }
6484
6485    switch (opcode) {
6486    case 0x4: case 0x5: case 0x7:
6487    {
6488        /* FCVT between half, single and double precision */
6489        int dtype = extract32(opcode, 0, 2);
6490        if (type == 2 || dtype == type) {
6491            goto do_unallocated;
6492        }
6493        if (!fp_access_check(s)) {
6494            return;
6495        }
6496
6497        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
6498        break;
6499    }
6500
6501    case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
6502        if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
6503            goto do_unallocated;
6504        }
6505        /* fall through */
6506    case 0x0 ... 0x3:
6507    case 0x8 ... 0xc:
6508    case 0xe ... 0xf:
6509        /* 32-to-32 and 64-to-64 ops */
6510        switch (type) {
6511        case 0:
6512            if (!fp_access_check(s)) {
6513                return;
6514            }
6515            handle_fp_1src_single(s, opcode, rd, rn);
6516            break;
6517        case 1:
6518            if (!fp_access_check(s)) {
6519                return;
6520            }
6521            handle_fp_1src_double(s, opcode, rd, rn);
6522            break;
6523        case 3:
6524            if (!dc_isar_feature(aa64_fp16, s)) {
6525                goto do_unallocated;
6526            }
6527
6528            if (!fp_access_check(s)) {
6529                return;
6530            }
6531            handle_fp_1src_half(s, opcode, rd, rn);
6532            break;
6533        default:
6534            goto do_unallocated;
6535        }
6536        break;
6537
6538    case 0x6:
6539        switch (type) {
6540        case 1: /* BFCVT */
6541            if (!dc_isar_feature(aa64_bf16, s)) {
6542                goto do_unallocated;
6543            }
6544            if (!fp_access_check(s)) {
6545                return;
6546            }
6547            handle_fp_1src_single(s, opcode, rd, rn);
6548            break;
6549        default:
6550            goto do_unallocated;
6551        }
6552        break;
6553
6554    default:
6555    do_unallocated:
6556        unallocated_encoding(s);
6557        break;
6558    }
6559}
6560
6561/* Floating-point data-processing (2 source) - single precision */
6562static void handle_fp_2src_single(DisasContext *s, int opcode,
6563                                  int rd, int rn, int rm)
6564{
6565    TCGv_i32 tcg_op1;
6566    TCGv_i32 tcg_op2;
6567    TCGv_i32 tcg_res;
6568    TCGv_ptr fpst;
6569
6570    tcg_res = tcg_temp_new_i32();
6571    fpst = fpstatus_ptr(FPST_FPCR);
6572    tcg_op1 = read_fp_sreg(s, rn);
6573    tcg_op2 = read_fp_sreg(s, rm);
6574
6575    switch (opcode) {
6576    case 0x0: /* FMUL */
6577        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6578        break;
6579    case 0x1: /* FDIV */
6580        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6581        break;
6582    case 0x2: /* FADD */
6583        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6584        break;
6585    case 0x3: /* FSUB */
6586        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6587        break;
6588    case 0x4: /* FMAX */
6589        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6590        break;
6591    case 0x5: /* FMIN */
6592        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6593        break;
6594    case 0x6: /* FMAXNM */
6595        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6596        break;
6597    case 0x7: /* FMINNM */
6598        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6599        break;
6600    case 0x8: /* FNMUL */
6601        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6602        gen_helper_vfp_negs(tcg_res, tcg_res);
6603        break;
6604    }
6605
6606    write_fp_sreg(s, rd, tcg_res);
6607
6608    tcg_temp_free_ptr(fpst);
6609    tcg_temp_free_i32(tcg_op1);
6610    tcg_temp_free_i32(tcg_op2);
6611    tcg_temp_free_i32(tcg_res);
6612}
6613
6614/* Floating-point data-processing (2 source) - double precision */
6615static void handle_fp_2src_double(DisasContext *s, int opcode,
6616                                  int rd, int rn, int rm)
6617{
6618    TCGv_i64 tcg_op1;
6619    TCGv_i64 tcg_op2;
6620    TCGv_i64 tcg_res;
6621    TCGv_ptr fpst;
6622
6623    tcg_res = tcg_temp_new_i64();
6624    fpst = fpstatus_ptr(FPST_FPCR);
6625    tcg_op1 = read_fp_dreg(s, rn);
6626    tcg_op2 = read_fp_dreg(s, rm);
6627
6628    switch (opcode) {
6629    case 0x0: /* FMUL */
6630        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6631        break;
6632    case 0x1: /* FDIV */
6633        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6634        break;
6635    case 0x2: /* FADD */
6636        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6637        break;
6638    case 0x3: /* FSUB */
6639        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6640        break;
6641    case 0x4: /* FMAX */
6642        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6643        break;
6644    case 0x5: /* FMIN */
6645        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6646        break;
6647    case 0x6: /* FMAXNM */
6648        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6649        break;
6650    case 0x7: /* FMINNM */
6651        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6652        break;
6653    case 0x8: /* FNMUL */
6654        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6655        gen_helper_vfp_negd(tcg_res, tcg_res);
6656        break;
6657    }
6658
6659    write_fp_dreg(s, rd, tcg_res);
6660
6661    tcg_temp_free_ptr(fpst);
6662    tcg_temp_free_i64(tcg_op1);
6663    tcg_temp_free_i64(tcg_op2);
6664    tcg_temp_free_i64(tcg_res);
6665}
6666
6667/* Floating-point data-processing (2 source) - half precision */
6668static void handle_fp_2src_half(DisasContext *s, int opcode,
6669                                int rd, int rn, int rm)
6670{
6671    TCGv_i32 tcg_op1;
6672    TCGv_i32 tcg_op2;
6673    TCGv_i32 tcg_res;
6674    TCGv_ptr fpst;
6675
6676    tcg_res = tcg_temp_new_i32();
6677    fpst = fpstatus_ptr(FPST_FPCR_F16);
6678    tcg_op1 = read_fp_hreg(s, rn);
6679    tcg_op2 = read_fp_hreg(s, rm);
6680
6681    switch (opcode) {
6682    case 0x0: /* FMUL */
6683        gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6684        break;
6685    case 0x1: /* FDIV */
6686        gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6687        break;
6688    case 0x2: /* FADD */
6689        gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6690        break;
6691    case 0x3: /* FSUB */
6692        gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6693        break;
6694    case 0x4: /* FMAX */
6695        gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6696        break;
6697    case 0x5: /* FMIN */
6698        gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6699        break;
6700    case 0x6: /* FMAXNM */
6701        gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6702        break;
6703    case 0x7: /* FMINNM */
6704        gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6705        break;
6706    case 0x8: /* FNMUL */
6707        gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6708        tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6709        break;
6710    default:
6711        g_assert_not_reached();
6712    }
6713
6714    write_fp_sreg(s, rd, tcg_res);
6715
6716    tcg_temp_free_ptr(fpst);
6717    tcg_temp_free_i32(tcg_op1);
6718    tcg_temp_free_i32(tcg_op2);
6719    tcg_temp_free_i32(tcg_res);
6720}
6721
6722/* Floating point data-processing (2 source)
6723 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6724 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6725 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6726 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6727 */
6728static void disas_fp_2src(DisasContext *s, uint32_t insn)
6729{
6730    int mos = extract32(insn, 29, 3);
6731    int type = extract32(insn, 22, 2);
6732    int rd = extract32(insn, 0, 5);
6733    int rn = extract32(insn, 5, 5);
6734    int rm = extract32(insn, 16, 5);
6735    int opcode = extract32(insn, 12, 4);
6736
6737    if (opcode > 8 || mos) {
6738        unallocated_encoding(s);
6739        return;
6740    }
6741
6742    switch (type) {
6743    case 0:
6744        if (!fp_access_check(s)) {
6745            return;
6746        }
6747        handle_fp_2src_single(s, opcode, rd, rn, rm);
6748        break;
6749    case 1:
6750        if (!fp_access_check(s)) {
6751            return;
6752        }
6753        handle_fp_2src_double(s, opcode, rd, rn, rm);
6754        break;
6755    case 3:
6756        if (!dc_isar_feature(aa64_fp16, s)) {
6757            unallocated_encoding(s);
6758            return;
6759        }
6760        if (!fp_access_check(s)) {
6761            return;
6762        }
6763        handle_fp_2src_half(s, opcode, rd, rn, rm);
6764        break;
6765    default:
6766        unallocated_encoding(s);
6767    }
6768}
6769
6770/* Floating-point data-processing (3 source) - single precision */
6771static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6772                                  int rd, int rn, int rm, int ra)
6773{
6774    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6775    TCGv_i32 tcg_res = tcg_temp_new_i32();
6776    TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6777
6778    tcg_op1 = read_fp_sreg(s, rn);
6779    tcg_op2 = read_fp_sreg(s, rm);
6780    tcg_op3 = read_fp_sreg(s, ra);
6781
6782    /* These are fused multiply-add, and must be done as one
6783     * floating point operation with no rounding between the
6784     * multiplication and addition steps.
6785     * NB that doing the negations here as separate steps is
6786     * correct : an input NaN should come out with its sign bit
6787     * flipped if it is a negated-input.
6788     */
6789    if (o1 == true) {
6790        gen_helper_vfp_negs(tcg_op3, tcg_op3);
6791    }
6792
6793    if (o0 != o1) {
6794        gen_helper_vfp_negs(tcg_op1, tcg_op1);
6795    }
6796
6797    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6798
6799    write_fp_sreg(s, rd, tcg_res);
6800
6801    tcg_temp_free_ptr(fpst);
6802    tcg_temp_free_i32(tcg_op1);
6803    tcg_temp_free_i32(tcg_op2);
6804    tcg_temp_free_i32(tcg_op3);
6805    tcg_temp_free_i32(tcg_res);
6806}
6807
6808/* Floating-point data-processing (3 source) - double precision */
6809static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6810                                  int rd, int rn, int rm, int ra)
6811{
6812    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6813    TCGv_i64 tcg_res = tcg_temp_new_i64();
6814    TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
6815
6816    tcg_op1 = read_fp_dreg(s, rn);
6817    tcg_op2 = read_fp_dreg(s, rm);
6818    tcg_op3 = read_fp_dreg(s, ra);
6819
6820    /* These are fused multiply-add, and must be done as one
6821     * floating point operation with no rounding between the
6822     * multiplication and addition steps.
6823     * NB that doing the negations here as separate steps is
6824     * correct : an input NaN should come out with its sign bit
6825     * flipped if it is a negated-input.
6826     */
6827    if (o1 == true) {
6828        gen_helper_vfp_negd(tcg_op3, tcg_op3);
6829    }
6830
6831    if (o0 != o1) {
6832        gen_helper_vfp_negd(tcg_op1, tcg_op1);
6833    }
6834
6835    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6836
6837    write_fp_dreg(s, rd, tcg_res);
6838
6839    tcg_temp_free_ptr(fpst);
6840    tcg_temp_free_i64(tcg_op1);
6841    tcg_temp_free_i64(tcg_op2);
6842    tcg_temp_free_i64(tcg_op3);
6843    tcg_temp_free_i64(tcg_res);
6844}
6845
6846/* Floating-point data-processing (3 source) - half precision */
6847static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6848                                int rd, int rn, int rm, int ra)
6849{
6850    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6851    TCGv_i32 tcg_res = tcg_temp_new_i32();
6852    TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16);
6853
6854    tcg_op1 = read_fp_hreg(s, rn);
6855    tcg_op2 = read_fp_hreg(s, rm);
6856    tcg_op3 = read_fp_hreg(s, ra);
6857
6858    /* These are fused multiply-add, and must be done as one
6859     * floating point operation with no rounding between the
6860     * multiplication and addition steps.
6861     * NB that doing the negations here as separate steps is
6862     * correct : an input NaN should come out with its sign bit
6863     * flipped if it is a negated-input.
6864     */
6865    if (o1 == true) {
6866        tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6867    }
6868
6869    if (o0 != o1) {
6870        tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6871    }
6872
6873    gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6874
6875    write_fp_sreg(s, rd, tcg_res);
6876
6877    tcg_temp_free_ptr(fpst);
6878    tcg_temp_free_i32(tcg_op1);
6879    tcg_temp_free_i32(tcg_op2);
6880    tcg_temp_free_i32(tcg_op3);
6881    tcg_temp_free_i32(tcg_res);
6882}
6883
6884/* Floating point data-processing (3 source)
6885 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6886 * +---+---+---+-----------+------+----+------+----+------+------+------+
6887 * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6888 * +---+---+---+-----------+------+----+------+----+------+------+------+
6889 */
6890static void disas_fp_3src(DisasContext *s, uint32_t insn)
6891{
6892    int mos = extract32(insn, 29, 3);
6893    int type = extract32(insn, 22, 2);
6894    int rd = extract32(insn, 0, 5);
6895    int rn = extract32(insn, 5, 5);
6896    int ra = extract32(insn, 10, 5);
6897    int rm = extract32(insn, 16, 5);
6898    bool o0 = extract32(insn, 15, 1);
6899    bool o1 = extract32(insn, 21, 1);
6900
6901    if (mos) {
6902        unallocated_encoding(s);
6903        return;
6904    }
6905
6906    switch (type) {
6907    case 0:
6908        if (!fp_access_check(s)) {
6909            return;
6910        }
6911        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6912        break;
6913    case 1:
6914        if (!fp_access_check(s)) {
6915            return;
6916        }
6917        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6918        break;
6919    case 3:
6920        if (!dc_isar_feature(aa64_fp16, s)) {
6921            unallocated_encoding(s);
6922            return;
6923        }
6924        if (!fp_access_check(s)) {
6925            return;
6926        }
6927        handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6928        break;
6929    default:
6930        unallocated_encoding(s);
6931    }
6932}
6933
6934/* Floating point immediate
6935 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6936 * +---+---+---+-----------+------+---+------------+-------+------+------+
6937 * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6938 * +---+---+---+-----------+------+---+------------+-------+------+------+
6939 */
6940static void disas_fp_imm(DisasContext *s, uint32_t insn)
6941{
6942    int rd = extract32(insn, 0, 5);
6943    int imm5 = extract32(insn, 5, 5);
6944    int imm8 = extract32(insn, 13, 8);
6945    int type = extract32(insn, 22, 2);
6946    int mos = extract32(insn, 29, 3);
6947    uint64_t imm;
6948    TCGv_i64 tcg_res;
6949    MemOp sz;
6950
6951    if (mos || imm5) {
6952        unallocated_encoding(s);
6953        return;
6954    }
6955
6956    switch (type) {
6957    case 0:
6958        sz = MO_32;
6959        break;
6960    case 1:
6961        sz = MO_64;
6962        break;
6963    case 3:
6964        sz = MO_16;
6965        if (dc_isar_feature(aa64_fp16, s)) {
6966            break;
6967        }
6968        /* fallthru */
6969    default:
6970        unallocated_encoding(s);
6971        return;
6972    }
6973
6974    if (!fp_access_check(s)) {
6975        return;
6976    }
6977
6978    imm = vfp_expand_imm(sz, imm8);
6979
6980    tcg_res = tcg_const_i64(imm);
6981    write_fp_dreg(s, rd, tcg_res);
6982    tcg_temp_free_i64(tcg_res);
6983}
6984
6985/* Handle floating point <=> fixed point conversions. Note that we can
6986 * also deal with fp <=> integer conversions as a special case (scale == 64)
6987 * OPTME: consider handling that special case specially or at least skipping
6988 * the call to scalbn in the helpers for zero shifts.
6989 */
6990static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6991                           bool itof, int rmode, int scale, int sf, int type)
6992{
6993    bool is_signed = !(opcode & 1);
6994    TCGv_ptr tcg_fpstatus;
6995    TCGv_i32 tcg_shift, tcg_single;
6996    TCGv_i64 tcg_double;
6997
6998    tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR);
6999
7000    tcg_shift = tcg_const_i32(64 - scale);
7001
7002    if (itof) {
7003        TCGv_i64 tcg_int = cpu_reg(s, rn);
7004        if (!sf) {
7005            TCGv_i64 tcg_extend = new_tmp_a64(s);
7006
7007            if (is_signed) {
7008                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
7009            } else {
7010                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
7011            }
7012
7013            tcg_int = tcg_extend;
7014        }
7015
7016        switch (type) {
7017        case 1: /* float64 */
7018            tcg_double = tcg_temp_new_i64();
7019            if (is_signed) {
7020                gen_helper_vfp_sqtod(tcg_double, tcg_int,
7021                                     tcg_shift, tcg_fpstatus);
7022            } else {
7023                gen_helper_vfp_uqtod(tcg_double, tcg_int,
7024                                     tcg_shift, tcg_fpstatus);
7025            }
7026            write_fp_dreg(s, rd, tcg_double);
7027            tcg_temp_free_i64(tcg_double);
7028            break;
7029
7030        case 0: /* float32 */
7031            tcg_single = tcg_temp_new_i32();
7032            if (is_signed) {
7033                gen_helper_vfp_sqtos(tcg_single, tcg_int,
7034                                     tcg_shift, tcg_fpstatus);
7035            } else {
7036                gen_helper_vfp_uqtos(tcg_single, tcg_int,
7037                                     tcg_shift, tcg_fpstatus);
7038            }
7039            write_fp_sreg(s, rd, tcg_single);
7040            tcg_temp_free_i32(tcg_single);
7041            break;
7042
7043        case 3: /* float16 */
7044            tcg_single = tcg_temp_new_i32();
7045            if (is_signed) {
7046                gen_helper_vfp_sqtoh(tcg_single, tcg_int,
7047                                     tcg_shift, tcg_fpstatus);
7048            } else {
7049                gen_helper_vfp_uqtoh(tcg_single, tcg_int,
7050                                     tcg_shift, tcg_fpstatus);
7051            }
7052            write_fp_sreg(s, rd, tcg_single);
7053            tcg_temp_free_i32(tcg_single);
7054            break;
7055
7056        default:
7057            g_assert_not_reached();
7058        }
7059    } else {
7060        TCGv_i64 tcg_int = cpu_reg(s, rd);
7061        TCGv_i32 tcg_rmode;
7062
7063        if (extract32(opcode, 2, 1)) {
7064            /* There are too many rounding modes to all fit into rmode,
7065             * so FCVTA[US] is a special case.
7066             */
7067            rmode = FPROUNDING_TIEAWAY;
7068        }
7069
7070        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
7071
7072        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7073
7074        switch (type) {
7075        case 1: /* float64 */
7076            tcg_double = read_fp_dreg(s, rn);
7077            if (is_signed) {
7078                if (!sf) {
7079                    gen_helper_vfp_tosld(tcg_int, tcg_double,
7080                                         tcg_shift, tcg_fpstatus);
7081                } else {
7082                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
7083                                         tcg_shift, tcg_fpstatus);
7084                }
7085            } else {
7086                if (!sf) {
7087                    gen_helper_vfp_tould(tcg_int, tcg_double,
7088                                         tcg_shift, tcg_fpstatus);
7089                } else {
7090                    gen_helper_vfp_touqd(tcg_int, tcg_double,
7091                                         tcg_shift, tcg_fpstatus);
7092                }
7093            }
7094            if (!sf) {
7095                tcg_gen_ext32u_i64(tcg_int, tcg_int);
7096            }
7097            tcg_temp_free_i64(tcg_double);
7098            break;
7099
7100        case 0: /* float32 */
7101            tcg_single = read_fp_sreg(s, rn);
7102            if (sf) {
7103                if (is_signed) {
7104                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
7105                                         tcg_shift, tcg_fpstatus);
7106                } else {
7107                    gen_helper_vfp_touqs(tcg_int, tcg_single,
7108                                         tcg_shift, tcg_fpstatus);
7109                }
7110            } else {
7111                TCGv_i32 tcg_dest = tcg_temp_new_i32();
7112                if (is_signed) {
7113                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
7114                                         tcg_shift, tcg_fpstatus);
7115                } else {
7116                    gen_helper_vfp_touls(tcg_dest, tcg_single,
7117                                         tcg_shift, tcg_fpstatus);
7118                }
7119                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
7120                tcg_temp_free_i32(tcg_dest);
7121            }
7122            tcg_temp_free_i32(tcg_single);
7123            break;
7124
7125        case 3: /* float16 */
7126            tcg_single = read_fp_sreg(s, rn);
7127            if (sf) {
7128                if (is_signed) {
7129                    gen_helper_vfp_tosqh(tcg_int, tcg_single,
7130                                         tcg_shift, tcg_fpstatus);
7131                } else {
7132                    gen_helper_vfp_touqh(tcg_int, tcg_single,
7133                                         tcg_shift, tcg_fpstatus);
7134                }
7135            } else {
7136                TCGv_i32 tcg_dest = tcg_temp_new_i32();
7137                if (is_signed) {
7138                    gen_helper_vfp_toslh(tcg_dest, tcg_single,
7139                                         tcg_shift, tcg_fpstatus);
7140                } else {
7141                    gen_helper_vfp_toulh(tcg_dest, tcg_single,
7142                                         tcg_shift, tcg_fpstatus);
7143                }
7144                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
7145                tcg_temp_free_i32(tcg_dest);
7146            }
7147            tcg_temp_free_i32(tcg_single);
7148            break;
7149
7150        default:
7151            g_assert_not_reached();
7152        }
7153
7154        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7155        tcg_temp_free_i32(tcg_rmode);
7156    }
7157
7158    tcg_temp_free_ptr(tcg_fpstatus);
7159    tcg_temp_free_i32(tcg_shift);
7160}
7161
7162/* Floating point <-> fixed point conversions
7163 *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
7164 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
7165 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
7166 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
7167 */
7168static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
7169{
7170    int rd = extract32(insn, 0, 5);
7171    int rn = extract32(insn, 5, 5);
7172    int scale = extract32(insn, 10, 6);
7173    int opcode = extract32(insn, 16, 3);
7174    int rmode = extract32(insn, 19, 2);
7175    int type = extract32(insn, 22, 2);
7176    bool sbit = extract32(insn, 29, 1);
7177    bool sf = extract32(insn, 31, 1);
7178    bool itof;
7179
7180    if (sbit || (!sf && scale < 32)) {
7181        unallocated_encoding(s);
7182        return;
7183    }
7184
7185    switch (type) {
7186    case 0: /* float32 */
7187    case 1: /* float64 */
7188        break;
7189    case 3: /* float16 */
7190        if (dc_isar_feature(aa64_fp16, s)) {
7191            break;
7192        }
7193        /* fallthru */
7194    default:
7195        unallocated_encoding(s);
7196        return;
7197    }
7198
7199    switch ((rmode << 3) | opcode) {
7200    case 0x2: /* SCVTF */
7201    case 0x3: /* UCVTF */
7202        itof = true;
7203        break;
7204    case 0x18: /* FCVTZS */
7205    case 0x19: /* FCVTZU */
7206        itof = false;
7207        break;
7208    default:
7209        unallocated_encoding(s);
7210        return;
7211    }
7212
7213    if (!fp_access_check(s)) {
7214        return;
7215    }
7216
7217    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
7218}
7219
7220static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
7221{
7222    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
7223     * without conversion.
7224     */
7225
7226    if (itof) {
7227        TCGv_i64 tcg_rn = cpu_reg(s, rn);
7228        TCGv_i64 tmp;
7229
7230        switch (type) {
7231        case 0:
7232            /* 32 bit */
7233            tmp = tcg_temp_new_i64();
7234            tcg_gen_ext32u_i64(tmp, tcg_rn);
7235            write_fp_dreg(s, rd, tmp);
7236            tcg_temp_free_i64(tmp);
7237            break;
7238        case 1:
7239            /* 64 bit */
7240            write_fp_dreg(s, rd, tcg_rn);
7241            break;
7242        case 2:
7243            /* 64 bit to top half. */
7244            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
7245            clear_vec_high(s, true, rd);
7246            break;
7247        case 3:
7248            /* 16 bit */
7249            tmp = tcg_temp_new_i64();
7250            tcg_gen_ext16u_i64(tmp, tcg_rn);
7251            write_fp_dreg(s, rd, tmp);
7252            tcg_temp_free_i64(tmp);
7253            break;
7254        default:
7255            g_assert_not_reached();
7256        }
7257    } else {
7258        TCGv_i64 tcg_rd = cpu_reg(s, rd);
7259
7260        switch (type) {
7261        case 0:
7262            /* 32 bit */
7263            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
7264            break;
7265        case 1:
7266            /* 64 bit */
7267            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
7268            break;
7269        case 2:
7270            /* 64 bits from top half */
7271            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
7272            break;
7273        case 3:
7274            /* 16 bit */
7275            tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
7276            break;
7277        default:
7278            g_assert_not_reached();
7279        }
7280    }
7281}
7282
7283static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
7284{
7285    TCGv_i64 t = read_fp_dreg(s, rn);
7286    TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR);
7287
7288    gen_helper_fjcvtzs(t, t, fpstatus);
7289
7290    tcg_temp_free_ptr(fpstatus);
7291
7292    tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
7293    tcg_gen_extrh_i64_i32(cpu_ZF, t);
7294    tcg_gen_movi_i32(cpu_CF, 0);
7295    tcg_gen_movi_i32(cpu_NF, 0);
7296    tcg_gen_movi_i32(cpu_VF, 0);
7297
7298    tcg_temp_free_i64(t);
7299}
7300
7301/* Floating point <-> integer conversions
7302 *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
7303 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7304 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
7305 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
7306 */
7307static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
7308{
7309    int rd = extract32(insn, 0, 5);
7310    int rn = extract32(insn, 5, 5);
7311    int opcode = extract32(insn, 16, 3);
7312    int rmode = extract32(insn, 19, 2);
7313    int type = extract32(insn, 22, 2);
7314    bool sbit = extract32(insn, 29, 1);
7315    bool sf = extract32(insn, 31, 1);
7316    bool itof = false;
7317
7318    if (sbit) {
7319        goto do_unallocated;
7320    }
7321
7322    switch (opcode) {
7323    case 2: /* SCVTF */
7324    case 3: /* UCVTF */
7325        itof = true;
7326        /* fallthru */
7327    case 4: /* FCVTAS */
7328    case 5: /* FCVTAU */
7329        if (rmode != 0) {
7330            goto do_unallocated;
7331        }
7332        /* fallthru */
7333    case 0: /* FCVT[NPMZ]S */
7334    case 1: /* FCVT[NPMZ]U */
7335        switch (type) {
7336        case 0: /* float32 */
7337        case 1: /* float64 */
7338            break;
7339        case 3: /* float16 */
7340            if (!dc_isar_feature(aa64_fp16, s)) {
7341                goto do_unallocated;
7342            }
7343            break;
7344        default:
7345            goto do_unallocated;
7346        }
7347        if (!fp_access_check(s)) {
7348            return;
7349        }
7350        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
7351        break;
7352
7353    default:
7354        switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
7355        case 0b01100110: /* FMOV half <-> 32-bit int */
7356        case 0b01100111:
7357        case 0b11100110: /* FMOV half <-> 64-bit int */
7358        case 0b11100111:
7359            if (!dc_isar_feature(aa64_fp16, s)) {
7360                goto do_unallocated;
7361            }
7362            /* fallthru */
7363        case 0b00000110: /* FMOV 32-bit */
7364        case 0b00000111:
7365        case 0b10100110: /* FMOV 64-bit */
7366        case 0b10100111:
7367        case 0b11001110: /* FMOV top half of 128-bit */
7368        case 0b11001111:
7369            if (!fp_access_check(s)) {
7370                return;
7371            }
7372            itof = opcode & 1;
7373            handle_fmov(s, rd, rn, type, itof);
7374            break;
7375
7376        case 0b00111110: /* FJCVTZS */
7377            if (!dc_isar_feature(aa64_jscvt, s)) {
7378                goto do_unallocated;
7379            } else if (fp_access_check(s)) {
7380                handle_fjcvtzs(s, rd, rn);
7381            }
7382            break;
7383
7384        default:
7385        do_unallocated:
7386            unallocated_encoding(s);
7387            return;
7388        }
7389        break;
7390    }
7391}
7392
7393/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
7394 *   31  30  29 28     25 24                          0
7395 * +---+---+---+---------+-----------------------------+
7396 * |   | 0 |   | 1 1 1 1 |                             |
7397 * +---+---+---+---------+-----------------------------+
7398 */
7399static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
7400{
7401    if (extract32(insn, 24, 1)) {
7402        /* Floating point data-processing (3 source) */
7403        disas_fp_3src(s, insn);
7404    } else if (extract32(insn, 21, 1) == 0) {
7405        /* Floating point to fixed point conversions */
7406        disas_fp_fixed_conv(s, insn);
7407    } else {
7408        switch (extract32(insn, 10, 2)) {
7409        case 1:
7410            /* Floating point conditional compare */
7411            disas_fp_ccomp(s, insn);
7412            break;
7413        case 2:
7414            /* Floating point data-processing (2 source) */
7415            disas_fp_2src(s, insn);
7416            break;
7417        case 3:
7418            /* Floating point conditional select */
7419            disas_fp_csel(s, insn);
7420            break;
7421        case 0:
7422            switch (ctz32(extract32(insn, 12, 4))) {
7423            case 0: /* [15:12] == xxx1 */
7424                /* Floating point immediate */
7425                disas_fp_imm(s, insn);
7426                break;
7427            case 1: /* [15:12] == xx10 */
7428                /* Floating point compare */
7429                disas_fp_compare(s, insn);
7430                break;
7431            case 2: /* [15:12] == x100 */
7432                /* Floating point data-processing (1 source) */
7433                disas_fp_1src(s, insn);
7434                break;
7435            case 3: /* [15:12] == 1000 */
7436                unallocated_encoding(s);
7437                break;
7438            default: /* [15:12] == 0000 */
7439                /* Floating point <-> integer conversions */
7440                disas_fp_int_conv(s, insn);
7441                break;
7442            }
7443            break;
7444        }
7445    }
7446}
7447
7448static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
7449                     int pos)
7450{
7451    /* Extract 64 bits from the middle of two concatenated 64 bit
7452     * vector register slices left:right. The extracted bits start
7453     * at 'pos' bits into the right (least significant) side.
7454     * We return the result in tcg_right, and guarantee not to
7455     * trash tcg_left.
7456     */
7457    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7458    assert(pos > 0 && pos < 64);
7459
7460    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
7461    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
7462    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
7463
7464    tcg_temp_free_i64(tcg_tmp);
7465}
7466
7467/* EXT
7468 *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
7469 * +---+---+-------------+-----+---+------+---+------+---+------+------+
7470 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
7471 * +---+---+-------------+-----+---+------+---+------+---+------+------+
7472 */
7473static void disas_simd_ext(DisasContext *s, uint32_t insn)
7474{
7475    int is_q = extract32(insn, 30, 1);
7476    int op2 = extract32(insn, 22, 2);
7477    int imm4 = extract32(insn, 11, 4);
7478    int rm = extract32(insn, 16, 5);
7479    int rn = extract32(insn, 5, 5);
7480    int rd = extract32(insn, 0, 5);
7481    int pos = imm4 << 3;
7482    TCGv_i64 tcg_resl, tcg_resh;
7483
7484    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
7485        unallocated_encoding(s);
7486        return;
7487    }
7488
7489    if (!fp_access_check(s)) {
7490        return;
7491    }
7492
7493    tcg_resh = tcg_temp_new_i64();
7494    tcg_resl = tcg_temp_new_i64();
7495
7496    /* Vd gets bits starting at pos bits into Vm:Vn. This is
7497     * either extracting 128 bits from a 128:128 concatenation, or
7498     * extracting 64 bits from a 64:64 concatenation.
7499     */
7500    if (!is_q) {
7501        read_vec_element(s, tcg_resl, rn, 0, MO_64);
7502        if (pos != 0) {
7503            read_vec_element(s, tcg_resh, rm, 0, MO_64);
7504            do_ext64(s, tcg_resh, tcg_resl, pos);
7505        }
7506    } else {
7507        TCGv_i64 tcg_hh;
7508        typedef struct {
7509            int reg;
7510            int elt;
7511        } EltPosns;
7512        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
7513        EltPosns *elt = eltposns;
7514
7515        if (pos >= 64) {
7516            elt++;
7517            pos -= 64;
7518        }
7519
7520        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
7521        elt++;
7522        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
7523        elt++;
7524        if (pos != 0) {
7525            do_ext64(s, tcg_resh, tcg_resl, pos);
7526            tcg_hh = tcg_temp_new_i64();
7527            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
7528            do_ext64(s, tcg_hh, tcg_resh, pos);
7529            tcg_temp_free_i64(tcg_hh);
7530        }
7531    }
7532
7533    write_vec_element(s, tcg_resl, rd, 0, MO_64);
7534    tcg_temp_free_i64(tcg_resl);
7535    if (is_q) {
7536        write_vec_element(s, tcg_resh, rd, 1, MO_64);
7537    }
7538    tcg_temp_free_i64(tcg_resh);
7539    clear_vec_high(s, is_q, rd);
7540}
7541
7542/* TBL/TBX
7543 *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
7544 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7545 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
7546 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
7547 */
7548static void disas_simd_tb(DisasContext *s, uint32_t insn)
7549{
7550    int op2 = extract32(insn, 22, 2);
7551    int is_q = extract32(insn, 30, 1);
7552    int rm = extract32(insn, 16, 5);
7553    int rn = extract32(insn, 5, 5);
7554    int rd = extract32(insn, 0, 5);
7555    int is_tbx = extract32(insn, 12, 1);
7556    int len = (extract32(insn, 13, 2) + 1) * 16;
7557
7558    if (op2 != 0) {
7559        unallocated_encoding(s);
7560        return;
7561    }
7562
7563    if (!fp_access_check(s)) {
7564        return;
7565    }
7566
7567    tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd),
7568                       vec_full_reg_offset(s, rm), cpu_env,
7569                       is_q ? 16 : 8, vec_full_reg_size(s),
7570                       (len << 6) | (is_tbx << 5) | rn,
7571                       gen_helper_simd_tblx);
7572}
7573
7574/* ZIP/UZP/TRN
7575 *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
7576 * +---+---+-------------+------+---+------+---+------------------+------+
7577 * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
7578 * +---+---+-------------+------+---+------+---+------------------+------+
7579 */
7580static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
7581{
7582    int rd = extract32(insn, 0, 5);
7583    int rn = extract32(insn, 5, 5);
7584    int rm = extract32(insn, 16, 5);
7585    int size = extract32(insn, 22, 2);
7586    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
7587     * bit 2 indicates 1 vs 2 variant of the insn.
7588     */
7589    int opcode = extract32(insn, 12, 2);
7590    bool part = extract32(insn, 14, 1);
7591    bool is_q = extract32(insn, 30, 1);
7592    int esize = 8 << size;
7593    int i, ofs;
7594    int datasize = is_q ? 128 : 64;
7595    int elements = datasize / esize;
7596    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
7597
7598    if (opcode == 0 || (size == 3 && !is_q)) {
7599        unallocated_encoding(s);
7600        return;
7601    }
7602
7603    if (!fp_access_check(s)) {
7604        return;
7605    }
7606
7607    tcg_resl = tcg_const_i64(0);
7608    tcg_resh = is_q ? tcg_const_i64(0) : NULL;
7609    tcg_res = tcg_temp_new_i64();
7610
7611    for (i = 0; i < elements; i++) {
7612        switch (opcode) {
7613        case 1: /* UZP1/2 */
7614        {
7615            int midpoint = elements / 2;
7616            if (i < midpoint) {
7617                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
7618            } else {
7619                read_vec_element(s, tcg_res, rm,
7620                                 2 * (i - midpoint) + part, size);
7621            }
7622            break;
7623        }
7624        case 2: /* TRN1/2 */
7625            if (i & 1) {
7626                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
7627            } else {
7628                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
7629            }
7630            break;
7631        case 3: /* ZIP1/2 */
7632        {
7633            int base = part * elements / 2;
7634            if (i & 1) {
7635                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
7636            } else {
7637                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
7638            }
7639            break;
7640        }
7641        default:
7642            g_assert_not_reached();
7643        }
7644
7645        ofs = i * esize;
7646        if (ofs < 64) {
7647            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
7648            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
7649        } else {
7650            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
7651            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
7652        }
7653    }
7654
7655    tcg_temp_free_i64(tcg_res);
7656
7657    write_vec_element(s, tcg_resl, rd, 0, MO_64);
7658    tcg_temp_free_i64(tcg_resl);
7659
7660    if (is_q) {
7661        write_vec_element(s, tcg_resh, rd, 1, MO_64);
7662        tcg_temp_free_i64(tcg_resh);
7663    }
7664    clear_vec_high(s, is_q, rd);
7665}
7666
7667/*
7668 * do_reduction_op helper
7669 *
7670 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7671 * important for correct NaN propagation that we do these
7672 * operations in exactly the order specified by the pseudocode.
7673 *
7674 * This is a recursive function, TCG temps should be freed by the
7675 * calling function once it is done with the values.
7676 */
7677static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7678                                int esize, int size, int vmap, TCGv_ptr fpst)
7679{
7680    if (esize == size) {
7681        int element;
7682        MemOp msize = esize == 16 ? MO_16 : MO_32;
7683        TCGv_i32 tcg_elem;
7684
7685        /* We should have one register left here */
7686        assert(ctpop8(vmap) == 1);
7687        element = ctz32(vmap);
7688        assert(element < 8);
7689
7690        tcg_elem = tcg_temp_new_i32();
7691        read_vec_element_i32(s, tcg_elem, rn, element, msize);
7692        return tcg_elem;
7693    } else {
7694        int bits = size / 2;
7695        int shift = ctpop8(vmap) / 2;
7696        int vmap_lo = (vmap >> shift) & vmap;
7697        int vmap_hi = (vmap & ~vmap_lo);
7698        TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7699
7700        tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7701        tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7702        tcg_res = tcg_temp_new_i32();
7703
7704        switch (fpopcode) {
7705        case 0x0c: /* fmaxnmv half-precision */
7706            gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7707            break;
7708        case 0x0f: /* fmaxv half-precision */
7709            gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7710            break;
7711        case 0x1c: /* fminnmv half-precision */
7712            gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7713            break;
7714        case 0x1f: /* fminv half-precision */
7715            gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7716            break;
7717        case 0x2c: /* fmaxnmv */
7718            gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7719            break;
7720        case 0x2f: /* fmaxv */
7721            gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7722            break;
7723        case 0x3c: /* fminnmv */
7724            gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7725            break;
7726        case 0x3f: /* fminv */
7727            gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7728            break;
7729        default:
7730            g_assert_not_reached();
7731        }
7732
7733        tcg_temp_free_i32(tcg_hi);
7734        tcg_temp_free_i32(tcg_lo);
7735        return tcg_res;
7736    }
7737}
7738
7739/* AdvSIMD across lanes
7740 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7741 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7742 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7743 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7744 */
7745static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7746{
7747    int rd = extract32(insn, 0, 5);
7748    int rn = extract32(insn, 5, 5);
7749    int size = extract32(insn, 22, 2);
7750    int opcode = extract32(insn, 12, 5);
7751    bool is_q = extract32(insn, 30, 1);
7752    bool is_u = extract32(insn, 29, 1);
7753    bool is_fp = false;
7754    bool is_min = false;
7755    int esize;
7756    int elements;
7757    int i;
7758    TCGv_i64 tcg_res, tcg_elt;
7759
7760    switch (opcode) {
7761    case 0x1b: /* ADDV */
7762        if (is_u) {
7763            unallocated_encoding(s);
7764            return;
7765        }
7766        /* fall through */
7767    case 0x3: /* SADDLV, UADDLV */
7768    case 0xa: /* SMAXV, UMAXV */
7769    case 0x1a: /* SMINV, UMINV */
7770        if (size == 3 || (size == 2 && !is_q)) {
7771            unallocated_encoding(s);
7772            return;
7773        }
7774        break;
7775    case 0xc: /* FMAXNMV, FMINNMV */
7776    case 0xf: /* FMAXV, FMINV */
7777        /* Bit 1 of size field encodes min vs max and the actual size
7778         * depends on the encoding of the U bit. If not set (and FP16
7779         * enabled) then we do half-precision float instead of single
7780         * precision.
7781         */
7782        is_min = extract32(size, 1, 1);
7783        is_fp = true;
7784        if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7785            size = 1;
7786        } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7787            unallocated_encoding(s);
7788            return;
7789        } else {
7790            size = 2;
7791        }
7792        break;
7793    default:
7794        unallocated_encoding(s);
7795        return;
7796    }
7797
7798    if (!fp_access_check(s)) {
7799        return;
7800    }
7801
7802    esize = 8 << size;
7803    elements = (is_q ? 128 : 64) / esize;
7804
7805    tcg_res = tcg_temp_new_i64();
7806    tcg_elt = tcg_temp_new_i64();
7807
7808    /* These instructions operate across all lanes of a vector
7809     * to produce a single result. We can guarantee that a 64
7810     * bit intermediate is sufficient:
7811     *  + for [US]ADDLV the maximum element size is 32 bits, and
7812     *    the result type is 64 bits
7813     *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7814     *    same as the element size, which is 32 bits at most
7815     * For the integer operations we can choose to work at 64
7816     * or 32 bits and truncate at the end; for simplicity
7817     * we use 64 bits always. The floating point
7818     * ops do require 32 bit intermediates, though.
7819     */
7820    if (!is_fp) {
7821        read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7822
7823        for (i = 1; i < elements; i++) {
7824            read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7825
7826            switch (opcode) {
7827            case 0x03: /* SADDLV / UADDLV */
7828            case 0x1b: /* ADDV */
7829                tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7830                break;
7831            case 0x0a: /* SMAXV / UMAXV */
7832                if (is_u) {
7833                    tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7834                } else {
7835                    tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7836                }
7837                break;
7838            case 0x1a: /* SMINV / UMINV */
7839                if (is_u) {
7840                    tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7841                } else {
7842                    tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7843                }
7844                break;
7845            default:
7846                g_assert_not_reached();
7847            }
7848
7849        }
7850    } else {
7851        /* Floating point vector reduction ops which work across 32
7852         * bit (single) or 16 bit (half-precision) intermediates.
7853         * Note that correct NaN propagation requires that we do these
7854         * operations in exactly the order specified by the pseudocode.
7855         */
7856        TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
7857        int fpopcode = opcode | is_min << 4 | is_u << 5;
7858        int vmap = (1 << elements) - 1;
7859        TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7860                                             (is_q ? 128 : 64), vmap, fpst);
7861        tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7862        tcg_temp_free_i32(tcg_res32);
7863        tcg_temp_free_ptr(fpst);
7864    }
7865
7866    tcg_temp_free_i64(tcg_elt);
7867
7868    /* Now truncate the result to the width required for the final output */
7869    if (opcode == 0x03) {
7870        /* SADDLV, UADDLV: result is 2*esize */
7871        size++;
7872    }
7873
7874    switch (size) {
7875    case 0:
7876        tcg_gen_ext8u_i64(tcg_res, tcg_res);
7877        break;
7878    case 1:
7879        tcg_gen_ext16u_i64(tcg_res, tcg_res);
7880        break;
7881    case 2:
7882        tcg_gen_ext32u_i64(tcg_res, tcg_res);
7883        break;
7884    case 3:
7885        break;
7886    default:
7887        g_assert_not_reached();
7888    }
7889
7890    write_fp_dreg(s, rd, tcg_res);
7891    tcg_temp_free_i64(tcg_res);
7892}
7893
7894/* DUP (Element, Vector)
7895 *
7896 *  31  30   29              21 20    16 15        10  9    5 4    0
7897 * +---+---+-------------------+--------+-------------+------+------+
7898 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7899 * +---+---+-------------------+--------+-------------+------+------+
7900 *
7901 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7902 */
7903static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7904                             int imm5)
7905{
7906    int size = ctz32(imm5);
7907    int index;
7908
7909    if (size > 3 || (size == 3 && !is_q)) {
7910        unallocated_encoding(s);
7911        return;
7912    }
7913
7914    if (!fp_access_check(s)) {
7915        return;
7916    }
7917
7918    index = imm5 >> (size + 1);
7919    tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7920                         vec_reg_offset(s, rn, index, size),
7921                         is_q ? 16 : 8, vec_full_reg_size(s));
7922}
7923
7924/* DUP (element, scalar)
7925 *  31                   21 20    16 15        10  9    5 4    0
7926 * +-----------------------+--------+-------------+------+------+
7927 * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7928 * +-----------------------+--------+-------------+------+------+
7929 */
7930static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7931                              int imm5)
7932{
7933    int size = ctz32(imm5);
7934    int index;
7935    TCGv_i64 tmp;
7936
7937    if (size > 3) {
7938        unallocated_encoding(s);
7939        return;
7940    }
7941
7942    if (!fp_access_check(s)) {
7943        return;
7944    }
7945
7946    index = imm5 >> (size + 1);
7947
7948    /* This instruction just extracts the specified element and
7949     * zero-extends it into the bottom of the destination register.
7950     */
7951    tmp = tcg_temp_new_i64();
7952    read_vec_element(s, tmp, rn, index, size);
7953    write_fp_dreg(s, rd, tmp);
7954    tcg_temp_free_i64(tmp);
7955}
7956
7957/* DUP (General)
7958 *
7959 *  31  30   29              21 20    16 15        10  9    5 4    0
7960 * +---+---+-------------------+--------+-------------+------+------+
7961 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7962 * +---+---+-------------------+--------+-------------+------+------+
7963 *
7964 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7965 */
7966static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7967                             int imm5)
7968{
7969    int size = ctz32(imm5);
7970    uint32_t dofs, oprsz, maxsz;
7971
7972    if (size > 3 || ((size == 3) && !is_q)) {
7973        unallocated_encoding(s);
7974        return;
7975    }
7976
7977    if (!fp_access_check(s)) {
7978        return;
7979    }
7980
7981    dofs = vec_full_reg_offset(s, rd);
7982    oprsz = is_q ? 16 : 8;
7983    maxsz = vec_full_reg_size(s);
7984
7985    tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7986}
7987
7988/* INS (Element)
7989 *
7990 *  31                   21 20    16 15  14    11  10 9    5 4    0
7991 * +-----------------------+--------+------------+---+------+------+
7992 * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7993 * +-----------------------+--------+------------+---+------+------+
7994 *
7995 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7996 * index: encoded in imm5<4:size+1>
7997 */
7998static void handle_simd_inse(DisasContext *s, int rd, int rn,
7999                             int imm4, int imm5)
8000{
8001    int size = ctz32(imm5);
8002    int src_index, dst_index;
8003    TCGv_i64 tmp;
8004
8005    if (size > 3) {
8006        unallocated_encoding(s);
8007        return;
8008    }
8009
8010    if (!fp_access_check(s)) {
8011        return;
8012    }
8013
8014    dst_index = extract32(imm5, 1+size, 5);
8015    src_index = extract32(imm4, size, 4);
8016
8017    tmp = tcg_temp_new_i64();
8018
8019    read_vec_element(s, tmp, rn, src_index, size);
8020    write_vec_element(s, tmp, rd, dst_index, size);
8021
8022    tcg_temp_free_i64(tmp);
8023
8024    /* INS is considered a 128-bit write for SVE. */
8025    clear_vec_high(s, true, rd);
8026}
8027
8028
8029/* INS (General)
8030 *
8031 *  31                   21 20    16 15        10  9    5 4    0
8032 * +-----------------------+--------+-------------+------+------+
8033 * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
8034 * +-----------------------+--------+-------------+------+------+
8035 *
8036 * size: encoded in imm5 (see ARM ARM LowestSetBit())
8037 * index: encoded in imm5<4:size+1>
8038 */
8039static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
8040{
8041    int size = ctz32(imm5);
8042    int idx;
8043
8044    if (size > 3) {
8045        unallocated_encoding(s);
8046        return;
8047    }
8048
8049    if (!fp_access_check(s)) {
8050        return;
8051    }
8052
8053    idx = extract32(imm5, 1 + size, 4 - size);
8054    write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
8055
8056    /* INS is considered a 128-bit write for SVE. */
8057    clear_vec_high(s, true, rd);
8058}
8059
8060/*
8061 * UMOV (General)
8062 * SMOV (General)
8063 *
8064 *  31  30   29              21 20    16 15    12   10 9    5 4    0
8065 * +---+---+-------------------+--------+-------------+------+------+
8066 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
8067 * +---+---+-------------------+--------+-------------+------+------+
8068 *
8069 * U: unsigned when set
8070 * size: encoded in imm5 (see ARM ARM LowestSetBit())
8071 */
8072static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
8073                                  int rn, int rd, int imm5)
8074{
8075    int size = ctz32(imm5);
8076    int element;
8077    TCGv_i64 tcg_rd;
8078
8079    /* Check for UnallocatedEncodings */
8080    if (is_signed) {
8081        if (size > 2 || (size == 2 && !is_q)) {
8082            unallocated_encoding(s);
8083            return;
8084        }
8085    } else {
8086        if (size > 3
8087            || (size < 3 && is_q)
8088            || (size == 3 && !is_q)) {
8089            unallocated_encoding(s);
8090            return;
8091        }
8092    }
8093
8094    if (!fp_access_check(s)) {
8095        return;
8096    }
8097
8098    element = extract32(imm5, 1+size, 4);
8099
8100    tcg_rd = cpu_reg(s, rd);
8101    read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
8102    if (is_signed && !is_q) {
8103        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
8104    }
8105}
8106
8107/* AdvSIMD copy
8108 *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
8109 * +---+---+----+-----------------+------+---+------+---+------+------+
8110 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
8111 * +---+---+----+-----------------+------+---+------+---+------+------+
8112 */
8113static void disas_simd_copy(DisasContext *s, uint32_t insn)
8114{
8115    int rd = extract32(insn, 0, 5);
8116    int rn = extract32(insn, 5, 5);
8117    int imm4 = extract32(insn, 11, 4);
8118    int op = extract32(insn, 29, 1);
8119    int is_q = extract32(insn, 30, 1);
8120    int imm5 = extract32(insn, 16, 5);
8121
8122    if (op) {
8123        if (is_q) {
8124            /* INS (element) */
8125            handle_simd_inse(s, rd, rn, imm4, imm5);
8126        } else {
8127            unallocated_encoding(s);
8128        }
8129    } else {
8130        switch (imm4) {
8131        case 0:
8132            /* DUP (element - vector) */
8133            handle_simd_dupe(s, is_q, rd, rn, imm5);
8134            break;
8135        case 1:
8136            /* DUP (general) */
8137            handle_simd_dupg(s, is_q, rd, rn, imm5);
8138            break;
8139        case 3:
8140            if (is_q) {
8141                /* INS (general) */
8142                handle_simd_insg(s, rd, rn, imm5);
8143            } else {
8144                unallocated_encoding(s);
8145            }
8146            break;
8147        case 5:
8148        case 7:
8149            /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
8150            handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
8151            break;
8152        default:
8153            unallocated_encoding(s);
8154            break;
8155        }
8156    }
8157}
8158
8159/* AdvSIMD modified immediate
8160 *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
8161 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
8162 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
8163 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
8164 *
8165 * There are a number of operations that can be carried out here:
8166 *   MOVI - move (shifted) imm into register
8167 *   MVNI - move inverted (shifted) imm into register
8168 *   ORR  - bitwise OR of (shifted) imm with register
8169 *   BIC  - bitwise clear of (shifted) imm with register
8170 * With ARMv8.2 we also have:
8171 *   FMOV half-precision
8172 */
8173static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
8174{
8175    int rd = extract32(insn, 0, 5);
8176    int cmode = extract32(insn, 12, 4);
8177    int o2 = extract32(insn, 11, 1);
8178    uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
8179    bool is_neg = extract32(insn, 29, 1);
8180    bool is_q = extract32(insn, 30, 1);
8181    uint64_t imm = 0;
8182
8183    if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
8184        /* Check for FMOV (vector, immediate) - half-precision */
8185        if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
8186            unallocated_encoding(s);
8187            return;
8188        }
8189    }
8190
8191    if (!fp_access_check(s)) {
8192        return;
8193    }
8194
8195    if (cmode == 15 && o2 && !is_neg) {
8196        /* FMOV (vector, immediate) - half-precision */
8197        imm = vfp_expand_imm(MO_16, abcdefgh);
8198        /* now duplicate across the lanes */
8199        imm = dup_const(MO_16, imm);
8200    } else {
8201        imm = asimd_imm_const(abcdefgh, cmode, is_neg);
8202    }
8203
8204    if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
8205        /* MOVI or MVNI, with MVNI negation handled above.  */
8206        tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8,
8207                             vec_full_reg_size(s), imm);
8208    } else {
8209        /* ORR or BIC, with BIC negation to AND handled above.  */
8210        if (is_neg) {
8211            gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
8212        } else {
8213            gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
8214        }
8215    }
8216}
8217
8218/* AdvSIMD scalar copy
8219 *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
8220 * +-----+----+-----------------+------+---+------+---+------+------+
8221 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
8222 * +-----+----+-----------------+------+---+------+---+------+------+
8223 */
8224static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
8225{
8226    int rd = extract32(insn, 0, 5);
8227    int rn = extract32(insn, 5, 5);
8228    int imm4 = extract32(insn, 11, 4);
8229    int imm5 = extract32(insn, 16, 5);
8230    int op = extract32(insn, 29, 1);
8231
8232    if (op != 0 || imm4 != 0) {
8233        unallocated_encoding(s);
8234        return;
8235    }
8236
8237    /* DUP (element, scalar) */
8238    handle_simd_dupes(s, rd, rn, imm5);
8239}
8240
8241/* AdvSIMD scalar pairwise
8242 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8243 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8244 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8245 * +-----+---+-----------+------+-----------+--------+-----+------+------+
8246 */
8247static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
8248{
8249    int u = extract32(insn, 29, 1);
8250    int size = extract32(insn, 22, 2);
8251    int opcode = extract32(insn, 12, 5);
8252    int rn = extract32(insn, 5, 5);
8253    int rd = extract32(insn, 0, 5);
8254    TCGv_ptr fpst;
8255
8256    /* For some ops (the FP ones), size[1] is part of the encoding.
8257     * For ADDP strictly it is not but size[1] is always 1 for valid
8258     * encodings.
8259     */
8260    opcode |= (extract32(size, 1, 1) << 5);
8261
8262    switch (opcode) {
8263    case 0x3b: /* ADDP */
8264        if (u || size != 3) {
8265            unallocated_encoding(s);
8266            return;
8267        }
8268        if (!fp_access_check(s)) {
8269            return;
8270        }
8271
8272        fpst = NULL;
8273        break;
8274    case 0xc: /* FMAXNMP */
8275    case 0xd: /* FADDP */
8276    case 0xf: /* FMAXP */
8277    case 0x2c: /* FMINNMP */
8278    case 0x2f: /* FMINP */
8279        /* FP op, size[0] is 32 or 64 bit*/
8280        if (!u) {
8281            if (!dc_isar_feature(aa64_fp16, s)) {
8282                unallocated_encoding(s);
8283                return;
8284            } else {
8285                size = MO_16;
8286            }
8287        } else {
8288            size = extract32(size, 0, 1) ? MO_64 : MO_32;
8289        }
8290
8291        if (!fp_access_check(s)) {
8292            return;
8293        }
8294
8295        fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8296        break;
8297    default:
8298        unallocated_encoding(s);
8299        return;
8300    }
8301
8302    if (size == MO_64) {
8303        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8304        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8305        TCGv_i64 tcg_res = tcg_temp_new_i64();
8306
8307        read_vec_element(s, tcg_op1, rn, 0, MO_64);
8308        read_vec_element(s, tcg_op2, rn, 1, MO_64);
8309
8310        switch (opcode) {
8311        case 0x3b: /* ADDP */
8312            tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
8313            break;
8314        case 0xc: /* FMAXNMP */
8315            gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8316            break;
8317        case 0xd: /* FADDP */
8318            gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8319            break;
8320        case 0xf: /* FMAXP */
8321            gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8322            break;
8323        case 0x2c: /* FMINNMP */
8324            gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8325            break;
8326        case 0x2f: /* FMINP */
8327            gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8328            break;
8329        default:
8330            g_assert_not_reached();
8331        }
8332
8333        write_fp_dreg(s, rd, tcg_res);
8334
8335        tcg_temp_free_i64(tcg_op1);
8336        tcg_temp_free_i64(tcg_op2);
8337        tcg_temp_free_i64(tcg_res);
8338    } else {
8339        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8340        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8341        TCGv_i32 tcg_res = tcg_temp_new_i32();
8342
8343        read_vec_element_i32(s, tcg_op1, rn, 0, size);
8344        read_vec_element_i32(s, tcg_op2, rn, 1, size);
8345
8346        if (size == MO_16) {
8347            switch (opcode) {
8348            case 0xc: /* FMAXNMP */
8349                gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8350                break;
8351            case 0xd: /* FADDP */
8352                gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
8353                break;
8354            case 0xf: /* FMAXP */
8355                gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
8356                break;
8357            case 0x2c: /* FMINNMP */
8358                gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
8359                break;
8360            case 0x2f: /* FMINP */
8361                gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
8362                break;
8363            default:
8364                g_assert_not_reached();
8365            }
8366        } else {
8367            switch (opcode) {
8368            case 0xc: /* FMAXNMP */
8369                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8370                break;
8371            case 0xd: /* FADDP */
8372                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8373                break;
8374            case 0xf: /* FMAXP */
8375                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8376                break;
8377            case 0x2c: /* FMINNMP */
8378                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8379                break;
8380            case 0x2f: /* FMINP */
8381                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8382                break;
8383            default:
8384                g_assert_not_reached();
8385            }
8386        }
8387
8388        write_fp_sreg(s, rd, tcg_res);
8389
8390        tcg_temp_free_i32(tcg_op1);
8391        tcg_temp_free_i32(tcg_op2);
8392        tcg_temp_free_i32(tcg_res);
8393    }
8394
8395    if (fpst) {
8396        tcg_temp_free_ptr(fpst);
8397    }
8398}
8399
8400/*
8401 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
8402 *
8403 * This code is handles the common shifting code and is used by both
8404 * the vector and scalar code.
8405 */
8406static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
8407                                    TCGv_i64 tcg_rnd, bool accumulate,
8408                                    bool is_u, int size, int shift)
8409{
8410    bool extended_result = false;
8411    bool round = tcg_rnd != NULL;
8412    int ext_lshift = 0;
8413    TCGv_i64 tcg_src_hi;
8414
8415    if (round && size == 3) {
8416        extended_result = true;
8417        ext_lshift = 64 - shift;
8418        tcg_src_hi = tcg_temp_new_i64();
8419    } else if (shift == 64) {
8420        if (!accumulate && is_u) {
8421            /* result is zero */
8422            tcg_gen_movi_i64(tcg_res, 0);
8423            return;
8424        }
8425    }
8426
8427    /* Deal with the rounding step */
8428    if (round) {
8429        if (extended_result) {
8430            TCGv_i64 tcg_zero = tcg_const_i64(0);
8431            if (!is_u) {
8432                /* take care of sign extending tcg_res */
8433                tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
8434                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8435                                 tcg_src, tcg_src_hi,
8436                                 tcg_rnd, tcg_zero);
8437            } else {
8438                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
8439                                 tcg_src, tcg_zero,
8440                                 tcg_rnd, tcg_zero);
8441            }
8442            tcg_temp_free_i64(tcg_zero);
8443        } else {
8444            tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
8445        }
8446    }
8447
8448    /* Now do the shift right */
8449    if (round && extended_result) {
8450        /* extended case, >64 bit precision required */
8451        if (ext_lshift == 0) {
8452            /* special case, only high bits matter */
8453            tcg_gen_mov_i64(tcg_src, tcg_src_hi);
8454        } else {
8455            tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8456            tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
8457            tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
8458        }
8459    } else {
8460        if (is_u) {
8461            if (shift == 64) {
8462                /* essentially shifting in 64 zeros */
8463                tcg_gen_movi_i64(tcg_src, 0);
8464            } else {
8465                tcg_gen_shri_i64(tcg_src, tcg_src, shift);
8466            }
8467        } else {
8468            if (shift == 64) {
8469                /* effectively extending the sign-bit */
8470                tcg_gen_sari_i64(tcg_src, tcg_src, 63);
8471            } else {
8472                tcg_gen_sari_i64(tcg_src, tcg_src, shift);
8473            }
8474        }
8475    }
8476
8477    if (accumulate) {
8478        tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
8479    } else {
8480        tcg_gen_mov_i64(tcg_res, tcg_src);
8481    }
8482
8483    if (extended_result) {
8484        tcg_temp_free_i64(tcg_src_hi);
8485    }
8486}
8487
8488/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
8489static void handle_scalar_simd_shri(DisasContext *s,
8490                                    bool is_u, int immh, int immb,
8491                                    int opcode, int rn, int rd)
8492{
8493    const int size = 3;
8494    int immhb = immh << 3 | immb;
8495    int shift = 2 * (8 << size) - immhb;
8496    bool accumulate = false;
8497    bool round = false;
8498    bool insert = false;
8499    TCGv_i64 tcg_rn;
8500    TCGv_i64 tcg_rd;
8501    TCGv_i64 tcg_round;
8502
8503    if (!extract32(immh, 3, 1)) {
8504        unallocated_encoding(s);
8505        return;
8506    }
8507
8508    if (!fp_access_check(s)) {
8509        return;
8510    }
8511
8512    switch (opcode) {
8513    case 0x02: /* SSRA / USRA (accumulate) */
8514        accumulate = true;
8515        break;
8516    case 0x04: /* SRSHR / URSHR (rounding) */
8517        round = true;
8518        break;
8519    case 0x06: /* SRSRA / URSRA (accum + rounding) */
8520        accumulate = round = true;
8521        break;
8522    case 0x08: /* SRI */
8523        insert = true;
8524        break;
8525    }
8526
8527    if (round) {
8528        uint64_t round_const = 1ULL << (shift - 1);
8529        tcg_round = tcg_const_i64(round_const);
8530    } else {
8531        tcg_round = NULL;
8532    }
8533
8534    tcg_rn = read_fp_dreg(s, rn);
8535    tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8536
8537    if (insert) {
8538        /* shift count same as element size is valid but does nothing;
8539         * special case to avoid potential shift by 64.
8540         */
8541        int esize = 8 << size;
8542        if (shift != esize) {
8543            tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
8544            tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
8545        }
8546    } else {
8547        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8548                                accumulate, is_u, size, shift);
8549    }
8550
8551    write_fp_dreg(s, rd, tcg_rd);
8552
8553    tcg_temp_free_i64(tcg_rn);
8554    tcg_temp_free_i64(tcg_rd);
8555    if (round) {
8556        tcg_temp_free_i64(tcg_round);
8557    }
8558}
8559
8560/* SHL/SLI - Scalar shift left */
8561static void handle_scalar_simd_shli(DisasContext *s, bool insert,
8562                                    int immh, int immb, int opcode,
8563                                    int rn, int rd)
8564{
8565    int size = 32 - clz32(immh) - 1;
8566    int immhb = immh << 3 | immb;
8567    int shift = immhb - (8 << size);
8568    TCGv_i64 tcg_rn;
8569    TCGv_i64 tcg_rd;
8570
8571    if (!extract32(immh, 3, 1)) {
8572        unallocated_encoding(s);
8573        return;
8574    }
8575
8576    if (!fp_access_check(s)) {
8577        return;
8578    }
8579
8580    tcg_rn = read_fp_dreg(s, rn);
8581    tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
8582
8583    if (insert) {
8584        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8585    } else {
8586        tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8587    }
8588
8589    write_fp_dreg(s, rd, tcg_rd);
8590
8591    tcg_temp_free_i64(tcg_rn);
8592    tcg_temp_free_i64(tcg_rd);
8593}
8594
8595/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8596 * (signed/unsigned) narrowing */
8597static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8598                                   bool is_u_shift, bool is_u_narrow,
8599                                   int immh, int immb, int opcode,
8600                                   int rn, int rd)
8601{
8602    int immhb = immh << 3 | immb;
8603    int size = 32 - clz32(immh) - 1;
8604    int esize = 8 << size;
8605    int shift = (2 * esize) - immhb;
8606    int elements = is_scalar ? 1 : (64 / esize);
8607    bool round = extract32(opcode, 0, 1);
8608    MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8609    TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8610    TCGv_i32 tcg_rd_narrowed;
8611    TCGv_i64 tcg_final;
8612
8613    static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8614        { gen_helper_neon_narrow_sat_s8,
8615          gen_helper_neon_unarrow_sat8 },
8616        { gen_helper_neon_narrow_sat_s16,
8617          gen_helper_neon_unarrow_sat16 },
8618        { gen_helper_neon_narrow_sat_s32,
8619          gen_helper_neon_unarrow_sat32 },
8620        { NULL, NULL },
8621    };
8622    static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8623        gen_helper_neon_narrow_sat_u8,
8624        gen_helper_neon_narrow_sat_u16,
8625        gen_helper_neon_narrow_sat_u32,
8626        NULL
8627    };
8628    NeonGenNarrowEnvFn *narrowfn;
8629
8630    int i;
8631
8632    assert(size < 4);
8633
8634    if (extract32(immh, 3, 1)) {
8635        unallocated_encoding(s);
8636        return;
8637    }
8638
8639    if (!fp_access_check(s)) {
8640        return;
8641    }
8642
8643    if (is_u_shift) {
8644        narrowfn = unsigned_narrow_fns[size];
8645    } else {
8646        narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8647    }
8648
8649    tcg_rn = tcg_temp_new_i64();
8650    tcg_rd = tcg_temp_new_i64();
8651    tcg_rd_narrowed = tcg_temp_new_i32();
8652    tcg_final = tcg_const_i64(0);
8653
8654    if (round) {
8655        uint64_t round_const = 1ULL << (shift - 1);
8656        tcg_round = tcg_const_i64(round_const);
8657    } else {
8658        tcg_round = NULL;
8659    }
8660
8661    for (i = 0; i < elements; i++) {
8662        read_vec_element(s, tcg_rn, rn, i, ldop);
8663        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8664                                false, is_u_shift, size+1, shift);
8665        narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8666        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8667        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8668    }
8669
8670    if (!is_q) {
8671        write_vec_element(s, tcg_final, rd, 0, MO_64);
8672    } else {
8673        write_vec_element(s, tcg_final, rd, 1, MO_64);
8674    }
8675
8676    if (round) {
8677        tcg_temp_free_i64(tcg_round);
8678    }
8679    tcg_temp_free_i64(tcg_rn);
8680    tcg_temp_free_i64(tcg_rd);
8681    tcg_temp_free_i32(tcg_rd_narrowed);
8682    tcg_temp_free_i64(tcg_final);
8683
8684    clear_vec_high(s, is_q, rd);
8685}
8686
8687/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8688static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8689                             bool src_unsigned, bool dst_unsigned,
8690                             int immh, int immb, int rn, int rd)
8691{
8692    int immhb = immh << 3 | immb;
8693    int size = 32 - clz32(immh) - 1;
8694    int shift = immhb - (8 << size);
8695    int pass;
8696
8697    assert(immh != 0);
8698    assert(!(scalar && is_q));
8699
8700    if (!scalar) {
8701        if (!is_q && extract32(immh, 3, 1)) {
8702            unallocated_encoding(s);
8703            return;
8704        }
8705
8706        /* Since we use the variable-shift helpers we must
8707         * replicate the shift count into each element of
8708         * the tcg_shift value.
8709         */
8710        switch (size) {
8711        case 0:
8712            shift |= shift << 8;
8713            /* fall through */
8714        case 1:
8715            shift |= shift << 16;
8716            break;
8717        case 2:
8718        case 3:
8719            break;
8720        default:
8721            g_assert_not_reached();
8722        }
8723    }
8724
8725    if (!fp_access_check(s)) {
8726        return;
8727    }
8728
8729    if (size == 3) {
8730        TCGv_i64 tcg_shift = tcg_const_i64(shift);
8731        static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8732            { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8733            { NULL, gen_helper_neon_qshl_u64 },
8734        };
8735        NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8736        int maxpass = is_q ? 2 : 1;
8737
8738        for (pass = 0; pass < maxpass; pass++) {
8739            TCGv_i64 tcg_op = tcg_temp_new_i64();
8740
8741            read_vec_element(s, tcg_op, rn, pass, MO_64);
8742            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8743            write_vec_element(s, tcg_op, rd, pass, MO_64);
8744
8745            tcg_temp_free_i64(tcg_op);
8746        }
8747        tcg_temp_free_i64(tcg_shift);
8748        clear_vec_high(s, is_q, rd);
8749    } else {
8750        TCGv_i32 tcg_shift = tcg_const_i32(shift);
8751        static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8752            {
8753                { gen_helper_neon_qshl_s8,
8754                  gen_helper_neon_qshl_s16,
8755                  gen_helper_neon_qshl_s32 },
8756                { gen_helper_neon_qshlu_s8,
8757                  gen_helper_neon_qshlu_s16,
8758                  gen_helper_neon_qshlu_s32 }
8759            }, {
8760                { NULL, NULL, NULL },
8761                { gen_helper_neon_qshl_u8,
8762                  gen_helper_neon_qshl_u16,
8763                  gen_helper_neon_qshl_u32 }
8764            }
8765        };
8766        NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8767        MemOp memop = scalar ? size : MO_32;
8768        int maxpass = scalar ? 1 : is_q ? 4 : 2;
8769
8770        for (pass = 0; pass < maxpass; pass++) {
8771            TCGv_i32 tcg_op = tcg_temp_new_i32();
8772
8773            read_vec_element_i32(s, tcg_op, rn, pass, memop);
8774            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8775            if (scalar) {
8776                switch (size) {
8777                case 0:
8778                    tcg_gen_ext8u_i32(tcg_op, tcg_op);
8779                    break;
8780                case 1:
8781                    tcg_gen_ext16u_i32(tcg_op, tcg_op);
8782                    break;
8783                case 2:
8784                    break;
8785                default:
8786                    g_assert_not_reached();
8787                }
8788                write_fp_sreg(s, rd, tcg_op);
8789            } else {
8790                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8791            }
8792
8793            tcg_temp_free_i32(tcg_op);
8794        }
8795        tcg_temp_free_i32(tcg_shift);
8796
8797        if (!scalar) {
8798            clear_vec_high(s, is_q, rd);
8799        }
8800    }
8801}
8802
8803/* Common vector code for handling integer to FP conversion */
8804static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8805                                   int elements, int is_signed,
8806                                   int fracbits, int size)
8807{
8808    TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8809    TCGv_i32 tcg_shift = NULL;
8810
8811    MemOp mop = size | (is_signed ? MO_SIGN : 0);
8812    int pass;
8813
8814    if (fracbits || size == MO_64) {
8815        tcg_shift = tcg_const_i32(fracbits);
8816    }
8817
8818    if (size == MO_64) {
8819        TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8820        TCGv_i64 tcg_double = tcg_temp_new_i64();
8821
8822        for (pass = 0; pass < elements; pass++) {
8823            read_vec_element(s, tcg_int64, rn, pass, mop);
8824
8825            if (is_signed) {
8826                gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8827                                     tcg_shift, tcg_fpst);
8828            } else {
8829                gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8830                                     tcg_shift, tcg_fpst);
8831            }
8832            if (elements == 1) {
8833                write_fp_dreg(s, rd, tcg_double);
8834            } else {
8835                write_vec_element(s, tcg_double, rd, pass, MO_64);
8836            }
8837        }
8838
8839        tcg_temp_free_i64(tcg_int64);
8840        tcg_temp_free_i64(tcg_double);
8841
8842    } else {
8843        TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8844        TCGv_i32 tcg_float = tcg_temp_new_i32();
8845
8846        for (pass = 0; pass < elements; pass++) {
8847            read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8848
8849            switch (size) {
8850            case MO_32:
8851                if (fracbits) {
8852                    if (is_signed) {
8853                        gen_helper_vfp_sltos(tcg_float, tcg_int32,
8854                                             tcg_shift, tcg_fpst);
8855                    } else {
8856                        gen_helper_vfp_ultos(tcg_float, tcg_int32,
8857                                             tcg_shift, tcg_fpst);
8858                    }
8859                } else {
8860                    if (is_signed) {
8861                        gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8862                    } else {
8863                        gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8864                    }
8865                }
8866                break;
8867            case MO_16:
8868                if (fracbits) {
8869                    if (is_signed) {
8870                        gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8871                                             tcg_shift, tcg_fpst);
8872                    } else {
8873                        gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8874                                             tcg_shift, tcg_fpst);
8875                    }
8876                } else {
8877                    if (is_signed) {
8878                        gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8879                    } else {
8880                        gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8881                    }
8882                }
8883                break;
8884            default:
8885                g_assert_not_reached();
8886            }
8887
8888            if (elements == 1) {
8889                write_fp_sreg(s, rd, tcg_float);
8890            } else {
8891                write_vec_element_i32(s, tcg_float, rd, pass, size);
8892            }
8893        }
8894
8895        tcg_temp_free_i32(tcg_int32);
8896        tcg_temp_free_i32(tcg_float);
8897    }
8898
8899    tcg_temp_free_ptr(tcg_fpst);
8900    if (tcg_shift) {
8901        tcg_temp_free_i32(tcg_shift);
8902    }
8903
8904    clear_vec_high(s, elements << size == 16, rd);
8905}
8906
8907/* UCVTF/SCVTF - Integer to FP conversion */
8908static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8909                                         bool is_q, bool is_u,
8910                                         int immh, int immb, int opcode,
8911                                         int rn, int rd)
8912{
8913    int size, elements, fracbits;
8914    int immhb = immh << 3 | immb;
8915
8916    if (immh & 8) {
8917        size = MO_64;
8918        if (!is_scalar && !is_q) {
8919            unallocated_encoding(s);
8920            return;
8921        }
8922    } else if (immh & 4) {
8923        size = MO_32;
8924    } else if (immh & 2) {
8925        size = MO_16;
8926        if (!dc_isar_feature(aa64_fp16, s)) {
8927            unallocated_encoding(s);
8928            return;
8929        }
8930    } else {
8931        /* immh == 0 would be a failure of the decode logic */
8932        g_assert(immh == 1);
8933        unallocated_encoding(s);
8934        return;
8935    }
8936
8937    if (is_scalar) {
8938        elements = 1;
8939    } else {
8940        elements = (8 << is_q) >> size;
8941    }
8942    fracbits = (16 << size) - immhb;
8943
8944    if (!fp_access_check(s)) {
8945        return;
8946    }
8947
8948    handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8949}
8950
8951/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8952static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8953                                         bool is_q, bool is_u,
8954                                         int immh, int immb, int rn, int rd)
8955{
8956    int immhb = immh << 3 | immb;
8957    int pass, size, fracbits;
8958    TCGv_ptr tcg_fpstatus;
8959    TCGv_i32 tcg_rmode, tcg_shift;
8960
8961    if (immh & 0x8) {
8962        size = MO_64;
8963        if (!is_scalar && !is_q) {
8964            unallocated_encoding(s);
8965            return;
8966        }
8967    } else if (immh & 0x4) {
8968        size = MO_32;
8969    } else if (immh & 0x2) {
8970        size = MO_16;
8971        if (!dc_isar_feature(aa64_fp16, s)) {
8972            unallocated_encoding(s);
8973            return;
8974        }
8975    } else {
8976        /* Should have split out AdvSIMD modified immediate earlier.  */
8977        assert(immh == 1);
8978        unallocated_encoding(s);
8979        return;
8980    }
8981
8982    if (!fp_access_check(s)) {
8983        return;
8984    }
8985
8986    assert(!(is_scalar && is_q));
8987
8988    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
8989    tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
8990    gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8991    fracbits = (16 << size) - immhb;
8992    tcg_shift = tcg_const_i32(fracbits);
8993
8994    if (size == MO_64) {
8995        int maxpass = is_scalar ? 1 : 2;
8996
8997        for (pass = 0; pass < maxpass; pass++) {
8998            TCGv_i64 tcg_op = tcg_temp_new_i64();
8999
9000            read_vec_element(s, tcg_op, rn, pass, MO_64);
9001            if (is_u) {
9002                gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9003            } else {
9004                gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9005            }
9006            write_vec_element(s, tcg_op, rd, pass, MO_64);
9007            tcg_temp_free_i64(tcg_op);
9008        }
9009        clear_vec_high(s, is_q, rd);
9010    } else {
9011        void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
9012        int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
9013
9014        switch (size) {
9015        case MO_16:
9016            if (is_u) {
9017                fn = gen_helper_vfp_touhh;
9018            } else {
9019                fn = gen_helper_vfp_toshh;
9020            }
9021            break;
9022        case MO_32:
9023            if (is_u) {
9024                fn = gen_helper_vfp_touls;
9025            } else {
9026                fn = gen_helper_vfp_tosls;
9027            }
9028            break;
9029        default:
9030            g_assert_not_reached();
9031        }
9032
9033        for (pass = 0; pass < maxpass; pass++) {
9034            TCGv_i32 tcg_op = tcg_temp_new_i32();
9035
9036            read_vec_element_i32(s, tcg_op, rn, pass, size);
9037            fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
9038            if (is_scalar) {
9039                write_fp_sreg(s, rd, tcg_op);
9040            } else {
9041                write_vec_element_i32(s, tcg_op, rd, pass, size);
9042            }
9043            tcg_temp_free_i32(tcg_op);
9044        }
9045        if (!is_scalar) {
9046            clear_vec_high(s, is_q, rd);
9047        }
9048    }
9049
9050    tcg_temp_free_ptr(tcg_fpstatus);
9051    tcg_temp_free_i32(tcg_shift);
9052    gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9053    tcg_temp_free_i32(tcg_rmode);
9054}
9055
9056/* AdvSIMD scalar shift by immediate
9057 *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
9058 * +-----+---+-------------+------+------+--------+---+------+------+
9059 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
9060 * +-----+---+-------------+------+------+--------+---+------+------+
9061 *
9062 * This is the scalar version so it works on a fixed sized registers
9063 */
9064static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
9065{
9066    int rd = extract32(insn, 0, 5);
9067    int rn = extract32(insn, 5, 5);
9068    int opcode = extract32(insn, 11, 5);
9069    int immb = extract32(insn, 16, 3);
9070    int immh = extract32(insn, 19, 4);
9071    bool is_u = extract32(insn, 29, 1);
9072
9073    if (immh == 0) {
9074        unallocated_encoding(s);
9075        return;
9076    }
9077
9078    switch (opcode) {
9079    case 0x08: /* SRI */
9080        if (!is_u) {
9081            unallocated_encoding(s);
9082            return;
9083        }
9084        /* fall through */
9085    case 0x00: /* SSHR / USHR */
9086    case 0x02: /* SSRA / USRA */
9087    case 0x04: /* SRSHR / URSHR */
9088    case 0x06: /* SRSRA / URSRA */
9089        handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
9090        break;
9091    case 0x0a: /* SHL / SLI */
9092        handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
9093        break;
9094    case 0x1c: /* SCVTF, UCVTF */
9095        handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
9096                                     opcode, rn, rd);
9097        break;
9098    case 0x10: /* SQSHRUN, SQSHRUN2 */
9099    case 0x11: /* SQRSHRUN, SQRSHRUN2 */
9100        if (!is_u) {
9101            unallocated_encoding(s);
9102            return;
9103        }
9104        handle_vec_simd_sqshrn(s, true, false, false, true,
9105                               immh, immb, opcode, rn, rd);
9106        break;
9107    case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
9108    case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
9109        handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
9110                               immh, immb, opcode, rn, rd);
9111        break;
9112    case 0xc: /* SQSHLU */
9113        if (!is_u) {
9114            unallocated_encoding(s);
9115            return;
9116        }
9117        handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
9118        break;
9119    case 0xe: /* SQSHL, UQSHL */
9120        handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
9121        break;
9122    case 0x1f: /* FCVTZS, FCVTZU */
9123        handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
9124        break;
9125    default:
9126        unallocated_encoding(s);
9127        break;
9128    }
9129}
9130
9131/* AdvSIMD scalar three different
9132 *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
9133 * +-----+---+-----------+------+---+------+--------+-----+------+------+
9134 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
9135 * +-----+---+-----------+------+---+------+--------+-----+------+------+
9136 */
9137static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
9138{
9139    bool is_u = extract32(insn, 29, 1);
9140    int size = extract32(insn, 22, 2);
9141    int opcode = extract32(insn, 12, 4);
9142    int rm = extract32(insn, 16, 5);
9143    int rn = extract32(insn, 5, 5);
9144    int rd = extract32(insn, 0, 5);
9145
9146    if (is_u) {
9147        unallocated_encoding(s);
9148        return;
9149    }
9150
9151    switch (opcode) {
9152    case 0x9: /* SQDMLAL, SQDMLAL2 */
9153    case 0xb: /* SQDMLSL, SQDMLSL2 */
9154    case 0xd: /* SQDMULL, SQDMULL2 */
9155        if (size == 0 || size == 3) {
9156            unallocated_encoding(s);
9157            return;
9158        }
9159        break;
9160    default:
9161        unallocated_encoding(s);
9162        return;
9163    }
9164
9165    if (!fp_access_check(s)) {
9166        return;
9167    }
9168
9169    if (size == 2) {
9170        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9171        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9172        TCGv_i64 tcg_res = tcg_temp_new_i64();
9173
9174        read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
9175        read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
9176
9177        tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
9178        gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
9179
9180        switch (opcode) {
9181        case 0xd: /* SQDMULL, SQDMULL2 */
9182            break;
9183        case 0xb: /* SQDMLSL, SQDMLSL2 */
9184            tcg_gen_neg_i64(tcg_res, tcg_res);
9185            /* fall through */
9186        case 0x9: /* SQDMLAL, SQDMLAL2 */
9187            read_vec_element(s, tcg_op1, rd, 0, MO_64);
9188            gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
9189                                              tcg_res, tcg_op1);
9190            break;
9191        default:
9192            g_assert_not_reached();
9193        }
9194
9195        write_fp_dreg(s, rd, tcg_res);
9196
9197        tcg_temp_free_i64(tcg_op1);
9198        tcg_temp_free_i64(tcg_op2);
9199        tcg_temp_free_i64(tcg_res);
9200    } else {
9201        TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
9202        TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
9203        TCGv_i64 tcg_res = tcg_temp_new_i64();
9204
9205        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
9206        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
9207
9208        switch (opcode) {
9209        case 0xd: /* SQDMULL, SQDMULL2 */
9210            break;
9211        case 0xb: /* SQDMLSL, SQDMLSL2 */
9212            gen_helper_neon_negl_u32(tcg_res, tcg_res);
9213            /* fall through */
9214        case 0x9: /* SQDMLAL, SQDMLAL2 */
9215        {
9216            TCGv_i64 tcg_op3 = tcg_temp_new_i64();
9217            read_vec_element(s, tcg_op3, rd, 0, MO_32);
9218            gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
9219                                              tcg_res, tcg_op3);
9220            tcg_temp_free_i64(tcg_op3);
9221            break;
9222        }
9223        default:
9224            g_assert_not_reached();
9225        }
9226
9227        tcg_gen_ext32u_i64(tcg_res, tcg_res);
9228        write_fp_dreg(s, rd, tcg_res);
9229
9230        tcg_temp_free_i32(tcg_op1);
9231        tcg_temp_free_i32(tcg_op2);
9232        tcg_temp_free_i64(tcg_res);
9233    }
9234}
9235
9236static void handle_3same_64(DisasContext *s, int opcode, bool u,
9237                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
9238{
9239    /* Handle 64x64->64 opcodes which are shared between the scalar
9240     * and vector 3-same groups. We cover every opcode where size == 3
9241     * is valid in either the three-reg-same (integer, not pairwise)
9242     * or scalar-three-reg-same groups.
9243     */
9244    TCGCond cond;
9245
9246    switch (opcode) {
9247    case 0x1: /* SQADD */
9248        if (u) {
9249            gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9250        } else {
9251            gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9252        }
9253        break;
9254    case 0x5: /* SQSUB */
9255        if (u) {
9256            gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9257        } else {
9258            gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9259        }
9260        break;
9261    case 0x6: /* CMGT, CMHI */
9262        /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
9263         * We implement this using setcond (test) and then negating.
9264         */
9265        cond = u ? TCG_COND_GTU : TCG_COND_GT;
9266    do_cmop:
9267        tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
9268        tcg_gen_neg_i64(tcg_rd, tcg_rd);
9269        break;
9270    case 0x7: /* CMGE, CMHS */
9271        cond = u ? TCG_COND_GEU : TCG_COND_GE;
9272        goto do_cmop;
9273    case 0x11: /* CMTST, CMEQ */
9274        if (u) {
9275            cond = TCG_COND_EQ;
9276            goto do_cmop;
9277        }
9278        gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
9279        break;
9280    case 0x8: /* SSHL, USHL */
9281        if (u) {
9282            gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm);
9283        } else {
9284            gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm);
9285        }
9286        break;
9287    case 0x9: /* SQSHL, UQSHL */
9288        if (u) {
9289            gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9290        } else {
9291            gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9292        }
9293        break;
9294    case 0xa: /* SRSHL, URSHL */
9295        if (u) {
9296            gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
9297        } else {
9298            gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
9299        }
9300        break;
9301    case 0xb: /* SQRSHL, UQRSHL */
9302        if (u) {
9303            gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9304        } else {
9305            gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
9306        }
9307        break;
9308    case 0x10: /* ADD, SUB */
9309        if (u) {
9310            tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
9311        } else {
9312            tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
9313        }
9314        break;
9315    default:
9316        g_assert_not_reached();
9317    }
9318}
9319
9320/* Handle the 3-same-operands float operations; shared by the scalar
9321 * and vector encodings. The caller must filter out any encodings
9322 * not allocated for the encoding it is dealing with.
9323 */
9324static void handle_3same_float(DisasContext *s, int size, int elements,
9325                               int fpopcode, int rd, int rn, int rm)
9326{
9327    int pass;
9328    TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
9329
9330    for (pass = 0; pass < elements; pass++) {
9331        if (size) {
9332            /* Double */
9333            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9334            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9335            TCGv_i64 tcg_res = tcg_temp_new_i64();
9336
9337            read_vec_element(s, tcg_op1, rn, pass, MO_64);
9338            read_vec_element(s, tcg_op2, rm, pass, MO_64);
9339
9340            switch (fpopcode) {
9341            case 0x39: /* FMLS */
9342                /* As usual for ARM, separate negation for fused multiply-add */
9343                gen_helper_vfp_negd(tcg_op1, tcg_op1);
9344                /* fall through */
9345            case 0x19: /* FMLA */
9346                read_vec_element(s, tcg_res, rd, pass, MO_64);
9347                gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
9348                                       tcg_res, fpst);
9349                break;
9350            case 0x18: /* FMAXNM */
9351                gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9352                break;
9353            case 0x1a: /* FADD */
9354                gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
9355                break;
9356            case 0x1b: /* FMULX */
9357                gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
9358                break;
9359            case 0x1c: /* FCMEQ */
9360                gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9361                break;
9362            case 0x1e: /* FMAX */
9363                gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
9364                break;
9365            case 0x1f: /* FRECPS */
9366                gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9367                break;
9368            case 0x38: /* FMINNM */
9369                gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
9370                break;
9371            case 0x3a: /* FSUB */
9372                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9373                break;
9374            case 0x3e: /* FMIN */
9375                gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
9376                break;
9377            case 0x3f: /* FRSQRTS */
9378                gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9379                break;
9380            case 0x5b: /* FMUL */
9381                gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
9382                break;
9383            case 0x5c: /* FCMGE */
9384                gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9385                break;
9386            case 0x5d: /* FACGE */
9387                gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9388                break;
9389            case 0x5f: /* FDIV */
9390                gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
9391                break;
9392            case 0x7a: /* FABD */
9393                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
9394                gen_helper_vfp_absd(tcg_res, tcg_res);
9395                break;
9396            case 0x7c: /* FCMGT */
9397                gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9398                break;
9399            case 0x7d: /* FACGT */
9400                gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
9401                break;
9402            default:
9403                g_assert_not_reached();
9404            }
9405
9406            write_vec_element(s, tcg_res, rd, pass, MO_64);
9407
9408            tcg_temp_free_i64(tcg_res);
9409            tcg_temp_free_i64(tcg_op1);
9410            tcg_temp_free_i64(tcg_op2);
9411        } else {
9412            /* Single */
9413            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9414            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9415            TCGv_i32 tcg_res = tcg_temp_new_i32();
9416
9417            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9418            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9419
9420            switch (fpopcode) {
9421            case 0x39: /* FMLS */
9422                /* As usual for ARM, separate negation for fused multiply-add */
9423                gen_helper_vfp_negs(tcg_op1, tcg_op1);
9424                /* fall through */
9425            case 0x19: /* FMLA */
9426                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9427                gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
9428                                       tcg_res, fpst);
9429                break;
9430            case 0x1a: /* FADD */
9431                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
9432                break;
9433            case 0x1b: /* FMULX */
9434                gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
9435                break;
9436            case 0x1c: /* FCMEQ */
9437                gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9438                break;
9439            case 0x1e: /* FMAX */
9440                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
9441                break;
9442            case 0x1f: /* FRECPS */
9443                gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9444                break;
9445            case 0x18: /* FMAXNM */
9446                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
9447                break;
9448            case 0x38: /* FMINNM */
9449                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
9450                break;
9451            case 0x3a: /* FSUB */
9452                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9453                break;
9454            case 0x3e: /* FMIN */
9455                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
9456                break;
9457            case 0x3f: /* FRSQRTS */
9458                gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9459                break;
9460            case 0x5b: /* FMUL */
9461                gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
9462                break;
9463            case 0x5c: /* FCMGE */
9464                gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9465                break;
9466            case 0x5d: /* FACGE */
9467                gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9468                break;
9469            case 0x5f: /* FDIV */
9470                gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
9471                break;
9472            case 0x7a: /* FABD */
9473                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
9474                gen_helper_vfp_abss(tcg_res, tcg_res);
9475                break;
9476            case 0x7c: /* FCMGT */
9477                gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9478                break;
9479            case 0x7d: /* FACGT */
9480                gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
9481                break;
9482            default:
9483                g_assert_not_reached();
9484            }
9485
9486            if (elements == 1) {
9487                /* scalar single so clear high part */
9488                TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9489
9490                tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
9491                write_vec_element(s, tcg_tmp, rd, pass, MO_64);
9492                tcg_temp_free_i64(tcg_tmp);
9493            } else {
9494                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9495            }
9496
9497            tcg_temp_free_i32(tcg_res);
9498            tcg_temp_free_i32(tcg_op1);
9499            tcg_temp_free_i32(tcg_op2);
9500        }
9501    }
9502
9503    tcg_temp_free_ptr(fpst);
9504
9505    clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
9506}
9507
9508/* AdvSIMD scalar three same
9509 *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
9510 * +-----+---+-----------+------+---+------+--------+---+------+------+
9511 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9512 * +-----+---+-----------+------+---+------+--------+---+------+------+
9513 */
9514static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
9515{
9516    int rd = extract32(insn, 0, 5);
9517    int rn = extract32(insn, 5, 5);
9518    int opcode = extract32(insn, 11, 5);
9519    int rm = extract32(insn, 16, 5);
9520    int size = extract32(insn, 22, 2);
9521    bool u = extract32(insn, 29, 1);
9522    TCGv_i64 tcg_rd;
9523
9524    if (opcode >= 0x18) {
9525        /* Floating point: U, size[1] and opcode indicate operation */
9526        int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
9527        switch (fpopcode) {
9528        case 0x1b: /* FMULX */
9529        case 0x1f: /* FRECPS */
9530        case 0x3f: /* FRSQRTS */
9531        case 0x5d: /* FACGE */
9532        case 0x7d: /* FACGT */
9533        case 0x1c: /* FCMEQ */
9534        case 0x5c: /* FCMGE */
9535        case 0x7c: /* FCMGT */
9536        case 0x7a: /* FABD */
9537            break;
9538        default:
9539            unallocated_encoding(s);
9540            return;
9541        }
9542
9543        if (!fp_access_check(s)) {
9544            return;
9545        }
9546
9547        handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
9548        return;
9549    }
9550
9551    switch (opcode) {
9552    case 0x1: /* SQADD, UQADD */
9553    case 0x5: /* SQSUB, UQSUB */
9554    case 0x9: /* SQSHL, UQSHL */
9555    case 0xb: /* SQRSHL, UQRSHL */
9556        break;
9557    case 0x8: /* SSHL, USHL */
9558    case 0xa: /* SRSHL, URSHL */
9559    case 0x6: /* CMGT, CMHI */
9560    case 0x7: /* CMGE, CMHS */
9561    case 0x11: /* CMTST, CMEQ */
9562    case 0x10: /* ADD, SUB (vector) */
9563        if (size != 3) {
9564            unallocated_encoding(s);
9565            return;
9566        }
9567        break;
9568    case 0x16: /* SQDMULH, SQRDMULH (vector) */
9569        if (size != 1 && size != 2) {
9570            unallocated_encoding(s);
9571            return;
9572        }
9573        break;
9574    default:
9575        unallocated_encoding(s);
9576        return;
9577    }
9578
9579    if (!fp_access_check(s)) {
9580        return;
9581    }
9582
9583    tcg_rd = tcg_temp_new_i64();
9584
9585    if (size == 3) {
9586        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9587        TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9588
9589        handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9590        tcg_temp_free_i64(tcg_rn);
9591        tcg_temp_free_i64(tcg_rm);
9592    } else {
9593        /* Do a single operation on the lowest element in the vector.
9594         * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9595         * no side effects for all these operations.
9596         * OPTME: special-purpose helpers would avoid doing some
9597         * unnecessary work in the helper for the 8 and 16 bit cases.
9598         */
9599        NeonGenTwoOpEnvFn *genenvfn;
9600        TCGv_i32 tcg_rn = tcg_temp_new_i32();
9601        TCGv_i32 tcg_rm = tcg_temp_new_i32();
9602        TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9603
9604        read_vec_element_i32(s, tcg_rn, rn, 0, size);
9605        read_vec_element_i32(s, tcg_rm, rm, 0, size);
9606
9607        switch (opcode) {
9608        case 0x1: /* SQADD, UQADD */
9609        {
9610            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9611                { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9612                { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9613                { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9614            };
9615            genenvfn = fns[size][u];
9616            break;
9617        }
9618        case 0x5: /* SQSUB, UQSUB */
9619        {
9620            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9621                { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9622                { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9623                { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9624            };
9625            genenvfn = fns[size][u];
9626            break;
9627        }
9628        case 0x9: /* SQSHL, UQSHL */
9629        {
9630            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9631                { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9632                { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9633                { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9634            };
9635            genenvfn = fns[size][u];
9636            break;
9637        }
9638        case 0xb: /* SQRSHL, UQRSHL */
9639        {
9640            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9641                { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9642                { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9643                { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9644            };
9645            genenvfn = fns[size][u];
9646            break;
9647        }
9648        case 0x16: /* SQDMULH, SQRDMULH */
9649        {
9650            static NeonGenTwoOpEnvFn * const fns[2][2] = {
9651                { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9652                { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9653            };
9654            assert(size == 1 || size == 2);
9655            genenvfn = fns[size - 1][u];
9656            break;
9657        }
9658        default:
9659            g_assert_not_reached();
9660        }
9661
9662        genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9663        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9664        tcg_temp_free_i32(tcg_rd32);
9665        tcg_temp_free_i32(tcg_rn);
9666        tcg_temp_free_i32(tcg_rm);
9667    }
9668
9669    write_fp_dreg(s, rd, tcg_rd);
9670
9671    tcg_temp_free_i64(tcg_rd);
9672}
9673
9674/* AdvSIMD scalar three same FP16
9675 *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9676 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9677 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9678 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9679 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9680 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9681 */
9682static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9683                                                  uint32_t insn)
9684{
9685    int rd = extract32(insn, 0, 5);
9686    int rn = extract32(insn, 5, 5);
9687    int opcode = extract32(insn, 11, 3);
9688    int rm = extract32(insn, 16, 5);
9689    bool u = extract32(insn, 29, 1);
9690    bool a = extract32(insn, 23, 1);
9691    int fpopcode = opcode | (a << 3) |  (u << 4);
9692    TCGv_ptr fpst;
9693    TCGv_i32 tcg_op1;
9694    TCGv_i32 tcg_op2;
9695    TCGv_i32 tcg_res;
9696
9697    switch (fpopcode) {
9698    case 0x03: /* FMULX */
9699    case 0x04: /* FCMEQ (reg) */
9700    case 0x07: /* FRECPS */
9701    case 0x0f: /* FRSQRTS */
9702    case 0x14: /* FCMGE (reg) */
9703    case 0x15: /* FACGE */
9704    case 0x1a: /* FABD */
9705    case 0x1c: /* FCMGT (reg) */
9706    case 0x1d: /* FACGT */
9707        break;
9708    default:
9709        unallocated_encoding(s);
9710        return;
9711    }
9712
9713    if (!dc_isar_feature(aa64_fp16, s)) {
9714        unallocated_encoding(s);
9715    }
9716
9717    if (!fp_access_check(s)) {
9718        return;
9719    }
9720
9721    fpst = fpstatus_ptr(FPST_FPCR_F16);
9722
9723    tcg_op1 = read_fp_hreg(s, rn);
9724    tcg_op2 = read_fp_hreg(s, rm);
9725    tcg_res = tcg_temp_new_i32();
9726
9727    switch (fpopcode) {
9728    case 0x03: /* FMULX */
9729        gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9730        break;
9731    case 0x04: /* FCMEQ (reg) */
9732        gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9733        break;
9734    case 0x07: /* FRECPS */
9735        gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9736        break;
9737    case 0x0f: /* FRSQRTS */
9738        gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9739        break;
9740    case 0x14: /* FCMGE (reg) */
9741        gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9742        break;
9743    case 0x15: /* FACGE */
9744        gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9745        break;
9746    case 0x1a: /* FABD */
9747        gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9748        tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9749        break;
9750    case 0x1c: /* FCMGT (reg) */
9751        gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9752        break;
9753    case 0x1d: /* FACGT */
9754        gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9755        break;
9756    default:
9757        g_assert_not_reached();
9758    }
9759
9760    write_fp_sreg(s, rd, tcg_res);
9761
9762
9763    tcg_temp_free_i32(tcg_res);
9764    tcg_temp_free_i32(tcg_op1);
9765    tcg_temp_free_i32(tcg_op2);
9766    tcg_temp_free_ptr(fpst);
9767}
9768
9769/* AdvSIMD scalar three same extra
9770 *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9771 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9772 * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9773 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9774 */
9775static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9776                                                   uint32_t insn)
9777{
9778    int rd = extract32(insn, 0, 5);
9779    int rn = extract32(insn, 5, 5);
9780    int opcode = extract32(insn, 11, 4);
9781    int rm = extract32(insn, 16, 5);
9782    int size = extract32(insn, 22, 2);
9783    bool u = extract32(insn, 29, 1);
9784    TCGv_i32 ele1, ele2, ele3;
9785    TCGv_i64 res;
9786    bool feature;
9787
9788    switch (u * 16 + opcode) {
9789    case 0x10: /* SQRDMLAH (vector) */
9790    case 0x11: /* SQRDMLSH (vector) */
9791        if (size != 1 && size != 2) {
9792            unallocated_encoding(s);
9793            return;
9794        }
9795        feature = dc_isar_feature(aa64_rdm, s);
9796        break;
9797    default:
9798        unallocated_encoding(s);
9799        return;
9800    }
9801    if (!feature) {
9802        unallocated_encoding(s);
9803        return;
9804    }
9805    if (!fp_access_check(s)) {
9806        return;
9807    }
9808
9809    /* Do a single operation on the lowest element in the vector.
9810     * We use the standard Neon helpers and rely on 0 OP 0 == 0
9811     * with no side effects for all these operations.
9812     * OPTME: special-purpose helpers would avoid doing some
9813     * unnecessary work in the helper for the 16 bit cases.
9814     */
9815    ele1 = tcg_temp_new_i32();
9816    ele2 = tcg_temp_new_i32();
9817    ele3 = tcg_temp_new_i32();
9818
9819    read_vec_element_i32(s, ele1, rn, 0, size);
9820    read_vec_element_i32(s, ele2, rm, 0, size);
9821    read_vec_element_i32(s, ele3, rd, 0, size);
9822
9823    switch (opcode) {
9824    case 0x0: /* SQRDMLAH */
9825        if (size == 1) {
9826            gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9827        } else {
9828            gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9829        }
9830        break;
9831    case 0x1: /* SQRDMLSH */
9832        if (size == 1) {
9833            gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9834        } else {
9835            gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9836        }
9837        break;
9838    default:
9839        g_assert_not_reached();
9840    }
9841    tcg_temp_free_i32(ele1);
9842    tcg_temp_free_i32(ele2);
9843
9844    res = tcg_temp_new_i64();
9845    tcg_gen_extu_i32_i64(res, ele3);
9846    tcg_temp_free_i32(ele3);
9847
9848    write_fp_dreg(s, rd, res);
9849    tcg_temp_free_i64(res);
9850}
9851
9852static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9853                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9854                            TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9855{
9856    /* Handle 64->64 opcodes which are shared between the scalar and
9857     * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9858     * is valid in either group and also the double-precision fp ops.
9859     * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9860     * requires them.
9861     */
9862    TCGCond cond;
9863
9864    switch (opcode) {
9865    case 0x4: /* CLS, CLZ */
9866        if (u) {
9867            tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9868        } else {
9869            tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9870        }
9871        break;
9872    case 0x5: /* NOT */
9873        /* This opcode is shared with CNT and RBIT but we have earlier
9874         * enforced that size == 3 if and only if this is the NOT insn.
9875         */
9876        tcg_gen_not_i64(tcg_rd, tcg_rn);
9877        break;
9878    case 0x7: /* SQABS, SQNEG */
9879        if (u) {
9880            gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9881        } else {
9882            gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9883        }
9884        break;
9885    case 0xa: /* CMLT */
9886        /* 64 bit integer comparison against zero, result is
9887         * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9888         * subtracting 1.
9889         */
9890        cond = TCG_COND_LT;
9891    do_cmop:
9892        tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9893        tcg_gen_neg_i64(tcg_rd, tcg_rd);
9894        break;
9895    case 0x8: /* CMGT, CMGE */
9896        cond = u ? TCG_COND_GE : TCG_COND_GT;
9897        goto do_cmop;
9898    case 0x9: /* CMEQ, CMLE */
9899        cond = u ? TCG_COND_LE : TCG_COND_EQ;
9900        goto do_cmop;
9901    case 0xb: /* ABS, NEG */
9902        if (u) {
9903            tcg_gen_neg_i64(tcg_rd, tcg_rn);
9904        } else {
9905            tcg_gen_abs_i64(tcg_rd, tcg_rn);
9906        }
9907        break;
9908    case 0x2f: /* FABS */
9909        gen_helper_vfp_absd(tcg_rd, tcg_rn);
9910        break;
9911    case 0x6f: /* FNEG */
9912        gen_helper_vfp_negd(tcg_rd, tcg_rn);
9913        break;
9914    case 0x7f: /* FSQRT */
9915        gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9916        break;
9917    case 0x1a: /* FCVTNS */
9918    case 0x1b: /* FCVTMS */
9919    case 0x1c: /* FCVTAS */
9920    case 0x3a: /* FCVTPS */
9921    case 0x3b: /* FCVTZS */
9922    {
9923        TCGv_i32 tcg_shift = tcg_const_i32(0);
9924        gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9925        tcg_temp_free_i32(tcg_shift);
9926        break;
9927    }
9928    case 0x5a: /* FCVTNU */
9929    case 0x5b: /* FCVTMU */
9930    case 0x5c: /* FCVTAU */
9931    case 0x7a: /* FCVTPU */
9932    case 0x7b: /* FCVTZU */
9933    {
9934        TCGv_i32 tcg_shift = tcg_const_i32(0);
9935        gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9936        tcg_temp_free_i32(tcg_shift);
9937        break;
9938    }
9939    case 0x18: /* FRINTN */
9940    case 0x19: /* FRINTM */
9941    case 0x38: /* FRINTP */
9942    case 0x39: /* FRINTZ */
9943    case 0x58: /* FRINTA */
9944    case 0x79: /* FRINTI */
9945        gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9946        break;
9947    case 0x59: /* FRINTX */
9948        gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9949        break;
9950    case 0x1e: /* FRINT32Z */
9951    case 0x5e: /* FRINT32X */
9952        gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9953        break;
9954    case 0x1f: /* FRINT64Z */
9955    case 0x5f: /* FRINT64X */
9956        gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9957        break;
9958    default:
9959        g_assert_not_reached();
9960    }
9961}
9962
9963static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9964                                   bool is_scalar, bool is_u, bool is_q,
9965                                   int size, int rn, int rd)
9966{
9967    bool is_double = (size == MO_64);
9968    TCGv_ptr fpst;
9969
9970    if (!fp_access_check(s)) {
9971        return;
9972    }
9973
9974    fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR);
9975
9976    if (is_double) {
9977        TCGv_i64 tcg_op = tcg_temp_new_i64();
9978        TCGv_i64 tcg_zero = tcg_const_i64(0);
9979        TCGv_i64 tcg_res = tcg_temp_new_i64();
9980        NeonGenTwoDoubleOpFn *genfn;
9981        bool swap = false;
9982        int pass;
9983
9984        switch (opcode) {
9985        case 0x2e: /* FCMLT (zero) */
9986            swap = true;
9987            /* fallthrough */
9988        case 0x2c: /* FCMGT (zero) */
9989            genfn = gen_helper_neon_cgt_f64;
9990            break;
9991        case 0x2d: /* FCMEQ (zero) */
9992            genfn = gen_helper_neon_ceq_f64;
9993            break;
9994        case 0x6d: /* FCMLE (zero) */
9995            swap = true;
9996            /* fall through */
9997        case 0x6c: /* FCMGE (zero) */
9998            genfn = gen_helper_neon_cge_f64;
9999            break;
10000        default:
10001            g_assert_not_reached();
10002        }
10003
10004        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10005            read_vec_element(s, tcg_op, rn, pass, MO_64);
10006            if (swap) {
10007                genfn(tcg_res, tcg_zero, tcg_op, fpst);
10008            } else {
10009                genfn(tcg_res, tcg_op, tcg_zero, fpst);
10010            }
10011            write_vec_element(s, tcg_res, rd, pass, MO_64);
10012        }
10013        tcg_temp_free_i64(tcg_res);
10014        tcg_temp_free_i64(tcg_zero);
10015        tcg_temp_free_i64(tcg_op);
10016
10017        clear_vec_high(s, !is_scalar, rd);
10018    } else {
10019        TCGv_i32 tcg_op = tcg_temp_new_i32();
10020        TCGv_i32 tcg_zero = tcg_const_i32(0);
10021        TCGv_i32 tcg_res = tcg_temp_new_i32();
10022        NeonGenTwoSingleOpFn *genfn;
10023        bool swap = false;
10024        int pass, maxpasses;
10025
10026        if (size == MO_16) {
10027            switch (opcode) {
10028            case 0x2e: /* FCMLT (zero) */
10029                swap = true;
10030                /* fall through */
10031            case 0x2c: /* FCMGT (zero) */
10032                genfn = gen_helper_advsimd_cgt_f16;
10033                break;
10034            case 0x2d: /* FCMEQ (zero) */
10035                genfn = gen_helper_advsimd_ceq_f16;
10036                break;
10037            case 0x6d: /* FCMLE (zero) */
10038                swap = true;
10039                /* fall through */
10040            case 0x6c: /* FCMGE (zero) */
10041                genfn = gen_helper_advsimd_cge_f16;
10042                break;
10043            default:
10044                g_assert_not_reached();
10045            }
10046        } else {
10047            switch (opcode) {
10048            case 0x2e: /* FCMLT (zero) */
10049                swap = true;
10050                /* fall through */
10051            case 0x2c: /* FCMGT (zero) */
10052                genfn = gen_helper_neon_cgt_f32;
10053                break;
10054            case 0x2d: /* FCMEQ (zero) */
10055                genfn = gen_helper_neon_ceq_f32;
10056                break;
10057            case 0x6d: /* FCMLE (zero) */
10058                swap = true;
10059                /* fall through */
10060            case 0x6c: /* FCMGE (zero) */
10061                genfn = gen_helper_neon_cge_f32;
10062                break;
10063            default:
10064                g_assert_not_reached();
10065            }
10066        }
10067
10068        if (is_scalar) {
10069            maxpasses = 1;
10070        } else {
10071            int vector_size = 8 << is_q;
10072            maxpasses = vector_size >> size;
10073        }
10074
10075        for (pass = 0; pass < maxpasses; pass++) {
10076            read_vec_element_i32(s, tcg_op, rn, pass, size);
10077            if (swap) {
10078                genfn(tcg_res, tcg_zero, tcg_op, fpst);
10079            } else {
10080                genfn(tcg_res, tcg_op, tcg_zero, fpst);
10081            }
10082            if (is_scalar) {
10083                write_fp_sreg(s, rd, tcg_res);
10084            } else {
10085                write_vec_element_i32(s, tcg_res, rd, pass, size);
10086            }
10087        }
10088        tcg_temp_free_i32(tcg_res);
10089        tcg_temp_free_i32(tcg_zero);
10090        tcg_temp_free_i32(tcg_op);
10091        if (!is_scalar) {
10092            clear_vec_high(s, is_q, rd);
10093        }
10094    }
10095
10096    tcg_temp_free_ptr(fpst);
10097}
10098
10099static void handle_2misc_reciprocal(DisasContext *s, int opcode,
10100                                    bool is_scalar, bool is_u, bool is_q,
10101                                    int size, int rn, int rd)
10102{
10103    bool is_double = (size == 3);
10104    TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10105
10106    if (is_double) {
10107        TCGv_i64 tcg_op = tcg_temp_new_i64();
10108        TCGv_i64 tcg_res = tcg_temp_new_i64();
10109        int pass;
10110
10111        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10112            read_vec_element(s, tcg_op, rn, pass, MO_64);
10113            switch (opcode) {
10114            case 0x3d: /* FRECPE */
10115                gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
10116                break;
10117            case 0x3f: /* FRECPX */
10118                gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
10119                break;
10120            case 0x7d: /* FRSQRTE */
10121                gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
10122                break;
10123            default:
10124                g_assert_not_reached();
10125            }
10126            write_vec_element(s, tcg_res, rd, pass, MO_64);
10127        }
10128        tcg_temp_free_i64(tcg_res);
10129        tcg_temp_free_i64(tcg_op);
10130        clear_vec_high(s, !is_scalar, rd);
10131    } else {
10132        TCGv_i32 tcg_op = tcg_temp_new_i32();
10133        TCGv_i32 tcg_res = tcg_temp_new_i32();
10134        int pass, maxpasses;
10135
10136        if (is_scalar) {
10137            maxpasses = 1;
10138        } else {
10139            maxpasses = is_q ? 4 : 2;
10140        }
10141
10142        for (pass = 0; pass < maxpasses; pass++) {
10143            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10144
10145            switch (opcode) {
10146            case 0x3c: /* URECPE */
10147                gen_helper_recpe_u32(tcg_res, tcg_op);
10148                break;
10149            case 0x3d: /* FRECPE */
10150                gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
10151                break;
10152            case 0x3f: /* FRECPX */
10153                gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
10154                break;
10155            case 0x7d: /* FRSQRTE */
10156                gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
10157                break;
10158            default:
10159                g_assert_not_reached();
10160            }
10161
10162            if (is_scalar) {
10163                write_fp_sreg(s, rd, tcg_res);
10164            } else {
10165                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10166            }
10167        }
10168        tcg_temp_free_i32(tcg_res);
10169        tcg_temp_free_i32(tcg_op);
10170        if (!is_scalar) {
10171            clear_vec_high(s, is_q, rd);
10172        }
10173    }
10174    tcg_temp_free_ptr(fpst);
10175}
10176
10177static void handle_2misc_narrow(DisasContext *s, bool scalar,
10178                                int opcode, bool u, bool is_q,
10179                                int size, int rn, int rd)
10180{
10181    /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
10182     * in the source becomes a size element in the destination).
10183     */
10184    int pass;
10185    TCGv_i32 tcg_res[2];
10186    int destelt = is_q ? 2 : 0;
10187    int passes = scalar ? 1 : 2;
10188
10189    if (scalar) {
10190        tcg_res[1] = tcg_const_i32(0);
10191    }
10192
10193    for (pass = 0; pass < passes; pass++) {
10194        TCGv_i64 tcg_op = tcg_temp_new_i64();
10195        NeonGenNarrowFn *genfn = NULL;
10196        NeonGenNarrowEnvFn *genenvfn = NULL;
10197
10198        if (scalar) {
10199            read_vec_element(s, tcg_op, rn, pass, size + 1);
10200        } else {
10201            read_vec_element(s, tcg_op, rn, pass, MO_64);
10202        }
10203        tcg_res[pass] = tcg_temp_new_i32();
10204
10205        switch (opcode) {
10206        case 0x12: /* XTN, SQXTUN */
10207        {
10208            static NeonGenNarrowFn * const xtnfns[3] = {
10209                gen_helper_neon_narrow_u8,
10210                gen_helper_neon_narrow_u16,
10211                tcg_gen_extrl_i64_i32,
10212            };
10213            static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
10214                gen_helper_neon_unarrow_sat8,
10215                gen_helper_neon_unarrow_sat16,
10216                gen_helper_neon_unarrow_sat32,
10217            };
10218            if (u) {
10219                genenvfn = sqxtunfns[size];
10220            } else {
10221                genfn = xtnfns[size];
10222            }
10223            break;
10224        }
10225        case 0x14: /* SQXTN, UQXTN */
10226        {
10227            static NeonGenNarrowEnvFn * const fns[3][2] = {
10228                { gen_helper_neon_narrow_sat_s8,
10229                  gen_helper_neon_narrow_sat_u8 },
10230                { gen_helper_neon_narrow_sat_s16,
10231                  gen_helper_neon_narrow_sat_u16 },
10232                { gen_helper_neon_narrow_sat_s32,
10233                  gen_helper_neon_narrow_sat_u32 },
10234            };
10235            genenvfn = fns[size][u];
10236            break;
10237        }
10238        case 0x16: /* FCVTN, FCVTN2 */
10239            /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
10240            if (size == 2) {
10241                gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
10242            } else {
10243                TCGv_i32 tcg_lo = tcg_temp_new_i32();
10244                TCGv_i32 tcg_hi = tcg_temp_new_i32();
10245                TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10246                TCGv_i32 ahp = get_ahp_flag();
10247
10248                tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
10249                gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
10250                gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
10251                tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
10252                tcg_temp_free_i32(tcg_lo);
10253                tcg_temp_free_i32(tcg_hi);
10254                tcg_temp_free_ptr(fpst);
10255                tcg_temp_free_i32(ahp);
10256            }
10257            break;
10258        case 0x36: /* BFCVTN, BFCVTN2 */
10259            {
10260                TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
10261                gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst);
10262                tcg_temp_free_ptr(fpst);
10263            }
10264            break;
10265        case 0x56:  /* FCVTXN, FCVTXN2 */
10266            /* 64 bit to 32 bit float conversion
10267             * with von Neumann rounding (round to odd)
10268             */
10269            assert(size == 2);
10270            gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
10271            break;
10272        default:
10273            g_assert_not_reached();
10274        }
10275
10276        if (genfn) {
10277            genfn(tcg_res[pass], tcg_op);
10278        } else if (genenvfn) {
10279            genenvfn(tcg_res[pass], cpu_env, tcg_op);
10280        }
10281
10282        tcg_temp_free_i64(tcg_op);
10283    }
10284
10285    for (pass = 0; pass < 2; pass++) {
10286        write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
10287        tcg_temp_free_i32(tcg_res[pass]);
10288    }
10289    clear_vec_high(s, is_q, rd);
10290}
10291
10292/* Remaining saturating accumulating ops */
10293static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
10294                                bool is_q, int size, int rn, int rd)
10295{
10296    bool is_double = (size == 3);
10297
10298    if (is_double) {
10299        TCGv_i64 tcg_rn = tcg_temp_new_i64();
10300        TCGv_i64 tcg_rd = tcg_temp_new_i64();
10301        int pass;
10302
10303        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10304            read_vec_element(s, tcg_rn, rn, pass, MO_64);
10305            read_vec_element(s, tcg_rd, rd, pass, MO_64);
10306
10307            if (is_u) { /* USQADD */
10308                gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10309            } else { /* SUQADD */
10310                gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10311            }
10312            write_vec_element(s, tcg_rd, rd, pass, MO_64);
10313        }
10314        tcg_temp_free_i64(tcg_rd);
10315        tcg_temp_free_i64(tcg_rn);
10316        clear_vec_high(s, !is_scalar, rd);
10317    } else {
10318        TCGv_i32 tcg_rn = tcg_temp_new_i32();
10319        TCGv_i32 tcg_rd = tcg_temp_new_i32();
10320        int pass, maxpasses;
10321
10322        if (is_scalar) {
10323            maxpasses = 1;
10324        } else {
10325            maxpasses = is_q ? 4 : 2;
10326        }
10327
10328        for (pass = 0; pass < maxpasses; pass++) {
10329            if (is_scalar) {
10330                read_vec_element_i32(s, tcg_rn, rn, pass, size);
10331                read_vec_element_i32(s, tcg_rd, rd, pass, size);
10332            } else {
10333                read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
10334                read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
10335            }
10336
10337            if (is_u) { /* USQADD */
10338                switch (size) {
10339                case 0:
10340                    gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10341                    break;
10342                case 1:
10343                    gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10344                    break;
10345                case 2:
10346                    gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10347                    break;
10348                default:
10349                    g_assert_not_reached();
10350                }
10351            } else { /* SUQADD */
10352                switch (size) {
10353                case 0:
10354                    gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10355                    break;
10356                case 1:
10357                    gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10358                    break;
10359                case 2:
10360                    gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
10361                    break;
10362                default:
10363                    g_assert_not_reached();
10364                }
10365            }
10366
10367            if (is_scalar) {
10368                TCGv_i64 tcg_zero = tcg_const_i64(0);
10369                write_vec_element(s, tcg_zero, rd, 0, MO_64);
10370                tcg_temp_free_i64(tcg_zero);
10371            }
10372            write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
10373        }
10374        tcg_temp_free_i32(tcg_rd);
10375        tcg_temp_free_i32(tcg_rn);
10376        clear_vec_high(s, is_q, rd);
10377    }
10378}
10379
10380/* AdvSIMD scalar two reg misc
10381 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
10382 * +-----+---+-----------+------+-----------+--------+-----+------+------+
10383 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10384 * +-----+---+-----------+------+-----------+--------+-----+------+------+
10385 */
10386static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
10387{
10388    int rd = extract32(insn, 0, 5);
10389    int rn = extract32(insn, 5, 5);
10390    int opcode = extract32(insn, 12, 5);
10391    int size = extract32(insn, 22, 2);
10392    bool u = extract32(insn, 29, 1);
10393    bool is_fcvt = false;
10394    int rmode;
10395    TCGv_i32 tcg_rmode;
10396    TCGv_ptr tcg_fpstatus;
10397
10398    switch (opcode) {
10399    case 0x3: /* USQADD / SUQADD*/
10400        if (!fp_access_check(s)) {
10401            return;
10402        }
10403        handle_2misc_satacc(s, true, u, false, size, rn, rd);
10404        return;
10405    case 0x7: /* SQABS / SQNEG */
10406        break;
10407    case 0xa: /* CMLT */
10408        if (u) {
10409            unallocated_encoding(s);
10410            return;
10411        }
10412        /* fall through */
10413    case 0x8: /* CMGT, CMGE */
10414    case 0x9: /* CMEQ, CMLE */
10415    case 0xb: /* ABS, NEG */
10416        if (size != 3) {
10417            unallocated_encoding(s);
10418            return;
10419        }
10420        break;
10421    case 0x12: /* SQXTUN */
10422        if (!u) {
10423            unallocated_encoding(s);
10424            return;
10425        }
10426        /* fall through */
10427    case 0x14: /* SQXTN, UQXTN */
10428        if (size == 3) {
10429            unallocated_encoding(s);
10430            return;
10431        }
10432        if (!fp_access_check(s)) {
10433            return;
10434        }
10435        handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
10436        return;
10437    case 0xc ... 0xf:
10438    case 0x16 ... 0x1d:
10439    case 0x1f:
10440        /* Floating point: U, size[1] and opcode indicate operation;
10441         * size[0] indicates single or double precision.
10442         */
10443        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
10444        size = extract32(size, 0, 1) ? 3 : 2;
10445        switch (opcode) {
10446        case 0x2c: /* FCMGT (zero) */
10447        case 0x2d: /* FCMEQ (zero) */
10448        case 0x2e: /* FCMLT (zero) */
10449        case 0x6c: /* FCMGE (zero) */
10450        case 0x6d: /* FCMLE (zero) */
10451            handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
10452            return;
10453        case 0x1d: /* SCVTF */
10454        case 0x5d: /* UCVTF */
10455        {
10456            bool is_signed = (opcode == 0x1d);
10457            if (!fp_access_check(s)) {
10458                return;
10459            }
10460            handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
10461            return;
10462        }
10463        case 0x3d: /* FRECPE */
10464        case 0x3f: /* FRECPX */
10465        case 0x7d: /* FRSQRTE */
10466            if (!fp_access_check(s)) {
10467                return;
10468            }
10469            handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
10470            return;
10471        case 0x1a: /* FCVTNS */
10472        case 0x1b: /* FCVTMS */
10473        case 0x3a: /* FCVTPS */
10474        case 0x3b: /* FCVTZS */
10475        case 0x5a: /* FCVTNU */
10476        case 0x5b: /* FCVTMU */
10477        case 0x7a: /* FCVTPU */
10478        case 0x7b: /* FCVTZU */
10479            is_fcvt = true;
10480            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10481            break;
10482        case 0x1c: /* FCVTAS */
10483        case 0x5c: /* FCVTAU */
10484            /* TIEAWAY doesn't fit in the usual rounding mode encoding */
10485            is_fcvt = true;
10486            rmode = FPROUNDING_TIEAWAY;
10487            break;
10488        case 0x56: /* FCVTXN, FCVTXN2 */
10489            if (size == 2) {
10490                unallocated_encoding(s);
10491                return;
10492            }
10493            if (!fp_access_check(s)) {
10494                return;
10495            }
10496            handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
10497            return;
10498        default:
10499            unallocated_encoding(s);
10500            return;
10501        }
10502        break;
10503    default:
10504        unallocated_encoding(s);
10505        return;
10506    }
10507
10508    if (!fp_access_check(s)) {
10509        return;
10510    }
10511
10512    if (is_fcvt) {
10513        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10514        tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
10515        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
10516    } else {
10517        tcg_rmode = NULL;
10518        tcg_fpstatus = NULL;
10519    }
10520
10521    if (size == 3) {
10522        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
10523        TCGv_i64 tcg_rd = tcg_temp_new_i64();
10524
10525        handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
10526        write_fp_dreg(s, rd, tcg_rd);
10527        tcg_temp_free_i64(tcg_rd);
10528        tcg_temp_free_i64(tcg_rn);
10529    } else {
10530        TCGv_i32 tcg_rn = tcg_temp_new_i32();
10531        TCGv_i32 tcg_rd = tcg_temp_new_i32();
10532
10533        read_vec_element_i32(s, tcg_rn, rn, 0, size);
10534
10535        switch (opcode) {
10536        case 0x7: /* SQABS, SQNEG */
10537        {
10538            NeonGenOneOpEnvFn *genfn;
10539            static NeonGenOneOpEnvFn * const fns[3][2] = {
10540                { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10541                { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10542                { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
10543            };
10544            genfn = fns[size][u];
10545            genfn(tcg_rd, cpu_env, tcg_rn);
10546            break;
10547        }
10548        case 0x1a: /* FCVTNS */
10549        case 0x1b: /* FCVTMS */
10550        case 0x1c: /* FCVTAS */
10551        case 0x3a: /* FCVTPS */
10552        case 0x3b: /* FCVTZS */
10553        {
10554            TCGv_i32 tcg_shift = tcg_const_i32(0);
10555            gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
10556            tcg_temp_free_i32(tcg_shift);
10557            break;
10558        }
10559        case 0x5a: /* FCVTNU */
10560        case 0x5b: /* FCVTMU */
10561        case 0x5c: /* FCVTAU */
10562        case 0x7a: /* FCVTPU */
10563        case 0x7b: /* FCVTZU */
10564        {
10565            TCGv_i32 tcg_shift = tcg_const_i32(0);
10566            gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
10567            tcg_temp_free_i32(tcg_shift);
10568            break;
10569        }
10570        default:
10571            g_assert_not_reached();
10572        }
10573
10574        write_fp_sreg(s, rd, tcg_rd);
10575        tcg_temp_free_i32(tcg_rd);
10576        tcg_temp_free_i32(tcg_rn);
10577    }
10578
10579    if (is_fcvt) {
10580        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
10581        tcg_temp_free_i32(tcg_rmode);
10582        tcg_temp_free_ptr(tcg_fpstatus);
10583    }
10584}
10585
10586/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
10587static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
10588                                 int immh, int immb, int opcode, int rn, int rd)
10589{
10590    int size = 32 - clz32(immh) - 1;
10591    int immhb = immh << 3 | immb;
10592    int shift = 2 * (8 << size) - immhb;
10593    GVecGen2iFn *gvec_fn;
10594
10595    if (extract32(immh, 3, 1) && !is_q) {
10596        unallocated_encoding(s);
10597        return;
10598    }
10599    tcg_debug_assert(size <= 3);
10600
10601    if (!fp_access_check(s)) {
10602        return;
10603    }
10604
10605    switch (opcode) {
10606    case 0x02: /* SSRA / USRA (accumulate) */
10607        gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra;
10608        break;
10609
10610    case 0x08: /* SRI */
10611        gvec_fn = gen_gvec_sri;
10612        break;
10613
10614    case 0x00: /* SSHR / USHR */
10615        if (is_u) {
10616            if (shift == 8 << size) {
10617                /* Shift count the same size as element size produces zero.  */
10618                tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd),
10619                                     is_q ? 16 : 8, vec_full_reg_size(s), 0);
10620                return;
10621            }
10622            gvec_fn = tcg_gen_gvec_shri;
10623        } else {
10624            /* Shift count the same size as element size produces all sign.  */
10625            if (shift == 8 << size) {
10626                shift -= 1;
10627            }
10628            gvec_fn = tcg_gen_gvec_sari;
10629        }
10630        break;
10631
10632    case 0x04: /* SRSHR / URSHR (rounding) */
10633        gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr;
10634        break;
10635
10636    case 0x06: /* SRSRA / URSRA (accum + rounding) */
10637        gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra;
10638        break;
10639
10640    default:
10641        g_assert_not_reached();
10642    }
10643
10644    gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size);
10645}
10646
10647/* SHL/SLI - Vector shift left */
10648static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10649                                 int immh, int immb, int opcode, int rn, int rd)
10650{
10651    int size = 32 - clz32(immh) - 1;
10652    int immhb = immh << 3 | immb;
10653    int shift = immhb - (8 << size);
10654
10655    /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10656    assert(size >= 0 && size <= 3);
10657
10658    if (extract32(immh, 3, 1) && !is_q) {
10659        unallocated_encoding(s);
10660        return;
10661    }
10662
10663    if (!fp_access_check(s)) {
10664        return;
10665    }
10666
10667    if (insert) {
10668        gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size);
10669    } else {
10670        gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10671    }
10672}
10673
10674/* USHLL/SHLL - Vector shift left with widening */
10675static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10676                                 int immh, int immb, int opcode, int rn, int rd)
10677{
10678    int size = 32 - clz32(immh) - 1;
10679    int immhb = immh << 3 | immb;
10680    int shift = immhb - (8 << size);
10681    int dsize = 64;
10682    int esize = 8 << size;
10683    int elements = dsize/esize;
10684    TCGv_i64 tcg_rn = new_tmp_a64(s);
10685    TCGv_i64 tcg_rd = new_tmp_a64(s);
10686    int i;
10687
10688    if (size >= 3) {
10689        unallocated_encoding(s);
10690        return;
10691    }
10692
10693    if (!fp_access_check(s)) {
10694        return;
10695    }
10696
10697    /* For the LL variants the store is larger than the load,
10698     * so if rd == rn we would overwrite parts of our input.
10699     * So load everything right now and use shifts in the main loop.
10700     */
10701    read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10702
10703    for (i = 0; i < elements; i++) {
10704        tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10705        ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10706        tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10707        write_vec_element(s, tcg_rd, rd, i, size + 1);
10708    }
10709}
10710
10711/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10712static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10713                                 int immh, int immb, int opcode, int rn, int rd)
10714{
10715    int immhb = immh << 3 | immb;
10716    int size = 32 - clz32(immh) - 1;
10717    int dsize = 64;
10718    int esize = 8 << size;
10719    int elements = dsize/esize;
10720    int shift = (2 * esize) - immhb;
10721    bool round = extract32(opcode, 0, 1);
10722    TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10723    TCGv_i64 tcg_round;
10724    int i;
10725
10726    if (extract32(immh, 3, 1)) {
10727        unallocated_encoding(s);
10728        return;
10729    }
10730
10731    if (!fp_access_check(s)) {
10732        return;
10733    }
10734
10735    tcg_rn = tcg_temp_new_i64();
10736    tcg_rd = tcg_temp_new_i64();
10737    tcg_final = tcg_temp_new_i64();
10738    read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10739
10740    if (round) {
10741        uint64_t round_const = 1ULL << (shift - 1);
10742        tcg_round = tcg_const_i64(round_const);
10743    } else {
10744        tcg_round = NULL;
10745    }
10746
10747    for (i = 0; i < elements; i++) {
10748        read_vec_element(s, tcg_rn, rn, i, size+1);
10749        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10750                                false, true, size+1, shift);
10751
10752        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10753    }
10754
10755    if (!is_q) {
10756        write_vec_element(s, tcg_final, rd, 0, MO_64);
10757    } else {
10758        write_vec_element(s, tcg_final, rd, 1, MO_64);
10759    }
10760    if (round) {
10761        tcg_temp_free_i64(tcg_round);
10762    }
10763    tcg_temp_free_i64(tcg_rn);
10764    tcg_temp_free_i64(tcg_rd);
10765    tcg_temp_free_i64(tcg_final);
10766
10767    clear_vec_high(s, is_q, rd);
10768}
10769
10770
10771/* AdvSIMD shift by immediate
10772 *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10773 * +---+---+---+-------------+------+------+--------+---+------+------+
10774 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10775 * +---+---+---+-------------+------+------+--------+---+------+------+
10776 */
10777static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10778{
10779    int rd = extract32(insn, 0, 5);
10780    int rn = extract32(insn, 5, 5);
10781    int opcode = extract32(insn, 11, 5);
10782    int immb = extract32(insn, 16, 3);
10783    int immh = extract32(insn, 19, 4);
10784    bool is_u = extract32(insn, 29, 1);
10785    bool is_q = extract32(insn, 30, 1);
10786
10787    /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */
10788    assert(immh != 0);
10789
10790    switch (opcode) {
10791    case 0x08: /* SRI */
10792        if (!is_u) {
10793            unallocated_encoding(s);
10794            return;
10795        }
10796        /* fall through */
10797    case 0x00: /* SSHR / USHR */
10798    case 0x02: /* SSRA / USRA (accumulate) */
10799    case 0x04: /* SRSHR / URSHR (rounding) */
10800    case 0x06: /* SRSRA / URSRA (accum + rounding) */
10801        handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10802        break;
10803    case 0x0a: /* SHL / SLI */
10804        handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10805        break;
10806    case 0x10: /* SHRN */
10807    case 0x11: /* RSHRN / SQRSHRUN */
10808        if (is_u) {
10809            handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10810                                   opcode, rn, rd);
10811        } else {
10812            handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10813        }
10814        break;
10815    case 0x12: /* SQSHRN / UQSHRN */
10816    case 0x13: /* SQRSHRN / UQRSHRN */
10817        handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10818                               opcode, rn, rd);
10819        break;
10820    case 0x14: /* SSHLL / USHLL */
10821        handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10822        break;
10823    case 0x1c: /* SCVTF / UCVTF */
10824        handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10825                                     opcode, rn, rd);
10826        break;
10827    case 0xc: /* SQSHLU */
10828        if (!is_u) {
10829            unallocated_encoding(s);
10830            return;
10831        }
10832        handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10833        break;
10834    case 0xe: /* SQSHL, UQSHL */
10835        handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10836        break;
10837    case 0x1f: /* FCVTZS/ FCVTZU */
10838        handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10839        return;
10840    default:
10841        unallocated_encoding(s);
10842        return;
10843    }
10844}
10845
10846/* Generate code to do a "long" addition or subtraction, ie one done in
10847 * TCGv_i64 on vector lanes twice the width specified by size.
10848 */
10849static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10850                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10851{
10852    static NeonGenTwo64OpFn * const fns[3][2] = {
10853        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10854        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10855        { tcg_gen_add_i64, tcg_gen_sub_i64 },
10856    };
10857    NeonGenTwo64OpFn *genfn;
10858    assert(size < 3);
10859
10860    genfn = fns[size][is_sub];
10861    genfn(tcg_res, tcg_op1, tcg_op2);
10862}
10863
10864static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10865                                int opcode, int rd, int rn, int rm)
10866{
10867    /* 3-reg-different widening insns: 64 x 64 -> 128 */
10868    TCGv_i64 tcg_res[2];
10869    int pass, accop;
10870
10871    tcg_res[0] = tcg_temp_new_i64();
10872    tcg_res[1] = tcg_temp_new_i64();
10873
10874    /* Does this op do an adding accumulate, a subtracting accumulate,
10875     * or no accumulate at all?
10876     */
10877    switch (opcode) {
10878    case 5:
10879    case 8:
10880    case 9:
10881        accop = 1;
10882        break;
10883    case 10:
10884    case 11:
10885        accop = -1;
10886        break;
10887    default:
10888        accop = 0;
10889        break;
10890    }
10891
10892    if (accop != 0) {
10893        read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10894        read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10895    }
10896
10897    /* size == 2 means two 32x32->64 operations; this is worth special
10898     * casing because we can generally handle it inline.
10899     */
10900    if (size == 2) {
10901        for (pass = 0; pass < 2; pass++) {
10902            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10903            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10904            TCGv_i64 tcg_passres;
10905            MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10906
10907            int elt = pass + is_q * 2;
10908
10909            read_vec_element(s, tcg_op1, rn, elt, memop);
10910            read_vec_element(s, tcg_op2, rm, elt, memop);
10911
10912            if (accop == 0) {
10913                tcg_passres = tcg_res[pass];
10914            } else {
10915                tcg_passres = tcg_temp_new_i64();
10916            }
10917
10918            switch (opcode) {
10919            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10920                tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10921                break;
10922            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10923                tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10924                break;
10925            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10926            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10927            {
10928                TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10929                TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10930
10931                tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10932                tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10933                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10934                                    tcg_passres,
10935                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10936                tcg_temp_free_i64(tcg_tmp1);
10937                tcg_temp_free_i64(tcg_tmp2);
10938                break;
10939            }
10940            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10941            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10942            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10943                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10944                break;
10945            case 9: /* SQDMLAL, SQDMLAL2 */
10946            case 11: /* SQDMLSL, SQDMLSL2 */
10947            case 13: /* SQDMULL, SQDMULL2 */
10948                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10949                gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10950                                                  tcg_passres, tcg_passres);
10951                break;
10952            default:
10953                g_assert_not_reached();
10954            }
10955
10956            if (opcode == 9 || opcode == 11) {
10957                /* saturating accumulate ops */
10958                if (accop < 0) {
10959                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
10960                }
10961                gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10962                                                  tcg_res[pass], tcg_passres);
10963            } else if (accop > 0) {
10964                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10965            } else if (accop < 0) {
10966                tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10967            }
10968
10969            if (accop != 0) {
10970                tcg_temp_free_i64(tcg_passres);
10971            }
10972
10973            tcg_temp_free_i64(tcg_op1);
10974            tcg_temp_free_i64(tcg_op2);
10975        }
10976    } else {
10977        /* size 0 or 1, generally helper functions */
10978        for (pass = 0; pass < 2; pass++) {
10979            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10980            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10981            TCGv_i64 tcg_passres;
10982            int elt = pass + is_q * 2;
10983
10984            read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10985            read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10986
10987            if (accop == 0) {
10988                tcg_passres = tcg_res[pass];
10989            } else {
10990                tcg_passres = tcg_temp_new_i64();
10991            }
10992
10993            switch (opcode) {
10994            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10995            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10996            {
10997                TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10998                static NeonGenWidenFn * const widenfns[2][2] = {
10999                    { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
11000                    { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
11001                };
11002                NeonGenWidenFn *widenfn = widenfns[size][is_u];
11003
11004                widenfn(tcg_op2_64, tcg_op2);
11005                widenfn(tcg_passres, tcg_op1);
11006                gen_neon_addl(size, (opcode == 2), tcg_passres,
11007                              tcg_passres, tcg_op2_64);
11008                tcg_temp_free_i64(tcg_op2_64);
11009                break;
11010            }
11011            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
11012            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
11013                if (size == 0) {
11014                    if (is_u) {
11015                        gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
11016                    } else {
11017                        gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
11018                    }
11019                } else {
11020                    if (is_u) {
11021                        gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
11022                    } else {
11023                        gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
11024                    }
11025                }
11026                break;
11027            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
11028            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
11029            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
11030                if (size == 0) {
11031                    if (is_u) {
11032                        gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
11033                    } else {
11034                        gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
11035                    }
11036                } else {
11037                    if (is_u) {
11038                        gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
11039                    } else {
11040                        gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
11041                    }
11042                }
11043                break;
11044            case 9: /* SQDMLAL, SQDMLAL2 */
11045            case 11: /* SQDMLSL, SQDMLSL2 */
11046            case 13: /* SQDMULL, SQDMULL2 */
11047                assert(size == 1);
11048                gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
11049                gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
11050                                                  tcg_passres, tcg_passres);
11051                break;
11052            default:
11053                g_assert_not_reached();
11054            }
11055            tcg_temp_free_i32(tcg_op1);
11056            tcg_temp_free_i32(tcg_op2);
11057
11058            if (accop != 0) {
11059                if (opcode == 9 || opcode == 11) {
11060                    /* saturating accumulate ops */
11061                    if (accop < 0) {
11062                        gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
11063                    }
11064                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
11065                                                      tcg_res[pass],
11066                                                      tcg_passres);
11067                } else {
11068                    gen_neon_addl(size, (accop < 0), tcg_res[pass],
11069                                  tcg_res[pass], tcg_passres);
11070                }
11071                tcg_temp_free_i64(tcg_passres);
11072            }
11073        }
11074    }
11075
11076    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
11077    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
11078    tcg_temp_free_i64(tcg_res[0]);
11079    tcg_temp_free_i64(tcg_res[1]);
11080}
11081
11082static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
11083                            int opcode, int rd, int rn, int rm)
11084{
11085    TCGv_i64 tcg_res[2];
11086    int part = is_q ? 2 : 0;
11087    int pass;
11088
11089    for (pass = 0; pass < 2; pass++) {
11090        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11091        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11092        TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
11093        static NeonGenWidenFn * const widenfns[3][2] = {
11094            { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
11095            { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
11096            { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
11097        };
11098        NeonGenWidenFn *widenfn = widenfns[size][is_u];
11099
11100        read_vec_element(s, tcg_op1, rn, pass, MO_64);
11101        read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
11102        widenfn(tcg_op2_wide, tcg_op2);
11103        tcg_temp_free_i32(tcg_op2);
11104        tcg_res[pass] = tcg_temp_new_i64();
11105        gen_neon_addl(size, (opcode == 3),
11106                      tcg_res[pass], tcg_op1, tcg_op2_wide);
11107        tcg_temp_free_i64(tcg_op1);
11108        tcg_temp_free_i64(tcg_op2_wide);
11109    }
11110
11111    for (pass = 0; pass < 2; pass++) {
11112        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11113        tcg_temp_free_i64(tcg_res[pass]);
11114    }
11115}
11116
11117static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
11118{
11119    tcg_gen_addi_i64(in, in, 1U << 31);
11120    tcg_gen_extrh_i64_i32(res, in);
11121}
11122
11123static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
11124                                 int opcode, int rd, int rn, int rm)
11125{
11126    TCGv_i32 tcg_res[2];
11127    int part = is_q ? 2 : 0;
11128    int pass;
11129
11130    for (pass = 0; pass < 2; pass++) {
11131        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11132        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11133        TCGv_i64 tcg_wideres = tcg_temp_new_i64();
11134        static NeonGenNarrowFn * const narrowfns[3][2] = {
11135            { gen_helper_neon_narrow_high_u8,
11136              gen_helper_neon_narrow_round_high_u8 },
11137            { gen_helper_neon_narrow_high_u16,
11138              gen_helper_neon_narrow_round_high_u16 },
11139            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
11140        };
11141        NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
11142
11143        read_vec_element(s, tcg_op1, rn, pass, MO_64);
11144        read_vec_element(s, tcg_op2, rm, pass, MO_64);
11145
11146        gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
11147
11148        tcg_temp_free_i64(tcg_op1);
11149        tcg_temp_free_i64(tcg_op2);
11150
11151        tcg_res[pass] = tcg_temp_new_i32();
11152        gennarrow(tcg_res[pass], tcg_wideres);
11153        tcg_temp_free_i64(tcg_wideres);
11154    }
11155
11156    for (pass = 0; pass < 2; pass++) {
11157        write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
11158        tcg_temp_free_i32(tcg_res[pass]);
11159    }
11160    clear_vec_high(s, is_q, rd);
11161}
11162
11163/* AdvSIMD three different
11164 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
11165 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
11166 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
11167 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
11168 */
11169static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
11170{
11171    /* Instructions in this group fall into three basic classes
11172     * (in each case with the operation working on each element in
11173     * the input vectors):
11174     * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
11175     *     128 bit input)
11176     * (2) wide 64 x 128 -> 128
11177     * (3) narrowing 128 x 128 -> 64
11178     * Here we do initial decode, catch unallocated cases and
11179     * dispatch to separate functions for each class.
11180     */
11181    int is_q = extract32(insn, 30, 1);
11182    int is_u = extract32(insn, 29, 1);
11183    int size = extract32(insn, 22, 2);
11184    int opcode = extract32(insn, 12, 4);
11185    int rm = extract32(insn, 16, 5);
11186    int rn = extract32(insn, 5, 5);
11187    int rd = extract32(insn, 0, 5);
11188
11189    switch (opcode) {
11190    case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
11191    case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
11192        /* 64 x 128 -> 128 */
11193        if (size == 3) {
11194            unallocated_encoding(s);
11195            return;
11196        }
11197        if (!fp_access_check(s)) {
11198            return;
11199        }
11200        handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
11201        break;
11202    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
11203    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
11204        /* 128 x 128 -> 64 */
11205        if (size == 3) {
11206            unallocated_encoding(s);
11207            return;
11208        }
11209        if (!fp_access_check(s)) {
11210            return;
11211        }
11212        handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
11213        break;
11214    case 14: /* PMULL, PMULL2 */
11215        if (is_u) {
11216            unallocated_encoding(s);
11217            return;
11218        }
11219        switch (size) {
11220        case 0: /* PMULL.P8 */
11221            if (!fp_access_check(s)) {
11222                return;
11223            }
11224            /* The Q field specifies lo/hi half input for this insn.  */
11225            gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
11226                             gen_helper_neon_pmull_h);
11227            break;
11228
11229        case 3: /* PMULL.P64 */
11230            if (!dc_isar_feature(aa64_pmull, s)) {
11231                unallocated_encoding(s);
11232                return;
11233            }
11234            if (!fp_access_check(s)) {
11235                return;
11236            }
11237            /* The Q field specifies lo/hi half input for this insn.  */
11238            gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
11239                             gen_helper_gvec_pmull_q);
11240            break;
11241
11242        default:
11243            unallocated_encoding(s);
11244            break;
11245        }
11246        return;
11247    case 9: /* SQDMLAL, SQDMLAL2 */
11248    case 11: /* SQDMLSL, SQDMLSL2 */
11249    case 13: /* SQDMULL, SQDMULL2 */
11250        if (is_u || size == 0) {
11251            unallocated_encoding(s);
11252            return;
11253        }
11254        /* fall through */
11255    case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
11256    case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
11257    case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
11258    case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
11259    case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
11260    case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
11261    case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
11262        /* 64 x 64 -> 128 */
11263        if (size == 3) {
11264            unallocated_encoding(s);
11265            return;
11266        }
11267        if (!fp_access_check(s)) {
11268            return;
11269        }
11270
11271        handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
11272        break;
11273    default:
11274        /* opcode 15 not allocated */
11275        unallocated_encoding(s);
11276        break;
11277    }
11278}
11279
11280/* Logic op (opcode == 3) subgroup of C3.6.16. */
11281static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
11282{
11283    int rd = extract32(insn, 0, 5);
11284    int rn = extract32(insn, 5, 5);
11285    int rm = extract32(insn, 16, 5);
11286    int size = extract32(insn, 22, 2);
11287    bool is_u = extract32(insn, 29, 1);
11288    bool is_q = extract32(insn, 30, 1);
11289
11290    if (!fp_access_check(s)) {
11291        return;
11292    }
11293
11294    switch (size + 4 * is_u) {
11295    case 0: /* AND */
11296        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
11297        return;
11298    case 1: /* BIC */
11299        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
11300        return;
11301    case 2: /* ORR */
11302        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
11303        return;
11304    case 3: /* ORN */
11305        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
11306        return;
11307    case 4: /* EOR */
11308        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
11309        return;
11310
11311    case 5: /* BSL bitwise select */
11312        gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
11313        return;
11314    case 6: /* BIT, bitwise insert if true */
11315        gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
11316        return;
11317    case 7: /* BIF, bitwise insert if false */
11318        gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
11319        return;
11320
11321    default:
11322        g_assert_not_reached();
11323    }
11324}
11325
11326/* Pairwise op subgroup of C3.6.16.
11327 *
11328 * This is called directly or via the handle_3same_float for float pairwise
11329 * operations where the opcode and size are calculated differently.
11330 */
11331static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
11332                                   int size, int rn, int rm, int rd)
11333{
11334    TCGv_ptr fpst;
11335    int pass;
11336
11337    /* Floating point operations need fpst */
11338    if (opcode >= 0x58) {
11339        fpst = fpstatus_ptr(FPST_FPCR);
11340    } else {
11341        fpst = NULL;
11342    }
11343
11344    if (!fp_access_check(s)) {
11345        return;
11346    }
11347
11348    /* These operations work on the concatenated rm:rn, with each pair of
11349     * adjacent elements being operated on to produce an element in the result.
11350     */
11351    if (size == 3) {
11352        TCGv_i64 tcg_res[2];
11353
11354        for (pass = 0; pass < 2; pass++) {
11355            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11356            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11357            int passreg = (pass == 0) ? rn : rm;
11358
11359            read_vec_element(s, tcg_op1, passreg, 0, MO_64);
11360            read_vec_element(s, tcg_op2, passreg, 1, MO_64);
11361            tcg_res[pass] = tcg_temp_new_i64();
11362
11363            switch (opcode) {
11364            case 0x17: /* ADDP */
11365                tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11366                break;
11367            case 0x58: /* FMAXNMP */
11368                gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11369                break;
11370            case 0x5a: /* FADDP */
11371                gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11372                break;
11373            case 0x5e: /* FMAXP */
11374                gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11375                break;
11376            case 0x78: /* FMINNMP */
11377                gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11378                break;
11379            case 0x7e: /* FMINP */
11380                gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11381                break;
11382            default:
11383                g_assert_not_reached();
11384            }
11385
11386            tcg_temp_free_i64(tcg_op1);
11387            tcg_temp_free_i64(tcg_op2);
11388        }
11389
11390        for (pass = 0; pass < 2; pass++) {
11391            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11392            tcg_temp_free_i64(tcg_res[pass]);
11393        }
11394    } else {
11395        int maxpass = is_q ? 4 : 2;
11396        TCGv_i32 tcg_res[4];
11397
11398        for (pass = 0; pass < maxpass; pass++) {
11399            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11400            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11401            NeonGenTwoOpFn *genfn = NULL;
11402            int passreg = pass < (maxpass / 2) ? rn : rm;
11403            int passelt = (is_q && (pass & 1)) ? 2 : 0;
11404
11405            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
11406            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
11407            tcg_res[pass] = tcg_temp_new_i32();
11408
11409            switch (opcode) {
11410            case 0x17: /* ADDP */
11411            {
11412                static NeonGenTwoOpFn * const fns[3] = {
11413                    gen_helper_neon_padd_u8,
11414                    gen_helper_neon_padd_u16,
11415                    tcg_gen_add_i32,
11416                };
11417                genfn = fns[size];
11418                break;
11419            }
11420            case 0x14: /* SMAXP, UMAXP */
11421            {
11422                static NeonGenTwoOpFn * const fns[3][2] = {
11423                    { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
11424                    { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
11425                    { tcg_gen_smax_i32, tcg_gen_umax_i32 },
11426                };
11427                genfn = fns[size][u];
11428                break;
11429            }
11430            case 0x15: /* SMINP, UMINP */
11431            {
11432                static NeonGenTwoOpFn * const fns[3][2] = {
11433                    { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
11434                    { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
11435                    { tcg_gen_smin_i32, tcg_gen_umin_i32 },
11436                };
11437                genfn = fns[size][u];
11438                break;
11439            }
11440            /* The FP operations are all on single floats (32 bit) */
11441            case 0x58: /* FMAXNMP */
11442                gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11443                break;
11444            case 0x5a: /* FADDP */
11445                gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11446                break;
11447            case 0x5e: /* FMAXP */
11448                gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11449                break;
11450            case 0x78: /* FMINNMP */
11451                gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11452                break;
11453            case 0x7e: /* FMINP */
11454                gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11455                break;
11456            default:
11457                g_assert_not_reached();
11458            }
11459
11460            /* FP ops called directly, otherwise call now */
11461            if (genfn) {
11462                genfn(tcg_res[pass], tcg_op1, tcg_op2);
11463            }
11464
11465            tcg_temp_free_i32(tcg_op1);
11466            tcg_temp_free_i32(tcg_op2);
11467        }
11468
11469        for (pass = 0; pass < maxpass; pass++) {
11470            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11471            tcg_temp_free_i32(tcg_res[pass]);
11472        }
11473        clear_vec_high(s, is_q, rd);
11474    }
11475
11476    if (fpst) {
11477        tcg_temp_free_ptr(fpst);
11478    }
11479}
11480
11481/* Floating point op subgroup of C3.6.16. */
11482static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
11483{
11484    /* For floating point ops, the U, size[1] and opcode bits
11485     * together indicate the operation. size[0] indicates single
11486     * or double.
11487     */
11488    int fpopcode = extract32(insn, 11, 5)
11489        | (extract32(insn, 23, 1) << 5)
11490        | (extract32(insn, 29, 1) << 6);
11491    int is_q = extract32(insn, 30, 1);
11492    int size = extract32(insn, 22, 1);
11493    int rm = extract32(insn, 16, 5);
11494    int rn = extract32(insn, 5, 5);
11495    int rd = extract32(insn, 0, 5);
11496
11497    int datasize = is_q ? 128 : 64;
11498    int esize = 32 << size;
11499    int elements = datasize / esize;
11500
11501    if (size == 1 && !is_q) {
11502        unallocated_encoding(s);
11503        return;
11504    }
11505
11506    switch (fpopcode) {
11507    case 0x58: /* FMAXNMP */
11508    case 0x5a: /* FADDP */
11509    case 0x5e: /* FMAXP */
11510    case 0x78: /* FMINNMP */
11511    case 0x7e: /* FMINP */
11512        if (size && !is_q) {
11513            unallocated_encoding(s);
11514            return;
11515        }
11516        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
11517                               rn, rm, rd);
11518        return;
11519    case 0x1b: /* FMULX */
11520    case 0x1f: /* FRECPS */
11521    case 0x3f: /* FRSQRTS */
11522    case 0x5d: /* FACGE */
11523    case 0x7d: /* FACGT */
11524    case 0x19: /* FMLA */
11525    case 0x39: /* FMLS */
11526    case 0x18: /* FMAXNM */
11527    case 0x1a: /* FADD */
11528    case 0x1c: /* FCMEQ */
11529    case 0x1e: /* FMAX */
11530    case 0x38: /* FMINNM */
11531    case 0x3a: /* FSUB */
11532    case 0x3e: /* FMIN */
11533    case 0x5b: /* FMUL */
11534    case 0x5c: /* FCMGE */
11535    case 0x5f: /* FDIV */
11536    case 0x7a: /* FABD */
11537    case 0x7c: /* FCMGT */
11538        if (!fp_access_check(s)) {
11539            return;
11540        }
11541        handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
11542        return;
11543
11544    case 0x1d: /* FMLAL  */
11545    case 0x3d: /* FMLSL  */
11546    case 0x59: /* FMLAL2 */
11547    case 0x79: /* FMLSL2 */
11548        if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11549            unallocated_encoding(s);
11550            return;
11551        }
11552        if (fp_access_check(s)) {
11553            int is_s = extract32(insn, 23, 1);
11554            int is_2 = extract32(insn, 29, 1);
11555            int data = (is_2 << 1) | is_s;
11556            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11557                               vec_full_reg_offset(s, rn),
11558                               vec_full_reg_offset(s, rm), cpu_env,
11559                               is_q ? 16 : 8, vec_full_reg_size(s),
11560                               data, gen_helper_gvec_fmlal_a64);
11561        }
11562        return;
11563
11564    default:
11565        unallocated_encoding(s);
11566        return;
11567    }
11568}
11569
11570/* Integer op subgroup of C3.6.16. */
11571static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11572{
11573    int is_q = extract32(insn, 30, 1);
11574    int u = extract32(insn, 29, 1);
11575    int size = extract32(insn, 22, 2);
11576    int opcode = extract32(insn, 11, 5);
11577    int rm = extract32(insn, 16, 5);
11578    int rn = extract32(insn, 5, 5);
11579    int rd = extract32(insn, 0, 5);
11580    int pass;
11581    TCGCond cond;
11582
11583    switch (opcode) {
11584    case 0x13: /* MUL, PMUL */
11585        if (u && size != 0) {
11586            unallocated_encoding(s);
11587            return;
11588        }
11589        /* fall through */
11590    case 0x0: /* SHADD, UHADD */
11591    case 0x2: /* SRHADD, URHADD */
11592    case 0x4: /* SHSUB, UHSUB */
11593    case 0xc: /* SMAX, UMAX */
11594    case 0xd: /* SMIN, UMIN */
11595    case 0xe: /* SABD, UABD */
11596    case 0xf: /* SABA, UABA */
11597    case 0x12: /* MLA, MLS */
11598        if (size == 3) {
11599            unallocated_encoding(s);
11600            return;
11601        }
11602        break;
11603    case 0x16: /* SQDMULH, SQRDMULH */
11604        if (size == 0 || size == 3) {
11605            unallocated_encoding(s);
11606            return;
11607        }
11608        break;
11609    default:
11610        if (size == 3 && !is_q) {
11611            unallocated_encoding(s);
11612            return;
11613        }
11614        break;
11615    }
11616
11617    if (!fp_access_check(s)) {
11618        return;
11619    }
11620
11621    switch (opcode) {
11622    case 0x01: /* SQADD, UQADD */
11623        if (u) {
11624            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size);
11625        } else {
11626            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size);
11627        }
11628        return;
11629    case 0x05: /* SQSUB, UQSUB */
11630        if (u) {
11631            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size);
11632        } else {
11633            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size);
11634        }
11635        return;
11636    case 0x08: /* SSHL, USHL */
11637        if (u) {
11638            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size);
11639        } else {
11640            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size);
11641        }
11642        return;
11643    case 0x0c: /* SMAX, UMAX */
11644        if (u) {
11645            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11646        } else {
11647            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11648        }
11649        return;
11650    case 0x0d: /* SMIN, UMIN */
11651        if (u) {
11652            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11653        } else {
11654            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11655        }
11656        return;
11657    case 0xe: /* SABD, UABD */
11658        if (u) {
11659            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size);
11660        } else {
11661            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size);
11662        }
11663        return;
11664    case 0xf: /* SABA, UABA */
11665        if (u) {
11666            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size);
11667        } else {
11668            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size);
11669        }
11670        return;
11671    case 0x10: /* ADD, SUB */
11672        if (u) {
11673            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11674        } else {
11675            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11676        }
11677        return;
11678    case 0x13: /* MUL, PMUL */
11679        if (!u) { /* MUL */
11680            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11681        } else {  /* PMUL */
11682            gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
11683        }
11684        return;
11685    case 0x12: /* MLA, MLS */
11686        if (u) {
11687            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size);
11688        } else {
11689            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size);
11690        }
11691        return;
11692    case 0x16: /* SQDMULH, SQRDMULH */
11693        {
11694            static gen_helper_gvec_3_ptr * const fns[2][2] = {
11695                { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h },
11696                { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s },
11697            };
11698            gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]);
11699        }
11700        return;
11701    case 0x11:
11702        if (!u) { /* CMTST */
11703            gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size);
11704            return;
11705        }
11706        /* else CMEQ */
11707        cond = TCG_COND_EQ;
11708        goto do_gvec_cmp;
11709    case 0x06: /* CMGT, CMHI */
11710        cond = u ? TCG_COND_GTU : TCG_COND_GT;
11711        goto do_gvec_cmp;
11712    case 0x07: /* CMGE, CMHS */
11713        cond = u ? TCG_COND_GEU : TCG_COND_GE;
11714    do_gvec_cmp:
11715        tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11716                         vec_full_reg_offset(s, rn),
11717                         vec_full_reg_offset(s, rm),
11718                         is_q ? 16 : 8, vec_full_reg_size(s));
11719        return;
11720    }
11721
11722    if (size == 3) {
11723        assert(is_q);
11724        for (pass = 0; pass < 2; pass++) {
11725            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11726            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11727            TCGv_i64 tcg_res = tcg_temp_new_i64();
11728
11729            read_vec_element(s, tcg_op1, rn, pass, MO_64);
11730            read_vec_element(s, tcg_op2, rm, pass, MO_64);
11731
11732            handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11733
11734            write_vec_element(s, tcg_res, rd, pass, MO_64);
11735
11736            tcg_temp_free_i64(tcg_res);
11737            tcg_temp_free_i64(tcg_op1);
11738            tcg_temp_free_i64(tcg_op2);
11739        }
11740    } else {
11741        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11742            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11743            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11744            TCGv_i32 tcg_res = tcg_temp_new_i32();
11745            NeonGenTwoOpFn *genfn = NULL;
11746            NeonGenTwoOpEnvFn *genenvfn = NULL;
11747
11748            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11749            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11750
11751            switch (opcode) {
11752            case 0x0: /* SHADD, UHADD */
11753            {
11754                static NeonGenTwoOpFn * const fns[3][2] = {
11755                    { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11756                    { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11757                    { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11758                };
11759                genfn = fns[size][u];
11760                break;
11761            }
11762            case 0x2: /* SRHADD, URHADD */
11763            {
11764                static NeonGenTwoOpFn * const fns[3][2] = {
11765                    { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11766                    { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11767                    { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11768                };
11769                genfn = fns[size][u];
11770                break;
11771            }
11772            case 0x4: /* SHSUB, UHSUB */
11773            {
11774                static NeonGenTwoOpFn * const fns[3][2] = {
11775                    { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11776                    { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11777                    { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11778                };
11779                genfn = fns[size][u];
11780                break;
11781            }
11782            case 0x9: /* SQSHL, UQSHL */
11783            {
11784                static NeonGenTwoOpEnvFn * const fns[3][2] = {
11785                    { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11786                    { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11787                    { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11788                };
11789                genenvfn = fns[size][u];
11790                break;
11791            }
11792            case 0xa: /* SRSHL, URSHL */
11793            {
11794                static NeonGenTwoOpFn * const fns[3][2] = {
11795                    { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11796                    { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11797                    { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11798                };
11799                genfn = fns[size][u];
11800                break;
11801            }
11802            case 0xb: /* SQRSHL, UQRSHL */
11803            {
11804                static NeonGenTwoOpEnvFn * const fns[3][2] = {
11805                    { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11806                    { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11807                    { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11808                };
11809                genenvfn = fns[size][u];
11810                break;
11811            }
11812            default:
11813                g_assert_not_reached();
11814            }
11815
11816            if (genenvfn) {
11817                genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11818            } else {
11819                genfn(tcg_res, tcg_op1, tcg_op2);
11820            }
11821
11822            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11823
11824            tcg_temp_free_i32(tcg_res);
11825            tcg_temp_free_i32(tcg_op1);
11826            tcg_temp_free_i32(tcg_op2);
11827        }
11828    }
11829    clear_vec_high(s, is_q, rd);
11830}
11831
11832/* AdvSIMD three same
11833 *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11834 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11835 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11836 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11837 */
11838static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11839{
11840    int opcode = extract32(insn, 11, 5);
11841
11842    switch (opcode) {
11843    case 0x3: /* logic ops */
11844        disas_simd_3same_logic(s, insn);
11845        break;
11846    case 0x17: /* ADDP */
11847    case 0x14: /* SMAXP, UMAXP */
11848    case 0x15: /* SMINP, UMINP */
11849    {
11850        /* Pairwise operations */
11851        int is_q = extract32(insn, 30, 1);
11852        int u = extract32(insn, 29, 1);
11853        int size = extract32(insn, 22, 2);
11854        int rm = extract32(insn, 16, 5);
11855        int rn = extract32(insn, 5, 5);
11856        int rd = extract32(insn, 0, 5);
11857        if (opcode == 0x17) {
11858            if (u || (size == 3 && !is_q)) {
11859                unallocated_encoding(s);
11860                return;
11861            }
11862        } else {
11863            if (size == 3) {
11864                unallocated_encoding(s);
11865                return;
11866            }
11867        }
11868        handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11869        break;
11870    }
11871    case 0x18 ... 0x31:
11872        /* floating point ops, sz[1] and U are part of opcode */
11873        disas_simd_3same_float(s, insn);
11874        break;
11875    default:
11876        disas_simd_3same_int(s, insn);
11877        break;
11878    }
11879}
11880
11881/*
11882 * Advanced SIMD three same (ARMv8.2 FP16 variants)
11883 *
11884 *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11885 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11886 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11887 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11888 *
11889 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11890 * (register), FACGE, FABD, FCMGT (register) and FACGT.
11891 *
11892 */
11893static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11894{
11895    int opcode = extract32(insn, 11, 3);
11896    int u = extract32(insn, 29, 1);
11897    int a = extract32(insn, 23, 1);
11898    int is_q = extract32(insn, 30, 1);
11899    int rm = extract32(insn, 16, 5);
11900    int rn = extract32(insn, 5, 5);
11901    int rd = extract32(insn, 0, 5);
11902    /*
11903     * For these floating point ops, the U, a and opcode bits
11904     * together indicate the operation.
11905     */
11906    int fpopcode = opcode | (a << 3) | (u << 4);
11907    int datasize = is_q ? 128 : 64;
11908    int elements = datasize / 16;
11909    bool pairwise;
11910    TCGv_ptr fpst;
11911    int pass;
11912
11913    switch (fpopcode) {
11914    case 0x0: /* FMAXNM */
11915    case 0x1: /* FMLA */
11916    case 0x2: /* FADD */
11917    case 0x3: /* FMULX */
11918    case 0x4: /* FCMEQ */
11919    case 0x6: /* FMAX */
11920    case 0x7: /* FRECPS */
11921    case 0x8: /* FMINNM */
11922    case 0x9: /* FMLS */
11923    case 0xa: /* FSUB */
11924    case 0xe: /* FMIN */
11925    case 0xf: /* FRSQRTS */
11926    case 0x13: /* FMUL */
11927    case 0x14: /* FCMGE */
11928    case 0x15: /* FACGE */
11929    case 0x17: /* FDIV */
11930    case 0x1a: /* FABD */
11931    case 0x1c: /* FCMGT */
11932    case 0x1d: /* FACGT */
11933        pairwise = false;
11934        break;
11935    case 0x10: /* FMAXNMP */
11936    case 0x12: /* FADDP */
11937    case 0x16: /* FMAXP */
11938    case 0x18: /* FMINNMP */
11939    case 0x1e: /* FMINP */
11940        pairwise = true;
11941        break;
11942    default:
11943        unallocated_encoding(s);
11944        return;
11945    }
11946
11947    if (!dc_isar_feature(aa64_fp16, s)) {
11948        unallocated_encoding(s);
11949        return;
11950    }
11951
11952    if (!fp_access_check(s)) {
11953        return;
11954    }
11955
11956    fpst = fpstatus_ptr(FPST_FPCR_F16);
11957
11958    if (pairwise) {
11959        int maxpass = is_q ? 8 : 4;
11960        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11961        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11962        TCGv_i32 tcg_res[8];
11963
11964        for (pass = 0; pass < maxpass; pass++) {
11965            int passreg = pass < (maxpass / 2) ? rn : rm;
11966            int passelt = (pass << 1) & (maxpass - 1);
11967
11968            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11969            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11970            tcg_res[pass] = tcg_temp_new_i32();
11971
11972            switch (fpopcode) {
11973            case 0x10: /* FMAXNMP */
11974                gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11975                                           fpst);
11976                break;
11977            case 0x12: /* FADDP */
11978                gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11979                break;
11980            case 0x16: /* FMAXP */
11981                gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11982                break;
11983            case 0x18: /* FMINNMP */
11984                gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11985                                           fpst);
11986                break;
11987            case 0x1e: /* FMINP */
11988                gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11989                break;
11990            default:
11991                g_assert_not_reached();
11992            }
11993        }
11994
11995        for (pass = 0; pass < maxpass; pass++) {
11996            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11997            tcg_temp_free_i32(tcg_res[pass]);
11998        }
11999
12000        tcg_temp_free_i32(tcg_op1);
12001        tcg_temp_free_i32(tcg_op2);
12002
12003    } else {
12004        for (pass = 0; pass < elements; pass++) {
12005            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
12006            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
12007            TCGv_i32 tcg_res = tcg_temp_new_i32();
12008
12009            read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
12010            read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
12011
12012            switch (fpopcode) {
12013            case 0x0: /* FMAXNM */
12014                gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
12015                break;
12016            case 0x1: /* FMLA */
12017                read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12018                gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
12019                                           fpst);
12020                break;
12021            case 0x2: /* FADD */
12022                gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
12023                break;
12024            case 0x3: /* FMULX */
12025                gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
12026                break;
12027            case 0x4: /* FCMEQ */
12028                gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12029                break;
12030            case 0x6: /* FMAX */
12031                gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
12032                break;
12033            case 0x7: /* FRECPS */
12034                gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12035                break;
12036            case 0x8: /* FMINNM */
12037                gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
12038                break;
12039            case 0x9: /* FMLS */
12040                /* As usual for ARM, separate negation for fused multiply-add */
12041                tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
12042                read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12043                gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
12044                                           fpst);
12045                break;
12046            case 0xa: /* FSUB */
12047                gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
12048                break;
12049            case 0xe: /* FMIN */
12050                gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
12051                break;
12052            case 0xf: /* FRSQRTS */
12053                gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12054                break;
12055            case 0x13: /* FMUL */
12056                gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
12057                break;
12058            case 0x14: /* FCMGE */
12059                gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12060                break;
12061            case 0x15: /* FACGE */
12062                gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12063                break;
12064            case 0x17: /* FDIV */
12065                gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
12066                break;
12067            case 0x1a: /* FABD */
12068                gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
12069                tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
12070                break;
12071            case 0x1c: /* FCMGT */
12072                gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12073                break;
12074            case 0x1d: /* FACGT */
12075                gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
12076                break;
12077            default:
12078                g_assert_not_reached();
12079            }
12080
12081            write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12082            tcg_temp_free_i32(tcg_res);
12083            tcg_temp_free_i32(tcg_op1);
12084            tcg_temp_free_i32(tcg_op2);
12085        }
12086    }
12087
12088    tcg_temp_free_ptr(fpst);
12089
12090    clear_vec_high(s, is_q, rd);
12091}
12092
12093/* AdvSIMD three same extra
12094 *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
12095 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
12096 * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
12097 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
12098 */
12099static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
12100{
12101    int rd = extract32(insn, 0, 5);
12102    int rn = extract32(insn, 5, 5);
12103    int opcode = extract32(insn, 11, 4);
12104    int rm = extract32(insn, 16, 5);
12105    int size = extract32(insn, 22, 2);
12106    bool u = extract32(insn, 29, 1);
12107    bool is_q = extract32(insn, 30, 1);
12108    bool feature;
12109    int rot;
12110
12111    switch (u * 16 + opcode) {
12112    case 0x10: /* SQRDMLAH (vector) */
12113    case 0x11: /* SQRDMLSH (vector) */
12114        if (size != 1 && size != 2) {
12115            unallocated_encoding(s);
12116            return;
12117        }
12118        feature = dc_isar_feature(aa64_rdm, s);
12119        break;
12120    case 0x02: /* SDOT (vector) */
12121    case 0x12: /* UDOT (vector) */
12122        if (size != MO_32) {
12123            unallocated_encoding(s);
12124            return;
12125        }
12126        feature = dc_isar_feature(aa64_dp, s);
12127        break;
12128    case 0x03: /* USDOT */
12129        if (size != MO_32) {
12130            unallocated_encoding(s);
12131            return;
12132        }
12133        feature = dc_isar_feature(aa64_i8mm, s);
12134        break;
12135    case 0x04: /* SMMLA */
12136    case 0x14: /* UMMLA */
12137    case 0x05: /* USMMLA */
12138        if (!is_q || size != MO_32) {
12139            unallocated_encoding(s);
12140            return;
12141        }
12142        feature = dc_isar_feature(aa64_i8mm, s);
12143        break;
12144    case 0x18: /* FCMLA, #0 */
12145    case 0x19: /* FCMLA, #90 */
12146    case 0x1a: /* FCMLA, #180 */
12147    case 0x1b: /* FCMLA, #270 */
12148    case 0x1c: /* FCADD, #90 */
12149    case 0x1e: /* FCADD, #270 */
12150        if (size == 0
12151            || (size == 1 && !dc_isar_feature(aa64_fp16, s))
12152            || (size == 3 && !is_q)) {
12153            unallocated_encoding(s);
12154            return;
12155        }
12156        feature = dc_isar_feature(aa64_fcma, s);
12157        break;
12158    case 0x1d: /* BFMMLA */
12159        if (size != MO_16 || !is_q) {
12160            unallocated_encoding(s);
12161            return;
12162        }
12163        feature = dc_isar_feature(aa64_bf16, s);
12164        break;
12165    case 0x1f:
12166        switch (size) {
12167        case 1: /* BFDOT */
12168        case 3: /* BFMLAL{B,T} */
12169            feature = dc_isar_feature(aa64_bf16, s);
12170            break;
12171        default:
12172            unallocated_encoding(s);
12173            return;
12174        }
12175        break;
12176    default:
12177        unallocated_encoding(s);
12178        return;
12179    }
12180    if (!feature) {
12181        unallocated_encoding(s);
12182        return;
12183    }
12184    if (!fp_access_check(s)) {
12185        return;
12186    }
12187
12188    switch (opcode) {
12189    case 0x0: /* SQRDMLAH (vector) */
12190        gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size);
12191        return;
12192
12193    case 0x1: /* SQRDMLSH (vector) */
12194        gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size);
12195        return;
12196
12197    case 0x2: /* SDOT / UDOT */
12198        gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
12199                         u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
12200        return;
12201
12202    case 0x3: /* USDOT */
12203        gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
12204        return;
12205
12206    case 0x04: /* SMMLA, UMMLA */
12207        gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
12208                         u ? gen_helper_gvec_ummla_b
12209                         : gen_helper_gvec_smmla_b);
12210        return;
12211    case 0x05: /* USMMLA */
12212        gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
12213        return;
12214
12215    case 0x8: /* FCMLA, #0 */
12216    case 0x9: /* FCMLA, #90 */
12217    case 0xa: /* FCMLA, #180 */
12218    case 0xb: /* FCMLA, #270 */
12219        rot = extract32(opcode, 0, 2);
12220        switch (size) {
12221        case 1:
12222            gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot,
12223                              gen_helper_gvec_fcmlah);
12224            break;
12225        case 2:
12226            gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
12227                              gen_helper_gvec_fcmlas);
12228            break;
12229        case 3:
12230            gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot,
12231                              gen_helper_gvec_fcmlad);
12232            break;
12233        default:
12234            g_assert_not_reached();
12235        }
12236        return;
12237
12238    case 0xc: /* FCADD, #90 */
12239    case 0xe: /* FCADD, #270 */
12240        rot = extract32(opcode, 1, 1);
12241        switch (size) {
12242        case 1:
12243            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
12244                              gen_helper_gvec_fcaddh);
12245            break;
12246        case 2:
12247            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
12248                              gen_helper_gvec_fcadds);
12249            break;
12250        case 3:
12251            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
12252                              gen_helper_gvec_fcaddd);
12253            break;
12254        default:
12255            g_assert_not_reached();
12256        }
12257        return;
12258
12259    case 0xd: /* BFMMLA */
12260        gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
12261        return;
12262    case 0xf:
12263        switch (size) {
12264        case 1: /* BFDOT */
12265            gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot);
12266            break;
12267        case 3: /* BFMLAL{B,T} */
12268            gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
12269                              gen_helper_gvec_bfmlal);
12270            break;
12271        default:
12272            g_assert_not_reached();
12273        }
12274        return;
12275
12276    default:
12277        g_assert_not_reached();
12278    }
12279}
12280
12281static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
12282                                  int size, int rn, int rd)
12283{
12284    /* Handle 2-reg-misc ops which are widening (so each size element
12285     * in the source becomes a 2*size element in the destination.
12286     * The only instruction like this is FCVTL.
12287     */
12288    int pass;
12289
12290    if (size == 3) {
12291        /* 32 -> 64 bit fp conversion */
12292        TCGv_i64 tcg_res[2];
12293        int srcelt = is_q ? 2 : 0;
12294
12295        for (pass = 0; pass < 2; pass++) {
12296            TCGv_i32 tcg_op = tcg_temp_new_i32();
12297            tcg_res[pass] = tcg_temp_new_i64();
12298
12299            read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
12300            gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
12301            tcg_temp_free_i32(tcg_op);
12302        }
12303        for (pass = 0; pass < 2; pass++) {
12304            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12305            tcg_temp_free_i64(tcg_res[pass]);
12306        }
12307    } else {
12308        /* 16 -> 32 bit fp conversion */
12309        int srcelt = is_q ? 4 : 0;
12310        TCGv_i32 tcg_res[4];
12311        TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR);
12312        TCGv_i32 ahp = get_ahp_flag();
12313
12314        for (pass = 0; pass < 4; pass++) {
12315            tcg_res[pass] = tcg_temp_new_i32();
12316
12317            read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
12318            gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
12319                                           fpst, ahp);
12320        }
12321        for (pass = 0; pass < 4; pass++) {
12322            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
12323            tcg_temp_free_i32(tcg_res[pass]);
12324        }
12325
12326        tcg_temp_free_ptr(fpst);
12327        tcg_temp_free_i32(ahp);
12328    }
12329}
12330
12331static void handle_rev(DisasContext *s, int opcode, bool u,
12332                       bool is_q, int size, int rn, int rd)
12333{
12334    int op = (opcode << 1) | u;
12335    int opsz = op + size;
12336    int grp_size = 3 - opsz;
12337    int dsize = is_q ? 128 : 64;
12338    int i;
12339
12340    if (opsz >= 3) {
12341        unallocated_encoding(s);
12342        return;
12343    }
12344
12345    if (!fp_access_check(s)) {
12346        return;
12347    }
12348
12349    if (size == 0) {
12350        /* Special case bytes, use bswap op on each group of elements */
12351        int groups = dsize / (8 << grp_size);
12352
12353        for (i = 0; i < groups; i++) {
12354            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
12355
12356            read_vec_element(s, tcg_tmp, rn, i, grp_size);
12357            switch (grp_size) {
12358            case MO_16:
12359                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
12360                break;
12361            case MO_32:
12362                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ);
12363                break;
12364            case MO_64:
12365                tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
12366                break;
12367            default:
12368                g_assert_not_reached();
12369            }
12370            write_vec_element(s, tcg_tmp, rd, i, grp_size);
12371            tcg_temp_free_i64(tcg_tmp);
12372        }
12373        clear_vec_high(s, is_q, rd);
12374    } else {
12375        int revmask = (1 << grp_size) - 1;
12376        int esize = 8 << size;
12377        int elements = dsize / esize;
12378        TCGv_i64 tcg_rn = tcg_temp_new_i64();
12379        TCGv_i64 tcg_rd = tcg_const_i64(0);
12380        TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
12381
12382        for (i = 0; i < elements; i++) {
12383            int e_rev = (i & 0xf) ^ revmask;
12384            int off = e_rev * esize;
12385            read_vec_element(s, tcg_rn, rn, i, size);
12386            if (off >= 64) {
12387                tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
12388                                    tcg_rn, off - 64, esize);
12389            } else {
12390                tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
12391            }
12392        }
12393        write_vec_element(s, tcg_rd, rd, 0, MO_64);
12394        write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
12395
12396        tcg_temp_free_i64(tcg_rd_hi);
12397        tcg_temp_free_i64(tcg_rd);
12398        tcg_temp_free_i64(tcg_rn);
12399    }
12400}
12401
12402static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
12403                                  bool is_q, int size, int rn, int rd)
12404{
12405    /* Implement the pairwise operations from 2-misc:
12406     * SADDLP, UADDLP, SADALP, UADALP.
12407     * These all add pairs of elements in the input to produce a
12408     * double-width result element in the output (possibly accumulating).
12409     */
12410    bool accum = (opcode == 0x6);
12411    int maxpass = is_q ? 2 : 1;
12412    int pass;
12413    TCGv_i64 tcg_res[2];
12414
12415    if (size == 2) {
12416        /* 32 + 32 -> 64 op */
12417        MemOp memop = size + (u ? 0 : MO_SIGN);
12418
12419        for (pass = 0; pass < maxpass; pass++) {
12420            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
12421            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
12422
12423            tcg_res[pass] = tcg_temp_new_i64();
12424
12425            read_vec_element(s, tcg_op1, rn, pass * 2, memop);
12426            read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
12427            tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
12428            if (accum) {
12429                read_vec_element(s, tcg_op1, rd, pass, MO_64);
12430                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
12431            }
12432
12433            tcg_temp_free_i64(tcg_op1);
12434            tcg_temp_free_i64(tcg_op2);
12435        }
12436    } else {
12437        for (pass = 0; pass < maxpass; pass++) {
12438            TCGv_i64 tcg_op = tcg_temp_new_i64();
12439            NeonGenOne64OpFn *genfn;
12440            static NeonGenOne64OpFn * const fns[2][2] = {
12441                { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
12442                { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
12443            };
12444
12445            genfn = fns[size][u];
12446
12447            tcg_res[pass] = tcg_temp_new_i64();
12448
12449            read_vec_element(s, tcg_op, rn, pass, MO_64);
12450            genfn(tcg_res[pass], tcg_op);
12451
12452            if (accum) {
12453                read_vec_element(s, tcg_op, rd, pass, MO_64);
12454                if (size == 0) {
12455                    gen_helper_neon_addl_u16(tcg_res[pass],
12456                                             tcg_res[pass], tcg_op);
12457                } else {
12458                    gen_helper_neon_addl_u32(tcg_res[pass],
12459                                             tcg_res[pass], tcg_op);
12460                }
12461            }
12462            tcg_temp_free_i64(tcg_op);
12463        }
12464    }
12465    if (!is_q) {
12466        tcg_res[1] = tcg_const_i64(0);
12467    }
12468    for (pass = 0; pass < 2; pass++) {
12469        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12470        tcg_temp_free_i64(tcg_res[pass]);
12471    }
12472}
12473
12474static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
12475{
12476    /* Implement SHLL and SHLL2 */
12477    int pass;
12478    int part = is_q ? 2 : 0;
12479    TCGv_i64 tcg_res[2];
12480
12481    for (pass = 0; pass < 2; pass++) {
12482        static NeonGenWidenFn * const widenfns[3] = {
12483            gen_helper_neon_widen_u8,
12484            gen_helper_neon_widen_u16,
12485            tcg_gen_extu_i32_i64,
12486        };
12487        NeonGenWidenFn *widenfn = widenfns[size];
12488        TCGv_i32 tcg_op = tcg_temp_new_i32();
12489
12490        read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
12491        tcg_res[pass] = tcg_temp_new_i64();
12492        widenfn(tcg_res[pass], tcg_op);
12493        tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
12494
12495        tcg_temp_free_i32(tcg_op);
12496    }
12497
12498    for (pass = 0; pass < 2; pass++) {
12499        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12500        tcg_temp_free_i64(tcg_res[pass]);
12501    }
12502}
12503
12504/* AdvSIMD two reg misc
12505 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
12506 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
12507 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12508 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
12509 */
12510static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
12511{
12512    int size = extract32(insn, 22, 2);
12513    int opcode = extract32(insn, 12, 5);
12514    bool u = extract32(insn, 29, 1);
12515    bool is_q = extract32(insn, 30, 1);
12516    int rn = extract32(insn, 5, 5);
12517    int rd = extract32(insn, 0, 5);
12518    bool need_fpstatus = false;
12519    bool need_rmode = false;
12520    int rmode = -1;
12521    TCGv_i32 tcg_rmode;
12522    TCGv_ptr tcg_fpstatus;
12523
12524    switch (opcode) {
12525    case 0x0: /* REV64, REV32 */
12526    case 0x1: /* REV16 */
12527        handle_rev(s, opcode, u, is_q, size, rn, rd);
12528        return;
12529    case 0x5: /* CNT, NOT, RBIT */
12530        if (u && size == 0) {
12531            /* NOT */
12532            break;
12533        } else if (u && size == 1) {
12534            /* RBIT */
12535            break;
12536        } else if (!u && size == 0) {
12537            /* CNT */
12538            break;
12539        }
12540        unallocated_encoding(s);
12541        return;
12542    case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
12543    case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
12544        if (size == 3) {
12545            unallocated_encoding(s);
12546            return;
12547        }
12548        if (!fp_access_check(s)) {
12549            return;
12550        }
12551
12552        handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
12553        return;
12554    case 0x4: /* CLS, CLZ */
12555        if (size == 3) {
12556            unallocated_encoding(s);
12557            return;
12558        }
12559        break;
12560    case 0x2: /* SADDLP, UADDLP */
12561    case 0x6: /* SADALP, UADALP */
12562        if (size == 3) {
12563            unallocated_encoding(s);
12564            return;
12565        }
12566        if (!fp_access_check(s)) {
12567            return;
12568        }
12569        handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
12570        return;
12571    case 0x13: /* SHLL, SHLL2 */
12572        if (u == 0 || size == 3) {
12573            unallocated_encoding(s);
12574            return;
12575        }
12576        if (!fp_access_check(s)) {
12577            return;
12578        }
12579        handle_shll(s, is_q, size, rn, rd);
12580        return;
12581    case 0xa: /* CMLT */
12582        if (u == 1) {
12583            unallocated_encoding(s);
12584            return;
12585        }
12586        /* fall through */
12587    case 0x8: /* CMGT, CMGE */
12588    case 0x9: /* CMEQ, CMLE */
12589    case 0xb: /* ABS, NEG */
12590        if (size == 3 && !is_q) {
12591            unallocated_encoding(s);
12592            return;
12593        }
12594        break;
12595    case 0x3: /* SUQADD, USQADD */
12596        if (size == 3 && !is_q) {
12597            unallocated_encoding(s);
12598            return;
12599        }
12600        if (!fp_access_check(s)) {
12601            return;
12602        }
12603        handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12604        return;
12605    case 0x7: /* SQABS, SQNEG */
12606        if (size == 3 && !is_q) {
12607            unallocated_encoding(s);
12608            return;
12609        }
12610        break;
12611    case 0xc ... 0xf:
12612    case 0x16 ... 0x1f:
12613    {
12614        /* Floating point: U, size[1] and opcode indicate operation;
12615         * size[0] indicates single or double precision.
12616         */
12617        int is_double = extract32(size, 0, 1);
12618        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12619        size = is_double ? 3 : 2;
12620        switch (opcode) {
12621        case 0x2f: /* FABS */
12622        case 0x6f: /* FNEG */
12623            if (size == 3 && !is_q) {
12624                unallocated_encoding(s);
12625                return;
12626            }
12627            break;
12628        case 0x1d: /* SCVTF */
12629        case 0x5d: /* UCVTF */
12630        {
12631            bool is_signed = (opcode == 0x1d) ? true : false;
12632            int elements = is_double ? 2 : is_q ? 4 : 2;
12633            if (is_double && !is_q) {
12634                unallocated_encoding(s);
12635                return;
12636            }
12637            if (!fp_access_check(s)) {
12638                return;
12639            }
12640            handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12641            return;
12642        }
12643        case 0x2c: /* FCMGT (zero) */
12644        case 0x2d: /* FCMEQ (zero) */
12645        case 0x2e: /* FCMLT (zero) */
12646        case 0x6c: /* FCMGE (zero) */
12647        case 0x6d: /* FCMLE (zero) */
12648            if (size == 3 && !is_q) {
12649                unallocated_encoding(s);
12650                return;
12651            }
12652            handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12653            return;
12654        case 0x7f: /* FSQRT */
12655            if (size == 3 && !is_q) {
12656                unallocated_encoding(s);
12657                return;
12658            }
12659            break;
12660        case 0x1a: /* FCVTNS */
12661        case 0x1b: /* FCVTMS */
12662        case 0x3a: /* FCVTPS */
12663        case 0x3b: /* FCVTZS */
12664        case 0x5a: /* FCVTNU */
12665        case 0x5b: /* FCVTMU */
12666        case 0x7a: /* FCVTPU */
12667        case 0x7b: /* FCVTZU */
12668            need_fpstatus = true;
12669            need_rmode = true;
12670            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12671            if (size == 3 && !is_q) {
12672                unallocated_encoding(s);
12673                return;
12674            }
12675            break;
12676        case 0x5c: /* FCVTAU */
12677        case 0x1c: /* FCVTAS */
12678            need_fpstatus = true;
12679            need_rmode = true;
12680            rmode = FPROUNDING_TIEAWAY;
12681            if (size == 3 && !is_q) {
12682                unallocated_encoding(s);
12683                return;
12684            }
12685            break;
12686        case 0x3c: /* URECPE */
12687            if (size == 3) {
12688                unallocated_encoding(s);
12689                return;
12690            }
12691            /* fall through */
12692        case 0x3d: /* FRECPE */
12693        case 0x7d: /* FRSQRTE */
12694            if (size == 3 && !is_q) {
12695                unallocated_encoding(s);
12696                return;
12697            }
12698            if (!fp_access_check(s)) {
12699                return;
12700            }
12701            handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12702            return;
12703        case 0x56: /* FCVTXN, FCVTXN2 */
12704            if (size == 2) {
12705                unallocated_encoding(s);
12706                return;
12707            }
12708            /* fall through */
12709        case 0x16: /* FCVTN, FCVTN2 */
12710            /* handle_2misc_narrow does a 2*size -> size operation, but these
12711             * instructions encode the source size rather than dest size.
12712             */
12713            if (!fp_access_check(s)) {
12714                return;
12715            }
12716            handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12717            return;
12718        case 0x36: /* BFCVTN, BFCVTN2 */
12719            if (!dc_isar_feature(aa64_bf16, s) || size != 2) {
12720                unallocated_encoding(s);
12721                return;
12722            }
12723            if (!fp_access_check(s)) {
12724                return;
12725            }
12726            handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12727            return;
12728        case 0x17: /* FCVTL, FCVTL2 */
12729            if (!fp_access_check(s)) {
12730                return;
12731            }
12732            handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12733            return;
12734        case 0x18: /* FRINTN */
12735        case 0x19: /* FRINTM */
12736        case 0x38: /* FRINTP */
12737        case 0x39: /* FRINTZ */
12738            need_rmode = true;
12739            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12740            /* fall through */
12741        case 0x59: /* FRINTX */
12742        case 0x79: /* FRINTI */
12743            need_fpstatus = true;
12744            if (size == 3 && !is_q) {
12745                unallocated_encoding(s);
12746                return;
12747            }
12748            break;
12749        case 0x58: /* FRINTA */
12750            need_rmode = true;
12751            rmode = FPROUNDING_TIEAWAY;
12752            need_fpstatus = true;
12753            if (size == 3 && !is_q) {
12754                unallocated_encoding(s);
12755                return;
12756            }
12757            break;
12758        case 0x7c: /* URSQRTE */
12759            if (size == 3) {
12760                unallocated_encoding(s);
12761                return;
12762            }
12763            break;
12764        case 0x1e: /* FRINT32Z */
12765        case 0x1f: /* FRINT64Z */
12766            need_rmode = true;
12767            rmode = FPROUNDING_ZERO;
12768            /* fall through */
12769        case 0x5e: /* FRINT32X */
12770        case 0x5f: /* FRINT64X */
12771            need_fpstatus = true;
12772            if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12773                unallocated_encoding(s);
12774                return;
12775            }
12776            break;
12777        default:
12778            unallocated_encoding(s);
12779            return;
12780        }
12781        break;
12782    }
12783    default:
12784        unallocated_encoding(s);
12785        return;
12786    }
12787
12788    if (!fp_access_check(s)) {
12789        return;
12790    }
12791
12792    if (need_fpstatus || need_rmode) {
12793        tcg_fpstatus = fpstatus_ptr(FPST_FPCR);
12794    } else {
12795        tcg_fpstatus = NULL;
12796    }
12797    if (need_rmode) {
12798        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12799        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12800    } else {
12801        tcg_rmode = NULL;
12802    }
12803
12804    switch (opcode) {
12805    case 0x5:
12806        if (u && size == 0) { /* NOT */
12807            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12808            return;
12809        }
12810        break;
12811    case 0x8: /* CMGT, CMGE */
12812        if (u) {
12813            gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size);
12814        } else {
12815            gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size);
12816        }
12817        return;
12818    case 0x9: /* CMEQ, CMLE */
12819        if (u) {
12820            gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size);
12821        } else {
12822            gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size);
12823        }
12824        return;
12825    case 0xa: /* CMLT */
12826        gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size);
12827        return;
12828    case 0xb:
12829        if (u) { /* ABS, NEG */
12830            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12831        } else {
12832            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12833        }
12834        return;
12835    }
12836
12837    if (size == 3) {
12838        /* All 64-bit element operations can be shared with scalar 2misc */
12839        int pass;
12840
12841        /* Coverity claims (size == 3 && !is_q) has been eliminated
12842         * from all paths leading to here.
12843         */
12844        tcg_debug_assert(is_q);
12845        for (pass = 0; pass < 2; pass++) {
12846            TCGv_i64 tcg_op = tcg_temp_new_i64();
12847            TCGv_i64 tcg_res = tcg_temp_new_i64();
12848
12849            read_vec_element(s, tcg_op, rn, pass, MO_64);
12850
12851            handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12852                            tcg_rmode, tcg_fpstatus);
12853
12854            write_vec_element(s, tcg_res, rd, pass, MO_64);
12855
12856            tcg_temp_free_i64(tcg_res);
12857            tcg_temp_free_i64(tcg_op);
12858        }
12859    } else {
12860        int pass;
12861
12862        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12863            TCGv_i32 tcg_op = tcg_temp_new_i32();
12864            TCGv_i32 tcg_res = tcg_temp_new_i32();
12865
12866            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12867
12868            if (size == 2) {
12869                /* Special cases for 32 bit elements */
12870                switch (opcode) {
12871                case 0x4: /* CLS */
12872                    if (u) {
12873                        tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12874                    } else {
12875                        tcg_gen_clrsb_i32(tcg_res, tcg_op);
12876                    }
12877                    break;
12878                case 0x7: /* SQABS, SQNEG */
12879                    if (u) {
12880                        gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12881                    } else {
12882                        gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12883                    }
12884                    break;
12885                case 0x2f: /* FABS */
12886                    gen_helper_vfp_abss(tcg_res, tcg_op);
12887                    break;
12888                case 0x6f: /* FNEG */
12889                    gen_helper_vfp_negs(tcg_res, tcg_op);
12890                    break;
12891                case 0x7f: /* FSQRT */
12892                    gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12893                    break;
12894                case 0x1a: /* FCVTNS */
12895                case 0x1b: /* FCVTMS */
12896                case 0x1c: /* FCVTAS */
12897                case 0x3a: /* FCVTPS */
12898                case 0x3b: /* FCVTZS */
12899                {
12900                    TCGv_i32 tcg_shift = tcg_const_i32(0);
12901                    gen_helper_vfp_tosls(tcg_res, tcg_op,
12902                                         tcg_shift, tcg_fpstatus);
12903                    tcg_temp_free_i32(tcg_shift);
12904                    break;
12905                }
12906                case 0x5a: /* FCVTNU */
12907                case 0x5b: /* FCVTMU */
12908                case 0x5c: /* FCVTAU */
12909                case 0x7a: /* FCVTPU */
12910                case 0x7b: /* FCVTZU */
12911                {
12912                    TCGv_i32 tcg_shift = tcg_const_i32(0);
12913                    gen_helper_vfp_touls(tcg_res, tcg_op,
12914                                         tcg_shift, tcg_fpstatus);
12915                    tcg_temp_free_i32(tcg_shift);
12916                    break;
12917                }
12918                case 0x18: /* FRINTN */
12919                case 0x19: /* FRINTM */
12920                case 0x38: /* FRINTP */
12921                case 0x39: /* FRINTZ */
12922                case 0x58: /* FRINTA */
12923                case 0x79: /* FRINTI */
12924                    gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12925                    break;
12926                case 0x59: /* FRINTX */
12927                    gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12928                    break;
12929                case 0x7c: /* URSQRTE */
12930                    gen_helper_rsqrte_u32(tcg_res, tcg_op);
12931                    break;
12932                case 0x1e: /* FRINT32Z */
12933                case 0x5e: /* FRINT32X */
12934                    gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12935                    break;
12936                case 0x1f: /* FRINT64Z */
12937                case 0x5f: /* FRINT64X */
12938                    gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12939                    break;
12940                default:
12941                    g_assert_not_reached();
12942                }
12943            } else {
12944                /* Use helpers for 8 and 16 bit elements */
12945                switch (opcode) {
12946                case 0x5: /* CNT, RBIT */
12947                    /* For these two insns size is part of the opcode specifier
12948                     * (handled earlier); they always operate on byte elements.
12949                     */
12950                    if (u) {
12951                        gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12952                    } else {
12953                        gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12954                    }
12955                    break;
12956                case 0x7: /* SQABS, SQNEG */
12957                {
12958                    NeonGenOneOpEnvFn *genfn;
12959                    static NeonGenOneOpEnvFn * const fns[2][2] = {
12960                        { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12961                        { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12962                    };
12963                    genfn = fns[size][u];
12964                    genfn(tcg_res, cpu_env, tcg_op);
12965                    break;
12966                }
12967                case 0x4: /* CLS, CLZ */
12968                    if (u) {
12969                        if (size == 0) {
12970                            gen_helper_neon_clz_u8(tcg_res, tcg_op);
12971                        } else {
12972                            gen_helper_neon_clz_u16(tcg_res, tcg_op);
12973                        }
12974                    } else {
12975                        if (size == 0) {
12976                            gen_helper_neon_cls_s8(tcg_res, tcg_op);
12977                        } else {
12978                            gen_helper_neon_cls_s16(tcg_res, tcg_op);
12979                        }
12980                    }
12981                    break;
12982                default:
12983                    g_assert_not_reached();
12984                }
12985            }
12986
12987            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12988
12989            tcg_temp_free_i32(tcg_res);
12990            tcg_temp_free_i32(tcg_op);
12991        }
12992    }
12993    clear_vec_high(s, is_q, rd);
12994
12995    if (need_rmode) {
12996        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12997        tcg_temp_free_i32(tcg_rmode);
12998    }
12999    if (need_fpstatus) {
13000        tcg_temp_free_ptr(tcg_fpstatus);
13001    }
13002}
13003
13004/* AdvSIMD [scalar] two register miscellaneous (FP16)
13005 *
13006 *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
13007 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
13008 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13009 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
13010 *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
13011 *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
13012 *
13013 * This actually covers two groups where scalar access is governed by
13014 * bit 28. A bunch of the instructions (float to integral) only exist
13015 * in the vector form and are un-allocated for the scalar decode. Also
13016 * in the scalar decode Q is always 1.
13017 */
13018static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
13019{
13020    int fpop, opcode, a, u;
13021    int rn, rd;
13022    bool is_q;
13023    bool is_scalar;
13024    bool only_in_vector = false;
13025
13026    int pass;
13027    TCGv_i32 tcg_rmode = NULL;
13028    TCGv_ptr tcg_fpstatus = NULL;
13029    bool need_rmode = false;
13030    bool need_fpst = true;
13031    int rmode;
13032
13033    if (!dc_isar_feature(aa64_fp16, s)) {
13034        unallocated_encoding(s);
13035        return;
13036    }
13037
13038    rd = extract32(insn, 0, 5);
13039    rn = extract32(insn, 5, 5);
13040
13041    a = extract32(insn, 23, 1);
13042    u = extract32(insn, 29, 1);
13043    is_scalar = extract32(insn, 28, 1);
13044    is_q = extract32(insn, 30, 1);
13045
13046    opcode = extract32(insn, 12, 5);
13047    fpop = deposit32(opcode, 5, 1, a);
13048    fpop = deposit32(fpop, 6, 1, u);
13049
13050    switch (fpop) {
13051    case 0x1d: /* SCVTF */
13052    case 0x5d: /* UCVTF */
13053    {
13054        int elements;
13055
13056        if (is_scalar) {
13057            elements = 1;
13058        } else {
13059            elements = (is_q ? 8 : 4);
13060        }
13061
13062        if (!fp_access_check(s)) {
13063            return;
13064        }
13065        handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
13066        return;
13067    }
13068    break;
13069    case 0x2c: /* FCMGT (zero) */
13070    case 0x2d: /* FCMEQ (zero) */
13071    case 0x2e: /* FCMLT (zero) */
13072    case 0x6c: /* FCMGE (zero) */
13073    case 0x6d: /* FCMLE (zero) */
13074        handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
13075        return;
13076    case 0x3d: /* FRECPE */
13077    case 0x3f: /* FRECPX */
13078        break;
13079    case 0x18: /* FRINTN */
13080        need_rmode = true;
13081        only_in_vector = true;
13082        rmode = FPROUNDING_TIEEVEN;
13083        break;
13084    case 0x19: /* FRINTM */
13085        need_rmode = true;
13086        only_in_vector = true;
13087        rmode = FPROUNDING_NEGINF;
13088        break;
13089    case 0x38: /* FRINTP */
13090        need_rmode = true;
13091        only_in_vector = true;
13092        rmode = FPROUNDING_POSINF;
13093        break;
13094    case 0x39: /* FRINTZ */
13095        need_rmode = true;
13096        only_in_vector = true;
13097        rmode = FPROUNDING_ZERO;
13098        break;
13099    case 0x58: /* FRINTA */
13100        need_rmode = true;
13101        only_in_vector = true;
13102        rmode = FPROUNDING_TIEAWAY;
13103        break;
13104    case 0x59: /* FRINTX */
13105    case 0x79: /* FRINTI */
13106        only_in_vector = true;
13107        /* current rounding mode */
13108        break;
13109    case 0x1a: /* FCVTNS */
13110        need_rmode = true;
13111        rmode = FPROUNDING_TIEEVEN;
13112        break;
13113    case 0x1b: /* FCVTMS */
13114        need_rmode = true;
13115        rmode = FPROUNDING_NEGINF;
13116        break;
13117    case 0x1c: /* FCVTAS */
13118        need_rmode = true;
13119        rmode = FPROUNDING_TIEAWAY;
13120        break;
13121    case 0x3a: /* FCVTPS */
13122        need_rmode = true;
13123        rmode = FPROUNDING_POSINF;
13124        break;
13125    case 0x3b: /* FCVTZS */
13126        need_rmode = true;
13127        rmode = FPROUNDING_ZERO;
13128        break;
13129    case 0x5a: /* FCVTNU */
13130        need_rmode = true;
13131        rmode = FPROUNDING_TIEEVEN;
13132        break;
13133    case 0x5b: /* FCVTMU */
13134        need_rmode = true;
13135        rmode = FPROUNDING_NEGINF;
13136        break;
13137    case 0x5c: /* FCVTAU */
13138        need_rmode = true;
13139        rmode = FPROUNDING_TIEAWAY;
13140        break;
13141    case 0x7a: /* FCVTPU */
13142        need_rmode = true;
13143        rmode = FPROUNDING_POSINF;
13144        break;
13145    case 0x7b: /* FCVTZU */
13146        need_rmode = true;
13147        rmode = FPROUNDING_ZERO;
13148        break;
13149    case 0x2f: /* FABS */
13150    case 0x6f: /* FNEG */
13151        need_fpst = false;
13152        break;
13153    case 0x7d: /* FRSQRTE */
13154    case 0x7f: /* FSQRT (vector) */
13155        break;
13156    default:
13157        unallocated_encoding(s);
13158        return;
13159    }
13160
13161
13162    /* Check additional constraints for the scalar encoding */
13163    if (is_scalar) {
13164        if (!is_q) {
13165            unallocated_encoding(s);
13166            return;
13167        }
13168        /* FRINTxx is only in the vector form */
13169        if (only_in_vector) {
13170            unallocated_encoding(s);
13171            return;
13172        }
13173    }
13174
13175    if (!fp_access_check(s)) {
13176        return;
13177    }
13178
13179    if (need_rmode || need_fpst) {
13180        tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16);
13181    }
13182
13183    if (need_rmode) {
13184        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
13185        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
13186    }
13187
13188    if (is_scalar) {
13189        TCGv_i32 tcg_op = read_fp_hreg(s, rn);
13190        TCGv_i32 tcg_res = tcg_temp_new_i32();
13191
13192        switch (fpop) {
13193        case 0x1a: /* FCVTNS */
13194        case 0x1b: /* FCVTMS */
13195        case 0x1c: /* FCVTAS */
13196        case 0x3a: /* FCVTPS */
13197        case 0x3b: /* FCVTZS */
13198            gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
13199            break;
13200        case 0x3d: /* FRECPE */
13201            gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
13202            break;
13203        case 0x3f: /* FRECPX */
13204            gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
13205            break;
13206        case 0x5a: /* FCVTNU */
13207        case 0x5b: /* FCVTMU */
13208        case 0x5c: /* FCVTAU */
13209        case 0x7a: /* FCVTPU */
13210        case 0x7b: /* FCVTZU */
13211            gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
13212            break;
13213        case 0x6f: /* FNEG */
13214            tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
13215            break;
13216        case 0x7d: /* FRSQRTE */
13217            gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
13218            break;
13219        default:
13220            g_assert_not_reached();
13221        }
13222
13223        /* limit any sign extension going on */
13224        tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
13225        write_fp_sreg(s, rd, tcg_res);
13226
13227        tcg_temp_free_i32(tcg_res);
13228        tcg_temp_free_i32(tcg_op);
13229    } else {
13230        for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
13231            TCGv_i32 tcg_op = tcg_temp_new_i32();
13232            TCGv_i32 tcg_res = tcg_temp_new_i32();
13233
13234            read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
13235
13236            switch (fpop) {
13237            case 0x1a: /* FCVTNS */
13238            case 0x1b: /* FCVTMS */
13239            case 0x1c: /* FCVTAS */
13240            case 0x3a: /* FCVTPS */
13241            case 0x3b: /* FCVTZS */
13242                gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
13243                break;
13244            case 0x3d: /* FRECPE */
13245                gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
13246                break;
13247            case 0x5a: /* FCVTNU */
13248            case 0x5b: /* FCVTMU */
13249            case 0x5c: /* FCVTAU */
13250            case 0x7a: /* FCVTPU */
13251            case 0x7b: /* FCVTZU */
13252                gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
13253                break;
13254            case 0x18: /* FRINTN */
13255            case 0x19: /* FRINTM */
13256            case 0x38: /* FRINTP */
13257            case 0x39: /* FRINTZ */
13258            case 0x58: /* FRINTA */
13259            case 0x79: /* FRINTI */
13260                gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
13261                break;
13262            case 0x59: /* FRINTX */
13263                gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
13264                break;
13265            case 0x2f: /* FABS */
13266                tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
13267                break;
13268            case 0x6f: /* FNEG */
13269                tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
13270                break;
13271            case 0x7d: /* FRSQRTE */
13272                gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
13273                break;
13274            case 0x7f: /* FSQRT */
13275                gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
13276                break;
13277            default:
13278                g_assert_not_reached();
13279            }
13280
13281            write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
13282
13283            tcg_temp_free_i32(tcg_res);
13284            tcg_temp_free_i32(tcg_op);
13285        }
13286
13287        clear_vec_high(s, is_q, rd);
13288    }
13289
13290    if (tcg_rmode) {
13291        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
13292        tcg_temp_free_i32(tcg_rmode);
13293    }
13294
13295    if (tcg_fpstatus) {
13296        tcg_temp_free_ptr(tcg_fpstatus);
13297    }
13298}
13299
13300/* AdvSIMD scalar x indexed element
13301 *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
13302 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
13303 * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
13304 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
13305 * AdvSIMD vector x indexed element
13306 *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
13307 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
13308 * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
13309 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
13310 */
13311static void disas_simd_indexed(DisasContext *s, uint32_t insn)
13312{
13313    /* This encoding has two kinds of instruction:
13314     *  normal, where we perform elt x idxelt => elt for each
13315     *     element in the vector
13316     *  long, where we perform elt x idxelt and generate a result of
13317     *     double the width of the input element
13318     * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
13319     */
13320    bool is_scalar = extract32(insn, 28, 1);
13321    bool is_q = extract32(insn, 30, 1);
13322    bool u = extract32(insn, 29, 1);
13323    int size = extract32(insn, 22, 2);
13324    int l = extract32(insn, 21, 1);
13325    int m = extract32(insn, 20, 1);
13326    /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
13327    int rm = extract32(insn, 16, 4);
13328    int opcode = extract32(insn, 12, 4);
13329    int h = extract32(insn, 11, 1);
13330    int rn = extract32(insn, 5, 5);
13331    int rd = extract32(insn, 0, 5);
13332    bool is_long = false;
13333    int is_fp = 0;
13334    bool is_fp16 = false;
13335    int index;
13336    TCGv_ptr fpst;
13337
13338    switch (16 * u + opcode) {
13339    case 0x08: /* MUL */
13340    case 0x10: /* MLA */
13341    case 0x14: /* MLS */
13342        if (is_scalar) {
13343            unallocated_encoding(s);
13344            return;
13345        }
13346        break;
13347    case 0x02: /* SMLAL, SMLAL2 */
13348    case 0x12: /* UMLAL, UMLAL2 */
13349    case 0x06: /* SMLSL, SMLSL2 */
13350    case 0x16: /* UMLSL, UMLSL2 */
13351    case 0x0a: /* SMULL, SMULL2 */
13352    case 0x1a: /* UMULL, UMULL2 */
13353        if (is_scalar) {
13354            unallocated_encoding(s);
13355            return;
13356        }
13357        is_long = true;
13358        break;
13359    case 0x03: /* SQDMLAL, SQDMLAL2 */
13360    case 0x07: /* SQDMLSL, SQDMLSL2 */
13361    case 0x0b: /* SQDMULL, SQDMULL2 */
13362        is_long = true;
13363        break;
13364    case 0x0c: /* SQDMULH */
13365    case 0x0d: /* SQRDMULH */
13366        break;
13367    case 0x01: /* FMLA */
13368    case 0x05: /* FMLS */
13369    case 0x09: /* FMUL */
13370    case 0x19: /* FMULX */
13371        is_fp = 1;
13372        break;
13373    case 0x1d: /* SQRDMLAH */
13374    case 0x1f: /* SQRDMLSH */
13375        if (!dc_isar_feature(aa64_rdm, s)) {
13376            unallocated_encoding(s);
13377            return;
13378        }
13379        break;
13380    case 0x0e: /* SDOT */
13381    case 0x1e: /* UDOT */
13382        if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
13383            unallocated_encoding(s);
13384            return;
13385        }
13386        break;
13387    case 0x0f:
13388        switch (size) {
13389        case 0: /* SUDOT */
13390        case 2: /* USDOT */
13391            if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
13392                unallocated_encoding(s);
13393                return;
13394            }
13395            size = MO_32;
13396            break;
13397        case 1: /* BFDOT */
13398            if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
13399                unallocated_encoding(s);
13400                return;
13401            }
13402            size = MO_32;
13403            break;
13404        case 3: /* BFMLAL{B,T} */
13405            if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
13406                unallocated_encoding(s);
13407                return;
13408            }
13409            /* can't set is_fp without other incorrect size checks */
13410            size = MO_16;
13411            break;
13412        default:
13413            unallocated_encoding(s);
13414            return;
13415        }
13416        break;
13417    case 0x11: /* FCMLA #0 */
13418    case 0x13: /* FCMLA #90 */
13419    case 0x15: /* FCMLA #180 */
13420    case 0x17: /* FCMLA #270 */
13421        if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
13422            unallocated_encoding(s);
13423            return;
13424        }
13425        is_fp = 2;
13426        break;
13427    case 0x00: /* FMLAL */
13428    case 0x04: /* FMLSL */
13429    case 0x18: /* FMLAL2 */
13430    case 0x1c: /* FMLSL2 */
13431        if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
13432            unallocated_encoding(s);
13433            return;
13434        }
13435        size = MO_16;
13436        /* is_fp, but we pass cpu_env not fp_status.  */
13437        break;
13438    default:
13439        unallocated_encoding(s);
13440        return;
13441    }
13442
13443    switch (is_fp) {
13444    case 1: /* normal fp */
13445        /* convert insn encoded size to MemOp size */
13446        switch (size) {
13447        case 0: /* half-precision */
13448            size = MO_16;
13449            is_fp16 = true;
13450            break;
13451        case MO_32: /* single precision */
13452        case MO_64: /* double precision */
13453            break;
13454        default:
13455            unallocated_encoding(s);
13456            return;
13457        }
13458        break;
13459
13460    case 2: /* complex fp */
13461        /* Each indexable element is a complex pair.  */
13462        size += 1;
13463        switch (size) {
13464        case MO_32:
13465            if (h && !is_q) {
13466                unallocated_encoding(s);
13467                return;
13468            }
13469            is_fp16 = true;
13470            break;
13471        case MO_64:
13472            break;
13473        default:
13474            unallocated_encoding(s);
13475            return;
13476        }
13477        break;
13478
13479    default: /* integer */
13480        switch (size) {
13481        case MO_8:
13482        case MO_64:
13483            unallocated_encoding(s);
13484            return;
13485        }
13486        break;
13487    }
13488    if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
13489        unallocated_encoding(s);
13490        return;
13491    }
13492
13493    /* Given MemOp size, adjust register and indexing.  */
13494    switch (size) {
13495    case MO_16:
13496        index = h << 2 | l << 1 | m;
13497        break;
13498    case MO_32:
13499        index = h << 1 | l;
13500        rm |= m << 4;
13501        break;
13502    case MO_64:
13503        if (l || !is_q) {
13504            unallocated_encoding(s);
13505            return;
13506        }
13507        index = h;
13508        rm |= m << 4;
13509        break;
13510    default:
13511        g_assert_not_reached();
13512    }
13513
13514    if (!fp_access_check(s)) {
13515        return;
13516    }
13517
13518    if (is_fp) {
13519        fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR);
13520    } else {
13521        fpst = NULL;
13522    }
13523
13524    switch (16 * u + opcode) {
13525    case 0x0e: /* SDOT */
13526    case 0x1e: /* UDOT */
13527        gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
13528                         u ? gen_helper_gvec_udot_idx_b
13529                         : gen_helper_gvec_sdot_idx_b);
13530        return;
13531    case 0x0f:
13532        switch (extract32(insn, 22, 2)) {
13533        case 0: /* SUDOT */
13534            gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
13535                             gen_helper_gvec_sudot_idx_b);
13536            return;
13537        case 1: /* BFDOT */
13538            gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
13539                             gen_helper_gvec_bfdot_idx);
13540            return;
13541        case 2: /* USDOT */
13542            gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
13543                             gen_helper_gvec_usdot_idx_b);
13544            return;
13545        case 3: /* BFMLAL{B,T} */
13546            gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
13547                              gen_helper_gvec_bfmlal_idx);
13548            return;
13549        }
13550        g_assert_not_reached();
13551    case 0x11: /* FCMLA #0 */
13552    case 0x13: /* FCMLA #90 */
13553    case 0x15: /* FCMLA #180 */
13554    case 0x17: /* FCMLA #270 */
13555        {
13556            int rot = extract32(insn, 13, 2);
13557            int data = (index << 2) | rot;
13558            tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd),
13559                               vec_full_reg_offset(s, rn),
13560                               vec_full_reg_offset(s, rm),
13561                               vec_full_reg_offset(s, rd), fpst,
13562                               is_q ? 16 : 8, vec_full_reg_size(s), data,
13563                               size == MO_64
13564                               ? gen_helper_gvec_fcmlas_idx
13565                               : gen_helper_gvec_fcmlah_idx);
13566            tcg_temp_free_ptr(fpst);
13567        }
13568        return;
13569
13570    case 0x00: /* FMLAL */
13571    case 0x04: /* FMLSL */
13572    case 0x18: /* FMLAL2 */
13573    case 0x1c: /* FMLSL2 */
13574        {
13575            int is_s = extract32(opcode, 2, 1);
13576            int is_2 = u;
13577            int data = (index << 2) | (is_2 << 1) | is_s;
13578            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
13579                               vec_full_reg_offset(s, rn),
13580                               vec_full_reg_offset(s, rm), cpu_env,
13581                               is_q ? 16 : 8, vec_full_reg_size(s),
13582                               data, gen_helper_gvec_fmlal_idx_a64);
13583        }
13584        return;
13585
13586    case 0x08: /* MUL */
13587        if (!is_long && !is_scalar) {
13588            static gen_helper_gvec_3 * const fns[3] = {
13589                gen_helper_gvec_mul_idx_h,
13590                gen_helper_gvec_mul_idx_s,
13591                gen_helper_gvec_mul_idx_d,
13592            };
13593            tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
13594                               vec_full_reg_offset(s, rn),
13595                               vec_full_reg_offset(s, rm),
13596                               is_q ? 16 : 8, vec_full_reg_size(s),
13597                               index, fns[size - 1]);
13598            return;
13599        }
13600        break;
13601
13602    case 0x10: /* MLA */
13603        if (!is_long && !is_scalar) {
13604            static gen_helper_gvec_4 * const fns[3] = {
13605                gen_helper_gvec_mla_idx_h,
13606                gen_helper_gvec_mla_idx_s,
13607                gen_helper_gvec_mla_idx_d,
13608            };
13609            tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13610                               vec_full_reg_offset(s, rn),
13611                               vec_full_reg_offset(s, rm),
13612                               vec_full_reg_offset(s, rd),
13613                               is_q ? 16 : 8, vec_full_reg_size(s),
13614                               index, fns[size - 1]);
13615            return;
13616        }
13617        break;
13618
13619    case 0x14: /* MLS */
13620        if (!is_long && !is_scalar) {
13621            static gen_helper_gvec_4 * const fns[3] = {
13622                gen_helper_gvec_mls_idx_h,
13623                gen_helper_gvec_mls_idx_s,
13624                gen_helper_gvec_mls_idx_d,
13625            };
13626            tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
13627                               vec_full_reg_offset(s, rn),
13628                               vec_full_reg_offset(s, rm),
13629                               vec_full_reg_offset(s, rd),
13630                               is_q ? 16 : 8, vec_full_reg_size(s),
13631                               index, fns[size - 1]);
13632            return;
13633        }
13634        break;
13635    }
13636
13637    if (size == 3) {
13638        TCGv_i64 tcg_idx = tcg_temp_new_i64();
13639        int pass;
13640
13641        assert(is_fp && is_q && !is_long);
13642
13643        read_vec_element(s, tcg_idx, rm, index, MO_64);
13644
13645        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13646            TCGv_i64 tcg_op = tcg_temp_new_i64();
13647            TCGv_i64 tcg_res = tcg_temp_new_i64();
13648
13649            read_vec_element(s, tcg_op, rn, pass, MO_64);
13650
13651            switch (16 * u + opcode) {
13652            case 0x05: /* FMLS */
13653                /* As usual for ARM, separate negation for fused multiply-add */
13654                gen_helper_vfp_negd(tcg_op, tcg_op);
13655                /* fall through */
13656            case 0x01: /* FMLA */
13657                read_vec_element(s, tcg_res, rd, pass, MO_64);
13658                gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
13659                break;
13660            case 0x09: /* FMUL */
13661                gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
13662                break;
13663            case 0x19: /* FMULX */
13664                gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
13665                break;
13666            default:
13667                g_assert_not_reached();
13668            }
13669
13670            write_vec_element(s, tcg_res, rd, pass, MO_64);
13671            tcg_temp_free_i64(tcg_op);
13672            tcg_temp_free_i64(tcg_res);
13673        }
13674
13675        tcg_temp_free_i64(tcg_idx);
13676        clear_vec_high(s, !is_scalar, rd);
13677    } else if (!is_long) {
13678        /* 32 bit floating point, or 16 or 32 bit integer.
13679         * For the 16 bit scalar case we use the usual Neon helpers and
13680         * rely on the fact that 0 op 0 == 0 with no side effects.
13681         */
13682        TCGv_i32 tcg_idx = tcg_temp_new_i32();
13683        int pass, maxpasses;
13684
13685        if (is_scalar) {
13686            maxpasses = 1;
13687        } else {
13688            maxpasses = is_q ? 4 : 2;
13689        }
13690
13691        read_vec_element_i32(s, tcg_idx, rm, index, size);
13692
13693        if (size == 1 && !is_scalar) {
13694            /* The simplest way to handle the 16x16 indexed ops is to duplicate
13695             * the index into both halves of the 32 bit tcg_idx and then use
13696             * the usual Neon helpers.
13697             */
13698            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13699        }
13700
13701        for (pass = 0; pass < maxpasses; pass++) {
13702            TCGv_i32 tcg_op = tcg_temp_new_i32();
13703            TCGv_i32 tcg_res = tcg_temp_new_i32();
13704
13705            read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13706
13707            switch (16 * u + opcode) {
13708            case 0x08: /* MUL */
13709            case 0x10: /* MLA */
13710            case 0x14: /* MLS */
13711            {
13712                static NeonGenTwoOpFn * const fns[2][2] = {
13713                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13714                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
13715                };
13716                NeonGenTwoOpFn *genfn;
13717                bool is_sub = opcode == 0x4;
13718
13719                if (size == 1) {
13720                    gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13721                } else {
13722                    tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13723                }
13724                if (opcode == 0x8) {
13725                    break;
13726                }
13727                read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13728                genfn = fns[size - 1][is_sub];
13729                genfn(tcg_res, tcg_op, tcg_res);
13730                break;
13731            }
13732            case 0x05: /* FMLS */
13733            case 0x01: /* FMLA */
13734                read_vec_element_i32(s, tcg_res, rd, pass,
13735                                     is_scalar ? size : MO_32);
13736                switch (size) {
13737                case 1:
13738                    if (opcode == 0x5) {
13739                        /* As usual for ARM, separate negation for fused
13740                         * multiply-add */
13741                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13742                    }
13743                    if (is_scalar) {
13744                        gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13745                                                   tcg_res, fpst);
13746                    } else {
13747                        gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13748                                                    tcg_res, fpst);
13749                    }
13750                    break;
13751                case 2:
13752                    if (opcode == 0x5) {
13753                        /* As usual for ARM, separate negation for
13754                         * fused multiply-add */
13755                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13756                    }
13757                    gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13758                                           tcg_res, fpst);
13759                    break;
13760                default:
13761                    g_assert_not_reached();
13762                }
13763                break;
13764            case 0x09: /* FMUL */
13765                switch (size) {
13766                case 1:
13767                    if (is_scalar) {
13768                        gen_helper_advsimd_mulh(tcg_res, tcg_op,
13769                                                tcg_idx, fpst);
13770                    } else {
13771                        gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13772                                                 tcg_idx, fpst);
13773                    }
13774                    break;
13775                case 2:
13776                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13777                    break;
13778                default:
13779                    g_assert_not_reached();
13780                }
13781                break;
13782            case 0x19: /* FMULX */
13783                switch (size) {
13784                case 1:
13785                    if (is_scalar) {
13786                        gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13787                                                 tcg_idx, fpst);
13788                    } else {
13789                        gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13790                                                  tcg_idx, fpst);
13791                    }
13792                    break;
13793                case 2:
13794                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13795                    break;
13796                default:
13797                    g_assert_not_reached();
13798                }
13799                break;
13800            case 0x0c: /* SQDMULH */
13801                if (size == 1) {
13802                    gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13803                                               tcg_op, tcg_idx);
13804                } else {
13805                    gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13806                                               tcg_op, tcg_idx);
13807                }
13808                break;
13809            case 0x0d: /* SQRDMULH */
13810                if (size == 1) {
13811                    gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13812                                                tcg_op, tcg_idx);
13813                } else {
13814                    gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13815                                                tcg_op, tcg_idx);
13816                }
13817                break;
13818            case 0x1d: /* SQRDMLAH */
13819                read_vec_element_i32(s, tcg_res, rd, pass,
13820                                     is_scalar ? size : MO_32);
13821                if (size == 1) {
13822                    gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13823                                                tcg_op, tcg_idx, tcg_res);
13824                } else {
13825                    gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13826                                                tcg_op, tcg_idx, tcg_res);
13827                }
13828                break;
13829            case 0x1f: /* SQRDMLSH */
13830                read_vec_element_i32(s, tcg_res, rd, pass,
13831                                     is_scalar ? size : MO_32);
13832                if (size == 1) {
13833                    gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13834                                                tcg_op, tcg_idx, tcg_res);
13835                } else {
13836                    gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13837                                                tcg_op, tcg_idx, tcg_res);
13838                }
13839                break;
13840            default:
13841                g_assert_not_reached();
13842            }
13843
13844            if (is_scalar) {
13845                write_fp_sreg(s, rd, tcg_res);
13846            } else {
13847                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13848            }
13849
13850            tcg_temp_free_i32(tcg_op);
13851            tcg_temp_free_i32(tcg_res);
13852        }
13853
13854        tcg_temp_free_i32(tcg_idx);
13855        clear_vec_high(s, is_q, rd);
13856    } else {
13857        /* long ops: 16x16->32 or 32x32->64 */
13858        TCGv_i64 tcg_res[2];
13859        int pass;
13860        bool satop = extract32(opcode, 0, 1);
13861        MemOp memop = MO_32;
13862
13863        if (satop || !u) {
13864            memop |= MO_SIGN;
13865        }
13866
13867        if (size == 2) {
13868            TCGv_i64 tcg_idx = tcg_temp_new_i64();
13869
13870            read_vec_element(s, tcg_idx, rm, index, memop);
13871
13872            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13873                TCGv_i64 tcg_op = tcg_temp_new_i64();
13874                TCGv_i64 tcg_passres;
13875                int passelt;
13876
13877                if (is_scalar) {
13878                    passelt = 0;
13879                } else {
13880                    passelt = pass + (is_q * 2);
13881                }
13882
13883                read_vec_element(s, tcg_op, rn, passelt, memop);
13884
13885                tcg_res[pass] = tcg_temp_new_i64();
13886
13887                if (opcode == 0xa || opcode == 0xb) {
13888                    /* Non-accumulating ops */
13889                    tcg_passres = tcg_res[pass];
13890                } else {
13891                    tcg_passres = tcg_temp_new_i64();
13892                }
13893
13894                tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13895                tcg_temp_free_i64(tcg_op);
13896
13897                if (satop) {
13898                    /* saturating, doubling */
13899                    gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13900                                                      tcg_passres, tcg_passres);
13901                }
13902
13903                if (opcode == 0xa || opcode == 0xb) {
13904                    continue;
13905                }
13906
13907                /* Accumulating op: handle accumulate step */
13908                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13909
13910                switch (opcode) {
13911                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13912                    tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13913                    break;
13914                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13915                    tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13916                    break;
13917                case 0x7: /* SQDMLSL, SQDMLSL2 */
13918                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
13919                    /* fall through */
13920                case 0x3: /* SQDMLAL, SQDMLAL2 */
13921                    gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13922                                                      tcg_res[pass],
13923                                                      tcg_passres);
13924                    break;
13925                default:
13926                    g_assert_not_reached();
13927                }
13928                tcg_temp_free_i64(tcg_passres);
13929            }
13930            tcg_temp_free_i64(tcg_idx);
13931
13932            clear_vec_high(s, !is_scalar, rd);
13933        } else {
13934            TCGv_i32 tcg_idx = tcg_temp_new_i32();
13935
13936            assert(size == 1);
13937            read_vec_element_i32(s, tcg_idx, rm, index, size);
13938
13939            if (!is_scalar) {
13940                /* The simplest way to handle the 16x16 indexed ops is to
13941                 * duplicate the index into both halves of the 32 bit tcg_idx
13942                 * and then use the usual Neon helpers.
13943                 */
13944                tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13945            }
13946
13947            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13948                TCGv_i32 tcg_op = tcg_temp_new_i32();
13949                TCGv_i64 tcg_passres;
13950
13951                if (is_scalar) {
13952                    read_vec_element_i32(s, tcg_op, rn, pass, size);
13953                } else {
13954                    read_vec_element_i32(s, tcg_op, rn,
13955                                         pass + (is_q * 2), MO_32);
13956                }
13957
13958                tcg_res[pass] = tcg_temp_new_i64();
13959
13960                if (opcode == 0xa || opcode == 0xb) {
13961                    /* Non-accumulating ops */
13962                    tcg_passres = tcg_res[pass];
13963                } else {
13964                    tcg_passres = tcg_temp_new_i64();
13965                }
13966
13967                if (memop & MO_SIGN) {
13968                    gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13969                } else {
13970                    gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13971                }
13972                if (satop) {
13973                    gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13974                                                      tcg_passres, tcg_passres);
13975                }
13976                tcg_temp_free_i32(tcg_op);
13977
13978                if (opcode == 0xa || opcode == 0xb) {
13979                    continue;
13980                }
13981
13982                /* Accumulating op: handle accumulate step */
13983                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13984
13985                switch (opcode) {
13986                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13987                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13988                                             tcg_passres);
13989                    break;
13990                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13991                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13992                                             tcg_passres);
13993                    break;
13994                case 0x7: /* SQDMLSL, SQDMLSL2 */
13995                    gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13996                    /* fall through */
13997                case 0x3: /* SQDMLAL, SQDMLAL2 */
13998                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13999                                                      tcg_res[pass],
14000                                                      tcg_passres);
14001                    break;
14002                default:
14003                    g_assert_not_reached();
14004                }
14005                tcg_temp_free_i64(tcg_passres);
14006            }
14007            tcg_temp_free_i32(tcg_idx);
14008
14009            if (is_scalar) {
14010                tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
14011            }
14012        }
14013
14014        if (is_scalar) {
14015            tcg_res[1] = tcg_const_i64(0);
14016        }
14017
14018        for (pass = 0; pass < 2; pass++) {
14019            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
14020            tcg_temp_free_i64(tcg_res[pass]);
14021        }
14022    }
14023
14024    if (fpst) {
14025        tcg_temp_free_ptr(fpst);
14026    }
14027}
14028
14029/* Crypto AES
14030 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
14031 * +-----------------+------+-----------+--------+-----+------+------+
14032 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
14033 * +-----------------+------+-----------+--------+-----+------+------+
14034 */
14035static void disas_crypto_aes(DisasContext *s, uint32_t insn)
14036{
14037    int size = extract32(insn, 22, 2);
14038    int opcode = extract32(insn, 12, 5);
14039    int rn = extract32(insn, 5, 5);
14040    int rd = extract32(insn, 0, 5);
14041    int decrypt;
14042    gen_helper_gvec_2 *genfn2 = NULL;
14043    gen_helper_gvec_3 *genfn3 = NULL;
14044
14045    if (!dc_isar_feature(aa64_aes, s) || size != 0) {
14046        unallocated_encoding(s);
14047        return;
14048    }
14049
14050    switch (opcode) {
14051    case 0x4: /* AESE */
14052        decrypt = 0;
14053        genfn3 = gen_helper_crypto_aese;
14054        break;
14055    case 0x6: /* AESMC */
14056        decrypt = 0;
14057        genfn2 = gen_helper_crypto_aesmc;
14058        break;
14059    case 0x5: /* AESD */
14060        decrypt = 1;
14061        genfn3 = gen_helper_crypto_aese;
14062        break;
14063    case 0x7: /* AESIMC */
14064        decrypt = 1;
14065        genfn2 = gen_helper_crypto_aesmc;
14066        break;
14067    default:
14068        unallocated_encoding(s);
14069        return;
14070    }
14071
14072    if (!fp_access_check(s)) {
14073        return;
14074    }
14075    if (genfn2) {
14076        gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2);
14077    } else {
14078        gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3);
14079    }
14080}
14081
14082/* Crypto three-reg SHA
14083 *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
14084 * +-----------------+------+---+------+---+--------+-----+------+------+
14085 * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
14086 * +-----------------+------+---+------+---+--------+-----+------+------+
14087 */
14088static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
14089{
14090    int size = extract32(insn, 22, 2);
14091    int opcode = extract32(insn, 12, 3);
14092    int rm = extract32(insn, 16, 5);
14093    int rn = extract32(insn, 5, 5);
14094    int rd = extract32(insn, 0, 5);
14095    gen_helper_gvec_3 *genfn;
14096    bool feature;
14097
14098    if (size != 0) {
14099        unallocated_encoding(s);
14100        return;
14101    }
14102
14103    switch (opcode) {
14104    case 0: /* SHA1C */
14105        genfn = gen_helper_crypto_sha1c;
14106        feature = dc_isar_feature(aa64_sha1, s);
14107        break;
14108    case 1: /* SHA1P */
14109        genfn = gen_helper_crypto_sha1p;
14110        feature = dc_isar_feature(aa64_sha1, s);
14111        break;
14112    case 2: /* SHA1M */
14113        genfn = gen_helper_crypto_sha1m;
14114        feature = dc_isar_feature(aa64_sha1, s);
14115        break;
14116    case 3: /* SHA1SU0 */
14117        genfn = gen_helper_crypto_sha1su0;
14118        feature = dc_isar_feature(aa64_sha1, s);
14119        break;
14120    case 4: /* SHA256H */
14121        genfn = gen_helper_crypto_sha256h;
14122        feature = dc_isar_feature(aa64_sha256, s);
14123        break;
14124    case 5: /* SHA256H2 */
14125        genfn = gen_helper_crypto_sha256h2;
14126        feature = dc_isar_feature(aa64_sha256, s);
14127        break;
14128    case 6: /* SHA256SU1 */
14129        genfn = gen_helper_crypto_sha256su1;
14130        feature = dc_isar_feature(aa64_sha256, s);
14131        break;
14132    default:
14133        unallocated_encoding(s);
14134        return;
14135    }
14136
14137    if (!feature) {
14138        unallocated_encoding(s);
14139        return;
14140    }
14141
14142    if (!fp_access_check(s)) {
14143        return;
14144    }
14145    gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn);
14146}
14147
14148/* Crypto two-reg SHA
14149 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
14150 * +-----------------+------+-----------+--------+-----+------+------+
14151 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
14152 * +-----------------+------+-----------+--------+-----+------+------+
14153 */
14154static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
14155{
14156    int size = extract32(insn, 22, 2);
14157    int opcode = extract32(insn, 12, 5);
14158    int rn = extract32(insn, 5, 5);
14159    int rd = extract32(insn, 0, 5);
14160    gen_helper_gvec_2 *genfn;
14161    bool feature;
14162
14163    if (size != 0) {
14164        unallocated_encoding(s);
14165        return;
14166    }
14167
14168    switch (opcode) {
14169    case 0: /* SHA1H */
14170        feature = dc_isar_feature(aa64_sha1, s);
14171        genfn = gen_helper_crypto_sha1h;
14172        break;
14173    case 1: /* SHA1SU1 */
14174        feature = dc_isar_feature(aa64_sha1, s);
14175        genfn = gen_helper_crypto_sha1su1;
14176        break;
14177    case 2: /* SHA256SU0 */
14178        feature = dc_isar_feature(aa64_sha256, s);
14179        genfn = gen_helper_crypto_sha256su0;
14180        break;
14181    default:
14182        unallocated_encoding(s);
14183        return;
14184    }
14185
14186    if (!feature) {
14187        unallocated_encoding(s);
14188        return;
14189    }
14190
14191    if (!fp_access_check(s)) {
14192        return;
14193    }
14194    gen_gvec_op2_ool(s, true, rd, rn, 0, genfn);
14195}
14196
14197static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m)
14198{
14199    tcg_gen_rotli_i64(d, m, 1);
14200    tcg_gen_xor_i64(d, d, n);
14201}
14202
14203static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m)
14204{
14205    tcg_gen_rotli_vec(vece, d, m, 1);
14206    tcg_gen_xor_vec(vece, d, d, n);
14207}
14208
14209void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
14210                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
14211{
14212    static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 };
14213    static const GVecGen3 op = {
14214        .fni8 = gen_rax1_i64,
14215        .fniv = gen_rax1_vec,
14216        .opt_opc = vecop_list,
14217        .fno = gen_helper_crypto_rax1,
14218        .vece = MO_64,
14219    };
14220    tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op);
14221}
14222
14223/* Crypto three-reg SHA512
14224 *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
14225 * +-----------------------+------+---+---+-----+--------+------+------+
14226 * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
14227 * +-----------------------+------+---+---+-----+--------+------+------+
14228 */
14229static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
14230{
14231    int opcode = extract32(insn, 10, 2);
14232    int o =  extract32(insn, 14, 1);
14233    int rm = extract32(insn, 16, 5);
14234    int rn = extract32(insn, 5, 5);
14235    int rd = extract32(insn, 0, 5);
14236    bool feature;
14237    gen_helper_gvec_3 *oolfn = NULL;
14238    GVecGen3Fn *gvecfn = NULL;
14239
14240    if (o == 0) {
14241        switch (opcode) {
14242        case 0: /* SHA512H */
14243            feature = dc_isar_feature(aa64_sha512, s);
14244            oolfn = gen_helper_crypto_sha512h;
14245            break;
14246        case 1: /* SHA512H2 */
14247            feature = dc_isar_feature(aa64_sha512, s);
14248            oolfn = gen_helper_crypto_sha512h2;
14249            break;
14250        case 2: /* SHA512SU1 */
14251            feature = dc_isar_feature(aa64_sha512, s);
14252            oolfn = gen_helper_crypto_sha512su1;
14253            break;
14254        case 3: /* RAX1 */
14255            feature = dc_isar_feature(aa64_sha3, s);
14256            gvecfn = gen_gvec_rax1;
14257            break;
14258        default:
14259            g_assert_not_reached();
14260        }
14261    } else {
14262        switch (opcode) {
14263        case 0: /* SM3PARTW1 */
14264            feature = dc_isar_feature(aa64_sm3, s);
14265            oolfn = gen_helper_crypto_sm3partw1;
14266            break;
14267        case 1: /* SM3PARTW2 */
14268            feature = dc_isar_feature(aa64_sm3, s);
14269            oolfn = gen_helper_crypto_sm3partw2;
14270            break;
14271        case 2: /* SM4EKEY */
14272            feature = dc_isar_feature(aa64_sm4, s);
14273            oolfn = gen_helper_crypto_sm4ekey;
14274            break;
14275        default:
14276            unallocated_encoding(s);
14277            return;
14278        }
14279    }
14280
14281    if (!feature) {
14282        unallocated_encoding(s);
14283        return;
14284    }
14285
14286    if (!fp_access_check(s)) {
14287        return;
14288    }
14289
14290    if (oolfn) {
14291        gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn);
14292    } else {
14293        gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64);
14294    }
14295}
14296
14297/* Crypto two-reg SHA512
14298 *  31                                     12  11  10  9    5 4    0
14299 * +-----------------------------------------+--------+------+------+
14300 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
14301 * +-----------------------------------------+--------+------+------+
14302 */
14303static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
14304{
14305    int opcode = extract32(insn, 10, 2);
14306    int rn = extract32(insn, 5, 5);
14307    int rd = extract32(insn, 0, 5);
14308    bool feature;
14309
14310    switch (opcode) {
14311    case 0: /* SHA512SU0 */
14312        feature = dc_isar_feature(aa64_sha512, s);
14313        break;
14314    case 1: /* SM4E */
14315        feature = dc_isar_feature(aa64_sm4, s);
14316        break;
14317    default:
14318        unallocated_encoding(s);
14319        return;
14320    }
14321
14322    if (!feature) {
14323        unallocated_encoding(s);
14324        return;
14325    }
14326
14327    if (!fp_access_check(s)) {
14328        return;
14329    }
14330
14331    switch (opcode) {
14332    case 0: /* SHA512SU0 */
14333        gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0);
14334        break;
14335    case 1: /* SM4E */
14336        gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e);
14337        break;
14338    default:
14339        g_assert_not_reached();
14340    }
14341}
14342
14343/* Crypto four-register
14344 *  31               23 22 21 20  16 15  14  10 9    5 4    0
14345 * +-------------------+-----+------+---+------+------+------+
14346 * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
14347 * +-------------------+-----+------+---+------+------+------+
14348 */
14349static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
14350{
14351    int op0 = extract32(insn, 21, 2);
14352    int rm = extract32(insn, 16, 5);
14353    int ra = extract32(insn, 10, 5);
14354    int rn = extract32(insn, 5, 5);
14355    int rd = extract32(insn, 0, 5);
14356    bool feature;
14357
14358    switch (op0) {
14359    case 0: /* EOR3 */
14360    case 1: /* BCAX */
14361        feature = dc_isar_feature(aa64_sha3, s);
14362        break;
14363    case 2: /* SM3SS1 */
14364        feature = dc_isar_feature(aa64_sm3, s);
14365        break;
14366    default:
14367        unallocated_encoding(s);
14368        return;
14369    }
14370
14371    if (!feature) {
14372        unallocated_encoding(s);
14373        return;
14374    }
14375
14376    if (!fp_access_check(s)) {
14377        return;
14378    }
14379
14380    if (op0 < 2) {
14381        TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
14382        int pass;
14383
14384        tcg_op1 = tcg_temp_new_i64();
14385        tcg_op2 = tcg_temp_new_i64();
14386        tcg_op3 = tcg_temp_new_i64();
14387        tcg_res[0] = tcg_temp_new_i64();
14388        tcg_res[1] = tcg_temp_new_i64();
14389
14390        for (pass = 0; pass < 2; pass++) {
14391            read_vec_element(s, tcg_op1, rn, pass, MO_64);
14392            read_vec_element(s, tcg_op2, rm, pass, MO_64);
14393            read_vec_element(s, tcg_op3, ra, pass, MO_64);
14394
14395            if (op0 == 0) {
14396                /* EOR3 */
14397                tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
14398            } else {
14399                /* BCAX */
14400                tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
14401            }
14402            tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
14403        }
14404        write_vec_element(s, tcg_res[0], rd, 0, MO_64);
14405        write_vec_element(s, tcg_res[1], rd, 1, MO_64);
14406
14407        tcg_temp_free_i64(tcg_op1);
14408        tcg_temp_free_i64(tcg_op2);
14409        tcg_temp_free_i64(tcg_op3);
14410        tcg_temp_free_i64(tcg_res[0]);
14411        tcg_temp_free_i64(tcg_res[1]);
14412    } else {
14413        TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
14414
14415        tcg_op1 = tcg_temp_new_i32();
14416        tcg_op2 = tcg_temp_new_i32();
14417        tcg_op3 = tcg_temp_new_i32();
14418        tcg_res = tcg_temp_new_i32();
14419        tcg_zero = tcg_const_i32(0);
14420
14421        read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
14422        read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
14423        read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
14424
14425        tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
14426        tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
14427        tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
14428        tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
14429
14430        write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
14431        write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
14432        write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
14433        write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
14434
14435        tcg_temp_free_i32(tcg_op1);
14436        tcg_temp_free_i32(tcg_op2);
14437        tcg_temp_free_i32(tcg_op3);
14438        tcg_temp_free_i32(tcg_res);
14439        tcg_temp_free_i32(tcg_zero);
14440    }
14441}
14442
14443/* Crypto XAR
14444 *  31                   21 20  16 15    10 9    5 4    0
14445 * +-----------------------+------+--------+------+------+
14446 * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
14447 * +-----------------------+------+--------+------+------+
14448 */
14449static void disas_crypto_xar(DisasContext *s, uint32_t insn)
14450{
14451    int rm = extract32(insn, 16, 5);
14452    int imm6 = extract32(insn, 10, 6);
14453    int rn = extract32(insn, 5, 5);
14454    int rd = extract32(insn, 0, 5);
14455
14456    if (!dc_isar_feature(aa64_sha3, s)) {
14457        unallocated_encoding(s);
14458        return;
14459    }
14460
14461    if (!fp_access_check(s)) {
14462        return;
14463    }
14464
14465    gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd),
14466                 vec_full_reg_offset(s, rn),
14467                 vec_full_reg_offset(s, rm), imm6, 16,
14468                 vec_full_reg_size(s));
14469}
14470
14471/* Crypto three-reg imm2
14472 *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
14473 * +-----------------------+------+-----+------+--------+------+------+
14474 * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
14475 * +-----------------------+------+-----+------+--------+------+------+
14476 */
14477static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
14478{
14479    static gen_helper_gvec_3 * const fns[4] = {
14480        gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b,
14481        gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b,
14482    };
14483    int opcode = extract32(insn, 10, 2);
14484    int imm2 = extract32(insn, 12, 2);
14485    int rm = extract32(insn, 16, 5);
14486    int rn = extract32(insn, 5, 5);
14487    int rd = extract32(insn, 0, 5);
14488
14489    if (!dc_isar_feature(aa64_sm3, s)) {
14490        unallocated_encoding(s);
14491        return;
14492    }
14493
14494    if (!fp_access_check(s)) {
14495        return;
14496    }
14497
14498    gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]);
14499}
14500
14501/* C3.6 Data processing - SIMD, inc Crypto
14502 *
14503 * As the decode gets a little complex we are using a table based
14504 * approach for this part of the decode.
14505 */
14506static const AArch64DecodeTable data_proc_simd[] = {
14507    /* pattern  ,  mask     ,  fn                        */
14508    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
14509    { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
14510    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
14511    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
14512    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
14513    { 0x0e000400, 0x9fe08400, disas_simd_copy },
14514    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
14515    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
14516    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
14517    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
14518    { 0x0e000000, 0xbf208c00, disas_simd_tb },
14519    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
14520    { 0x2e000000, 0xbf208400, disas_simd_ext },
14521    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
14522    { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
14523    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
14524    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
14525    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
14526    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
14527    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
14528    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
14529    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
14530    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
14531    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
14532    { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
14533    { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
14534    { 0xce000000, 0xff808000, disas_crypto_four_reg },
14535    { 0xce800000, 0xffe00000, disas_crypto_xar },
14536    { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
14537    { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
14538    { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
14539    { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
14540    { 0x00000000, 0x00000000, NULL }
14541};
14542
14543static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
14544{
14545    /* Note that this is called with all non-FP cases from
14546     * table C3-6 so it must UNDEF for entries not specifically
14547     * allocated to instructions in that table.
14548     */
14549    AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
14550    if (fn) {
14551        fn(s, insn);
14552    } else {
14553        unallocated_encoding(s);
14554    }
14555}
14556
14557/* C3.6 Data processing - SIMD and floating point */
14558static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
14559{
14560    if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
14561        disas_data_proc_fp(s, insn);
14562    } else {
14563        /* SIMD, including crypto */
14564        disas_data_proc_simd(s, insn);
14565    }
14566}
14567
14568/**
14569 * is_guarded_page:
14570 * @env: The cpu environment
14571 * @s: The DisasContext
14572 *
14573 * Return true if the page is guarded.
14574 */
14575static bool is_guarded_page(CPUARMState *env, DisasContext *s)
14576{
14577    uint64_t addr = s->base.pc_first;
14578#ifdef CONFIG_USER_ONLY
14579    return page_get_flags(addr) & PAGE_BTI;
14580#else
14581    int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
14582    unsigned int index = tlb_index(env, mmu_idx, addr);
14583    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
14584
14585    /*
14586     * We test this immediately after reading an insn, which means
14587     * that any normal page must be in the TLB.  The only exception
14588     * would be for executing from flash or device memory, which
14589     * does not retain the TLB entry.
14590     *
14591     * FIXME: Assume false for those, for now.  We could use
14592     * arm_cpu_get_phys_page_attrs_debug to re-read the page
14593     * table entry even for that case.
14594     */
14595    return (tlb_hit(entry->addr_code, addr) &&
14596            arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs));
14597#endif
14598}
14599
14600/**
14601 * btype_destination_ok:
14602 * @insn: The instruction at the branch destination
14603 * @bt: SCTLR_ELx.BT
14604 * @btype: PSTATE.BTYPE, and is non-zero
14605 *
14606 * On a guarded page, there are a limited number of insns
14607 * that may be present at the branch target:
14608 *   - branch target identifiers,
14609 *   - paciasp, pacibsp,
14610 *   - BRK insn
14611 *   - HLT insn
14612 * Anything else causes a Branch Target Exception.
14613 *
14614 * Return true if the branch is compatible, false to raise BTITRAP.
14615 */
14616static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
14617{
14618    if ((insn & 0xfffff01fu) == 0xd503201fu) {
14619        /* HINT space */
14620        switch (extract32(insn, 5, 7)) {
14621        case 0b011001: /* PACIASP */
14622        case 0b011011: /* PACIBSP */
14623            /*
14624             * If SCTLR_ELx.BT, then PACI*SP are not compatible
14625             * with btype == 3.  Otherwise all btype are ok.
14626             */
14627            return !bt || btype != 3;
14628        case 0b100000: /* BTI */
14629            /* Not compatible with any btype.  */
14630            return false;
14631        case 0b100010: /* BTI c */
14632            /* Not compatible with btype == 3 */
14633            return btype != 3;
14634        case 0b100100: /* BTI j */
14635            /* Not compatible with btype == 2 */
14636            return btype != 2;
14637        case 0b100110: /* BTI jc */
14638            /* Compatible with any btype.  */
14639            return true;
14640        }
14641    } else {
14642        switch (insn & 0xffe0001fu) {
14643        case 0xd4200000u: /* BRK */
14644        case 0xd4400000u: /* HLT */
14645            /* Give priority to the breakpoint exception.  */
14646            return true;
14647        }
14648    }
14649    return false;
14650}
14651
14652/* C3.1 A64 instruction index by encoding */
14653static void disas_a64_insn(CPUARMState *env, DisasContext *s)
14654{
14655    uint32_t insn;
14656
14657    s->pc_curr = s->base.pc_next;
14658    insn = arm_ldl_code(env, s->base.pc_next, s->sctlr_b);
14659    s->insn = insn;
14660    s->base.pc_next += 4;
14661
14662    s->fp_access_checked = false;
14663    s->sve_access_checked = false;
14664
14665    if (dc_isar_feature(aa64_bti, s)) {
14666        if (s->base.num_insns == 1) {
14667            /*
14668             * At the first insn of the TB, compute s->guarded_page.
14669             * We delayed computing this until successfully reading
14670             * the first insn of the TB, above.  This (mostly) ensures
14671             * that the softmmu tlb entry has been populated, and the
14672             * page table GP bit is available.
14673             *
14674             * Note that we need to compute this even if btype == 0,
14675             * because this value is used for BR instructions later
14676             * where ENV is not available.
14677             */
14678            s->guarded_page = is_guarded_page(env, s);
14679
14680            /* First insn can have btype set to non-zero.  */
14681            tcg_debug_assert(s->btype >= 0);
14682
14683            /*
14684             * Note that the Branch Target Exception has fairly high
14685             * priority -- below debugging exceptions but above most
14686             * everything else.  This allows us to handle this now
14687             * instead of waiting until the insn is otherwise decoded.
14688             */
14689            if (s->btype != 0
14690                && s->guarded_page
14691                && !btype_destination_ok(insn, s->bt, s->btype)) {
14692                gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
14693                                   syn_btitrap(s->btype),
14694                                   default_exception_el(s));
14695                return;
14696            }
14697        } else {
14698            /* Not the first insn: btype must be 0.  */
14699            tcg_debug_assert(s->btype == 0);
14700        }
14701    }
14702
14703    switch (extract32(insn, 25, 4)) {
14704    case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
14705        unallocated_encoding(s);
14706        break;
14707    case 0x2:
14708        if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) {
14709            unallocated_encoding(s);
14710        }
14711        break;
14712    case 0x8: case 0x9: /* Data processing - immediate */
14713        disas_data_proc_imm(s, insn);
14714        break;
14715    case 0xa: case 0xb: /* Branch, exception generation and system insns */
14716        disas_b_exc_sys(s, insn);
14717        break;
14718    case 0x4:
14719    case 0x6:
14720    case 0xc:
14721    case 0xe:      /* Loads and stores */
14722        disas_ldst(s, insn);
14723        break;
14724    case 0x5:
14725    case 0xd:      /* Data processing - register */
14726        disas_data_proc_reg(s, insn);
14727        break;
14728    case 0x7:
14729    case 0xf:      /* Data processing - SIMD and floating point */
14730        disas_data_proc_simd_fp(s, insn);
14731        break;
14732    default:
14733        assert(FALSE); /* all 15 cases should be handled above */
14734        break;
14735    }
14736
14737    /* if we allocated any temporaries, free them here */
14738    free_tmp_a64(s);
14739
14740    /*
14741     * After execution of most insns, btype is reset to 0.
14742     * Note that we set btype == -1 when the insn sets btype.
14743     */
14744    if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14745        reset_btype(s);
14746    }
14747}
14748
14749static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14750                                          CPUState *cpu)
14751{
14752    DisasContext *dc = container_of(dcbase, DisasContext, base);
14753    CPUARMState *env = cpu->env_ptr;
14754    ARMCPU *arm_cpu = env_archcpu(env);
14755    CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb);
14756    int bound, core_mmu_idx;
14757
14758    dc->isar = &arm_cpu->isar;
14759    dc->condjmp = 0;
14760
14761    dc->aarch64 = 1;
14762    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
14763     * there is no secure EL1, so we route exceptions to EL3.
14764     */
14765    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
14766                               !arm_el_is_aa64(env, 3);
14767    dc->thumb = 0;
14768    dc->sctlr_b = 0;
14769    dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE;
14770    dc->condexec_mask = 0;
14771    dc->condexec_cond = 0;
14772    core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX);
14773    dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
14774    dc->tbii = EX_TBFLAG_A64(tb_flags, TBII);
14775    dc->tbid = EX_TBFLAG_A64(tb_flags, TBID);
14776    dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA);
14777    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14778#if !defined(CONFIG_USER_ONLY)
14779    dc->user = (dc->current_el == 0);
14780#endif
14781    dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL);
14782    dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM);
14783    dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL);
14784    dc->sve_len = (EX_TBFLAG_A64(tb_flags, ZCR_LEN) + 1) * 16;
14785    dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE);
14786    dc->bt = EX_TBFLAG_A64(tb_flags, BT);
14787    dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE);
14788    dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV);
14789    dc->ata = EX_TBFLAG_A64(tb_flags, ATA);
14790    dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE);
14791    dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE);
14792    dc->vec_len = 0;
14793    dc->vec_stride = 0;
14794    dc->cp_regs = arm_cpu->cp_regs;
14795    dc->features = env->features;
14796    dc->dcz_blocksize = arm_cpu->dcz_blocksize;
14797
14798#ifdef CONFIG_USER_ONLY
14799    /* In sve_probe_page, we assume TBI is enabled. */
14800    tcg_debug_assert(dc->tbid & 1);
14801#endif
14802
14803    /* Single step state. The code-generation logic here is:
14804     *  SS_ACTIVE == 0:
14805     *   generate code with no special handling for single-stepping (except
14806     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14807     *   this happens anyway because those changes are all system register or
14808     *   PSTATE writes).
14809     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14810     *   emit code for one insn
14811     *   emit code to clear PSTATE.SS
14812     *   emit code to generate software step exception for completed step
14813     *   end TB (as usual for having generated an exception)
14814     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14815     *   emit code to generate a software step exception
14816     *   end the TB
14817     */
14818    dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE);
14819    dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS);
14820    dc->is_ldex = false;
14821    dc->debug_target_el = EX_TBFLAG_ANY(tb_flags, DEBUG_TARGET_EL);
14822
14823    /* Bound the number of insns to execute to those left on the page.  */
14824    bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14825
14826    /* If architectural single step active, limit to 1.  */
14827    if (dc->ss_active) {
14828        bound = 1;
14829    }
14830    dc->base.max_insns = MIN(dc->base.max_insns, bound);
14831
14832    init_tmp_a64_array(dc);
14833}
14834
14835static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14836{
14837}
14838
14839static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14840{
14841    DisasContext *dc = container_of(dcbase, DisasContext, base);
14842
14843    tcg_gen_insn_start(dc->base.pc_next, 0, 0);
14844    dc->insn_start = tcg_last_op();
14845}
14846
14847static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14848{
14849    DisasContext *dc = container_of(dcbase, DisasContext, base);
14850    CPUARMState *env = cpu->env_ptr;
14851
14852    if (dc->ss_active && !dc->pstate_ss) {
14853        /* Singlestep state is Active-pending.
14854         * If we're in this state at the start of a TB then either
14855         *  a) we just took an exception to an EL which is being debugged
14856         *     and this is the first insn in the exception handler
14857         *  b) debug exceptions were masked and we just unmasked them
14858         *     without changing EL (eg by clearing PSTATE.D)
14859         * In either case we're going to take a swstep exception in the
14860         * "did not step an insn" case, and so the syndrome ISV and EX
14861         * bits should be zero.
14862         */
14863        assert(dc->base.num_insns == 1);
14864        gen_swstep_exception(dc, 0, 0);
14865        dc->base.is_jmp = DISAS_NORETURN;
14866    } else {
14867        disas_a64_insn(env, dc);
14868    }
14869
14870    translator_loop_temp_check(&dc->base);
14871}
14872
14873static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14874{
14875    DisasContext *dc = container_of(dcbase, DisasContext, base);
14876
14877    if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
14878        /* Note that this means single stepping WFI doesn't halt the CPU.
14879         * For conditional branch insns this is harmless unreachable code as
14880         * gen_goto_tb() has already handled emitting the debug exception
14881         * (and thus a tb-jump is not possible when singlestepping).
14882         */
14883        switch (dc->base.is_jmp) {
14884        default:
14885            gen_a64_set_pc_im(dc->base.pc_next);
14886            /* fall through */
14887        case DISAS_EXIT:
14888        case DISAS_JUMP:
14889            if (dc->base.singlestep_enabled) {
14890                gen_exception_internal(EXCP_DEBUG);
14891            } else {
14892                gen_step_complete_exception(dc);
14893            }
14894            break;
14895        case DISAS_NORETURN:
14896            break;
14897        }
14898    } else {
14899        switch (dc->base.is_jmp) {
14900        case DISAS_NEXT:
14901        case DISAS_TOO_MANY:
14902            gen_goto_tb(dc, 1, dc->base.pc_next);
14903            break;
14904        default:
14905        case DISAS_UPDATE_EXIT:
14906            gen_a64_set_pc_im(dc->base.pc_next);
14907            /* fall through */
14908        case DISAS_EXIT:
14909            tcg_gen_exit_tb(NULL, 0);
14910            break;
14911        case DISAS_UPDATE_NOCHAIN:
14912            gen_a64_set_pc_im(dc->base.pc_next);
14913            /* fall through */
14914        case DISAS_JUMP:
14915            tcg_gen_lookup_and_goto_ptr();
14916            break;
14917        case DISAS_NORETURN:
14918        case DISAS_SWI:
14919            break;
14920        case DISAS_WFE:
14921            gen_a64_set_pc_im(dc->base.pc_next);
14922            gen_helper_wfe(cpu_env);
14923            break;
14924        case DISAS_YIELD:
14925            gen_a64_set_pc_im(dc->base.pc_next);
14926            gen_helper_yield(cpu_env);
14927            break;
14928        case DISAS_WFI:
14929        {
14930            /* This is a special case because we don't want to just halt the CPU
14931             * if trying to debug across a WFI.
14932             */
14933            TCGv_i32 tmp = tcg_const_i32(4);
14934
14935            gen_a64_set_pc_im(dc->base.pc_next);
14936            gen_helper_wfi(cpu_env, tmp);
14937            tcg_temp_free_i32(tmp);
14938            /* The helper doesn't necessarily throw an exception, but we
14939             * must go back to the main loop to check for interrupts anyway.
14940             */
14941            tcg_gen_exit_tb(NULL, 0);
14942            break;
14943        }
14944        }
14945    }
14946}
14947
14948static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14949                                      CPUState *cpu)
14950{
14951    DisasContext *dc = container_of(dcbase, DisasContext, base);
14952
14953    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
14954    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
14955}
14956
14957const TranslatorOps aarch64_translator_ops = {
14958    .init_disas_context = aarch64_tr_init_disas_context,
14959    .tb_start           = aarch64_tr_tb_start,
14960    .insn_start         = aarch64_tr_insn_start,
14961    .translate_insn     = aarch64_tr_translate_insn,
14962    .tb_stop            = aarch64_tr_tb_stop,
14963    .disas_log          = aarch64_tr_disas_log,
14964};
14965