qemu/target/arm/translate-a64.c
<<
>>
Prefs
   1/*
   2 *  AArch64 translation
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg-op.h"
  24#include "tcg-op-gvec.h"
  25#include "qemu/log.h"
  26#include "arm_ldst.h"
  27#include "translate.h"
  28#include "internals.h"
  29#include "qemu/host-utils.h"
  30
  31#include "hw/semihosting/semihost.h"
  32#include "exec/gen-icount.h"
  33
  34#include "exec/helper-proto.h"
  35#include "exec/helper-gen.h"
  36#include "exec/log.h"
  37
  38#include "trace-tcg.h"
  39#include "translate-a64.h"
  40#include "qemu/atomic128.h"
  41
  42static TCGv_i64 cpu_X[32];
  43static TCGv_i64 cpu_pc;
  44
  45/* Load/store exclusive handling */
  46static TCGv_i64 cpu_exclusive_high;
  47
  48static const char *regnames[] = {
  49    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  50    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  51    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  52    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  53};
  54
  55enum a64_shift_type {
  56    A64_SHIFT_TYPE_LSL = 0,
  57    A64_SHIFT_TYPE_LSR = 1,
  58    A64_SHIFT_TYPE_ASR = 2,
  59    A64_SHIFT_TYPE_ROR = 3
  60};
  61
  62/* Table based decoder typedefs - used when the relevant bits for decode
  63 * are too awkwardly scattered across the instruction (eg SIMD).
  64 */
  65typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  66
  67typedef struct AArch64DecodeTable {
  68    uint32_t pattern;
  69    uint32_t mask;
  70    AArch64DecodeFn *disas_fn;
  71} AArch64DecodeTable;
  72
  73/* Function prototype for gen_ functions for calling Neon helpers */
  74typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  75typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  76typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  77typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  78typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  79typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  80typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  81typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  82typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  83typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  84typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  85typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
  86typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
  87typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
  88typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
  89
  90/* initialize TCG globals.  */
  91void a64_translate_init(void)
  92{
  93    int i;
  94
  95    cpu_pc = tcg_global_mem_new_i64(cpu_env,
  96                                    offsetof(CPUARMState, pc),
  97                                    "pc");
  98    for (i = 0; i < 32; i++) {
  99        cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
 100                                          offsetof(CPUARMState, xregs[i]),
 101                                          regnames[i]);
 102    }
 103
 104    cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 105        offsetof(CPUARMState, exclusive_high), "exclusive_high");
 106}
 107
 108static inline int get_a64_user_mem_index(DisasContext *s)
 109{
 110    /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
 111     *  if EL1, access as if EL0; otherwise access at current EL
 112     */
 113    ARMMMUIdx useridx;
 114
 115    switch (s->mmu_idx) {
 116    case ARMMMUIdx_S12NSE1:
 117        useridx = ARMMMUIdx_S12NSE0;
 118        break;
 119    case ARMMMUIdx_S1SE1:
 120        useridx = ARMMMUIdx_S1SE0;
 121        break;
 122    case ARMMMUIdx_S2NS:
 123        g_assert_not_reached();
 124    default:
 125        useridx = s->mmu_idx;
 126        break;
 127    }
 128    return arm_to_core_mmu_idx(useridx);
 129}
 130
 131static void reset_btype(DisasContext *s)
 132{
 133    if (s->btype != 0) {
 134        TCGv_i32 zero = tcg_const_i32(0);
 135        tcg_gen_st_i32(zero, cpu_env, offsetof(CPUARMState, btype));
 136        tcg_temp_free_i32(zero);
 137        s->btype = 0;
 138    }
 139}
 140
 141static void set_btype(DisasContext *s, int val)
 142{
 143    TCGv_i32 tcg_val;
 144
 145    /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
 146    tcg_debug_assert(val >= 1 && val <= 3);
 147
 148    tcg_val = tcg_const_i32(val);
 149    tcg_gen_st_i32(tcg_val, cpu_env, offsetof(CPUARMState, btype));
 150    tcg_temp_free_i32(tcg_val);
 151    s->btype = -1;
 152}
 153
 154void gen_a64_set_pc_im(uint64_t val)
 155{
 156    tcg_gen_movi_i64(cpu_pc, val);
 157}
 158
 159/*
 160 * Handle Top Byte Ignore (TBI) bits.
 161 *
 162 * If address tagging is enabled via the TCR TBI bits:
 163 *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 164 *    then the address is zero-extended, clearing bits [63:56]
 165 *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 166 *    and TBI1 controls addressses with bit 55 == 1.
 167 *    If the appropriate TBI bit is set for the address then
 168 *    the address is sign-extended from bit 55 into bits [63:56]
 169 *
 170 * Here We have concatenated TBI{1,0} into tbi.
 171 */
 172static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
 173                                TCGv_i64 src, int tbi)
 174{
 175    if (tbi == 0) {
 176        /* Load unmodified address */
 177        tcg_gen_mov_i64(dst, src);
 178    } else if (s->current_el >= 2) {
 179        /* FIXME: ARMv8.1-VHE S2 translation regime.  */
 180        /* Force tag byte to all zero */
 181        tcg_gen_extract_i64(dst, src, 0, 56);
 182    } else {
 183        /* Sign-extend from bit 55.  */
 184        tcg_gen_sextract_i64(dst, src, 0, 56);
 185
 186        if (tbi != 3) {
 187            TCGv_i64 tcg_zero = tcg_const_i64(0);
 188
 189            /*
 190             * The two TBI bits differ.
 191             * If tbi0, then !tbi1: only use the extension if positive.
 192             * if !tbi0, then tbi1: only use the extension if negative.
 193             */
 194            tcg_gen_movcond_i64(tbi == 1 ? TCG_COND_GE : TCG_COND_LT,
 195                                dst, dst, tcg_zero, dst, src);
 196            tcg_temp_free_i64(tcg_zero);
 197        }
 198    }
 199}
 200
 201static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 202{
 203    /*
 204     * If address tagging is enabled for instructions via the TCR TBI bits,
 205     * then loading an address into the PC will clear out any tag.
 206     */
 207    gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
 208}
 209
 210/*
 211 * Return a "clean" address for ADDR according to TBID.
 212 * This is always a fresh temporary, as we need to be able to
 213 * increment this independently of a dirty write-back address.
 214 */
 215static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
 216{
 217    TCGv_i64 clean = new_tmp_a64(s);
 218    gen_top_byte_ignore(s, clean, addr, s->tbid);
 219    return clean;
 220}
 221
 222typedef struct DisasCompare64 {
 223    TCGCond cond;
 224    TCGv_i64 value;
 225} DisasCompare64;
 226
 227static void a64_test_cc(DisasCompare64 *c64, int cc)
 228{
 229    DisasCompare c32;
 230
 231    arm_test_cc(&c32, cc);
 232
 233    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 234       * properly.  The NE/EQ comparisons are also fine with this choice.  */
 235    c64->cond = c32.cond;
 236    c64->value = tcg_temp_new_i64();
 237    tcg_gen_ext_i32_i64(c64->value, c32.value);
 238
 239    arm_free_cc(&c32);
 240}
 241
 242static void a64_free_cc(DisasCompare64 *c64)
 243{
 244    tcg_temp_free_i64(c64->value);
 245}
 246
 247static void gen_exception_internal(int excp)
 248{
 249    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 250
 251    assert(excp_is_internal(excp));
 252    gen_helper_exception_internal(cpu_env, tcg_excp);
 253    tcg_temp_free_i32(tcg_excp);
 254}
 255
 256static void gen_exception_internal_insn(DisasContext *s, uint64_t pc, int excp)
 257{
 258    gen_a64_set_pc_im(pc);
 259    gen_exception_internal(excp);
 260    s->base.is_jmp = DISAS_NORETURN;
 261}
 262
 263static void gen_exception_insn(DisasContext *s, uint64_t pc, int excp,
 264                               uint32_t syndrome, uint32_t target_el)
 265{
 266    gen_a64_set_pc_im(pc);
 267    gen_exception(excp, syndrome, target_el);
 268    s->base.is_jmp = DISAS_NORETURN;
 269}
 270
 271static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
 272{
 273    TCGv_i32 tcg_syn;
 274
 275    gen_a64_set_pc_im(s->pc_curr);
 276    tcg_syn = tcg_const_i32(syndrome);
 277    gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
 278    tcg_temp_free_i32(tcg_syn);
 279    s->base.is_jmp = DISAS_NORETURN;
 280}
 281
 282static void gen_step_complete_exception(DisasContext *s)
 283{
 284    /* We just completed step of an insn. Move from Active-not-pending
 285     * to Active-pending, and then also take the swstep exception.
 286     * This corresponds to making the (IMPDEF) choice to prioritize
 287     * swstep exceptions over asynchronous exceptions taken to an exception
 288     * level where debug is disabled. This choice has the advantage that
 289     * we do not need to maintain internal state corresponding to the
 290     * ISV/EX syndrome bits between completion of the step and generation
 291     * of the exception, and our syndrome information is always correct.
 292     */
 293    gen_ss_advance(s);
 294    gen_swstep_exception(s, 1, s->is_ldex);
 295    s->base.is_jmp = DISAS_NORETURN;
 296}
 297
 298static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 299{
 300    /* No direct tb linking with singlestep (either QEMU's or the ARM
 301     * debug architecture kind) or deterministic io
 302     */
 303    if (s->base.singlestep_enabled || s->ss_active ||
 304        (tb_cflags(s->base.tb) & CF_LAST_IO)) {
 305        return false;
 306    }
 307
 308#ifndef CONFIG_USER_ONLY
 309    /* Only link tbs from inside the same guest page */
 310    if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 311        return false;
 312    }
 313#endif
 314
 315    return true;
 316}
 317
 318static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 319{
 320    TranslationBlock *tb;
 321
 322    tb = s->base.tb;
 323    if (use_goto_tb(s, n, dest)) {
 324        tcg_gen_goto_tb(n);
 325        gen_a64_set_pc_im(dest);
 326        tcg_gen_exit_tb(tb, n);
 327        s->base.is_jmp = DISAS_NORETURN;
 328    } else {
 329        gen_a64_set_pc_im(dest);
 330        if (s->ss_active) {
 331            gen_step_complete_exception(s);
 332        } else if (s->base.singlestep_enabled) {
 333            gen_exception_internal(EXCP_DEBUG);
 334        } else {
 335            tcg_gen_lookup_and_goto_ptr();
 336            s->base.is_jmp = DISAS_NORETURN;
 337        }
 338    }
 339}
 340
 341void unallocated_encoding(DisasContext *s)
 342{
 343    /* Unallocated and reserved encodings are uncategorized */
 344    gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
 345                       default_exception_el(s));
 346}
 347
 348static void init_tmp_a64_array(DisasContext *s)
 349{
 350#ifdef CONFIG_DEBUG_TCG
 351    memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
 352#endif
 353    s->tmp_a64_count = 0;
 354}
 355
 356static void free_tmp_a64(DisasContext *s)
 357{
 358    int i;
 359    for (i = 0; i < s->tmp_a64_count; i++) {
 360        tcg_temp_free_i64(s->tmp_a64[i]);
 361    }
 362    init_tmp_a64_array(s);
 363}
 364
 365TCGv_i64 new_tmp_a64(DisasContext *s)
 366{
 367    assert(s->tmp_a64_count < TMP_A64_MAX);
 368    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 369}
 370
 371TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 372{
 373    TCGv_i64 t = new_tmp_a64(s);
 374    tcg_gen_movi_i64(t, 0);
 375    return t;
 376}
 377
 378/*
 379 * Register access functions
 380 *
 381 * These functions are used for directly accessing a register in where
 382 * changes to the final register value are likely to be made. If you
 383 * need to use a register for temporary calculation (e.g. index type
 384 * operations) use the read_* form.
 385 *
 386 * B1.2.1 Register mappings
 387 *
 388 * In instruction register encoding 31 can refer to ZR (zero register) or
 389 * the SP (stack pointer) depending on context. In QEMU's case we map SP
 390 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 391 * This is the point of the _sp forms.
 392 */
 393TCGv_i64 cpu_reg(DisasContext *s, int reg)
 394{
 395    if (reg == 31) {
 396        return new_tmp_a64_zero(s);
 397    } else {
 398        return cpu_X[reg];
 399    }
 400}
 401
 402/* register access for when 31 == SP */
 403TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 404{
 405    return cpu_X[reg];
 406}
 407
 408/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 409 * representing the register contents. This TCGv is an auto-freed
 410 * temporary so it need not be explicitly freed, and may be modified.
 411 */
 412TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 413{
 414    TCGv_i64 v = new_tmp_a64(s);
 415    if (reg != 31) {
 416        if (sf) {
 417            tcg_gen_mov_i64(v, cpu_X[reg]);
 418        } else {
 419            tcg_gen_ext32u_i64(v, cpu_X[reg]);
 420        }
 421    } else {
 422        tcg_gen_movi_i64(v, 0);
 423    }
 424    return v;
 425}
 426
 427TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 428{
 429    TCGv_i64 v = new_tmp_a64(s);
 430    if (sf) {
 431        tcg_gen_mov_i64(v, cpu_X[reg]);
 432    } else {
 433        tcg_gen_ext32u_i64(v, cpu_X[reg]);
 434    }
 435    return v;
 436}
 437
 438/* Return the offset into CPUARMState of a slice (from
 439 * the least significant end) of FP register Qn (ie
 440 * Dn, Sn, Hn or Bn).
 441 * (Note that this is not the same mapping as for A32; see cpu.h)
 442 */
 443static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
 444{
 445    return vec_reg_offset(s, regno, 0, size);
 446}
 447
 448/* Offset of the high half of the 128 bit vector Qn */
 449static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 450{
 451    return vec_reg_offset(s, regno, 1, MO_64);
 452}
 453
 454/* Convenience accessors for reading and writing single and double
 455 * FP registers. Writing clears the upper parts of the associated
 456 * 128 bit vector register, as required by the architecture.
 457 * Note that unlike the GP register accessors, the values returned
 458 * by the read functions must be manually freed.
 459 */
 460static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 461{
 462    TCGv_i64 v = tcg_temp_new_i64();
 463
 464    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 465    return v;
 466}
 467
 468static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 469{
 470    TCGv_i32 v = tcg_temp_new_i32();
 471
 472    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 473    return v;
 474}
 475
 476static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
 477{
 478    TCGv_i32 v = tcg_temp_new_i32();
 479
 480    tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
 481    return v;
 482}
 483
 484/* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
 485 * If SVE is not enabled, then there are only 128 bits in the vector.
 486 */
 487static void clear_vec_high(DisasContext *s, bool is_q, int rd)
 488{
 489    unsigned ofs = fp_reg_offset(s, rd, MO_64);
 490    unsigned vsz = vec_full_reg_size(s);
 491
 492    if (!is_q) {
 493        TCGv_i64 tcg_zero = tcg_const_i64(0);
 494        tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
 495        tcg_temp_free_i64(tcg_zero);
 496    }
 497    if (vsz > 16) {
 498        tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
 499    }
 500}
 501
 502void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 503{
 504    unsigned ofs = fp_reg_offset(s, reg, MO_64);
 505
 506    tcg_gen_st_i64(v, cpu_env, ofs);
 507    clear_vec_high(s, false, reg);
 508}
 509
 510static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 511{
 512    TCGv_i64 tmp = tcg_temp_new_i64();
 513
 514    tcg_gen_extu_i32_i64(tmp, v);
 515    write_fp_dreg(s, reg, tmp);
 516    tcg_temp_free_i64(tmp);
 517}
 518
 519TCGv_ptr get_fpstatus_ptr(bool is_f16)
 520{
 521    TCGv_ptr statusptr = tcg_temp_new_ptr();
 522    int offset;
 523
 524    /* In A64 all instructions (both FP and Neon) use the FPCR; there
 525     * is no equivalent of the A32 Neon "standard FPSCR value".
 526     * However half-precision operations operate under a different
 527     * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
 528     */
 529    if (is_f16) {
 530        offset = offsetof(CPUARMState, vfp.fp_status_f16);
 531    } else {
 532        offset = offsetof(CPUARMState, vfp.fp_status);
 533    }
 534    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 535    return statusptr;
 536}
 537
 538/* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
 539static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
 540                         GVecGen2Fn *gvec_fn, int vece)
 541{
 542    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 543            is_q ? 16 : 8, vec_full_reg_size(s));
 544}
 545
 546/* Expand a 2-operand + immediate AdvSIMD vector operation using
 547 * an expander function.
 548 */
 549static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
 550                          int64_t imm, GVecGen2iFn *gvec_fn, int vece)
 551{
 552    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 553            imm, is_q ? 16 : 8, vec_full_reg_size(s));
 554}
 555
 556/* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
 557static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
 558                         GVecGen3Fn *gvec_fn, int vece)
 559{
 560    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 561            vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
 562}
 563
 564/* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
 565static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
 566                         int rx, GVecGen4Fn *gvec_fn, int vece)
 567{
 568    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 569            vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
 570            is_q ? 16 : 8, vec_full_reg_size(s));
 571}
 572
 573/* Expand a 2-operand + immediate AdvSIMD vector operation using
 574 * an op descriptor.
 575 */
 576static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
 577                          int rn, int64_t imm, const GVecGen2i *gvec_op)
 578{
 579    tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 580                    is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
 581}
 582
 583/* Expand a 3-operand AdvSIMD vector operation using an op descriptor.  */
 584static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
 585                         int rn, int rm, const GVecGen3 *gvec_op)
 586{
 587    tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 588                   vec_full_reg_offset(s, rm), is_q ? 16 : 8,
 589                   vec_full_reg_size(s), gvec_op);
 590}
 591
 592/* Expand a 3-operand operation using an out-of-line helper.  */
 593static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
 594                             int rn, int rm, int data, gen_helper_gvec_3 *fn)
 595{
 596    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 597                       vec_full_reg_offset(s, rn),
 598                       vec_full_reg_offset(s, rm),
 599                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 600}
 601
 602/* Expand a 3-operand + env pointer operation using
 603 * an out-of-line helper.
 604 */
 605static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
 606                             int rn, int rm, gen_helper_gvec_3_ptr *fn)
 607{
 608    tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 609                       vec_full_reg_offset(s, rn),
 610                       vec_full_reg_offset(s, rm), cpu_env,
 611                       is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
 612}
 613
 614/* Expand a 3-operand + fpstatus pointer + simd data value operation using
 615 * an out-of-line helper.
 616 */
 617static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
 618                              int rm, bool is_fp16, int data,
 619                              gen_helper_gvec_3_ptr *fn)
 620{
 621    TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
 622    tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 623                       vec_full_reg_offset(s, rn),
 624                       vec_full_reg_offset(s, rm), fpst,
 625                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 626    tcg_temp_free_ptr(fpst);
 627}
 628
 629/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 630 * than the 32 bit equivalent.
 631 */
 632static inline void gen_set_NZ64(TCGv_i64 result)
 633{
 634    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 635    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 636}
 637
 638/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 639static inline void gen_logic_CC(int sf, TCGv_i64 result)
 640{
 641    if (sf) {
 642        gen_set_NZ64(result);
 643    } else {
 644        tcg_gen_extrl_i64_i32(cpu_ZF, result);
 645        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 646    }
 647    tcg_gen_movi_i32(cpu_CF, 0);
 648    tcg_gen_movi_i32(cpu_VF, 0);
 649}
 650
 651/* dest = T0 + T1; compute C, N, V and Z flags */
 652static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 653{
 654    if (sf) {
 655        TCGv_i64 result, flag, tmp;
 656        result = tcg_temp_new_i64();
 657        flag = tcg_temp_new_i64();
 658        tmp = tcg_temp_new_i64();
 659
 660        tcg_gen_movi_i64(tmp, 0);
 661        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 662
 663        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 664
 665        gen_set_NZ64(result);
 666
 667        tcg_gen_xor_i64(flag, result, t0);
 668        tcg_gen_xor_i64(tmp, t0, t1);
 669        tcg_gen_andc_i64(flag, flag, tmp);
 670        tcg_temp_free_i64(tmp);
 671        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 672
 673        tcg_gen_mov_i64(dest, result);
 674        tcg_temp_free_i64(result);
 675        tcg_temp_free_i64(flag);
 676    } else {
 677        /* 32 bit arithmetic */
 678        TCGv_i32 t0_32 = tcg_temp_new_i32();
 679        TCGv_i32 t1_32 = tcg_temp_new_i32();
 680        TCGv_i32 tmp = tcg_temp_new_i32();
 681
 682        tcg_gen_movi_i32(tmp, 0);
 683        tcg_gen_extrl_i64_i32(t0_32, t0);
 684        tcg_gen_extrl_i64_i32(t1_32, t1);
 685        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 686        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 687        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 688        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 689        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 690        tcg_gen_extu_i32_i64(dest, cpu_NF);
 691
 692        tcg_temp_free_i32(tmp);
 693        tcg_temp_free_i32(t0_32);
 694        tcg_temp_free_i32(t1_32);
 695    }
 696}
 697
 698/* dest = T0 - T1; compute C, N, V and Z flags */
 699static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 700{
 701    if (sf) {
 702        /* 64 bit arithmetic */
 703        TCGv_i64 result, flag, tmp;
 704
 705        result = tcg_temp_new_i64();
 706        flag = tcg_temp_new_i64();
 707        tcg_gen_sub_i64(result, t0, t1);
 708
 709        gen_set_NZ64(result);
 710
 711        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 712        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 713
 714        tcg_gen_xor_i64(flag, result, t0);
 715        tmp = tcg_temp_new_i64();
 716        tcg_gen_xor_i64(tmp, t0, t1);
 717        tcg_gen_and_i64(flag, flag, tmp);
 718        tcg_temp_free_i64(tmp);
 719        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 720        tcg_gen_mov_i64(dest, result);
 721        tcg_temp_free_i64(flag);
 722        tcg_temp_free_i64(result);
 723    } else {
 724        /* 32 bit arithmetic */
 725        TCGv_i32 t0_32 = tcg_temp_new_i32();
 726        TCGv_i32 t1_32 = tcg_temp_new_i32();
 727        TCGv_i32 tmp;
 728
 729        tcg_gen_extrl_i64_i32(t0_32, t0);
 730        tcg_gen_extrl_i64_i32(t1_32, t1);
 731        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 732        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 733        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 734        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 735        tmp = tcg_temp_new_i32();
 736        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 737        tcg_temp_free_i32(t0_32);
 738        tcg_temp_free_i32(t1_32);
 739        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 740        tcg_temp_free_i32(tmp);
 741        tcg_gen_extu_i32_i64(dest, cpu_NF);
 742    }
 743}
 744
 745/* dest = T0 + T1 + CF; do not compute flags. */
 746static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 747{
 748    TCGv_i64 flag = tcg_temp_new_i64();
 749    tcg_gen_extu_i32_i64(flag, cpu_CF);
 750    tcg_gen_add_i64(dest, t0, t1);
 751    tcg_gen_add_i64(dest, dest, flag);
 752    tcg_temp_free_i64(flag);
 753
 754    if (!sf) {
 755        tcg_gen_ext32u_i64(dest, dest);
 756    }
 757}
 758
 759/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 760static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 761{
 762    if (sf) {
 763        TCGv_i64 result, cf_64, vf_64, tmp;
 764        result = tcg_temp_new_i64();
 765        cf_64 = tcg_temp_new_i64();
 766        vf_64 = tcg_temp_new_i64();
 767        tmp = tcg_const_i64(0);
 768
 769        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 770        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 771        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 772        tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 773        gen_set_NZ64(result);
 774
 775        tcg_gen_xor_i64(vf_64, result, t0);
 776        tcg_gen_xor_i64(tmp, t0, t1);
 777        tcg_gen_andc_i64(vf_64, vf_64, tmp);
 778        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 779
 780        tcg_gen_mov_i64(dest, result);
 781
 782        tcg_temp_free_i64(tmp);
 783        tcg_temp_free_i64(vf_64);
 784        tcg_temp_free_i64(cf_64);
 785        tcg_temp_free_i64(result);
 786    } else {
 787        TCGv_i32 t0_32, t1_32, tmp;
 788        t0_32 = tcg_temp_new_i32();
 789        t1_32 = tcg_temp_new_i32();
 790        tmp = tcg_const_i32(0);
 791
 792        tcg_gen_extrl_i64_i32(t0_32, t0);
 793        tcg_gen_extrl_i64_i32(t1_32, t1);
 794        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 795        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 796
 797        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 798        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 799        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 800        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 801        tcg_gen_extu_i32_i64(dest, cpu_NF);
 802
 803        tcg_temp_free_i32(tmp);
 804        tcg_temp_free_i32(t1_32);
 805        tcg_temp_free_i32(t0_32);
 806    }
 807}
 808
 809/*
 810 * Load/Store generators
 811 */
 812
 813/*
 814 * Store from GPR register to memory.
 815 */
 816static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 817                             TCGv_i64 tcg_addr, int size, int memidx,
 818                             bool iss_valid,
 819                             unsigned int iss_srt,
 820                             bool iss_sf, bool iss_ar)
 821{
 822    g_assert(size <= 3);
 823    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 824
 825    if (iss_valid) {
 826        uint32_t syn;
 827
 828        syn = syn_data_abort_with_iss(0,
 829                                      size,
 830                                      false,
 831                                      iss_srt,
 832                                      iss_sf,
 833                                      iss_ar,
 834                                      0, 0, 0, 0, 0, false);
 835        disas_set_insn_syndrome(s, syn);
 836    }
 837}
 838
 839static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 840                      TCGv_i64 tcg_addr, int size,
 841                      bool iss_valid,
 842                      unsigned int iss_srt,
 843                      bool iss_sf, bool iss_ar)
 844{
 845    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 846                     iss_valid, iss_srt, iss_sf, iss_ar);
 847}
 848
 849/*
 850 * Load from memory to GPR register
 851 */
 852static void do_gpr_ld_memidx(DisasContext *s,
 853                             TCGv_i64 dest, TCGv_i64 tcg_addr,
 854                             int size, bool is_signed,
 855                             bool extend, int memidx,
 856                             bool iss_valid, unsigned int iss_srt,
 857                             bool iss_sf, bool iss_ar)
 858{
 859    MemOp memop = s->be_data + size;
 860
 861    g_assert(size <= 3);
 862
 863    if (is_signed) {
 864        memop += MO_SIGN;
 865    }
 866
 867    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 868
 869    if (extend && is_signed) {
 870        g_assert(size < 3);
 871        tcg_gen_ext32u_i64(dest, dest);
 872    }
 873
 874    if (iss_valid) {
 875        uint32_t syn;
 876
 877        syn = syn_data_abort_with_iss(0,
 878                                      size,
 879                                      is_signed,
 880                                      iss_srt,
 881                                      iss_sf,
 882                                      iss_ar,
 883                                      0, 0, 0, 0, 0, false);
 884        disas_set_insn_syndrome(s, syn);
 885    }
 886}
 887
 888static void do_gpr_ld(DisasContext *s,
 889                      TCGv_i64 dest, TCGv_i64 tcg_addr,
 890                      int size, bool is_signed, bool extend,
 891                      bool iss_valid, unsigned int iss_srt,
 892                      bool iss_sf, bool iss_ar)
 893{
 894    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 895                     get_mem_index(s),
 896                     iss_valid, iss_srt, iss_sf, iss_ar);
 897}
 898
 899/*
 900 * Store from FP register to memory
 901 */
 902static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 903{
 904    /* This writes the bottom N bits of a 128 bit wide vector to memory */
 905    TCGv_i64 tmp = tcg_temp_new_i64();
 906    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 907    if (size < 4) {
 908        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 909                            s->be_data + size);
 910    } else {
 911        bool be = s->be_data == MO_BE;
 912        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 913
 914        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 915        tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 916                            s->be_data | MO_Q);
 917        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 918        tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 919                            s->be_data | MO_Q);
 920        tcg_temp_free_i64(tcg_hiaddr);
 921    }
 922
 923    tcg_temp_free_i64(tmp);
 924}
 925
 926/*
 927 * Load from memory to FP register
 928 */
 929static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 930{
 931    /* This always zero-extends and writes to a full 128 bit wide vector */
 932    TCGv_i64 tmplo = tcg_temp_new_i64();
 933    TCGv_i64 tmphi;
 934
 935    if (size < 4) {
 936        MemOp memop = s->be_data + size;
 937        tmphi = tcg_const_i64(0);
 938        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 939    } else {
 940        bool be = s->be_data == MO_BE;
 941        TCGv_i64 tcg_hiaddr;
 942
 943        tmphi = tcg_temp_new_i64();
 944        tcg_hiaddr = tcg_temp_new_i64();
 945
 946        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 947        tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 948                            s->be_data | MO_Q);
 949        tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 950                            s->be_data | MO_Q);
 951        tcg_temp_free_i64(tcg_hiaddr);
 952    }
 953
 954    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 955    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 956
 957    tcg_temp_free_i64(tmplo);
 958    tcg_temp_free_i64(tmphi);
 959
 960    clear_vec_high(s, true, destidx);
 961}
 962
 963/*
 964 * Vector load/store helpers.
 965 *
 966 * The principal difference between this and a FP load is that we don't
 967 * zero extend as we are filling a partial chunk of the vector register.
 968 * These functions don't support 128 bit loads/stores, which would be
 969 * normal load/store operations.
 970 *
 971 * The _i32 versions are useful when operating on 32 bit quantities
 972 * (eg for floating point single or using Neon helper functions).
 973 */
 974
 975/* Get value of an element within a vector register */
 976static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 977                             int element, MemOp memop)
 978{
 979    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 980    switch (memop) {
 981    case MO_8:
 982        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 983        break;
 984    case MO_16:
 985        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 986        break;
 987    case MO_32:
 988        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 989        break;
 990    case MO_8|MO_SIGN:
 991        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 992        break;
 993    case MO_16|MO_SIGN:
 994        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 995        break;
 996    case MO_32|MO_SIGN:
 997        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 998        break;
 999    case MO_64:
1000    case MO_64|MO_SIGN:
1001        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1002        break;
1003    default:
1004        g_assert_not_reached();
1005    }
1006}
1007
1008static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1009                                 int element, MemOp memop)
1010{
1011    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1012    switch (memop) {
1013    case MO_8:
1014        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1015        break;
1016    case MO_16:
1017        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1018        break;
1019    case MO_8|MO_SIGN:
1020        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1021        break;
1022    case MO_16|MO_SIGN:
1023        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1024        break;
1025    case MO_32:
1026    case MO_32|MO_SIGN:
1027        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1028        break;
1029    default:
1030        g_assert_not_reached();
1031    }
1032}
1033
1034/* Set value of an element within a vector register */
1035static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1036                              int element, MemOp memop)
1037{
1038    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1039    switch (memop) {
1040    case MO_8:
1041        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1042        break;
1043    case MO_16:
1044        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1045        break;
1046    case MO_32:
1047        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1048        break;
1049    case MO_64:
1050        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1051        break;
1052    default:
1053        g_assert_not_reached();
1054    }
1055}
1056
1057static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1058                                  int destidx, int element, MemOp memop)
1059{
1060    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1061    switch (memop) {
1062    case MO_8:
1063        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1064        break;
1065    case MO_16:
1066        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1067        break;
1068    case MO_32:
1069        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1070        break;
1071    default:
1072        g_assert_not_reached();
1073    }
1074}
1075
1076/* Store from vector register to memory */
1077static void do_vec_st(DisasContext *s, int srcidx, int element,
1078                      TCGv_i64 tcg_addr, int size, MemOp endian)
1079{
1080    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1081
1082    read_vec_element(s, tcg_tmp, srcidx, element, size);
1083    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1084
1085    tcg_temp_free_i64(tcg_tmp);
1086}
1087
1088/* Load from memory to vector register */
1089static void do_vec_ld(DisasContext *s, int destidx, int element,
1090                      TCGv_i64 tcg_addr, int size, MemOp endian)
1091{
1092    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1093
1094    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1095    write_vec_element(s, tcg_tmp, destidx, element, size);
1096
1097    tcg_temp_free_i64(tcg_tmp);
1098}
1099
1100/* Check that FP/Neon access is enabled. If it is, return
1101 * true. If not, emit code to generate an appropriate exception,
1102 * and return false; the caller should not emit any code for
1103 * the instruction. Note that this check must happen after all
1104 * unallocated-encoding checks (otherwise the syndrome information
1105 * for the resulting exception will be incorrect).
1106 */
1107static inline bool fp_access_check(DisasContext *s)
1108{
1109    assert(!s->fp_access_checked);
1110    s->fp_access_checked = true;
1111
1112    if (!s->fp_excp_el) {
1113        return true;
1114    }
1115
1116    gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
1117                       syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
1118    return false;
1119}
1120
1121/* Check that SVE access is enabled.  If it is, return true.
1122 * If not, emit code to generate an appropriate exception and return false.
1123 */
1124bool sve_access_check(DisasContext *s)
1125{
1126    if (s->sve_excp_el) {
1127        gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_sve_access_trap(),
1128                           s->sve_excp_el);
1129        return false;
1130    }
1131    return fp_access_check(s);
1132}
1133
1134/*
1135 * This utility function is for doing register extension with an
1136 * optional shift. You will likely want to pass a temporary for the
1137 * destination register. See DecodeRegExtend() in the ARM ARM.
1138 */
1139static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1140                              int option, unsigned int shift)
1141{
1142    int extsize = extract32(option, 0, 2);
1143    bool is_signed = extract32(option, 2, 1);
1144
1145    if (is_signed) {
1146        switch (extsize) {
1147        case 0:
1148            tcg_gen_ext8s_i64(tcg_out, tcg_in);
1149            break;
1150        case 1:
1151            tcg_gen_ext16s_i64(tcg_out, tcg_in);
1152            break;
1153        case 2:
1154            tcg_gen_ext32s_i64(tcg_out, tcg_in);
1155            break;
1156        case 3:
1157            tcg_gen_mov_i64(tcg_out, tcg_in);
1158            break;
1159        }
1160    } else {
1161        switch (extsize) {
1162        case 0:
1163            tcg_gen_ext8u_i64(tcg_out, tcg_in);
1164            break;
1165        case 1:
1166            tcg_gen_ext16u_i64(tcg_out, tcg_in);
1167            break;
1168        case 2:
1169            tcg_gen_ext32u_i64(tcg_out, tcg_in);
1170            break;
1171        case 3:
1172            tcg_gen_mov_i64(tcg_out, tcg_in);
1173            break;
1174        }
1175    }
1176
1177    if (shift) {
1178        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1179    }
1180}
1181
1182static inline void gen_check_sp_alignment(DisasContext *s)
1183{
1184    /* The AArch64 architecture mandates that (if enabled via PSTATE
1185     * or SCTLR bits) there is a check that SP is 16-aligned on every
1186     * SP-relative load or store (with an exception generated if it is not).
1187     * In line with general QEMU practice regarding misaligned accesses,
1188     * we omit these checks for the sake of guest program performance.
1189     * This function is provided as a hook so we can more easily add these
1190     * checks in future (possibly as a "favour catching guest program bugs
1191     * over speed" user selectable option).
1192     */
1193}
1194
1195/*
1196 * This provides a simple table based table lookup decoder. It is
1197 * intended to be used when the relevant bits for decode are too
1198 * awkwardly placed and switch/if based logic would be confusing and
1199 * deeply nested. Since it's a linear search through the table, tables
1200 * should be kept small.
1201 *
1202 * It returns the first handler where insn & mask == pattern, or
1203 * NULL if there is no match.
1204 * The table is terminated by an empty mask (i.e. 0)
1205 */
1206static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1207                                               uint32_t insn)
1208{
1209    const AArch64DecodeTable *tptr = table;
1210
1211    while (tptr->mask) {
1212        if ((insn & tptr->mask) == tptr->pattern) {
1213            return tptr->disas_fn;
1214        }
1215        tptr++;
1216    }
1217    return NULL;
1218}
1219
1220/*
1221 * The instruction disassembly implemented here matches
1222 * the instruction encoding classifications in chapter C4
1223 * of the ARM Architecture Reference Manual (DDI0487B_a);
1224 * classification names and decode diagrams here should generally
1225 * match up with those in the manual.
1226 */
1227
1228/* Unconditional branch (immediate)
1229 *   31  30       26 25                                  0
1230 * +----+-----------+-------------------------------------+
1231 * | op | 0 0 1 0 1 |                 imm26               |
1232 * +----+-----------+-------------------------------------+
1233 */
1234static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1235{
1236    uint64_t addr = s->pc_curr + sextract32(insn, 0, 26) * 4;
1237
1238    if (insn & (1U << 31)) {
1239        /* BL Branch with link */
1240        tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
1241    }
1242
1243    /* B Branch / BL Branch with link */
1244    reset_btype(s);
1245    gen_goto_tb(s, 0, addr);
1246}
1247
1248/* Compare and branch (immediate)
1249 *   31  30         25  24  23                  5 4      0
1250 * +----+-------------+----+---------------------+--------+
1251 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1252 * +----+-------------+----+---------------------+--------+
1253 */
1254static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1255{
1256    unsigned int sf, op, rt;
1257    uint64_t addr;
1258    TCGLabel *label_match;
1259    TCGv_i64 tcg_cmp;
1260
1261    sf = extract32(insn, 31, 1);
1262    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1263    rt = extract32(insn, 0, 5);
1264    addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1265
1266    tcg_cmp = read_cpu_reg(s, rt, sf);
1267    label_match = gen_new_label();
1268
1269    reset_btype(s);
1270    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1271                        tcg_cmp, 0, label_match);
1272
1273    gen_goto_tb(s, 0, s->base.pc_next);
1274    gen_set_label(label_match);
1275    gen_goto_tb(s, 1, addr);
1276}
1277
1278/* Test and branch (immediate)
1279 *   31  30         25  24  23   19 18          5 4    0
1280 * +----+-------------+----+-------+-------------+------+
1281 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1282 * +----+-------------+----+-------+-------------+------+
1283 */
1284static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1285{
1286    unsigned int bit_pos, op, rt;
1287    uint64_t addr;
1288    TCGLabel *label_match;
1289    TCGv_i64 tcg_cmp;
1290
1291    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1292    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1293    addr = s->pc_curr + sextract32(insn, 5, 14) * 4;
1294    rt = extract32(insn, 0, 5);
1295
1296    tcg_cmp = tcg_temp_new_i64();
1297    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1298    label_match = gen_new_label();
1299
1300    reset_btype(s);
1301    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1302                        tcg_cmp, 0, label_match);
1303    tcg_temp_free_i64(tcg_cmp);
1304    gen_goto_tb(s, 0, s->base.pc_next);
1305    gen_set_label(label_match);
1306    gen_goto_tb(s, 1, addr);
1307}
1308
1309/* Conditional branch (immediate)
1310 *  31           25  24  23                  5   4  3    0
1311 * +---------------+----+---------------------+----+------+
1312 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1313 * +---------------+----+---------------------+----+------+
1314 */
1315static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1316{
1317    unsigned int cond;
1318    uint64_t addr;
1319
1320    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1321        unallocated_encoding(s);
1322        return;
1323    }
1324    addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1325    cond = extract32(insn, 0, 4);
1326
1327    reset_btype(s);
1328    if (cond < 0x0e) {
1329        /* genuinely conditional branches */
1330        TCGLabel *label_match = gen_new_label();
1331        arm_gen_test_cc(cond, label_match);
1332        gen_goto_tb(s, 0, s->base.pc_next);
1333        gen_set_label(label_match);
1334        gen_goto_tb(s, 1, addr);
1335    } else {
1336        /* 0xe and 0xf are both "always" conditions */
1337        gen_goto_tb(s, 0, addr);
1338    }
1339}
1340
1341/* HINT instruction group, including various allocated HINTs */
1342static void handle_hint(DisasContext *s, uint32_t insn,
1343                        unsigned int op1, unsigned int op2, unsigned int crm)
1344{
1345    unsigned int selector = crm << 3 | op2;
1346
1347    if (op1 != 3) {
1348        unallocated_encoding(s);
1349        return;
1350    }
1351
1352    switch (selector) {
1353    case 0b00000: /* NOP */
1354        break;
1355    case 0b00011: /* WFI */
1356        s->base.is_jmp = DISAS_WFI;
1357        break;
1358    case 0b00001: /* YIELD */
1359        /* When running in MTTCG we don't generate jumps to the yield and
1360         * WFE helpers as it won't affect the scheduling of other vCPUs.
1361         * If we wanted to more completely model WFE/SEV so we don't busy
1362         * spin unnecessarily we would need to do something more involved.
1363         */
1364        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1365            s->base.is_jmp = DISAS_YIELD;
1366        }
1367        break;
1368    case 0b00010: /* WFE */
1369        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1370            s->base.is_jmp = DISAS_WFE;
1371        }
1372        break;
1373    case 0b00100: /* SEV */
1374    case 0b00101: /* SEVL */
1375        /* we treat all as NOP at least for now */
1376        break;
1377    case 0b00111: /* XPACLRI */
1378        if (s->pauth_active) {
1379            gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1380        }
1381        break;
1382    case 0b01000: /* PACIA1716 */
1383        if (s->pauth_active) {
1384            gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1385        }
1386        break;
1387    case 0b01010: /* PACIB1716 */
1388        if (s->pauth_active) {
1389            gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1390        }
1391        break;
1392    case 0b01100: /* AUTIA1716 */
1393        if (s->pauth_active) {
1394            gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1395        }
1396        break;
1397    case 0b01110: /* AUTIB1716 */
1398        if (s->pauth_active) {
1399            gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1400        }
1401        break;
1402    case 0b11000: /* PACIAZ */
1403        if (s->pauth_active) {
1404            gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1405                                new_tmp_a64_zero(s));
1406        }
1407        break;
1408    case 0b11001: /* PACIASP */
1409        if (s->pauth_active) {
1410            gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1411        }
1412        break;
1413    case 0b11010: /* PACIBZ */
1414        if (s->pauth_active) {
1415            gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1416                                new_tmp_a64_zero(s));
1417        }
1418        break;
1419    case 0b11011: /* PACIBSP */
1420        if (s->pauth_active) {
1421            gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1422        }
1423        break;
1424    case 0b11100: /* AUTIAZ */
1425        if (s->pauth_active) {
1426            gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1427                              new_tmp_a64_zero(s));
1428        }
1429        break;
1430    case 0b11101: /* AUTIASP */
1431        if (s->pauth_active) {
1432            gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1433        }
1434        break;
1435    case 0b11110: /* AUTIBZ */
1436        if (s->pauth_active) {
1437            gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1438                              new_tmp_a64_zero(s));
1439        }
1440        break;
1441    case 0b11111: /* AUTIBSP */
1442        if (s->pauth_active) {
1443            gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1444        }
1445        break;
1446    default:
1447        /* default specified as NOP equivalent */
1448        break;
1449    }
1450}
1451
1452static void gen_clrex(DisasContext *s, uint32_t insn)
1453{
1454    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1455}
1456
1457/* CLREX, DSB, DMB, ISB */
1458static void handle_sync(DisasContext *s, uint32_t insn,
1459                        unsigned int op1, unsigned int op2, unsigned int crm)
1460{
1461    TCGBar bar;
1462
1463    if (op1 != 3) {
1464        unallocated_encoding(s);
1465        return;
1466    }
1467
1468    switch (op2) {
1469    case 2: /* CLREX */
1470        gen_clrex(s, insn);
1471        return;
1472    case 4: /* DSB */
1473    case 5: /* DMB */
1474        switch (crm & 3) {
1475        case 1: /* MBReqTypes_Reads */
1476            bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1477            break;
1478        case 2: /* MBReqTypes_Writes */
1479            bar = TCG_BAR_SC | TCG_MO_ST_ST;
1480            break;
1481        default: /* MBReqTypes_All */
1482            bar = TCG_BAR_SC | TCG_MO_ALL;
1483            break;
1484        }
1485        tcg_gen_mb(bar);
1486        return;
1487    case 6: /* ISB */
1488        /* We need to break the TB after this insn to execute
1489         * a self-modified code correctly and also to take
1490         * any pending interrupts immediately.
1491         */
1492        reset_btype(s);
1493        gen_goto_tb(s, 0, s->base.pc_next);
1494        return;
1495
1496    case 7: /* SB */
1497        if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1498            goto do_unallocated;
1499        }
1500        /*
1501         * TODO: There is no speculation barrier opcode for TCG;
1502         * MB and end the TB instead.
1503         */
1504        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1505        gen_goto_tb(s, 0, s->base.pc_next);
1506        return;
1507
1508    default:
1509    do_unallocated:
1510        unallocated_encoding(s);
1511        return;
1512    }
1513}
1514
1515static void gen_xaflag(void)
1516{
1517    TCGv_i32 z = tcg_temp_new_i32();
1518
1519    tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1520
1521    /*
1522     * (!C & !Z) << 31
1523     * (!(C | Z)) << 31
1524     * ~((C | Z) << 31)
1525     * ~-(C | Z)
1526     * (C | Z) - 1
1527     */
1528    tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1529    tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1530
1531    /* !(Z & C) */
1532    tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1533    tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1534
1535    /* (!C & Z) << 31 -> -(Z & ~C) */
1536    tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1537    tcg_gen_neg_i32(cpu_VF, cpu_VF);
1538
1539    /* C | Z */
1540    tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1541
1542    tcg_temp_free_i32(z);
1543}
1544
1545static void gen_axflag(void)
1546{
1547    tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1548    tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1549
1550    /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1551    tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1552
1553    tcg_gen_movi_i32(cpu_NF, 0);
1554    tcg_gen_movi_i32(cpu_VF, 0);
1555}
1556
1557/* MSR (immediate) - move immediate to processor state field */
1558static void handle_msr_i(DisasContext *s, uint32_t insn,
1559                         unsigned int op1, unsigned int op2, unsigned int crm)
1560{
1561    TCGv_i32 t1;
1562    int op = op1 << 3 | op2;
1563
1564    /* End the TB by default, chaining is ok.  */
1565    s->base.is_jmp = DISAS_TOO_MANY;
1566
1567    switch (op) {
1568    case 0x00: /* CFINV */
1569        if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1570            goto do_unallocated;
1571        }
1572        tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1573        s->base.is_jmp = DISAS_NEXT;
1574        break;
1575
1576    case 0x01: /* XAFlag */
1577        if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1578            goto do_unallocated;
1579        }
1580        gen_xaflag();
1581        s->base.is_jmp = DISAS_NEXT;
1582        break;
1583
1584    case 0x02: /* AXFlag */
1585        if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1586            goto do_unallocated;
1587        }
1588        gen_axflag();
1589        s->base.is_jmp = DISAS_NEXT;
1590        break;
1591
1592    case 0x05: /* SPSel */
1593        if (s->current_el == 0) {
1594            goto do_unallocated;
1595        }
1596        t1 = tcg_const_i32(crm & PSTATE_SP);
1597        gen_helper_msr_i_spsel(cpu_env, t1);
1598        tcg_temp_free_i32(t1);
1599        break;
1600
1601    case 0x1e: /* DAIFSet */
1602        t1 = tcg_const_i32(crm);
1603        gen_helper_msr_i_daifset(cpu_env, t1);
1604        tcg_temp_free_i32(t1);
1605        break;
1606
1607    case 0x1f: /* DAIFClear */
1608        t1 = tcg_const_i32(crm);
1609        gen_helper_msr_i_daifclear(cpu_env, t1);
1610        tcg_temp_free_i32(t1);
1611        /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1612        s->base.is_jmp = DISAS_UPDATE;
1613        break;
1614
1615    default:
1616    do_unallocated:
1617        unallocated_encoding(s);
1618        return;
1619    }
1620}
1621
1622static void gen_get_nzcv(TCGv_i64 tcg_rt)
1623{
1624    TCGv_i32 tmp = tcg_temp_new_i32();
1625    TCGv_i32 nzcv = tcg_temp_new_i32();
1626
1627    /* build bit 31, N */
1628    tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1629    /* build bit 30, Z */
1630    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1631    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1632    /* build bit 29, C */
1633    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1634    /* build bit 28, V */
1635    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1636    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1637    /* generate result */
1638    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1639
1640    tcg_temp_free_i32(nzcv);
1641    tcg_temp_free_i32(tmp);
1642}
1643
1644static void gen_set_nzcv(TCGv_i64 tcg_rt)
1645{
1646    TCGv_i32 nzcv = tcg_temp_new_i32();
1647
1648    /* take NZCV from R[t] */
1649    tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1650
1651    /* bit 31, N */
1652    tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1653    /* bit 30, Z */
1654    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1655    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1656    /* bit 29, C */
1657    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1658    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1659    /* bit 28, V */
1660    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1661    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1662    tcg_temp_free_i32(nzcv);
1663}
1664
1665/* MRS - move from system register
1666 * MSR (register) - move to system register
1667 * SYS
1668 * SYSL
1669 * These are all essentially the same insn in 'read' and 'write'
1670 * versions, with varying op0 fields.
1671 */
1672static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1673                       unsigned int op0, unsigned int op1, unsigned int op2,
1674                       unsigned int crn, unsigned int crm, unsigned int rt)
1675{
1676    const ARMCPRegInfo *ri;
1677    TCGv_i64 tcg_rt;
1678
1679    ri = get_arm_cp_reginfo(s->cp_regs,
1680                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1681                                               crn, crm, op0, op1, op2));
1682
1683    if (!ri) {
1684        /* Unknown register; this might be a guest error or a QEMU
1685         * unimplemented feature.
1686         */
1687        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1688                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1689                      isread ? "read" : "write", op0, op1, crn, crm, op2);
1690        unallocated_encoding(s);
1691        return;
1692    }
1693
1694    /* Check access permissions */
1695    if (!cp_access_ok(s->current_el, ri, isread)) {
1696        unallocated_encoding(s);
1697        return;
1698    }
1699
1700    if (ri->accessfn) {
1701        /* Emit code to perform further access permissions checks at
1702         * runtime; this may result in an exception.
1703         */
1704        TCGv_ptr tmpptr;
1705        TCGv_i32 tcg_syn, tcg_isread;
1706        uint32_t syndrome;
1707
1708        gen_a64_set_pc_im(s->pc_curr);
1709        tmpptr = tcg_const_ptr(ri);
1710        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1711        tcg_syn = tcg_const_i32(syndrome);
1712        tcg_isread = tcg_const_i32(isread);
1713        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1714        tcg_temp_free_ptr(tmpptr);
1715        tcg_temp_free_i32(tcg_syn);
1716        tcg_temp_free_i32(tcg_isread);
1717    } else if (ri->type & ARM_CP_RAISES_EXC) {
1718        /*
1719         * The readfn or writefn might raise an exception;
1720         * synchronize the CPU state in case it does.
1721         */
1722        gen_a64_set_pc_im(s->pc_curr);
1723    }
1724
1725    /* Handle special cases first */
1726    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1727    case ARM_CP_NOP:
1728        return;
1729    case ARM_CP_NZCV:
1730        tcg_rt = cpu_reg(s, rt);
1731        if (isread) {
1732            gen_get_nzcv(tcg_rt);
1733        } else {
1734            gen_set_nzcv(tcg_rt);
1735        }
1736        return;
1737    case ARM_CP_CURRENTEL:
1738        /* Reads as current EL value from pstate, which is
1739         * guaranteed to be constant by the tb flags.
1740         */
1741        tcg_rt = cpu_reg(s, rt);
1742        tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1743        return;
1744    case ARM_CP_DC_ZVA:
1745        /* Writes clear the aligned block of memory which rt points into. */
1746        tcg_rt = cpu_reg(s, rt);
1747        gen_helper_dc_zva(cpu_env, tcg_rt);
1748        return;
1749    default:
1750        break;
1751    }
1752    if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1753        return;
1754    } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1755        return;
1756    }
1757
1758    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1759        gen_io_start();
1760    }
1761
1762    tcg_rt = cpu_reg(s, rt);
1763
1764    if (isread) {
1765        if (ri->type & ARM_CP_CONST) {
1766            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1767        } else if (ri->readfn) {
1768            TCGv_ptr tmpptr;
1769            tmpptr = tcg_const_ptr(ri);
1770            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1771            tcg_temp_free_ptr(tmpptr);
1772        } else {
1773            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1774        }
1775    } else {
1776        if (ri->type & ARM_CP_CONST) {
1777            /* If not forbidden by access permissions, treat as WI */
1778            return;
1779        } else if (ri->writefn) {
1780            TCGv_ptr tmpptr;
1781            tmpptr = tcg_const_ptr(ri);
1782            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1783            tcg_temp_free_ptr(tmpptr);
1784        } else {
1785            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1786        }
1787    }
1788
1789    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1790        /* I/O operations must end the TB here (whether read or write) */
1791        s->base.is_jmp = DISAS_UPDATE;
1792    }
1793    if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1794        /*
1795         * A write to any coprocessor regiser that ends a TB
1796         * must rebuild the hflags for the next TB.
1797         */
1798        TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
1799        gen_helper_rebuild_hflags_a64(cpu_env, tcg_el);
1800        tcg_temp_free_i32(tcg_el);
1801        /*
1802         * We default to ending the TB on a coprocessor register write,
1803         * but allow this to be suppressed by the register definition
1804         * (usually only necessary to work around guest bugs).
1805         */
1806        s->base.is_jmp = DISAS_UPDATE;
1807    }
1808}
1809
1810/* System
1811 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1812 * +---------------------+---+-----+-----+-------+-------+-----+------+
1813 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1814 * +---------------------+---+-----+-----+-------+-------+-----+------+
1815 */
1816static void disas_system(DisasContext *s, uint32_t insn)
1817{
1818    unsigned int l, op0, op1, crn, crm, op2, rt;
1819    l = extract32(insn, 21, 1);
1820    op0 = extract32(insn, 19, 2);
1821    op1 = extract32(insn, 16, 3);
1822    crn = extract32(insn, 12, 4);
1823    crm = extract32(insn, 8, 4);
1824    op2 = extract32(insn, 5, 3);
1825    rt = extract32(insn, 0, 5);
1826
1827    if (op0 == 0) {
1828        if (l || rt != 31) {
1829            unallocated_encoding(s);
1830            return;
1831        }
1832        switch (crn) {
1833        case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1834            handle_hint(s, insn, op1, op2, crm);
1835            break;
1836        case 3: /* CLREX, DSB, DMB, ISB */
1837            handle_sync(s, insn, op1, op2, crm);
1838            break;
1839        case 4: /* MSR (immediate) */
1840            handle_msr_i(s, insn, op1, op2, crm);
1841            break;
1842        default:
1843            unallocated_encoding(s);
1844            break;
1845        }
1846        return;
1847    }
1848    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1849}
1850
1851/* Exception generation
1852 *
1853 *  31             24 23 21 20                     5 4   2 1  0
1854 * +-----------------+-----+------------------------+-----+----+
1855 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1856 * +-----------------------+------------------------+----------+
1857 */
1858static void disas_exc(DisasContext *s, uint32_t insn)
1859{
1860    int opc = extract32(insn, 21, 3);
1861    int op2_ll = extract32(insn, 0, 5);
1862    int imm16 = extract32(insn, 5, 16);
1863    TCGv_i32 tmp;
1864
1865    switch (opc) {
1866    case 0:
1867        /* For SVC, HVC and SMC we advance the single-step state
1868         * machine before taking the exception. This is architecturally
1869         * mandated, to ensure that single-stepping a system call
1870         * instruction works properly.
1871         */
1872        switch (op2_ll) {
1873        case 1:                                                     /* SVC */
1874            gen_ss_advance(s);
1875            gen_exception_insn(s, s->base.pc_next, EXCP_SWI,
1876                               syn_aa64_svc(imm16), default_exception_el(s));
1877            break;
1878        case 2:                                                     /* HVC */
1879            if (s->current_el == 0) {
1880                unallocated_encoding(s);
1881                break;
1882            }
1883            /* The pre HVC helper handles cases when HVC gets trapped
1884             * as an undefined insn by runtime configuration.
1885             */
1886            gen_a64_set_pc_im(s->pc_curr);
1887            gen_helper_pre_hvc(cpu_env);
1888            gen_ss_advance(s);
1889            gen_exception_insn(s, s->base.pc_next, EXCP_HVC,
1890                               syn_aa64_hvc(imm16), 2);
1891            break;
1892        case 3:                                                     /* SMC */
1893            if (s->current_el == 0) {
1894                unallocated_encoding(s);
1895                break;
1896            }
1897            gen_a64_set_pc_im(s->pc_curr);
1898            tmp = tcg_const_i32(syn_aa64_smc(imm16));
1899            gen_helper_pre_smc(cpu_env, tmp);
1900            tcg_temp_free_i32(tmp);
1901            gen_ss_advance(s);
1902            gen_exception_insn(s, s->base.pc_next, EXCP_SMC,
1903                               syn_aa64_smc(imm16), 3);
1904            break;
1905        default:
1906            unallocated_encoding(s);
1907            break;
1908        }
1909        break;
1910    case 1:
1911        if (op2_ll != 0) {
1912            unallocated_encoding(s);
1913            break;
1914        }
1915        /* BRK */
1916        gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
1917        break;
1918    case 2:
1919        if (op2_ll != 0) {
1920            unallocated_encoding(s);
1921            break;
1922        }
1923        /* HLT. This has two purposes.
1924         * Architecturally, it is an external halting debug instruction.
1925         * Since QEMU doesn't implement external debug, we treat this as
1926         * it is required for halting debug disabled: it will UNDEF.
1927         * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1928         */
1929        if (semihosting_enabled() && imm16 == 0xf000) {
1930#ifndef CONFIG_USER_ONLY
1931            /* In system mode, don't allow userspace access to semihosting,
1932             * to provide some semblance of security (and for consistency
1933             * with our 32-bit semihosting).
1934             */
1935            if (s->current_el == 0) {
1936                unsupported_encoding(s, insn);
1937                break;
1938            }
1939#endif
1940            gen_exception_internal_insn(s, s->base.pc_next, EXCP_SEMIHOST);
1941        } else {
1942            unsupported_encoding(s, insn);
1943        }
1944        break;
1945    case 5:
1946        if (op2_ll < 1 || op2_ll > 3) {
1947            unallocated_encoding(s);
1948            break;
1949        }
1950        /* DCPS1, DCPS2, DCPS3 */
1951        unsupported_encoding(s, insn);
1952        break;
1953    default:
1954        unallocated_encoding(s);
1955        break;
1956    }
1957}
1958
1959/* Unconditional branch (register)
1960 *  31           25 24   21 20   16 15   10 9    5 4     0
1961 * +---------------+-------+-------+-------+------+-------+
1962 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1963 * +---------------+-------+-------+-------+------+-------+
1964 */
1965static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1966{
1967    unsigned int opc, op2, op3, rn, op4;
1968    unsigned btype_mod = 2;   /* 0: BR, 1: BLR, 2: other */
1969    TCGv_i64 dst;
1970    TCGv_i64 modifier;
1971
1972    opc = extract32(insn, 21, 4);
1973    op2 = extract32(insn, 16, 5);
1974    op3 = extract32(insn, 10, 6);
1975    rn = extract32(insn, 5, 5);
1976    op4 = extract32(insn, 0, 5);
1977
1978    if (op2 != 0x1f) {
1979        goto do_unallocated;
1980    }
1981
1982    switch (opc) {
1983    case 0: /* BR */
1984    case 1: /* BLR */
1985    case 2: /* RET */
1986        btype_mod = opc;
1987        switch (op3) {
1988        case 0:
1989            /* BR, BLR, RET */
1990            if (op4 != 0) {
1991                goto do_unallocated;
1992            }
1993            dst = cpu_reg(s, rn);
1994            break;
1995
1996        case 2:
1997        case 3:
1998            if (!dc_isar_feature(aa64_pauth, s)) {
1999                goto do_unallocated;
2000            }
2001            if (opc == 2) {
2002                /* RETAA, RETAB */
2003                if (rn != 0x1f || op4 != 0x1f) {
2004                    goto do_unallocated;
2005                }
2006                rn = 30;
2007                modifier = cpu_X[31];
2008            } else {
2009                /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */
2010                if (op4 != 0x1f) {
2011                    goto do_unallocated;
2012                }
2013                modifier = new_tmp_a64_zero(s);
2014            }
2015            if (s->pauth_active) {
2016                dst = new_tmp_a64(s);
2017                if (op3 == 2) {
2018                    gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2019                } else {
2020                    gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2021                }
2022            } else {
2023                dst = cpu_reg(s, rn);
2024            }
2025            break;
2026
2027        default:
2028            goto do_unallocated;
2029        }
2030        gen_a64_set_pc(s, dst);
2031        /* BLR also needs to load return address */
2032        if (opc == 1) {
2033            tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2034        }
2035        break;
2036
2037    case 8: /* BRAA */
2038    case 9: /* BLRAA */
2039        if (!dc_isar_feature(aa64_pauth, s)) {
2040            goto do_unallocated;
2041        }
2042        if ((op3 & ~1) != 2) {
2043            goto do_unallocated;
2044        }
2045        btype_mod = opc & 1;
2046        if (s->pauth_active) {
2047            dst = new_tmp_a64(s);
2048            modifier = cpu_reg_sp(s, op4);
2049            if (op3 == 2) {
2050                gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2051            } else {
2052                gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2053            }
2054        } else {
2055            dst = cpu_reg(s, rn);
2056        }
2057        gen_a64_set_pc(s, dst);
2058        /* BLRAA also needs to load return address */
2059        if (opc == 9) {
2060            tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2061        }
2062        break;
2063
2064    case 4: /* ERET */
2065        if (s->current_el == 0) {
2066            goto do_unallocated;
2067        }
2068        switch (op3) {
2069        case 0: /* ERET */
2070            if (op4 != 0) {
2071                goto do_unallocated;
2072            }
2073            dst = tcg_temp_new_i64();
2074            tcg_gen_ld_i64(dst, cpu_env,
2075                           offsetof(CPUARMState, elr_el[s->current_el]));
2076            break;
2077
2078        case 2: /* ERETAA */
2079        case 3: /* ERETAB */
2080            if (!dc_isar_feature(aa64_pauth, s)) {
2081                goto do_unallocated;
2082            }
2083            if (rn != 0x1f || op4 != 0x1f) {
2084                goto do_unallocated;
2085            }
2086            dst = tcg_temp_new_i64();
2087            tcg_gen_ld_i64(dst, cpu_env,
2088                           offsetof(CPUARMState, elr_el[s->current_el]));
2089            if (s->pauth_active) {
2090                modifier = cpu_X[31];
2091                if (op3 == 2) {
2092                    gen_helper_autia(dst, cpu_env, dst, modifier);
2093                } else {
2094                    gen_helper_autib(dst, cpu_env, dst, modifier);
2095                }
2096            }
2097            break;
2098
2099        default:
2100            goto do_unallocated;
2101        }
2102        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2103            gen_io_start();
2104        }
2105
2106        gen_helper_exception_return(cpu_env, dst);
2107        tcg_temp_free_i64(dst);
2108        /* Must exit loop to check un-masked IRQs */
2109        s->base.is_jmp = DISAS_EXIT;
2110        return;
2111
2112    case 5: /* DRPS */
2113        if (op3 != 0 || op4 != 0 || rn != 0x1f) {
2114            goto do_unallocated;
2115        } else {
2116            unsupported_encoding(s, insn);
2117        }
2118        return;
2119
2120    default:
2121    do_unallocated:
2122        unallocated_encoding(s);
2123        return;
2124    }
2125
2126    switch (btype_mod) {
2127    case 0: /* BR */
2128        if (dc_isar_feature(aa64_bti, s)) {
2129            /* BR to {x16,x17} or !guard -> 1, else 3.  */
2130            set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
2131        }
2132        break;
2133
2134    case 1: /* BLR */
2135        if (dc_isar_feature(aa64_bti, s)) {
2136            /* BLR sets BTYPE to 2, regardless of source guarded page.  */
2137            set_btype(s, 2);
2138        }
2139        break;
2140
2141    default: /* RET or none of the above.  */
2142        /* BTYPE will be set to 0 by normal end-of-insn processing.  */
2143        break;
2144    }
2145
2146    s->base.is_jmp = DISAS_JUMP;
2147}
2148
2149/* Branches, exception generating and system instructions */
2150static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2151{
2152    switch (extract32(insn, 25, 7)) {
2153    case 0x0a: case 0x0b:
2154    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
2155        disas_uncond_b_imm(s, insn);
2156        break;
2157    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
2158        disas_comp_b_imm(s, insn);
2159        break;
2160    case 0x1b: case 0x5b: /* Test & branch (immediate) */
2161        disas_test_b_imm(s, insn);
2162        break;
2163    case 0x2a: /* Conditional branch (immediate) */
2164        disas_cond_b_imm(s, insn);
2165        break;
2166    case 0x6a: /* Exception generation / System */
2167        if (insn & (1 << 24)) {
2168            if (extract32(insn, 22, 2) == 0) {
2169                disas_system(s, insn);
2170            } else {
2171                unallocated_encoding(s);
2172            }
2173        } else {
2174            disas_exc(s, insn);
2175        }
2176        break;
2177    case 0x6b: /* Unconditional branch (register) */
2178        disas_uncond_b_reg(s, insn);
2179        break;
2180    default:
2181        unallocated_encoding(s);
2182        break;
2183    }
2184}
2185
2186/*
2187 * Load/Store exclusive instructions are implemented by remembering
2188 * the value/address loaded, and seeing if these are the same
2189 * when the store is performed. This is not actually the architecturally
2190 * mandated semantics, but it works for typical guest code sequences
2191 * and avoids having to monitor regular stores.
2192 *
2193 * The store exclusive uses the atomic cmpxchg primitives to avoid
2194 * races in multi-threaded linux-user and when MTTCG softmmu is
2195 * enabled.
2196 */
2197static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2198                               TCGv_i64 addr, int size, bool is_pair)
2199{
2200    int idx = get_mem_index(s);
2201    MemOp memop = s->be_data;
2202
2203    g_assert(size <= 3);
2204    if (is_pair) {
2205        g_assert(size >= 2);
2206        if (size == 2) {
2207            /* The pair must be single-copy atomic for the doubleword.  */
2208            memop |= MO_64 | MO_ALIGN;
2209            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2210            if (s->be_data == MO_LE) {
2211                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2212                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2213            } else {
2214                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2215                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2216            }
2217        } else {
2218            /* The pair must be single-copy atomic for *each* doubleword, not
2219               the entire quadword, however it must be quadword aligned.  */
2220            memop |= MO_64;
2221            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2222                                memop | MO_ALIGN_16);
2223
2224            TCGv_i64 addr2 = tcg_temp_new_i64();
2225            tcg_gen_addi_i64(addr2, addr, 8);
2226            tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2227            tcg_temp_free_i64(addr2);
2228
2229            tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2230            tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2231        }
2232    } else {
2233        memop |= size | MO_ALIGN;
2234        tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2235        tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2236    }
2237    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2238}
2239
2240static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2241                                TCGv_i64 addr, int size, int is_pair)
2242{
2243    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2244     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2245     *     [addr] = {Rt};
2246     *     if (is_pair) {
2247     *         [addr + datasize] = {Rt2};
2248     *     }
2249     *     {Rd} = 0;
2250     * } else {
2251     *     {Rd} = 1;
2252     * }
2253     * env->exclusive_addr = -1;
2254     */
2255    TCGLabel *fail_label = gen_new_label();
2256    TCGLabel *done_label = gen_new_label();
2257    TCGv_i64 tmp;
2258
2259    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2260
2261    tmp = tcg_temp_new_i64();
2262    if (is_pair) {
2263        if (size == 2) {
2264            if (s->be_data == MO_LE) {
2265                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2266            } else {
2267                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2268            }
2269            tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2270                                       cpu_exclusive_val, tmp,
2271                                       get_mem_index(s),
2272                                       MO_64 | MO_ALIGN | s->be_data);
2273            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2274        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2275            if (!HAVE_CMPXCHG128) {
2276                gen_helper_exit_atomic(cpu_env);
2277                s->base.is_jmp = DISAS_NORETURN;
2278            } else if (s->be_data == MO_LE) {
2279                gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2280                                                        cpu_exclusive_addr,
2281                                                        cpu_reg(s, rt),
2282                                                        cpu_reg(s, rt2));
2283            } else {
2284                gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2285                                                        cpu_exclusive_addr,
2286                                                        cpu_reg(s, rt),
2287                                                        cpu_reg(s, rt2));
2288            }
2289        } else if (s->be_data == MO_LE) {
2290            gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2291                                           cpu_reg(s, rt), cpu_reg(s, rt2));
2292        } else {
2293            gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2294                                           cpu_reg(s, rt), cpu_reg(s, rt2));
2295        }
2296    } else {
2297        tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2298                                   cpu_reg(s, rt), get_mem_index(s),
2299                                   size | MO_ALIGN | s->be_data);
2300        tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2301    }
2302    tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2303    tcg_temp_free_i64(tmp);
2304    tcg_gen_br(done_label);
2305
2306    gen_set_label(fail_label);
2307    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2308    gen_set_label(done_label);
2309    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2310}
2311
2312static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2313                                 int rn, int size)
2314{
2315    TCGv_i64 tcg_rs = cpu_reg(s, rs);
2316    TCGv_i64 tcg_rt = cpu_reg(s, rt);
2317    int memidx = get_mem_index(s);
2318    TCGv_i64 clean_addr;
2319
2320    if (rn == 31) {
2321        gen_check_sp_alignment(s);
2322    }
2323    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2324    tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2325                               size | MO_ALIGN | s->be_data);
2326}
2327
2328static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2329                                      int rn, int size)
2330{
2331    TCGv_i64 s1 = cpu_reg(s, rs);
2332    TCGv_i64 s2 = cpu_reg(s, rs + 1);
2333    TCGv_i64 t1 = cpu_reg(s, rt);
2334    TCGv_i64 t2 = cpu_reg(s, rt + 1);
2335    TCGv_i64 clean_addr;
2336    int memidx = get_mem_index(s);
2337
2338    if (rn == 31) {
2339        gen_check_sp_alignment(s);
2340    }
2341    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2342
2343    if (size == 2) {
2344        TCGv_i64 cmp = tcg_temp_new_i64();
2345        TCGv_i64 val = tcg_temp_new_i64();
2346
2347        if (s->be_data == MO_LE) {
2348            tcg_gen_concat32_i64(val, t1, t2);
2349            tcg_gen_concat32_i64(cmp, s1, s2);
2350        } else {
2351            tcg_gen_concat32_i64(val, t2, t1);
2352            tcg_gen_concat32_i64(cmp, s2, s1);
2353        }
2354
2355        tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2356                                   MO_64 | MO_ALIGN | s->be_data);
2357        tcg_temp_free_i64(val);
2358
2359        if (s->be_data == MO_LE) {
2360            tcg_gen_extr32_i64(s1, s2, cmp);
2361        } else {
2362            tcg_gen_extr32_i64(s2, s1, cmp);
2363        }
2364        tcg_temp_free_i64(cmp);
2365    } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2366        if (HAVE_CMPXCHG128) {
2367            TCGv_i32 tcg_rs = tcg_const_i32(rs);
2368            if (s->be_data == MO_LE) {
2369                gen_helper_casp_le_parallel(cpu_env, tcg_rs,
2370                                            clean_addr, t1, t2);
2371            } else {
2372                gen_helper_casp_be_parallel(cpu_env, tcg_rs,
2373                                            clean_addr, t1, t2);
2374            }
2375            tcg_temp_free_i32(tcg_rs);
2376        } else {
2377            gen_helper_exit_atomic(cpu_env);
2378            s->base.is_jmp = DISAS_NORETURN;
2379        }
2380    } else {
2381        TCGv_i64 d1 = tcg_temp_new_i64();
2382        TCGv_i64 d2 = tcg_temp_new_i64();
2383        TCGv_i64 a2 = tcg_temp_new_i64();
2384        TCGv_i64 c1 = tcg_temp_new_i64();
2385        TCGv_i64 c2 = tcg_temp_new_i64();
2386        TCGv_i64 zero = tcg_const_i64(0);
2387
2388        /* Load the two words, in memory order.  */
2389        tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
2390                            MO_64 | MO_ALIGN_16 | s->be_data);
2391        tcg_gen_addi_i64(a2, clean_addr, 8);
2392        tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
2393
2394        /* Compare the two words, also in memory order.  */
2395        tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
2396        tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
2397        tcg_gen_and_i64(c2, c2, c1);
2398
2399        /* If compare equal, write back new data, else write back old data.  */
2400        tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
2401        tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
2402        tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
2403        tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
2404        tcg_temp_free_i64(a2);
2405        tcg_temp_free_i64(c1);
2406        tcg_temp_free_i64(c2);
2407        tcg_temp_free_i64(zero);
2408
2409        /* Write back the data from memory to Rs.  */
2410        tcg_gen_mov_i64(s1, d1);
2411        tcg_gen_mov_i64(s2, d2);
2412        tcg_temp_free_i64(d1);
2413        tcg_temp_free_i64(d2);
2414    }
2415}
2416
2417/* Update the Sixty-Four bit (SF) registersize. This logic is derived
2418 * from the ARMv8 specs for LDR (Shared decode for all encodings).
2419 */
2420static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2421{
2422    int opc0 = extract32(opc, 0, 1);
2423    int regsize;
2424
2425    if (is_signed) {
2426        regsize = opc0 ? 32 : 64;
2427    } else {
2428        regsize = size == 3 ? 64 : 32;
2429    }
2430    return regsize == 64;
2431}
2432
2433/* Load/store exclusive
2434 *
2435 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2436 * +-----+-------------+----+---+----+------+----+-------+------+------+
2437 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2438 * +-----+-------------+----+---+----+------+----+-------+------+------+
2439 *
2440 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2441 *   L: 0 -> store, 1 -> load
2442 *  o2: 0 -> exclusive, 1 -> not
2443 *  o1: 0 -> single register, 1 -> register pair
2444 *  o0: 1 -> load-acquire/store-release, 0 -> not
2445 */
2446static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2447{
2448    int rt = extract32(insn, 0, 5);
2449    int rn = extract32(insn, 5, 5);
2450    int rt2 = extract32(insn, 10, 5);
2451    int rs = extract32(insn, 16, 5);
2452    int is_lasr = extract32(insn, 15, 1);
2453    int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2454    int size = extract32(insn, 30, 2);
2455    TCGv_i64 clean_addr;
2456
2457    switch (o2_L_o1_o0) {
2458    case 0x0: /* STXR */
2459    case 0x1: /* STLXR */
2460        if (rn == 31) {
2461            gen_check_sp_alignment(s);
2462        }
2463        if (is_lasr) {
2464            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2465        }
2466        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2467        gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2468        return;
2469
2470    case 0x4: /* LDXR */
2471    case 0x5: /* LDAXR */
2472        if (rn == 31) {
2473            gen_check_sp_alignment(s);
2474        }
2475        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2476        s->is_ldex = true;
2477        gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2478        if (is_lasr) {
2479            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2480        }
2481        return;
2482
2483    case 0x8: /* STLLR */
2484        if (!dc_isar_feature(aa64_lor, s)) {
2485            break;
2486        }
2487        /* StoreLORelease is the same as Store-Release for QEMU.  */
2488        /* fall through */
2489    case 0x9: /* STLR */
2490        /* Generate ISS for non-exclusive accesses including LASR.  */
2491        if (rn == 31) {
2492            gen_check_sp_alignment(s);
2493        }
2494        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2495        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2496        do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
2497                  disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2498        return;
2499
2500    case 0xc: /* LDLAR */
2501        if (!dc_isar_feature(aa64_lor, s)) {
2502            break;
2503        }
2504        /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2505        /* fall through */
2506    case 0xd: /* LDAR */
2507        /* Generate ISS for non-exclusive accesses including LASR.  */
2508        if (rn == 31) {
2509            gen_check_sp_alignment(s);
2510        }
2511        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2512        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
2513                  disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2514        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2515        return;
2516
2517    case 0x2: case 0x3: /* CASP / STXP */
2518        if (size & 2) { /* STXP / STLXP */
2519            if (rn == 31) {
2520                gen_check_sp_alignment(s);
2521            }
2522            if (is_lasr) {
2523                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2524            }
2525            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2526            gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2527            return;
2528        }
2529        if (rt2 == 31
2530            && ((rt | rs) & 1) == 0
2531            && dc_isar_feature(aa64_atomics, s)) {
2532            /* CASP / CASPL */
2533            gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2534            return;
2535        }
2536        break;
2537
2538    case 0x6: case 0x7: /* CASPA / LDXP */
2539        if (size & 2) { /* LDXP / LDAXP */
2540            if (rn == 31) {
2541                gen_check_sp_alignment(s);
2542            }
2543            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2544            s->is_ldex = true;
2545            gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2546            if (is_lasr) {
2547                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2548            }
2549            return;
2550        }
2551        if (rt2 == 31
2552            && ((rt | rs) & 1) == 0
2553            && dc_isar_feature(aa64_atomics, s)) {
2554            /* CASPA / CASPAL */
2555            gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2556            return;
2557        }
2558        break;
2559
2560    case 0xa: /* CAS */
2561    case 0xb: /* CASL */
2562    case 0xe: /* CASA */
2563    case 0xf: /* CASAL */
2564        if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2565            gen_compare_and_swap(s, rs, rt, rn, size);
2566            return;
2567        }
2568        break;
2569    }
2570    unallocated_encoding(s);
2571}
2572
2573/*
2574 * Load register (literal)
2575 *
2576 *  31 30 29   27  26 25 24 23                5 4     0
2577 * +-----+-------+---+-----+-------------------+-------+
2578 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2579 * +-----+-------+---+-----+-------------------+-------+
2580 *
2581 * V: 1 -> vector (simd/fp)
2582 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2583 *                   10-> 32 bit signed, 11 -> prefetch
2584 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2585 */
2586static void disas_ld_lit(DisasContext *s, uint32_t insn)
2587{
2588    int rt = extract32(insn, 0, 5);
2589    int64_t imm = sextract32(insn, 5, 19) << 2;
2590    bool is_vector = extract32(insn, 26, 1);
2591    int opc = extract32(insn, 30, 2);
2592    bool is_signed = false;
2593    int size = 2;
2594    TCGv_i64 tcg_rt, clean_addr;
2595
2596    if (is_vector) {
2597        if (opc == 3) {
2598            unallocated_encoding(s);
2599            return;
2600        }
2601        size = 2 + opc;
2602        if (!fp_access_check(s)) {
2603            return;
2604        }
2605    } else {
2606        if (opc == 3) {
2607            /* PRFM (literal) : prefetch */
2608            return;
2609        }
2610        size = 2 + extract32(opc, 0, 1);
2611        is_signed = extract32(opc, 1, 1);
2612    }
2613
2614    tcg_rt = cpu_reg(s, rt);
2615
2616    clean_addr = tcg_const_i64(s->pc_curr + imm);
2617    if (is_vector) {
2618        do_fp_ld(s, rt, clean_addr, size);
2619    } else {
2620        /* Only unsigned 32bit loads target 32bit registers.  */
2621        bool iss_sf = opc != 0;
2622
2623        do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false,
2624                  true, rt, iss_sf, false);
2625    }
2626    tcg_temp_free_i64(clean_addr);
2627}
2628
2629/*
2630 * LDNP (Load Pair - non-temporal hint)
2631 * LDP (Load Pair - non vector)
2632 * LDPSW (Load Pair Signed Word - non vector)
2633 * STNP (Store Pair - non-temporal hint)
2634 * STP (Store Pair - non vector)
2635 * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2636 * LDP (Load Pair of SIMD&FP)
2637 * STNP (Store Pair of SIMD&FP - non-temporal hint)
2638 * STP (Store Pair of SIMD&FP)
2639 *
2640 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2641 * +-----+-------+---+---+-------+---+-----------------------------+
2642 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2643 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2644 *
2645 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2646 *      LDPSW                    01
2647 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2648 *   V: 0 -> GPR, 1 -> Vector
2649 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2650 *      10 -> signed offset, 11 -> pre-index
2651 *   L: 0 -> Store 1 -> Load
2652 *
2653 * Rt, Rt2 = GPR or SIMD registers to be stored
2654 * Rn = general purpose register containing address
2655 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2656 */
2657static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2658{
2659    int rt = extract32(insn, 0, 5);
2660    int rn = extract32(insn, 5, 5);
2661    int rt2 = extract32(insn, 10, 5);
2662    uint64_t offset = sextract64(insn, 15, 7);
2663    int index = extract32(insn, 23, 2);
2664    bool is_vector = extract32(insn, 26, 1);
2665    bool is_load = extract32(insn, 22, 1);
2666    int opc = extract32(insn, 30, 2);
2667
2668    bool is_signed = false;
2669    bool postindex = false;
2670    bool wback = false;
2671
2672    TCGv_i64 clean_addr, dirty_addr;
2673
2674    int size;
2675
2676    if (opc == 3) {
2677        unallocated_encoding(s);
2678        return;
2679    }
2680
2681    if (is_vector) {
2682        size = 2 + opc;
2683    } else {
2684        size = 2 + extract32(opc, 1, 1);
2685        is_signed = extract32(opc, 0, 1);
2686        if (!is_load && is_signed) {
2687            unallocated_encoding(s);
2688            return;
2689        }
2690    }
2691
2692    switch (index) {
2693    case 1: /* post-index */
2694        postindex = true;
2695        wback = true;
2696        break;
2697    case 0:
2698        /* signed offset with "non-temporal" hint. Since we don't emulate
2699         * caches we don't care about hints to the cache system about
2700         * data access patterns, and handle this identically to plain
2701         * signed offset.
2702         */
2703        if (is_signed) {
2704            /* There is no non-temporal-hint version of LDPSW */
2705            unallocated_encoding(s);
2706            return;
2707        }
2708        postindex = false;
2709        break;
2710    case 2: /* signed offset, rn not updated */
2711        postindex = false;
2712        break;
2713    case 3: /* pre-index */
2714        postindex = false;
2715        wback = true;
2716        break;
2717    }
2718
2719    if (is_vector && !fp_access_check(s)) {
2720        return;
2721    }
2722
2723    offset <<= size;
2724
2725    if (rn == 31) {
2726        gen_check_sp_alignment(s);
2727    }
2728
2729    dirty_addr = read_cpu_reg_sp(s, rn, 1);
2730    if (!postindex) {
2731        tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2732    }
2733    clean_addr = clean_data_tbi(s, dirty_addr);
2734
2735    if (is_vector) {
2736        if (is_load) {
2737            do_fp_ld(s, rt, clean_addr, size);
2738        } else {
2739            do_fp_st(s, rt, clean_addr, size);
2740        }
2741        tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2742        if (is_load) {
2743            do_fp_ld(s, rt2, clean_addr, size);
2744        } else {
2745            do_fp_st(s, rt2, clean_addr, size);
2746        }
2747    } else {
2748        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2749        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2750
2751        if (is_load) {
2752            TCGv_i64 tmp = tcg_temp_new_i64();
2753
2754            /* Do not modify tcg_rt before recognizing any exception
2755             * from the second load.
2756             */
2757            do_gpr_ld(s, tmp, clean_addr, size, is_signed, false,
2758                      false, 0, false, false);
2759            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2760            do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false,
2761                      false, 0, false, false);
2762
2763            tcg_gen_mov_i64(tcg_rt, tmp);
2764            tcg_temp_free_i64(tmp);
2765        } else {
2766            do_gpr_st(s, tcg_rt, clean_addr, size,
2767                      false, 0, false, false);
2768            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2769            do_gpr_st(s, tcg_rt2, clean_addr, size,
2770                      false, 0, false, false);
2771        }
2772    }
2773
2774    if (wback) {
2775        if (postindex) {
2776            tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2777        }
2778        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
2779    }
2780}
2781
2782/*
2783 * Load/store (immediate post-indexed)
2784 * Load/store (immediate pre-indexed)
2785 * Load/store (unscaled immediate)
2786 *
2787 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2788 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2789 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2790 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2791 *
2792 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2793         10 -> unprivileged
2794 * V = 0 -> non-vector
2795 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2796 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2797 */
2798static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2799                                int opc,
2800                                int size,
2801                                int rt,
2802                                bool is_vector)
2803{
2804    int rn = extract32(insn, 5, 5);
2805    int imm9 = sextract32(insn, 12, 9);
2806    int idx = extract32(insn, 10, 2);
2807    bool is_signed = false;
2808    bool is_store = false;
2809    bool is_extended = false;
2810    bool is_unpriv = (idx == 2);
2811    bool iss_valid = !is_vector;
2812    bool post_index;
2813    bool writeback;
2814
2815    TCGv_i64 clean_addr, dirty_addr;
2816
2817    if (is_vector) {
2818        size |= (opc & 2) << 1;
2819        if (size > 4 || is_unpriv) {
2820            unallocated_encoding(s);
2821            return;
2822        }
2823        is_store = ((opc & 1) == 0);
2824        if (!fp_access_check(s)) {
2825            return;
2826        }
2827    } else {
2828        if (size == 3 && opc == 2) {
2829            /* PRFM - prefetch */
2830            if (idx != 0) {
2831                unallocated_encoding(s);
2832                return;
2833            }
2834            return;
2835        }
2836        if (opc == 3 && size > 1) {
2837            unallocated_encoding(s);
2838            return;
2839        }
2840        is_store = (opc == 0);
2841        is_signed = extract32(opc, 1, 1);
2842        is_extended = (size < 3) && extract32(opc, 0, 1);
2843    }
2844
2845    switch (idx) {
2846    case 0:
2847    case 2:
2848        post_index = false;
2849        writeback = false;
2850        break;
2851    case 1:
2852        post_index = true;
2853        writeback = true;
2854        break;
2855    case 3:
2856        post_index = false;
2857        writeback = true;
2858        break;
2859    default:
2860        g_assert_not_reached();
2861    }
2862
2863    if (rn == 31) {
2864        gen_check_sp_alignment(s);
2865    }
2866
2867    dirty_addr = read_cpu_reg_sp(s, rn, 1);
2868    if (!post_index) {
2869        tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
2870    }
2871    clean_addr = clean_data_tbi(s, dirty_addr);
2872
2873    if (is_vector) {
2874        if (is_store) {
2875            do_fp_st(s, rt, clean_addr, size);
2876        } else {
2877            do_fp_ld(s, rt, clean_addr, size);
2878        }
2879    } else {
2880        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2881        int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2882        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2883
2884        if (is_store) {
2885            do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
2886                             iss_valid, rt, iss_sf, false);
2887        } else {
2888            do_gpr_ld_memidx(s, tcg_rt, clean_addr, size,
2889                             is_signed, is_extended, memidx,
2890                             iss_valid, rt, iss_sf, false);
2891        }
2892    }
2893
2894    if (writeback) {
2895        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2896        if (post_index) {
2897            tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
2898        }
2899        tcg_gen_mov_i64(tcg_rn, dirty_addr);
2900    }
2901}
2902
2903/*
2904 * Load/store (register offset)
2905 *
2906 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2907 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2908 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2909 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2910 *
2911 * For non-vector:
2912 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2913 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2914 * For vector:
2915 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2916 *   opc<0>: 0 -> store, 1 -> load
2917 * V: 1 -> vector/simd
2918 * opt: extend encoding (see DecodeRegExtend)
2919 * S: if S=1 then scale (essentially index by sizeof(size))
2920 * Rt: register to transfer into/out of
2921 * Rn: address register or SP for base
2922 * Rm: offset register or ZR for offset
2923 */
2924static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2925                                   int opc,
2926                                   int size,
2927                                   int rt,
2928                                   bool is_vector)
2929{
2930    int rn = extract32(insn, 5, 5);
2931    int shift = extract32(insn, 12, 1);
2932    int rm = extract32(insn, 16, 5);
2933    int opt = extract32(insn, 13, 3);
2934    bool is_signed = false;
2935    bool is_store = false;
2936    bool is_extended = false;
2937
2938    TCGv_i64 tcg_rm, clean_addr, dirty_addr;
2939
2940    if (extract32(opt, 1, 1) == 0) {
2941        unallocated_encoding(s);
2942        return;
2943    }
2944
2945    if (is_vector) {
2946        size |= (opc & 2) << 1;
2947        if (size > 4) {
2948            unallocated_encoding(s);
2949            return;
2950        }
2951        is_store = !extract32(opc, 0, 1);
2952        if (!fp_access_check(s)) {
2953            return;
2954        }
2955    } else {
2956        if (size == 3 && opc == 2) {
2957            /* PRFM - prefetch */
2958            return;
2959        }
2960        if (opc == 3 && size > 1) {
2961            unallocated_encoding(s);
2962            return;
2963        }
2964        is_store = (opc == 0);
2965        is_signed = extract32(opc, 1, 1);
2966        is_extended = (size < 3) && extract32(opc, 0, 1);
2967    }
2968
2969    if (rn == 31) {
2970        gen_check_sp_alignment(s);
2971    }
2972    dirty_addr = read_cpu_reg_sp(s, rn, 1);
2973
2974    tcg_rm = read_cpu_reg(s, rm, 1);
2975    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2976
2977    tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
2978    clean_addr = clean_data_tbi(s, dirty_addr);
2979
2980    if (is_vector) {
2981        if (is_store) {
2982            do_fp_st(s, rt, clean_addr, size);
2983        } else {
2984            do_fp_ld(s, rt, clean_addr, size);
2985        }
2986    } else {
2987        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2988        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2989        if (is_store) {
2990            do_gpr_st(s, tcg_rt, clean_addr, size,
2991                      true, rt, iss_sf, false);
2992        } else {
2993            do_gpr_ld(s, tcg_rt, clean_addr, size,
2994                      is_signed, is_extended,
2995                      true, rt, iss_sf, false);
2996        }
2997    }
2998}
2999
3000/*
3001 * Load/store (unsigned immediate)
3002 *
3003 * 31 30 29   27  26 25 24 23 22 21        10 9     5
3004 * +----+-------+---+-----+-----+------------+-------+------+
3005 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
3006 * +----+-------+---+-----+-----+------------+-------+------+
3007 *
3008 * For non-vector:
3009 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3010 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3011 * For vector:
3012 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3013 *   opc<0>: 0 -> store, 1 -> load
3014 * Rn: base address register (inc SP)
3015 * Rt: target register
3016 */
3017static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3018                                        int opc,
3019                                        int size,
3020                                        int rt,
3021                                        bool is_vector)
3022{
3023    int rn = extract32(insn, 5, 5);
3024    unsigned int imm12 = extract32(insn, 10, 12);
3025    unsigned int offset;
3026
3027    TCGv_i64 clean_addr, dirty_addr;
3028
3029    bool is_store;
3030    bool is_signed = false;
3031    bool is_extended = false;
3032
3033    if (is_vector) {
3034        size |= (opc & 2) << 1;
3035        if (size > 4) {
3036            unallocated_encoding(s);
3037            return;
3038        }
3039        is_store = !extract32(opc, 0, 1);
3040        if (!fp_access_check(s)) {
3041            return;
3042        }
3043    } else {
3044        if (size == 3 && opc == 2) {
3045            /* PRFM - prefetch */
3046            return;
3047        }
3048        if (opc == 3 && size > 1) {
3049            unallocated_encoding(s);
3050            return;
3051        }
3052        is_store = (opc == 0);
3053        is_signed = extract32(opc, 1, 1);
3054        is_extended = (size < 3) && extract32(opc, 0, 1);
3055    }
3056
3057    if (rn == 31) {
3058        gen_check_sp_alignment(s);
3059    }
3060    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3061    offset = imm12 << size;
3062    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3063    clean_addr = clean_data_tbi(s, dirty_addr);
3064
3065    if (is_vector) {
3066        if (is_store) {
3067            do_fp_st(s, rt, clean_addr, size);
3068        } else {
3069            do_fp_ld(s, rt, clean_addr, size);
3070        }
3071    } else {
3072        TCGv_i64 tcg_rt = cpu_reg(s, rt);
3073        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3074        if (is_store) {
3075            do_gpr_st(s, tcg_rt, clean_addr, size,
3076                      true, rt, iss_sf, false);
3077        } else {
3078            do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended,
3079                      true, rt, iss_sf, false);
3080        }
3081    }
3082}
3083
3084/* Atomic memory operations
3085 *
3086 *  31  30      27  26    24    22  21   16   15    12    10    5     0
3087 * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3088 * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
3089 * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3090 *
3091 * Rt: the result register
3092 * Rn: base address or SP
3093 * Rs: the source register for the operation
3094 * V: vector flag (always 0 as of v8.3)
3095 * A: acquire flag
3096 * R: release flag
3097 */
3098static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3099                              int size, int rt, bool is_vector)
3100{
3101    int rs = extract32(insn, 16, 5);
3102    int rn = extract32(insn, 5, 5);
3103    int o3_opc = extract32(insn, 12, 4);
3104    TCGv_i64 tcg_rs, clean_addr;
3105    AtomicThreeOpFn *fn;
3106
3107    if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3108        unallocated_encoding(s);
3109        return;
3110    }
3111    switch (o3_opc) {
3112    case 000: /* LDADD */
3113        fn = tcg_gen_atomic_fetch_add_i64;
3114        break;
3115    case 001: /* LDCLR */
3116        fn = tcg_gen_atomic_fetch_and_i64;
3117        break;
3118    case 002: /* LDEOR */
3119        fn = tcg_gen_atomic_fetch_xor_i64;
3120        break;
3121    case 003: /* LDSET */
3122        fn = tcg_gen_atomic_fetch_or_i64;
3123        break;
3124    case 004: /* LDSMAX */
3125        fn = tcg_gen_atomic_fetch_smax_i64;
3126        break;
3127    case 005: /* LDSMIN */
3128        fn = tcg_gen_atomic_fetch_smin_i64;
3129        break;
3130    case 006: /* LDUMAX */
3131        fn = tcg_gen_atomic_fetch_umax_i64;
3132        break;
3133    case 007: /* LDUMIN */
3134        fn = tcg_gen_atomic_fetch_umin_i64;
3135        break;
3136    case 010: /* SWP */
3137        fn = tcg_gen_atomic_xchg_i64;
3138        break;
3139    default:
3140        unallocated_encoding(s);
3141        return;
3142    }
3143
3144    if (rn == 31) {
3145        gen_check_sp_alignment(s);
3146    }
3147    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
3148    tcg_rs = read_cpu_reg(s, rs, true);
3149
3150    if (o3_opc == 1) { /* LDCLR */
3151        tcg_gen_not_i64(tcg_rs, tcg_rs);
3152    }
3153
3154    /* The tcg atomic primitives are all full barriers.  Therefore we
3155     * can ignore the Acquire and Release bits of this instruction.
3156     */
3157    fn(cpu_reg(s, rt), clean_addr, tcg_rs, get_mem_index(s),
3158       s->be_data | size | MO_ALIGN);
3159}
3160
3161/*
3162 * PAC memory operations
3163 *
3164 *  31  30      27  26    24    22  21       12  11  10    5     0
3165 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3166 * | size | 1 1 1 | V | 0 0 | M S | 1 |  imm9  | W | 1 | Rn |  Rt |
3167 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3168 *
3169 * Rt: the result register
3170 * Rn: base address or SP
3171 * V: vector flag (always 0 as of v8.3)
3172 * M: clear for key DA, set for key DB
3173 * W: pre-indexing flag
3174 * S: sign for imm9.
3175 */
3176static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3177                           int size, int rt, bool is_vector)
3178{
3179    int rn = extract32(insn, 5, 5);
3180    bool is_wback = extract32(insn, 11, 1);
3181    bool use_key_a = !extract32(insn, 23, 1);
3182    int offset;
3183    TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3184
3185    if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3186        unallocated_encoding(s);
3187        return;
3188    }
3189
3190    if (rn == 31) {
3191        gen_check_sp_alignment(s);
3192    }
3193    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3194
3195    if (s->pauth_active) {
3196        if (use_key_a) {
3197            gen_helper_autda(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
3198        } else {
3199            gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
3200        }
3201    }
3202
3203    /* Form the 10-bit signed, scaled offset.  */
3204    offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3205    offset = sextract32(offset << size, 0, 10 + size);
3206    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3207
3208    /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3209    clean_addr = clean_data_tbi(s, dirty_addr);
3210
3211    tcg_rt = cpu_reg(s, rt);
3212    do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false,
3213              /* extend */ false, /* iss_valid */ !is_wback,
3214              /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3215
3216    if (is_wback) {
3217        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3218    }
3219}
3220
3221/* Load/store register (all forms) */
3222static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3223{
3224    int rt = extract32(insn, 0, 5);
3225    int opc = extract32(insn, 22, 2);
3226    bool is_vector = extract32(insn, 26, 1);
3227    int size = extract32(insn, 30, 2);
3228
3229    switch (extract32(insn, 24, 2)) {
3230    case 0:
3231        if (extract32(insn, 21, 1) == 0) {
3232            /* Load/store register (unscaled immediate)
3233             * Load/store immediate pre/post-indexed
3234             * Load/store register unprivileged
3235             */
3236            disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3237            return;
3238        }
3239        switch (extract32(insn, 10, 2)) {
3240        case 0:
3241            disas_ldst_atomic(s, insn, size, rt, is_vector);
3242            return;
3243        case 2:
3244            disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3245            return;
3246        default:
3247            disas_ldst_pac(s, insn, size, rt, is_vector);
3248            return;
3249        }
3250        break;
3251    case 1:
3252        disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3253        return;
3254    }
3255    unallocated_encoding(s);
3256}
3257
3258/* AdvSIMD load/store multiple structures
3259 *
3260 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
3261 * +---+---+---------------+---+-------------+--------+------+------+------+
3262 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
3263 * +---+---+---------------+---+-------------+--------+------+------+------+
3264 *
3265 * AdvSIMD load/store multiple structures (post-indexed)
3266 *
3267 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
3268 * +---+---+---------------+---+---+---------+--------+------+------+------+
3269 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
3270 * +---+---+---------------+---+---+---------+--------+------+------+------+
3271 *
3272 * Rt: first (or only) SIMD&FP register to be transferred
3273 * Rn: base address or SP
3274 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3275 */
3276static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3277{
3278    int rt = extract32(insn, 0, 5);
3279    int rn = extract32(insn, 5, 5);
3280    int rm = extract32(insn, 16, 5);
3281    int size = extract32(insn, 10, 2);
3282    int opcode = extract32(insn, 12, 4);
3283    bool is_store = !extract32(insn, 22, 1);
3284    bool is_postidx = extract32(insn, 23, 1);
3285    bool is_q = extract32(insn, 30, 1);
3286    TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3287    MemOp endian = s->be_data;
3288
3289    int ebytes;   /* bytes per element */
3290    int elements; /* elements per vector */
3291    int rpt;    /* num iterations */
3292    int selem;  /* structure elements */
3293    int r;
3294
3295    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3296        unallocated_encoding(s);
3297        return;
3298    }
3299
3300    if (!is_postidx && rm != 0) {
3301        unallocated_encoding(s);
3302        return;
3303    }
3304
3305    /* From the shared decode logic */
3306    switch (opcode) {
3307    case 0x0:
3308        rpt = 1;
3309        selem = 4;
3310        break;
3311    case 0x2:
3312        rpt = 4;
3313        selem = 1;
3314        break;
3315    case 0x4:
3316        rpt = 1;
3317        selem = 3;
3318        break;
3319    case 0x6:
3320        rpt = 3;
3321        selem = 1;
3322        break;
3323    case 0x7:
3324        rpt = 1;
3325        selem = 1;
3326        break;
3327    case 0x8:
3328        rpt = 1;
3329        selem = 2;
3330        break;
3331    case 0xa:
3332        rpt = 2;
3333        selem = 1;
3334        break;
3335    default:
3336        unallocated_encoding(s);
3337        return;
3338    }
3339
3340    if (size == 3 && !is_q && selem != 1) {
3341        /* reserved */
3342        unallocated_encoding(s);
3343        return;
3344    }
3345
3346    if (!fp_access_check(s)) {
3347        return;
3348    }
3349
3350    if (rn == 31) {
3351        gen_check_sp_alignment(s);
3352    }
3353
3354    /* For our purposes, bytes are always little-endian.  */
3355    if (size == 0) {
3356        endian = MO_LE;
3357    }
3358
3359    /* Consecutive little-endian elements from a single register
3360     * can be promoted to a larger little-endian operation.
3361     */
3362    if (selem == 1 && endian == MO_LE) {
3363        size = 3;
3364    }
3365    ebytes = 1 << size;
3366    elements = (is_q ? 16 : 8) / ebytes;
3367
3368    tcg_rn = cpu_reg_sp(s, rn);
3369    clean_addr = clean_data_tbi(s, tcg_rn);
3370    tcg_ebytes = tcg_const_i64(ebytes);
3371
3372    for (r = 0; r < rpt; r++) {
3373        int e;
3374        for (e = 0; e < elements; e++) {
3375            int xs;
3376            for (xs = 0; xs < selem; xs++) {
3377                int tt = (rt + r + xs) % 32;
3378                if (is_store) {
3379                    do_vec_st(s, tt, e, clean_addr, size, endian);
3380                } else {
3381                    do_vec_ld(s, tt, e, clean_addr, size, endian);
3382                }
3383                tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3384            }
3385        }
3386    }
3387    tcg_temp_free_i64(tcg_ebytes);
3388
3389    if (!is_store) {
3390        /* For non-quad operations, setting a slice of the low
3391         * 64 bits of the register clears the high 64 bits (in
3392         * the ARM ARM pseudocode this is implicit in the fact
3393         * that 'rval' is a 64 bit wide variable).
3394         * For quad operations, we might still need to zero the
3395         * high bits of SVE.
3396         */
3397        for (r = 0; r < rpt * selem; r++) {
3398            int tt = (rt + r) % 32;
3399            clear_vec_high(s, is_q, tt);
3400        }
3401    }
3402
3403    if (is_postidx) {
3404        if (rm == 31) {
3405            tcg_gen_addi_i64(tcg_rn, tcg_rn, rpt * elements * selem * ebytes);
3406        } else {
3407            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3408        }
3409    }
3410}
3411
3412/* AdvSIMD load/store single structure
3413 *
3414 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3415 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3416 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3417 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3418 *
3419 * AdvSIMD load/store single structure (post-indexed)
3420 *
3421 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3422 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3423 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3424 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3425 *
3426 * Rt: first (or only) SIMD&FP register to be transferred
3427 * Rn: base address or SP
3428 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3429 * index = encoded in Q:S:size dependent on size
3430 *
3431 * lane_size = encoded in R, opc
3432 * transfer width = encoded in opc, S, size
3433 */
3434static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3435{
3436    int rt = extract32(insn, 0, 5);
3437    int rn = extract32(insn, 5, 5);
3438    int rm = extract32(insn, 16, 5);
3439    int size = extract32(insn, 10, 2);
3440    int S = extract32(insn, 12, 1);
3441    int opc = extract32(insn, 13, 3);
3442    int R = extract32(insn, 21, 1);
3443    int is_load = extract32(insn, 22, 1);
3444    int is_postidx = extract32(insn, 23, 1);
3445    int is_q = extract32(insn, 30, 1);
3446
3447    int scale = extract32(opc, 1, 2);
3448    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3449    bool replicate = false;
3450    int index = is_q << 3 | S << 2 | size;
3451    int ebytes, xs;
3452    TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3453
3454    if (extract32(insn, 31, 1)) {
3455        unallocated_encoding(s);
3456        return;
3457    }
3458    if (!is_postidx && rm != 0) {
3459        unallocated_encoding(s);
3460        return;
3461    }
3462
3463    switch (scale) {
3464    case 3:
3465        if (!is_load || S) {
3466            unallocated_encoding(s);
3467            return;
3468        }
3469        scale = size;
3470        replicate = true;
3471        break;
3472    case 0:
3473        break;
3474    case 1:
3475        if (extract32(size, 0, 1)) {
3476            unallocated_encoding(s);
3477            return;
3478        }
3479        index >>= 1;
3480        break;
3481    case 2:
3482        if (extract32(size, 1, 1)) {
3483            unallocated_encoding(s);
3484            return;
3485        }
3486        if (!extract32(size, 0, 1)) {
3487            index >>= 2;
3488        } else {
3489            if (S) {
3490                unallocated_encoding(s);
3491                return;
3492            }
3493            index >>= 3;
3494            scale = 3;
3495        }
3496        break;
3497    default:
3498        g_assert_not_reached();
3499    }
3500
3501    if (!fp_access_check(s)) {
3502        return;
3503    }
3504
3505    ebytes = 1 << scale;
3506
3507    if (rn == 31) {
3508        gen_check_sp_alignment(s);
3509    }
3510
3511    tcg_rn = cpu_reg_sp(s, rn);
3512    clean_addr = clean_data_tbi(s, tcg_rn);
3513    tcg_ebytes = tcg_const_i64(ebytes);
3514
3515    for (xs = 0; xs < selem; xs++) {
3516        if (replicate) {
3517            /* Load and replicate to all elements */
3518            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3519
3520            tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr,
3521                                get_mem_index(s), s->be_data + scale);
3522            tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3523                                 (is_q + 1) * 8, vec_full_reg_size(s),
3524                                 tcg_tmp);
3525            tcg_temp_free_i64(tcg_tmp);
3526        } else {
3527            /* Load/store one element per register */
3528            if (is_load) {
3529                do_vec_ld(s, rt, index, clean_addr, scale, s->be_data);
3530            } else {
3531                do_vec_st(s, rt, index, clean_addr, scale, s->be_data);
3532            }
3533        }
3534        tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3535        rt = (rt + 1) % 32;
3536    }
3537    tcg_temp_free_i64(tcg_ebytes);
3538
3539    if (is_postidx) {
3540        if (rm == 31) {
3541            tcg_gen_addi_i64(tcg_rn, tcg_rn, selem * ebytes);
3542        } else {
3543            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3544        }
3545    }
3546}
3547
3548/* Loads and stores */
3549static void disas_ldst(DisasContext *s, uint32_t insn)
3550{
3551    switch (extract32(insn, 24, 6)) {
3552    case 0x08: /* Load/store exclusive */
3553        disas_ldst_excl(s, insn);
3554        break;
3555    case 0x18: case 0x1c: /* Load register (literal) */
3556        disas_ld_lit(s, insn);
3557        break;
3558    case 0x28: case 0x29:
3559    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
3560        disas_ldst_pair(s, insn);
3561        break;
3562    case 0x38: case 0x39:
3563    case 0x3c: case 0x3d: /* Load/store register (all forms) */
3564        disas_ldst_reg(s, insn);
3565        break;
3566    case 0x0c: /* AdvSIMD load/store multiple structures */
3567        disas_ldst_multiple_struct(s, insn);
3568        break;
3569    case 0x0d: /* AdvSIMD load/store single structure */
3570        disas_ldst_single_struct(s, insn);
3571        break;
3572    default:
3573        unallocated_encoding(s);
3574        break;
3575    }
3576}
3577
3578/* PC-rel. addressing
3579 *   31  30   29 28       24 23                5 4    0
3580 * +----+-------+-----------+-------------------+------+
3581 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
3582 * +----+-------+-----------+-------------------+------+
3583 */
3584static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
3585{
3586    unsigned int page, rd;
3587    uint64_t base;
3588    uint64_t offset;
3589
3590    page = extract32(insn, 31, 1);
3591    /* SignExtend(immhi:immlo) -> offset */
3592    offset = sextract64(insn, 5, 19);
3593    offset = offset << 2 | extract32(insn, 29, 2);
3594    rd = extract32(insn, 0, 5);
3595    base = s->pc_curr;
3596
3597    if (page) {
3598        /* ADRP (page based) */
3599        base &= ~0xfff;
3600        offset <<= 12;
3601    }
3602
3603    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
3604}
3605
3606/*
3607 * Add/subtract (immediate)
3608 *
3609 *  31 30 29 28       24 23 22 21         10 9   5 4   0
3610 * +--+--+--+-----------+-----+-------------+-----+-----+
3611 * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
3612 * +--+--+--+-----------+-----+-------------+-----+-----+
3613 *
3614 *    sf: 0 -> 32bit, 1 -> 64bit
3615 *    op: 0 -> add  , 1 -> sub
3616 *     S: 1 -> set flags
3617 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
3618 */
3619static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
3620{
3621    int rd = extract32(insn, 0, 5);
3622    int rn = extract32(insn, 5, 5);
3623    uint64_t imm = extract32(insn, 10, 12);
3624    int shift = extract32(insn, 22, 2);
3625    bool setflags = extract32(insn, 29, 1);
3626    bool sub_op = extract32(insn, 30, 1);
3627    bool is_64bit = extract32(insn, 31, 1);
3628
3629    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3630    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
3631    TCGv_i64 tcg_result;
3632
3633    switch (shift) {
3634    case 0x0:
3635        break;
3636    case 0x1:
3637        imm <<= 12;
3638        break;
3639    default:
3640        unallocated_encoding(s);
3641        return;
3642    }
3643
3644    tcg_result = tcg_temp_new_i64();
3645    if (!setflags) {
3646        if (sub_op) {
3647            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
3648        } else {
3649            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
3650        }
3651    } else {
3652        TCGv_i64 tcg_imm = tcg_const_i64(imm);
3653        if (sub_op) {
3654            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3655        } else {
3656            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3657        }
3658        tcg_temp_free_i64(tcg_imm);
3659    }
3660
3661    if (is_64bit) {
3662        tcg_gen_mov_i64(tcg_rd, tcg_result);
3663    } else {
3664        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3665    }
3666
3667    tcg_temp_free_i64(tcg_result);
3668}
3669
3670/* The input should be a value in the bottom e bits (with higher
3671 * bits zero); returns that value replicated into every element
3672 * of size e in a 64 bit integer.
3673 */
3674static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
3675{
3676    assert(e != 0);
3677    while (e < 64) {
3678        mask |= mask << e;
3679        e *= 2;
3680    }
3681    return mask;
3682}
3683
3684/* Return a value with the bottom len bits set (where 0 < len <= 64) */
3685static inline uint64_t bitmask64(unsigned int length)
3686{
3687    assert(length > 0 && length <= 64);
3688    return ~0ULL >> (64 - length);
3689}
3690
3691/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3692 * only require the wmask. Returns false if the imms/immr/immn are a reserved
3693 * value (ie should cause a guest UNDEF exception), and true if they are
3694 * valid, in which case the decoded bit pattern is written to result.
3695 */
3696bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3697                            unsigned int imms, unsigned int immr)
3698{
3699    uint64_t mask;
3700    unsigned e, levels, s, r;
3701    int len;
3702
3703    assert(immn < 2 && imms < 64 && immr < 64);
3704
3705    /* The bit patterns we create here are 64 bit patterns which
3706     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3707     * 64 bits each. Each element contains the same value: a run
3708     * of between 1 and e-1 non-zero bits, rotated within the
3709     * element by between 0 and e-1 bits.
3710     *
3711     * The element size and run length are encoded into immn (1 bit)
3712     * and imms (6 bits) as follows:
3713     * 64 bit elements: immn = 1, imms = <length of run - 1>
3714     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3715     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3716     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3717     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3718     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3719     * Notice that immn = 0, imms = 11111x is the only combination
3720     * not covered by one of the above options; this is reserved.
3721     * Further, <length of run - 1> all-ones is a reserved pattern.
3722     *
3723     * In all cases the rotation is by immr % e (and immr is 6 bits).
3724     */
3725
3726    /* First determine the element size */
3727    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3728    if (len < 1) {
3729        /* This is the immn == 0, imms == 0x11111x case */
3730        return false;
3731    }
3732    e = 1 << len;
3733
3734    levels = e - 1;
3735    s = imms & levels;
3736    r = immr & levels;
3737
3738    if (s == levels) {
3739        /* <length of run - 1> mustn't be all-ones. */
3740        return false;
3741    }
3742
3743    /* Create the value of one element: s+1 set bits rotated
3744     * by r within the element (which is e bits wide)...
3745     */
3746    mask = bitmask64(s + 1);
3747    if (r) {
3748        mask = (mask >> r) | (mask << (e - r));
3749        mask &= bitmask64(e);
3750    }
3751    /* ...then replicate the element over the whole 64 bit value */
3752    mask = bitfield_replicate(mask, e);
3753    *result = mask;
3754    return true;
3755}
3756
3757/* Logical (immediate)
3758 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3759 * +----+-----+-------------+---+------+------+------+------+
3760 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3761 * +----+-----+-------------+---+------+------+------+------+
3762 */
3763static void disas_logic_imm(DisasContext *s, uint32_t insn)
3764{
3765    unsigned int sf, opc, is_n, immr, imms, rn, rd;
3766    TCGv_i64 tcg_rd, tcg_rn;
3767    uint64_t wmask;
3768    bool is_and = false;
3769
3770    sf = extract32(insn, 31, 1);
3771    opc = extract32(insn, 29, 2);
3772    is_n = extract32(insn, 22, 1);
3773    immr = extract32(insn, 16, 6);
3774    imms = extract32(insn, 10, 6);
3775    rn = extract32(insn, 5, 5);
3776    rd = extract32(insn, 0, 5);
3777
3778    if (!sf && is_n) {
3779        unallocated_encoding(s);
3780        return;
3781    }
3782
3783    if (opc == 0x3) { /* ANDS */
3784        tcg_rd = cpu_reg(s, rd);
3785    } else {
3786        tcg_rd = cpu_reg_sp(s, rd);
3787    }
3788    tcg_rn = cpu_reg(s, rn);
3789
3790    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3791        /* some immediate field values are reserved */
3792        unallocated_encoding(s);
3793        return;
3794    }
3795
3796    if (!sf) {
3797        wmask &= 0xffffffff;
3798    }
3799
3800    switch (opc) {
3801    case 0x3: /* ANDS */
3802    case 0x0: /* AND */
3803        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3804        is_and = true;
3805        break;
3806    case 0x1: /* ORR */
3807        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3808        break;
3809    case 0x2: /* EOR */
3810        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3811        break;
3812    default:
3813        assert(FALSE); /* must handle all above */
3814        break;
3815    }
3816
3817    if (!sf && !is_and) {
3818        /* zero extend final result; we know we can skip this for AND
3819         * since the immediate had the high 32 bits clear.
3820         */
3821        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3822    }
3823
3824    if (opc == 3) { /* ANDS */
3825        gen_logic_CC(sf, tcg_rd);
3826    }
3827}
3828
3829/*
3830 * Move wide (immediate)
3831 *
3832 *  31 30 29 28         23 22 21 20             5 4    0
3833 * +--+-----+-------------+-----+----------------+------+
3834 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3835 * +--+-----+-------------+-----+----------------+------+
3836 *
3837 * sf: 0 -> 32 bit, 1 -> 64 bit
3838 * opc: 00 -> N, 10 -> Z, 11 -> K
3839 * hw: shift/16 (0,16, and sf only 32, 48)
3840 */
3841static void disas_movw_imm(DisasContext *s, uint32_t insn)
3842{
3843    int rd = extract32(insn, 0, 5);
3844    uint64_t imm = extract32(insn, 5, 16);
3845    int sf = extract32(insn, 31, 1);
3846    int opc = extract32(insn, 29, 2);
3847    int pos = extract32(insn, 21, 2) << 4;
3848    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3849    TCGv_i64 tcg_imm;
3850
3851    if (!sf && (pos >= 32)) {
3852        unallocated_encoding(s);
3853        return;
3854    }
3855
3856    switch (opc) {
3857    case 0: /* MOVN */
3858    case 2: /* MOVZ */
3859        imm <<= pos;
3860        if (opc == 0) {
3861            imm = ~imm;
3862        }
3863        if (!sf) {
3864            imm &= 0xffffffffu;
3865        }
3866        tcg_gen_movi_i64(tcg_rd, imm);
3867        break;
3868    case 3: /* MOVK */
3869        tcg_imm = tcg_const_i64(imm);
3870        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3871        tcg_temp_free_i64(tcg_imm);
3872        if (!sf) {
3873            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3874        }
3875        break;
3876    default:
3877        unallocated_encoding(s);
3878        break;
3879    }
3880}
3881
3882/* Bitfield
3883 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3884 * +----+-----+-------------+---+------+------+------+------+
3885 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3886 * +----+-----+-------------+---+------+------+------+------+
3887 */
3888static void disas_bitfield(DisasContext *s, uint32_t insn)
3889{
3890    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3891    TCGv_i64 tcg_rd, tcg_tmp;
3892
3893    sf = extract32(insn, 31, 1);
3894    opc = extract32(insn, 29, 2);
3895    n = extract32(insn, 22, 1);
3896    ri = extract32(insn, 16, 6);
3897    si = extract32(insn, 10, 6);
3898    rn = extract32(insn, 5, 5);
3899    rd = extract32(insn, 0, 5);
3900    bitsize = sf ? 64 : 32;
3901
3902    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3903        unallocated_encoding(s);
3904        return;
3905    }
3906
3907    tcg_rd = cpu_reg(s, rd);
3908
3909    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3910       to be smaller than bitsize, we'll never reference data outside the
3911       low 32-bits anyway.  */
3912    tcg_tmp = read_cpu_reg(s, rn, 1);
3913
3914    /* Recognize simple(r) extractions.  */
3915    if (si >= ri) {
3916        /* Wd<s-r:0> = Wn<s:r> */
3917        len = (si - ri) + 1;
3918        if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3919            tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3920            goto done;
3921        } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3922            tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3923            return;
3924        }
3925        /* opc == 1, BFXIL fall through to deposit */
3926        tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3927        pos = 0;
3928    } else {
3929        /* Handle the ri > si case with a deposit
3930         * Wd<32+s-r,32-r> = Wn<s:0>
3931         */
3932        len = si + 1;
3933        pos = (bitsize - ri) & (bitsize - 1);
3934    }
3935
3936    if (opc == 0 && len < ri) {
3937        /* SBFM: sign extend the destination field from len to fill
3938           the balance of the word.  Let the deposit below insert all
3939           of those sign bits.  */
3940        tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3941        len = ri;
3942    }
3943
3944    if (opc == 1) { /* BFM, BFXIL */
3945        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3946    } else {
3947        /* SBFM or UBFM: We start with zero, and we haven't modified
3948           any bits outside bitsize, therefore the zero-extension
3949           below is unneeded.  */
3950        tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3951        return;
3952    }
3953
3954 done:
3955    if (!sf) { /* zero extend final result */
3956        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3957    }
3958}
3959
3960/* Extract
3961 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3962 * +----+------+-------------+---+----+------+--------+------+------+
3963 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3964 * +----+------+-------------+---+----+------+--------+------+------+
3965 */
3966static void disas_extract(DisasContext *s, uint32_t insn)
3967{
3968    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3969
3970    sf = extract32(insn, 31, 1);
3971    n = extract32(insn, 22, 1);
3972    rm = extract32(insn, 16, 5);
3973    imm = extract32(insn, 10, 6);
3974    rn = extract32(insn, 5, 5);
3975    rd = extract32(insn, 0, 5);
3976    op21 = extract32(insn, 29, 2);
3977    op0 = extract32(insn, 21, 1);
3978    bitsize = sf ? 64 : 32;
3979
3980    if (sf != n || op21 || op0 || imm >= bitsize) {
3981        unallocated_encoding(s);
3982    } else {
3983        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3984
3985        tcg_rd = cpu_reg(s, rd);
3986
3987        if (unlikely(imm == 0)) {
3988            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3989             * so an extract from bit 0 is a special case.
3990             */
3991            if (sf) {
3992                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3993            } else {
3994                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3995            }
3996        } else {
3997            tcg_rm = cpu_reg(s, rm);
3998            tcg_rn = cpu_reg(s, rn);
3999
4000            if (sf) {
4001                /* Specialization to ROR happens in EXTRACT2.  */
4002                tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm);
4003            } else {
4004                TCGv_i32 t0 = tcg_temp_new_i32();
4005
4006                tcg_gen_extrl_i64_i32(t0, tcg_rm);
4007                if (rm == rn) {
4008                    tcg_gen_rotri_i32(t0, t0, imm);
4009                } else {
4010                    TCGv_i32 t1 = tcg_temp_new_i32();
4011                    tcg_gen_extrl_i64_i32(t1, tcg_rn);
4012                    tcg_gen_extract2_i32(t0, t0, t1, imm);
4013                    tcg_temp_free_i32(t1);
4014                }
4015                tcg_gen_extu_i32_i64(tcg_rd, t0);
4016                tcg_temp_free_i32(t0);
4017            }
4018        }
4019    }
4020}
4021
4022/* Data processing - immediate */
4023static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
4024{
4025    switch (extract32(insn, 23, 6)) {
4026    case 0x20: case 0x21: /* PC-rel. addressing */
4027        disas_pc_rel_adr(s, insn);
4028        break;
4029    case 0x22: case 0x23: /* Add/subtract (immediate) */
4030        disas_add_sub_imm(s, insn);
4031        break;
4032    case 0x24: /* Logical (immediate) */
4033        disas_logic_imm(s, insn);
4034        break;
4035    case 0x25: /* Move wide (immediate) */
4036        disas_movw_imm(s, insn);
4037        break;
4038    case 0x26: /* Bitfield */
4039        disas_bitfield(s, insn);
4040        break;
4041    case 0x27: /* Extract */
4042        disas_extract(s, insn);
4043        break;
4044    default:
4045        unallocated_encoding(s);
4046        break;
4047    }
4048}
4049
4050/* Shift a TCGv src by TCGv shift_amount, put result in dst.
4051 * Note that it is the caller's responsibility to ensure that the
4052 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4053 * mandated semantics for out of range shifts.
4054 */
4055static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4056                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4057{
4058    switch (shift_type) {
4059    case A64_SHIFT_TYPE_LSL:
4060        tcg_gen_shl_i64(dst, src, shift_amount);
4061        break;
4062    case A64_SHIFT_TYPE_LSR:
4063        tcg_gen_shr_i64(dst, src, shift_amount);
4064        break;
4065    case A64_SHIFT_TYPE_ASR:
4066        if (!sf) {
4067            tcg_gen_ext32s_i64(dst, src);
4068        }
4069        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4070        break;
4071    case A64_SHIFT_TYPE_ROR:
4072        if (sf) {
4073            tcg_gen_rotr_i64(dst, src, shift_amount);
4074        } else {
4075            TCGv_i32 t0, t1;
4076            t0 = tcg_temp_new_i32();
4077            t1 = tcg_temp_new_i32();
4078            tcg_gen_extrl_i64_i32(t0, src);
4079            tcg_gen_extrl_i64_i32(t1, shift_amount);
4080            tcg_gen_rotr_i32(t0, t0, t1);
4081            tcg_gen_extu_i32_i64(dst, t0);
4082            tcg_temp_free_i32(t0);
4083            tcg_temp_free_i32(t1);
4084        }
4085        break;
4086    default:
4087        assert(FALSE); /* all shift types should be handled */
4088        break;
4089    }
4090
4091    if (!sf) { /* zero extend final result */
4092        tcg_gen_ext32u_i64(dst, dst);
4093    }
4094}
4095
4096/* Shift a TCGv src by immediate, put result in dst.
4097 * The shift amount must be in range (this should always be true as the
4098 * relevant instructions will UNDEF on bad shift immediates).
4099 */
4100static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4101                          enum a64_shift_type shift_type, unsigned int shift_i)
4102{
4103    assert(shift_i < (sf ? 64 : 32));
4104
4105    if (shift_i == 0) {
4106        tcg_gen_mov_i64(dst, src);
4107    } else {
4108        TCGv_i64 shift_const;
4109
4110        shift_const = tcg_const_i64(shift_i);
4111        shift_reg(dst, src, sf, shift_type, shift_const);
4112        tcg_temp_free_i64(shift_const);
4113    }
4114}
4115
4116/* Logical (shifted register)
4117 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4118 * +----+-----+-----------+-------+---+------+--------+------+------+
4119 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4120 * +----+-----+-----------+-------+---+------+--------+------+------+
4121 */
4122static void disas_logic_reg(DisasContext *s, uint32_t insn)
4123{
4124    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4125    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4126
4127    sf = extract32(insn, 31, 1);
4128    opc = extract32(insn, 29, 2);
4129    shift_type = extract32(insn, 22, 2);
4130    invert = extract32(insn, 21, 1);
4131    rm = extract32(insn, 16, 5);
4132    shift_amount = extract32(insn, 10, 6);
4133    rn = extract32(insn, 5, 5);
4134    rd = extract32(insn, 0, 5);
4135
4136    if (!sf && (shift_amount & (1 << 5))) {
4137        unallocated_encoding(s);
4138        return;
4139    }
4140
4141    tcg_rd = cpu_reg(s, rd);
4142
4143    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4144        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4145         * register-register MOV and MVN, so it is worth special casing.
4146         */
4147        tcg_rm = cpu_reg(s, rm);
4148        if (invert) {
4149            tcg_gen_not_i64(tcg_rd, tcg_rm);
4150            if (!sf) {
4151                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4152            }
4153        } else {
4154            if (sf) {
4155                tcg_gen_mov_i64(tcg_rd, tcg_rm);
4156            } else {
4157                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4158            }
4159        }
4160        return;
4161    }
4162
4163    tcg_rm = read_cpu_reg(s, rm, sf);
4164
4165    if (shift_amount) {
4166        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4167    }
4168
4169    tcg_rn = cpu_reg(s, rn);
4170
4171    switch (opc | (invert << 2)) {
4172    case 0: /* AND */
4173    case 3: /* ANDS */
4174        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4175        break;
4176    case 1: /* ORR */
4177        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4178        break;
4179    case 2: /* EOR */
4180        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4181        break;
4182    case 4: /* BIC */
4183    case 7: /* BICS */
4184        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4185        break;
4186    case 5: /* ORN */
4187        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4188        break;
4189    case 6: /* EON */
4190        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4191        break;
4192    default:
4193        assert(FALSE);
4194        break;
4195    }
4196
4197    if (!sf) {
4198        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4199    }
4200
4201    if (opc == 3) {
4202        gen_logic_CC(sf, tcg_rd);
4203    }
4204}
4205
4206/*
4207 * Add/subtract (extended register)
4208 *
4209 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4210 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4211 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4212 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4213 *
4214 *  sf: 0 -> 32bit, 1 -> 64bit
4215 *  op: 0 -> add  , 1 -> sub
4216 *   S: 1 -> set flags
4217 * opt: 00
4218 * option: extension type (see DecodeRegExtend)
4219 * imm3: optional shift to Rm
4220 *
4221 * Rd = Rn + LSL(extend(Rm), amount)
4222 */
4223static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4224{
4225    int rd = extract32(insn, 0, 5);
4226    int rn = extract32(insn, 5, 5);
4227    int imm3 = extract32(insn, 10, 3);
4228    int option = extract32(insn, 13, 3);
4229    int rm = extract32(insn, 16, 5);
4230    int opt = extract32(insn, 22, 2);
4231    bool setflags = extract32(insn, 29, 1);
4232    bool sub_op = extract32(insn, 30, 1);
4233    bool sf = extract32(insn, 31, 1);
4234
4235    TCGv_i64 tcg_rm, tcg_rn; /* temps */
4236    TCGv_i64 tcg_rd;
4237    TCGv_i64 tcg_result;
4238
4239    if (imm3 > 4 || opt != 0) {
4240        unallocated_encoding(s);
4241        return;
4242    }
4243
4244    /* non-flag setting ops may use SP */
4245    if (!setflags) {
4246        tcg_rd = cpu_reg_sp(s, rd);
4247    } else {
4248        tcg_rd = cpu_reg(s, rd);
4249    }
4250    tcg_rn = read_cpu_reg_sp(s, rn, sf);
4251
4252    tcg_rm = read_cpu_reg(s, rm, sf);
4253    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4254
4255    tcg_result = tcg_temp_new_i64();
4256
4257    if (!setflags) {
4258        if (sub_op) {
4259            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4260        } else {
4261            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4262        }
4263    } else {
4264        if (sub_op) {
4265            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4266        } else {
4267            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4268        }
4269    }
4270
4271    if (sf) {
4272        tcg_gen_mov_i64(tcg_rd, tcg_result);
4273    } else {
4274        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4275    }
4276
4277    tcg_temp_free_i64(tcg_result);
4278}
4279
4280/*
4281 * Add/subtract (shifted register)
4282 *
4283 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4284 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4285 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4286 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4287 *
4288 *    sf: 0 -> 32bit, 1 -> 64bit
4289 *    op: 0 -> add  , 1 -> sub
4290 *     S: 1 -> set flags
4291 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4292 *  imm6: Shift amount to apply to Rm before the add/sub
4293 */
4294static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4295{
4296    int rd = extract32(insn, 0, 5);
4297    int rn = extract32(insn, 5, 5);
4298    int imm6 = extract32(insn, 10, 6);
4299    int rm = extract32(insn, 16, 5);
4300    int shift_type = extract32(insn, 22, 2);
4301    bool setflags = extract32(insn, 29, 1);
4302    bool sub_op = extract32(insn, 30, 1);
4303    bool sf = extract32(insn, 31, 1);
4304
4305    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4306    TCGv_i64 tcg_rn, tcg_rm;
4307    TCGv_i64 tcg_result;
4308
4309    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4310        unallocated_encoding(s);
4311        return;
4312    }
4313
4314    tcg_rn = read_cpu_reg(s, rn, sf);
4315    tcg_rm = read_cpu_reg(s, rm, sf);
4316
4317    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4318
4319    tcg_result = tcg_temp_new_i64();
4320
4321    if (!setflags) {
4322        if (sub_op) {
4323            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4324        } else {
4325            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4326        }
4327    } else {
4328        if (sub_op) {
4329            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4330        } else {
4331            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4332        }
4333    }
4334
4335    if (sf) {
4336        tcg_gen_mov_i64(tcg_rd, tcg_result);
4337    } else {
4338        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4339    }
4340
4341    tcg_temp_free_i64(tcg_result);
4342}
4343
4344/* Data-processing (3 source)
4345 *
4346 *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4347 *  +--+------+-----------+------+------+----+------+------+------+
4348 *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4349 *  +--+------+-----------+------+------+----+------+------+------+
4350 */
4351static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4352{
4353    int rd = extract32(insn, 0, 5);
4354    int rn = extract32(insn, 5, 5);
4355    int ra = extract32(insn, 10, 5);
4356    int rm = extract32(insn, 16, 5);
4357    int op_id = (extract32(insn, 29, 3) << 4) |
4358        (extract32(insn, 21, 3) << 1) |
4359        extract32(insn, 15, 1);
4360    bool sf = extract32(insn, 31, 1);
4361    bool is_sub = extract32(op_id, 0, 1);
4362    bool is_high = extract32(op_id, 2, 1);
4363    bool is_signed = false;
4364    TCGv_i64 tcg_op1;
4365    TCGv_i64 tcg_op2;
4366    TCGv_i64 tcg_tmp;
4367
4368    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4369    switch (op_id) {
4370    case 0x42: /* SMADDL */
4371    case 0x43: /* SMSUBL */
4372    case 0x44: /* SMULH */
4373        is_signed = true;
4374        break;
4375    case 0x0: /* MADD (32bit) */
4376    case 0x1: /* MSUB (32bit) */
4377    case 0x40: /* MADD (64bit) */
4378    case 0x41: /* MSUB (64bit) */
4379    case 0x4a: /* UMADDL */
4380    case 0x4b: /* UMSUBL */
4381    case 0x4c: /* UMULH */
4382        break;
4383    default:
4384        unallocated_encoding(s);
4385        return;
4386    }
4387
4388    if (is_high) {
4389        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4390        TCGv_i64 tcg_rd = cpu_reg(s, rd);
4391        TCGv_i64 tcg_rn = cpu_reg(s, rn);
4392        TCGv_i64 tcg_rm = cpu_reg(s, rm);
4393
4394        if (is_signed) {
4395            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4396        } else {
4397            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4398        }
4399
4400        tcg_temp_free_i64(low_bits);
4401        return;
4402    }
4403
4404    tcg_op1 = tcg_temp_new_i64();
4405    tcg_op2 = tcg_temp_new_i64();
4406    tcg_tmp = tcg_temp_new_i64();
4407
4408    if (op_id < 0x42) {
4409        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4410        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4411    } else {
4412        if (is_signed) {
4413            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4414            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4415        } else {
4416            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4417            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4418        }
4419    }
4420
4421    if (ra == 31 && !is_sub) {
4422        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4423        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4424    } else {
4425        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4426        if (is_sub) {
4427            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4428        } else {
4429            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4430        }
4431    }
4432
4433    if (!sf) {
4434        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4435    }
4436
4437    tcg_temp_free_i64(tcg_op1);
4438    tcg_temp_free_i64(tcg_op2);
4439    tcg_temp_free_i64(tcg_tmp);
4440}
4441
4442/* Add/subtract (with carry)
4443 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
4444 * +--+--+--+------------------------+------+-------------+------+-----+
4445 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
4446 * +--+--+--+------------------------+------+-------------+------+-----+
4447 */
4448
4449static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4450{
4451    unsigned int sf, op, setflags, rm, rn, rd;
4452    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4453
4454    sf = extract32(insn, 31, 1);
4455    op = extract32(insn, 30, 1);
4456    setflags = extract32(insn, 29, 1);
4457    rm = extract32(insn, 16, 5);
4458    rn = extract32(insn, 5, 5);
4459    rd = extract32(insn, 0, 5);
4460
4461    tcg_rd = cpu_reg(s, rd);
4462    tcg_rn = cpu_reg(s, rn);
4463
4464    if (op) {
4465        tcg_y = new_tmp_a64(s);
4466        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4467    } else {
4468        tcg_y = cpu_reg(s, rm);
4469    }
4470
4471    if (setflags) {
4472        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4473    } else {
4474        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4475    }
4476}
4477
4478/*
4479 * Rotate right into flags
4480 *  31 30 29                21       15          10      5  4      0
4481 * +--+--+--+-----------------+--------+-----------+------+--+------+
4482 * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
4483 * +--+--+--+-----------------+--------+-----------+------+--+------+
4484 */
4485static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4486{
4487    int mask = extract32(insn, 0, 4);
4488    int o2 = extract32(insn, 4, 1);
4489    int rn = extract32(insn, 5, 5);
4490    int imm6 = extract32(insn, 15, 6);
4491    int sf_op_s = extract32(insn, 29, 3);
4492    TCGv_i64 tcg_rn;
4493    TCGv_i32 nzcv;
4494
4495    if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4496        unallocated_encoding(s);
4497        return;
4498    }
4499
4500    tcg_rn = read_cpu_reg(s, rn, 1);
4501    tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4502
4503    nzcv = tcg_temp_new_i32();
4504    tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
4505
4506    if (mask & 8) { /* N */
4507        tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
4508    }
4509    if (mask & 4) { /* Z */
4510        tcg_gen_not_i32(cpu_ZF, nzcv);
4511        tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
4512    }
4513    if (mask & 2) { /* C */
4514        tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
4515    }
4516    if (mask & 1) { /* V */
4517        tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
4518    }
4519
4520    tcg_temp_free_i32(nzcv);
4521}
4522
4523/*
4524 * Evaluate into flags
4525 *  31 30 29                21        15   14        10      5  4      0
4526 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4527 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
4528 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4529 */
4530static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
4531{
4532    int o3_mask = extract32(insn, 0, 5);
4533    int rn = extract32(insn, 5, 5);
4534    int o2 = extract32(insn, 15, 6);
4535    int sz = extract32(insn, 14, 1);
4536    int sf_op_s = extract32(insn, 29, 3);
4537    TCGv_i32 tmp;
4538    int shift;
4539
4540    if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
4541        !dc_isar_feature(aa64_condm_4, s)) {
4542        unallocated_encoding(s);
4543        return;
4544    }
4545    shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
4546
4547    tmp = tcg_temp_new_i32();
4548    tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
4549    tcg_gen_shli_i32(cpu_NF, tmp, shift);
4550    tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
4551    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
4552    tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
4553    tcg_temp_free_i32(tmp);
4554}
4555
4556/* Conditional compare (immediate / register)
4557 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
4558 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4559 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
4560 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4561 *        [1]                             y                [0]       [0]
4562 */
4563static void disas_cc(DisasContext *s, uint32_t insn)
4564{
4565    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
4566    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
4567    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
4568    DisasCompare c;
4569
4570    if (!extract32(insn, 29, 1)) {
4571        unallocated_encoding(s);
4572        return;
4573    }
4574    if (insn & (1 << 10 | 1 << 4)) {
4575        unallocated_encoding(s);
4576        return;
4577    }
4578    sf = extract32(insn, 31, 1);
4579    op = extract32(insn, 30, 1);
4580    is_imm = extract32(insn, 11, 1);
4581    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
4582    cond = extract32(insn, 12, 4);
4583    rn = extract32(insn, 5, 5);
4584    nzcv = extract32(insn, 0, 4);
4585
4586    /* Set T0 = !COND.  */
4587    tcg_t0 = tcg_temp_new_i32();
4588    arm_test_cc(&c, cond);
4589    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
4590    arm_free_cc(&c);
4591
4592    /* Load the arguments for the new comparison.  */
4593    if (is_imm) {
4594        tcg_y = new_tmp_a64(s);
4595        tcg_gen_movi_i64(tcg_y, y);
4596    } else {
4597        tcg_y = cpu_reg(s, y);
4598    }
4599    tcg_rn = cpu_reg(s, rn);
4600
4601    /* Set the flags for the new comparison.  */
4602    tcg_tmp = tcg_temp_new_i64();
4603    if (op) {
4604        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4605    } else {
4606        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4607    }
4608    tcg_temp_free_i64(tcg_tmp);
4609
4610    /* If COND was false, force the flags to #nzcv.  Compute two masks
4611     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4612     * For tcg hosts that support ANDC, we can make do with just T1.
4613     * In either case, allow the tcg optimizer to delete any unused mask.
4614     */
4615    tcg_t1 = tcg_temp_new_i32();
4616    tcg_t2 = tcg_temp_new_i32();
4617    tcg_gen_neg_i32(tcg_t1, tcg_t0);
4618    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
4619
4620    if (nzcv & 8) { /* N */
4621        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
4622    } else {
4623        if (TCG_TARGET_HAS_andc_i32) {
4624            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
4625        } else {
4626            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
4627        }
4628    }
4629    if (nzcv & 4) { /* Z */
4630        if (TCG_TARGET_HAS_andc_i32) {
4631            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
4632        } else {
4633            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
4634        }
4635    } else {
4636        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
4637    }
4638    if (nzcv & 2) { /* C */
4639        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
4640    } else {
4641        if (TCG_TARGET_HAS_andc_i32) {
4642            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
4643        } else {
4644            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
4645        }
4646    }
4647    if (nzcv & 1) { /* V */
4648        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
4649    } else {
4650        if (TCG_TARGET_HAS_andc_i32) {
4651            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
4652        } else {
4653            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
4654        }
4655    }
4656    tcg_temp_free_i32(tcg_t0);
4657    tcg_temp_free_i32(tcg_t1);
4658    tcg_temp_free_i32(tcg_t2);
4659}
4660
4661/* Conditional select
4662 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
4663 * +----+----+---+-----------------+------+------+-----+------+------+
4664 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
4665 * +----+----+---+-----------------+------+------+-----+------+------+
4666 */
4667static void disas_cond_select(DisasContext *s, uint32_t insn)
4668{
4669    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
4670    TCGv_i64 tcg_rd, zero;
4671    DisasCompare64 c;
4672
4673    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
4674        /* S == 1 or op2<1> == 1 */
4675        unallocated_encoding(s);
4676        return;
4677    }
4678    sf = extract32(insn, 31, 1);
4679    else_inv = extract32(insn, 30, 1);
4680    rm = extract32(insn, 16, 5);
4681    cond = extract32(insn, 12, 4);
4682    else_inc = extract32(insn, 10, 1);
4683    rn = extract32(insn, 5, 5);
4684    rd = extract32(insn, 0, 5);
4685
4686    tcg_rd = cpu_reg(s, rd);
4687
4688    a64_test_cc(&c, cond);
4689    zero = tcg_const_i64(0);
4690
4691    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
4692        /* CSET & CSETM.  */
4693        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
4694        if (else_inv) {
4695            tcg_gen_neg_i64(tcg_rd, tcg_rd);
4696        }
4697    } else {
4698        TCGv_i64 t_true = cpu_reg(s, rn);
4699        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
4700        if (else_inv && else_inc) {
4701            tcg_gen_neg_i64(t_false, t_false);
4702        } else if (else_inv) {
4703            tcg_gen_not_i64(t_false, t_false);
4704        } else if (else_inc) {
4705            tcg_gen_addi_i64(t_false, t_false, 1);
4706        }
4707        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
4708    }
4709
4710    tcg_temp_free_i64(zero);
4711    a64_free_cc(&c);
4712
4713    if (!sf) {
4714        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4715    }
4716}
4717
4718static void handle_clz(DisasContext *s, unsigned int sf,
4719                       unsigned int rn, unsigned int rd)
4720{
4721    TCGv_i64 tcg_rd, tcg_rn;
4722    tcg_rd = cpu_reg(s, rd);
4723    tcg_rn = cpu_reg(s, rn);
4724
4725    if (sf) {
4726        tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
4727    } else {
4728        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4729        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4730        tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
4731        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4732        tcg_temp_free_i32(tcg_tmp32);
4733    }
4734}
4735
4736static void handle_cls(DisasContext *s, unsigned int sf,
4737                       unsigned int rn, unsigned int rd)
4738{
4739    TCGv_i64 tcg_rd, tcg_rn;
4740    tcg_rd = cpu_reg(s, rd);
4741    tcg_rn = cpu_reg(s, rn);
4742
4743    if (sf) {
4744        tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4745    } else {
4746        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4747        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4748        tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4749        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4750        tcg_temp_free_i32(tcg_tmp32);
4751    }
4752}
4753
4754static void handle_rbit(DisasContext *s, unsigned int sf,
4755                        unsigned int rn, unsigned int rd)
4756{
4757    TCGv_i64 tcg_rd, tcg_rn;
4758    tcg_rd = cpu_reg(s, rd);
4759    tcg_rn = cpu_reg(s, rn);
4760
4761    if (sf) {
4762        gen_helper_rbit64(tcg_rd, tcg_rn);
4763    } else {
4764        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4765        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4766        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4767        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4768        tcg_temp_free_i32(tcg_tmp32);
4769    }
4770}
4771
4772/* REV with sf==1, opcode==3 ("REV64") */
4773static void handle_rev64(DisasContext *s, unsigned int sf,
4774                         unsigned int rn, unsigned int rd)
4775{
4776    if (!sf) {
4777        unallocated_encoding(s);
4778        return;
4779    }
4780    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4781}
4782
4783/* REV with sf==0, opcode==2
4784 * REV32 (sf==1, opcode==2)
4785 */
4786static void handle_rev32(DisasContext *s, unsigned int sf,
4787                         unsigned int rn, unsigned int rd)
4788{
4789    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4790
4791    if (sf) {
4792        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4793        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4794
4795        /* bswap32_i64 requires zero high word */
4796        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4797        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4798        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4799        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4800        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4801
4802        tcg_temp_free_i64(tcg_tmp);
4803    } else {
4804        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4805        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4806    }
4807}
4808
4809/* REV16 (opcode==1) */
4810static void handle_rev16(DisasContext *s, unsigned int sf,
4811                         unsigned int rn, unsigned int rd)
4812{
4813    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4814    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4815    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4816    TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4817
4818    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4819    tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4820    tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4821    tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4822    tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4823
4824    tcg_temp_free_i64(mask);
4825    tcg_temp_free_i64(tcg_tmp);
4826}
4827
4828/* Data-processing (1 source)
4829 *   31  30  29  28             21 20     16 15    10 9    5 4    0
4830 * +----+---+---+-----------------+---------+--------+------+------+
4831 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4832 * +----+---+---+-----------------+---------+--------+------+------+
4833 */
4834static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4835{
4836    unsigned int sf, opcode, opcode2, rn, rd;
4837    TCGv_i64 tcg_rd;
4838
4839    if (extract32(insn, 29, 1)) {
4840        unallocated_encoding(s);
4841        return;
4842    }
4843
4844    sf = extract32(insn, 31, 1);
4845    opcode = extract32(insn, 10, 6);
4846    opcode2 = extract32(insn, 16, 5);
4847    rn = extract32(insn, 5, 5);
4848    rd = extract32(insn, 0, 5);
4849
4850#define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
4851
4852    switch (MAP(sf, opcode2, opcode)) {
4853    case MAP(0, 0x00, 0x00): /* RBIT */
4854    case MAP(1, 0x00, 0x00):
4855        handle_rbit(s, sf, rn, rd);
4856        break;
4857    case MAP(0, 0x00, 0x01): /* REV16 */
4858    case MAP(1, 0x00, 0x01):
4859        handle_rev16(s, sf, rn, rd);
4860        break;
4861    case MAP(0, 0x00, 0x02): /* REV/REV32 */
4862    case MAP(1, 0x00, 0x02):
4863        handle_rev32(s, sf, rn, rd);
4864        break;
4865    case MAP(1, 0x00, 0x03): /* REV64 */
4866        handle_rev64(s, sf, rn, rd);
4867        break;
4868    case MAP(0, 0x00, 0x04): /* CLZ */
4869    case MAP(1, 0x00, 0x04):
4870        handle_clz(s, sf, rn, rd);
4871        break;
4872    case MAP(0, 0x00, 0x05): /* CLS */
4873    case MAP(1, 0x00, 0x05):
4874        handle_cls(s, sf, rn, rd);
4875        break;
4876    case MAP(1, 0x01, 0x00): /* PACIA */
4877        if (s->pauth_active) {
4878            tcg_rd = cpu_reg(s, rd);
4879            gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4880        } else if (!dc_isar_feature(aa64_pauth, s)) {
4881            goto do_unallocated;
4882        }
4883        break;
4884    case MAP(1, 0x01, 0x01): /* PACIB */
4885        if (s->pauth_active) {
4886            tcg_rd = cpu_reg(s, rd);
4887            gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4888        } else if (!dc_isar_feature(aa64_pauth, s)) {
4889            goto do_unallocated;
4890        }
4891        break;
4892    case MAP(1, 0x01, 0x02): /* PACDA */
4893        if (s->pauth_active) {
4894            tcg_rd = cpu_reg(s, rd);
4895            gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4896        } else if (!dc_isar_feature(aa64_pauth, s)) {
4897            goto do_unallocated;
4898        }
4899        break;
4900    case MAP(1, 0x01, 0x03): /* PACDB */
4901        if (s->pauth_active) {
4902            tcg_rd = cpu_reg(s, rd);
4903            gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4904        } else if (!dc_isar_feature(aa64_pauth, s)) {
4905            goto do_unallocated;
4906        }
4907        break;
4908    case MAP(1, 0x01, 0x04): /* AUTIA */
4909        if (s->pauth_active) {
4910            tcg_rd = cpu_reg(s, rd);
4911            gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4912        } else if (!dc_isar_feature(aa64_pauth, s)) {
4913            goto do_unallocated;
4914        }
4915        break;
4916    case MAP(1, 0x01, 0x05): /* AUTIB */
4917        if (s->pauth_active) {
4918            tcg_rd = cpu_reg(s, rd);
4919            gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4920        } else if (!dc_isar_feature(aa64_pauth, s)) {
4921            goto do_unallocated;
4922        }
4923        break;
4924    case MAP(1, 0x01, 0x06): /* AUTDA */
4925        if (s->pauth_active) {
4926            tcg_rd = cpu_reg(s, rd);
4927            gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4928        } else if (!dc_isar_feature(aa64_pauth, s)) {
4929            goto do_unallocated;
4930        }
4931        break;
4932    case MAP(1, 0x01, 0x07): /* AUTDB */
4933        if (s->pauth_active) {
4934            tcg_rd = cpu_reg(s, rd);
4935            gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4936        } else if (!dc_isar_feature(aa64_pauth, s)) {
4937            goto do_unallocated;
4938        }
4939        break;
4940    case MAP(1, 0x01, 0x08): /* PACIZA */
4941        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4942            goto do_unallocated;
4943        } else if (s->pauth_active) {
4944            tcg_rd = cpu_reg(s, rd);
4945            gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4946        }
4947        break;
4948    case MAP(1, 0x01, 0x09): /* PACIZB */
4949        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4950            goto do_unallocated;
4951        } else if (s->pauth_active) {
4952            tcg_rd = cpu_reg(s, rd);
4953            gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4954        }
4955        break;
4956    case MAP(1, 0x01, 0x0a): /* PACDZA */
4957        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4958            goto do_unallocated;
4959        } else if (s->pauth_active) {
4960            tcg_rd = cpu_reg(s, rd);
4961            gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4962        }
4963        break;
4964    case MAP(1, 0x01, 0x0b): /* PACDZB */
4965        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4966            goto do_unallocated;
4967        } else if (s->pauth_active) {
4968            tcg_rd = cpu_reg(s, rd);
4969            gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4970        }
4971        break;
4972    case MAP(1, 0x01, 0x0c): /* AUTIZA */
4973        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4974            goto do_unallocated;
4975        } else if (s->pauth_active) {
4976            tcg_rd = cpu_reg(s, rd);
4977            gen_helper_autia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4978        }
4979        break;
4980    case MAP(1, 0x01, 0x0d): /* AUTIZB */
4981        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4982            goto do_unallocated;
4983        } else if (s->pauth_active) {
4984            tcg_rd = cpu_reg(s, rd);
4985            gen_helper_autib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4986        }
4987        break;
4988    case MAP(1, 0x01, 0x0e): /* AUTDZA */
4989        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4990            goto do_unallocated;
4991        } else if (s->pauth_active) {
4992            tcg_rd = cpu_reg(s, rd);
4993            gen_helper_autda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4994        }
4995        break;
4996    case MAP(1, 0x01, 0x0f): /* AUTDZB */
4997        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4998            goto do_unallocated;
4999        } else if (s->pauth_active) {
5000            tcg_rd = cpu_reg(s, rd);
5001            gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5002        }
5003        break;
5004    case MAP(1, 0x01, 0x10): /* XPACI */
5005        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5006            goto do_unallocated;
5007        } else if (s->pauth_active) {
5008            tcg_rd = cpu_reg(s, rd);
5009            gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5010        }
5011        break;
5012    case MAP(1, 0x01, 0x11): /* XPACD */
5013        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5014            goto do_unallocated;
5015        } else if (s->pauth_active) {
5016            tcg_rd = cpu_reg(s, rd);
5017            gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5018        }
5019        break;
5020    default:
5021    do_unallocated:
5022        unallocated_encoding(s);
5023        break;
5024    }
5025
5026#undef MAP
5027}
5028
5029static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5030                       unsigned int rm, unsigned int rn, unsigned int rd)
5031{
5032    TCGv_i64 tcg_n, tcg_m, tcg_rd;
5033    tcg_rd = cpu_reg(s, rd);
5034
5035    if (!sf && is_signed) {
5036        tcg_n = new_tmp_a64(s);
5037        tcg_m = new_tmp_a64(s);
5038        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5039        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5040    } else {
5041        tcg_n = read_cpu_reg(s, rn, sf);
5042        tcg_m = read_cpu_reg(s, rm, sf);
5043    }
5044
5045    if (is_signed) {
5046        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5047    } else {
5048        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5049    }
5050
5051    if (!sf) { /* zero extend final result */
5052        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5053    }
5054}
5055
5056/* LSLV, LSRV, ASRV, RORV */
5057static void handle_shift_reg(DisasContext *s,
5058                             enum a64_shift_type shift_type, unsigned int sf,
5059                             unsigned int rm, unsigned int rn, unsigned int rd)
5060{
5061    TCGv_i64 tcg_shift = tcg_temp_new_i64();
5062    TCGv_i64 tcg_rd = cpu_reg(s, rd);
5063    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5064
5065    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5066    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5067    tcg_temp_free_i64(tcg_shift);
5068}
5069
5070/* CRC32[BHWX], CRC32C[BHWX] */
5071static void handle_crc32(DisasContext *s,
5072                         unsigned int sf, unsigned int sz, bool crc32c,
5073                         unsigned int rm, unsigned int rn, unsigned int rd)
5074{
5075    TCGv_i64 tcg_acc, tcg_val;
5076    TCGv_i32 tcg_bytes;
5077
5078    if (!dc_isar_feature(aa64_crc32, s)
5079        || (sf == 1 && sz != 3)
5080        || (sf == 0 && sz == 3)) {
5081        unallocated_encoding(s);
5082        return;
5083    }
5084
5085    if (sz == 3) {
5086        tcg_val = cpu_reg(s, rm);
5087    } else {
5088        uint64_t mask;
5089        switch (sz) {
5090        case 0:
5091            mask = 0xFF;
5092            break;
5093        case 1:
5094            mask = 0xFFFF;
5095            break;
5096        case 2:
5097            mask = 0xFFFFFFFF;
5098            break;
5099        default:
5100            g_assert_not_reached();
5101        }
5102        tcg_val = new_tmp_a64(s);
5103        tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5104    }
5105
5106    tcg_acc = cpu_reg(s, rn);
5107    tcg_bytes = tcg_const_i32(1 << sz);
5108
5109    if (crc32c) {
5110        gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5111    } else {
5112        gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5113    }
5114
5115    tcg_temp_free_i32(tcg_bytes);
5116}
5117
5118/* Data-processing (2 source)
5119 *   31   30  29 28             21 20  16 15    10 9    5 4    0
5120 * +----+---+---+-----------------+------+--------+------+------+
5121 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5122 * +----+---+---+-----------------+------+--------+------+------+
5123 */
5124static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5125{
5126    unsigned int sf, rm, opcode, rn, rd;
5127    sf = extract32(insn, 31, 1);
5128    rm = extract32(insn, 16, 5);
5129    opcode = extract32(insn, 10, 6);
5130    rn = extract32(insn, 5, 5);
5131    rd = extract32(insn, 0, 5);
5132
5133    if (extract32(insn, 29, 1)) {
5134        unallocated_encoding(s);
5135        return;
5136    }
5137
5138    switch (opcode) {
5139    case 2: /* UDIV */
5140        handle_div(s, false, sf, rm, rn, rd);
5141        break;
5142    case 3: /* SDIV */
5143        handle_div(s, true, sf, rm, rn, rd);
5144        break;
5145    case 8: /* LSLV */
5146        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5147        break;
5148    case 9: /* LSRV */
5149        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5150        break;
5151    case 10: /* ASRV */
5152        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5153        break;
5154    case 11: /* RORV */
5155        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5156        break;
5157    case 12: /* PACGA */
5158        if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5159            goto do_unallocated;
5160        }
5161        gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5162                         cpu_reg(s, rn), cpu_reg_sp(s, rm));
5163        break;
5164    case 16:
5165    case 17:
5166    case 18:
5167    case 19:
5168    case 20:
5169    case 21:
5170    case 22:
5171    case 23: /* CRC32 */
5172    {
5173        int sz = extract32(opcode, 0, 2);
5174        bool crc32c = extract32(opcode, 2, 1);
5175        handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5176        break;
5177    }
5178    default:
5179    do_unallocated:
5180        unallocated_encoding(s);
5181        break;
5182    }
5183}
5184
5185/*
5186 * Data processing - register
5187 *  31  30 29  28      25    21  20  16      10         0
5188 * +--+---+--+---+-------+-----+-------+-------+---------+
5189 * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5190 * +--+---+--+---+-------+-----+-------+-------+---------+
5191 */
5192static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5193{
5194    int op0 = extract32(insn, 30, 1);
5195    int op1 = extract32(insn, 28, 1);
5196    int op2 = extract32(insn, 21, 4);
5197    int op3 = extract32(insn, 10, 6);
5198
5199    if (!op1) {
5200        if (op2 & 8) {
5201            if (op2 & 1) {
5202                /* Add/sub (extended register) */
5203                disas_add_sub_ext_reg(s, insn);
5204            } else {
5205                /* Add/sub (shifted register) */
5206                disas_add_sub_reg(s, insn);
5207            }
5208        } else {
5209            /* Logical (shifted register) */
5210            disas_logic_reg(s, insn);
5211        }
5212        return;
5213    }
5214
5215    switch (op2) {
5216    case 0x0:
5217        switch (op3) {
5218        case 0x00: /* Add/subtract (with carry) */
5219            disas_adc_sbc(s, insn);
5220            break;
5221
5222        case 0x01: /* Rotate right into flags */
5223        case 0x21:
5224            disas_rotate_right_into_flags(s, insn);
5225            break;
5226
5227        case 0x02: /* Evaluate into flags */
5228        case 0x12:
5229        case 0x22:
5230        case 0x32:
5231            disas_evaluate_into_flags(s, insn);
5232            break;
5233
5234        default:
5235            goto do_unallocated;
5236        }
5237        break;
5238
5239    case 0x2: /* Conditional compare */
5240        disas_cc(s, insn); /* both imm and reg forms */
5241        break;
5242
5243    case 0x4: /* Conditional select */
5244        disas_cond_select(s, insn);
5245        break;
5246
5247    case 0x6: /* Data-processing */
5248        if (op0) {    /* (1 source) */
5249            disas_data_proc_1src(s, insn);
5250        } else {      /* (2 source) */
5251            disas_data_proc_2src(s, insn);
5252        }
5253        break;
5254    case 0x8 ... 0xf: /* (3 source) */
5255        disas_data_proc_3src(s, insn);
5256        break;
5257
5258    default:
5259    do_unallocated:
5260        unallocated_encoding(s);
5261        break;
5262    }
5263}
5264
5265static void handle_fp_compare(DisasContext *s, int size,
5266                              unsigned int rn, unsigned int rm,
5267                              bool cmp_with_zero, bool signal_all_nans)
5268{
5269    TCGv_i64 tcg_flags = tcg_temp_new_i64();
5270    TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
5271
5272    if (size == MO_64) {
5273        TCGv_i64 tcg_vn, tcg_vm;
5274
5275        tcg_vn = read_fp_dreg(s, rn);
5276        if (cmp_with_zero) {
5277            tcg_vm = tcg_const_i64(0);
5278        } else {
5279            tcg_vm = read_fp_dreg(s, rm);
5280        }
5281        if (signal_all_nans) {
5282            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5283        } else {
5284            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5285        }
5286        tcg_temp_free_i64(tcg_vn);
5287        tcg_temp_free_i64(tcg_vm);
5288    } else {
5289        TCGv_i32 tcg_vn = tcg_temp_new_i32();
5290        TCGv_i32 tcg_vm = tcg_temp_new_i32();
5291
5292        read_vec_element_i32(s, tcg_vn, rn, 0, size);
5293        if (cmp_with_zero) {
5294            tcg_gen_movi_i32(tcg_vm, 0);
5295        } else {
5296            read_vec_element_i32(s, tcg_vm, rm, 0, size);
5297        }
5298
5299        switch (size) {
5300        case MO_32:
5301            if (signal_all_nans) {
5302                gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5303            } else {
5304                gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5305            }
5306            break;
5307        case MO_16:
5308            if (signal_all_nans) {
5309                gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5310            } else {
5311                gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5312            }
5313            break;
5314        default:
5315            g_assert_not_reached();
5316        }
5317
5318        tcg_temp_free_i32(tcg_vn);
5319        tcg_temp_free_i32(tcg_vm);
5320    }
5321
5322    tcg_temp_free_ptr(fpst);
5323
5324    gen_set_nzcv(tcg_flags);
5325
5326    tcg_temp_free_i64(tcg_flags);
5327}
5328
5329/* Floating point compare
5330 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5331 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5332 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5333 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5334 */
5335static void disas_fp_compare(DisasContext *s, uint32_t insn)
5336{
5337    unsigned int mos, type, rm, op, rn, opc, op2r;
5338    int size;
5339
5340    mos = extract32(insn, 29, 3);
5341    type = extract32(insn, 22, 2);
5342    rm = extract32(insn, 16, 5);
5343    op = extract32(insn, 14, 2);
5344    rn = extract32(insn, 5, 5);
5345    opc = extract32(insn, 3, 2);
5346    op2r = extract32(insn, 0, 3);
5347
5348    if (mos || op || op2r) {
5349        unallocated_encoding(s);
5350        return;
5351    }
5352
5353    switch (type) {
5354    case 0:
5355        size = MO_32;
5356        break;
5357    case 1:
5358        size = MO_64;
5359        break;
5360    case 3:
5361        size = MO_16;
5362        if (dc_isar_feature(aa64_fp16, s)) {
5363            break;
5364        }
5365        /* fallthru */
5366    default:
5367        unallocated_encoding(s);
5368        return;
5369    }
5370
5371    if (!fp_access_check(s)) {
5372        return;
5373    }
5374
5375    handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5376}
5377
5378/* Floating point conditional compare
5379 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
5380 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5381 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
5382 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5383 */
5384static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5385{
5386    unsigned int mos, type, rm, cond, rn, op, nzcv;
5387    TCGv_i64 tcg_flags;
5388    TCGLabel *label_continue = NULL;
5389    int size;
5390
5391    mos = extract32(insn, 29, 3);
5392    type = extract32(insn, 22, 2);
5393    rm = extract32(insn, 16, 5);
5394    cond = extract32(insn, 12, 4);
5395    rn = extract32(insn, 5, 5);
5396    op = extract32(insn, 4, 1);
5397    nzcv = extract32(insn, 0, 4);
5398
5399    if (mos) {
5400        unallocated_encoding(s);
5401        return;
5402    }
5403
5404    switch (type) {
5405    case 0:
5406        size = MO_32;
5407        break;
5408    case 1:
5409        size = MO_64;
5410        break;
5411    case 3:
5412        size = MO_16;
5413        if (dc_isar_feature(aa64_fp16, s)) {
5414            break;
5415        }
5416        /* fallthru */
5417    default:
5418        unallocated_encoding(s);
5419        return;
5420    }
5421
5422    if (!fp_access_check(s)) {
5423        return;
5424    }
5425
5426    if (cond < 0x0e) { /* not always */
5427        TCGLabel *label_match = gen_new_label();
5428        label_continue = gen_new_label();
5429        arm_gen_test_cc(cond, label_match);
5430        /* nomatch: */
5431        tcg_flags = tcg_const_i64(nzcv << 28);
5432        gen_set_nzcv(tcg_flags);
5433        tcg_temp_free_i64(tcg_flags);
5434        tcg_gen_br(label_continue);
5435        gen_set_label(label_match);
5436    }
5437
5438    handle_fp_compare(s, size, rn, rm, false, op);
5439
5440    if (cond < 0x0e) {
5441        gen_set_label(label_continue);
5442    }
5443}
5444
5445/* Floating point conditional select
5446 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
5447 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5448 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
5449 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5450 */
5451static void disas_fp_csel(DisasContext *s, uint32_t insn)
5452{
5453    unsigned int mos, type, rm, cond, rn, rd;
5454    TCGv_i64 t_true, t_false, t_zero;
5455    DisasCompare64 c;
5456    MemOp sz;
5457
5458    mos = extract32(insn, 29, 3);
5459    type = extract32(insn, 22, 2);
5460    rm = extract32(insn, 16, 5);
5461    cond = extract32(insn, 12, 4);
5462    rn = extract32(insn, 5, 5);
5463    rd = extract32(insn, 0, 5);
5464
5465    if (mos) {
5466        unallocated_encoding(s);
5467        return;
5468    }
5469
5470    switch (type) {
5471    case 0:
5472        sz = MO_32;
5473        break;
5474    case 1:
5475        sz = MO_64;
5476        break;
5477    case 3:
5478        sz = MO_16;
5479        if (dc_isar_feature(aa64_fp16, s)) {
5480            break;
5481        }
5482        /* fallthru */
5483    default:
5484        unallocated_encoding(s);
5485        return;
5486    }
5487
5488    if (!fp_access_check(s)) {
5489        return;
5490    }
5491
5492    /* Zero extend sreg & hreg inputs to 64 bits now.  */
5493    t_true = tcg_temp_new_i64();
5494    t_false = tcg_temp_new_i64();
5495    read_vec_element(s, t_true, rn, 0, sz);
5496    read_vec_element(s, t_false, rm, 0, sz);
5497
5498    a64_test_cc(&c, cond);
5499    t_zero = tcg_const_i64(0);
5500    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
5501    tcg_temp_free_i64(t_zero);
5502    tcg_temp_free_i64(t_false);
5503    a64_free_cc(&c);
5504
5505    /* Note that sregs & hregs write back zeros to the high bits,
5506       and we've already done the zero-extension.  */
5507    write_fp_dreg(s, rd, t_true);
5508    tcg_temp_free_i64(t_true);
5509}
5510
5511/* Floating-point data-processing (1 source) - half precision */
5512static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
5513{
5514    TCGv_ptr fpst = NULL;
5515    TCGv_i32 tcg_op = read_fp_hreg(s, rn);
5516    TCGv_i32 tcg_res = tcg_temp_new_i32();
5517
5518    switch (opcode) {
5519    case 0x0: /* FMOV */
5520        tcg_gen_mov_i32(tcg_res, tcg_op);
5521        break;
5522    case 0x1: /* FABS */
5523        tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
5524        break;
5525    case 0x2: /* FNEG */
5526        tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
5527        break;
5528    case 0x3: /* FSQRT */
5529        fpst = get_fpstatus_ptr(true);
5530        gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
5531        break;
5532    case 0x8: /* FRINTN */
5533    case 0x9: /* FRINTP */
5534    case 0xa: /* FRINTM */
5535    case 0xb: /* FRINTZ */
5536    case 0xc: /* FRINTA */
5537    {
5538        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
5539        fpst = get_fpstatus_ptr(true);
5540
5541        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5542        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5543
5544        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5545        tcg_temp_free_i32(tcg_rmode);
5546        break;
5547    }
5548    case 0xe: /* FRINTX */
5549        fpst = get_fpstatus_ptr(true);
5550        gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
5551        break;
5552    case 0xf: /* FRINTI */
5553        fpst = get_fpstatus_ptr(true);
5554        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5555        break;
5556    default:
5557        abort();
5558    }
5559
5560    write_fp_sreg(s, rd, tcg_res);
5561
5562    if (fpst) {
5563        tcg_temp_free_ptr(fpst);
5564    }
5565    tcg_temp_free_i32(tcg_op);
5566    tcg_temp_free_i32(tcg_res);
5567}
5568
5569/* Floating-point data-processing (1 source) - single precision */
5570static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
5571{
5572    void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
5573    TCGv_i32 tcg_op, tcg_res;
5574    TCGv_ptr fpst;
5575    int rmode = -1;
5576
5577    tcg_op = read_fp_sreg(s, rn);
5578    tcg_res = tcg_temp_new_i32();
5579
5580    switch (opcode) {
5581    case 0x0: /* FMOV */
5582        tcg_gen_mov_i32(tcg_res, tcg_op);
5583        goto done;
5584    case 0x1: /* FABS */
5585        gen_helper_vfp_abss(tcg_res, tcg_op);
5586        goto done;
5587    case 0x2: /* FNEG */
5588        gen_helper_vfp_negs(tcg_res, tcg_op);
5589        goto done;
5590    case 0x3: /* FSQRT */
5591        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
5592        goto done;
5593    case 0x8: /* FRINTN */
5594    case 0x9: /* FRINTP */
5595    case 0xa: /* FRINTM */
5596    case 0xb: /* FRINTZ */
5597    case 0xc: /* FRINTA */
5598        rmode = arm_rmode_to_sf(opcode & 7);
5599        gen_fpst = gen_helper_rints;
5600        break;
5601    case 0xe: /* FRINTX */
5602        gen_fpst = gen_helper_rints_exact;
5603        break;
5604    case 0xf: /* FRINTI */
5605        gen_fpst = gen_helper_rints;
5606        break;
5607    case 0x10: /* FRINT32Z */
5608        rmode = float_round_to_zero;
5609        gen_fpst = gen_helper_frint32_s;
5610        break;
5611    case 0x11: /* FRINT32X */
5612        gen_fpst = gen_helper_frint32_s;
5613        break;
5614    case 0x12: /* FRINT64Z */
5615        rmode = float_round_to_zero;
5616        gen_fpst = gen_helper_frint64_s;
5617        break;
5618    case 0x13: /* FRINT64X */
5619        gen_fpst = gen_helper_frint64_s;
5620        break;
5621    default:
5622        g_assert_not_reached();
5623    }
5624
5625    fpst = get_fpstatus_ptr(false);
5626    if (rmode >= 0) {
5627        TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
5628        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5629        gen_fpst(tcg_res, tcg_op, fpst);
5630        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5631        tcg_temp_free_i32(tcg_rmode);
5632    } else {
5633        gen_fpst(tcg_res, tcg_op, fpst);
5634    }
5635    tcg_temp_free_ptr(fpst);
5636
5637 done:
5638    write_fp_sreg(s, rd, tcg_res);
5639    tcg_temp_free_i32(tcg_op);
5640    tcg_temp_free_i32(tcg_res);
5641}
5642
5643/* Floating-point data-processing (1 source) - double precision */
5644static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
5645{
5646    void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
5647    TCGv_i64 tcg_op, tcg_res;
5648    TCGv_ptr fpst;
5649    int rmode = -1;
5650
5651    switch (opcode) {
5652    case 0x0: /* FMOV */
5653        gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
5654        return;
5655    }
5656
5657    tcg_op = read_fp_dreg(s, rn);
5658    tcg_res = tcg_temp_new_i64();
5659
5660    switch (opcode) {
5661    case 0x1: /* FABS */
5662        gen_helper_vfp_absd(tcg_res, tcg_op);
5663        goto done;
5664    case 0x2: /* FNEG */
5665        gen_helper_vfp_negd(tcg_res, tcg_op);
5666        goto done;
5667    case 0x3: /* FSQRT */
5668        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
5669        goto done;
5670    case 0x8: /* FRINTN */
5671    case 0x9: /* FRINTP */
5672    case 0xa: /* FRINTM */
5673    case 0xb: /* FRINTZ */
5674    case 0xc: /* FRINTA */
5675        rmode = arm_rmode_to_sf(opcode & 7);
5676        gen_fpst = gen_helper_rintd;
5677        break;
5678    case 0xe: /* FRINTX */
5679        gen_fpst = gen_helper_rintd_exact;
5680        break;
5681    case 0xf: /* FRINTI */
5682        gen_fpst = gen_helper_rintd;
5683        break;
5684    case 0x10: /* FRINT32Z */
5685        rmode = float_round_to_zero;
5686        gen_fpst = gen_helper_frint32_d;
5687        break;
5688    case 0x11: /* FRINT32X */
5689        gen_fpst = gen_helper_frint32_d;
5690        break;
5691    case 0x12: /* FRINT64Z */
5692        rmode = float_round_to_zero;
5693        gen_fpst = gen_helper_frint64_d;
5694        break;
5695    case 0x13: /* FRINT64X */
5696        gen_fpst = gen_helper_frint64_d;
5697        break;
5698    default:
5699        g_assert_not_reached();
5700    }
5701
5702    fpst = get_fpstatus_ptr(false);
5703    if (rmode >= 0) {
5704        TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
5705        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5706        gen_fpst(tcg_res, tcg_op, fpst);
5707        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5708        tcg_temp_free_i32(tcg_rmode);
5709    } else {
5710        gen_fpst(tcg_res, tcg_op, fpst);
5711    }
5712    tcg_temp_free_ptr(fpst);
5713
5714 done:
5715    write_fp_dreg(s, rd, tcg_res);
5716    tcg_temp_free_i64(tcg_op);
5717    tcg_temp_free_i64(tcg_res);
5718}
5719
5720static void handle_fp_fcvt(DisasContext *s, int opcode,
5721                           int rd, int rn, int dtype, int ntype)
5722{
5723    switch (ntype) {
5724    case 0x0:
5725    {
5726        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5727        if (dtype == 1) {
5728            /* Single to double */
5729            TCGv_i64 tcg_rd = tcg_temp_new_i64();
5730            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
5731            write_fp_dreg(s, rd, tcg_rd);
5732            tcg_temp_free_i64(tcg_rd);
5733        } else {
5734            /* Single to half */
5735            TCGv_i32 tcg_rd = tcg_temp_new_i32();
5736            TCGv_i32 ahp = get_ahp_flag();
5737            TCGv_ptr fpst = get_fpstatus_ptr(false);
5738
5739            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5740            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5741            write_fp_sreg(s, rd, tcg_rd);
5742            tcg_temp_free_i32(tcg_rd);
5743            tcg_temp_free_i32(ahp);
5744            tcg_temp_free_ptr(fpst);
5745        }
5746        tcg_temp_free_i32(tcg_rn);
5747        break;
5748    }
5749    case 0x1:
5750    {
5751        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
5752        TCGv_i32 tcg_rd = tcg_temp_new_i32();
5753        if (dtype == 0) {
5754            /* Double to single */
5755            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
5756        } else {
5757            TCGv_ptr fpst = get_fpstatus_ptr(false);
5758            TCGv_i32 ahp = get_ahp_flag();
5759            /* Double to half */
5760            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5761            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5762            tcg_temp_free_ptr(fpst);
5763            tcg_temp_free_i32(ahp);
5764        }
5765        write_fp_sreg(s, rd, tcg_rd);
5766        tcg_temp_free_i32(tcg_rd);
5767        tcg_temp_free_i64(tcg_rn);
5768        break;
5769    }
5770    case 0x3:
5771    {
5772        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5773        TCGv_ptr tcg_fpst = get_fpstatus_ptr(false);
5774        TCGv_i32 tcg_ahp = get_ahp_flag();
5775        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
5776        if (dtype == 0) {
5777            /* Half to single */
5778            TCGv_i32 tcg_rd = tcg_temp_new_i32();
5779            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5780            write_fp_sreg(s, rd, tcg_rd);
5781            tcg_temp_free_ptr(tcg_fpst);
5782            tcg_temp_free_i32(tcg_ahp);
5783            tcg_temp_free_i32(tcg_rd);
5784        } else {
5785            /* Half to double */
5786            TCGv_i64 tcg_rd = tcg_temp_new_i64();
5787            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5788            write_fp_dreg(s, rd, tcg_rd);
5789            tcg_temp_free_i64(tcg_rd);
5790        }
5791        tcg_temp_free_i32(tcg_rn);
5792        break;
5793    }
5794    default:
5795        abort();
5796    }
5797}
5798
5799/* Floating point data-processing (1 source)
5800 *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
5801 * +---+---+---+-----------+------+---+--------+-----------+------+------+
5802 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
5803 * +---+---+---+-----------+------+---+--------+-----------+------+------+
5804 */
5805static void disas_fp_1src(DisasContext *s, uint32_t insn)
5806{
5807    int mos = extract32(insn, 29, 3);
5808    int type = extract32(insn, 22, 2);
5809    int opcode = extract32(insn, 15, 6);
5810    int rn = extract32(insn, 5, 5);
5811    int rd = extract32(insn, 0, 5);
5812
5813    if (mos) {
5814        unallocated_encoding(s);
5815        return;
5816    }
5817
5818    switch (opcode) {
5819    case 0x4: case 0x5: case 0x7:
5820    {
5821        /* FCVT between half, single and double precision */
5822        int dtype = extract32(opcode, 0, 2);
5823        if (type == 2 || dtype == type) {
5824            unallocated_encoding(s);
5825            return;
5826        }
5827        if (!fp_access_check(s)) {
5828            return;
5829        }
5830
5831        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
5832        break;
5833    }
5834
5835    case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
5836        if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
5837            unallocated_encoding(s);
5838            return;
5839        }
5840        /* fall through */
5841    case 0x0 ... 0x3:
5842    case 0x8 ... 0xc:
5843    case 0xe ... 0xf:
5844        /* 32-to-32 and 64-to-64 ops */
5845        switch (type) {
5846        case 0:
5847            if (!fp_access_check(s)) {
5848                return;
5849            }
5850            handle_fp_1src_single(s, opcode, rd, rn);
5851            break;
5852        case 1:
5853            if (!fp_access_check(s)) {
5854                return;
5855            }
5856            handle_fp_1src_double(s, opcode, rd, rn);
5857            break;
5858        case 3:
5859            if (!dc_isar_feature(aa64_fp16, s)) {
5860                unallocated_encoding(s);
5861                return;
5862            }
5863
5864            if (!fp_access_check(s)) {
5865                return;
5866            }
5867            handle_fp_1src_half(s, opcode, rd, rn);
5868            break;
5869        default:
5870            unallocated_encoding(s);
5871        }
5872        break;
5873
5874    default:
5875        unallocated_encoding(s);
5876        break;
5877    }
5878}
5879
5880/* Floating-point data-processing (2 source) - single precision */
5881static void handle_fp_2src_single(DisasContext *s, int opcode,
5882                                  int rd, int rn, int rm)
5883{
5884    TCGv_i32 tcg_op1;
5885    TCGv_i32 tcg_op2;
5886    TCGv_i32 tcg_res;
5887    TCGv_ptr fpst;
5888
5889    tcg_res = tcg_temp_new_i32();
5890    fpst = get_fpstatus_ptr(false);
5891    tcg_op1 = read_fp_sreg(s, rn);
5892    tcg_op2 = read_fp_sreg(s, rm);
5893
5894    switch (opcode) {
5895    case 0x0: /* FMUL */
5896        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5897        break;
5898    case 0x1: /* FDIV */
5899        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
5900        break;
5901    case 0x2: /* FADD */
5902        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
5903        break;
5904    case 0x3: /* FSUB */
5905        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
5906        break;
5907    case 0x4: /* FMAX */
5908        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
5909        break;
5910    case 0x5: /* FMIN */
5911        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
5912        break;
5913    case 0x6: /* FMAXNM */
5914        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
5915        break;
5916    case 0x7: /* FMINNM */
5917        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
5918        break;
5919    case 0x8: /* FNMUL */
5920        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5921        gen_helper_vfp_negs(tcg_res, tcg_res);
5922        break;
5923    }
5924
5925    write_fp_sreg(s, rd, tcg_res);
5926
5927    tcg_temp_free_ptr(fpst);
5928    tcg_temp_free_i32(tcg_op1);
5929    tcg_temp_free_i32(tcg_op2);
5930    tcg_temp_free_i32(tcg_res);
5931}
5932
5933/* Floating-point data-processing (2 source) - double precision */
5934static void handle_fp_2src_double(DisasContext *s, int opcode,
5935                                  int rd, int rn, int rm)
5936{
5937    TCGv_i64 tcg_op1;
5938    TCGv_i64 tcg_op2;
5939    TCGv_i64 tcg_res;
5940    TCGv_ptr fpst;
5941
5942    tcg_res = tcg_temp_new_i64();
5943    fpst = get_fpstatus_ptr(false);
5944    tcg_op1 = read_fp_dreg(s, rn);
5945    tcg_op2 = read_fp_dreg(s, rm);
5946
5947    switch (opcode) {
5948    case 0x0: /* FMUL */
5949        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5950        break;
5951    case 0x1: /* FDIV */
5952        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
5953        break;
5954    case 0x2: /* FADD */
5955        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
5956        break;
5957    case 0x3: /* FSUB */
5958        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
5959        break;
5960    case 0x4: /* FMAX */
5961        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
5962        break;
5963    case 0x5: /* FMIN */
5964        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
5965        break;
5966    case 0x6: /* FMAXNM */
5967        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5968        break;
5969    case 0x7: /* FMINNM */
5970        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5971        break;
5972    case 0x8: /* FNMUL */
5973        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5974        gen_helper_vfp_negd(tcg_res, tcg_res);
5975        break;
5976    }
5977
5978    write_fp_dreg(s, rd, tcg_res);
5979
5980    tcg_temp_free_ptr(fpst);
5981    tcg_temp_free_i64(tcg_op1);
5982    tcg_temp_free_i64(tcg_op2);
5983    tcg_temp_free_i64(tcg_res);
5984}
5985
5986/* Floating-point data-processing (2 source) - half precision */
5987static void handle_fp_2src_half(DisasContext *s, int opcode,
5988                                int rd, int rn, int rm)
5989{
5990    TCGv_i32 tcg_op1;
5991    TCGv_i32 tcg_op2;
5992    TCGv_i32 tcg_res;
5993    TCGv_ptr fpst;
5994
5995    tcg_res = tcg_temp_new_i32();
5996    fpst = get_fpstatus_ptr(true);
5997    tcg_op1 = read_fp_hreg(s, rn);
5998    tcg_op2 = read_fp_hreg(s, rm);
5999
6000    switch (opcode) {
6001    case 0x0: /* FMUL */
6002        gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6003        break;
6004    case 0x1: /* FDIV */
6005        gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6006        break;
6007    case 0x2: /* FADD */
6008        gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6009        break;
6010    case 0x3: /* FSUB */
6011        gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6012        break;
6013    case 0x4: /* FMAX */
6014        gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6015        break;
6016    case 0x5: /* FMIN */
6017        gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6018        break;
6019    case 0x6: /* FMAXNM */
6020        gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6021        break;
6022    case 0x7: /* FMINNM */
6023        gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6024        break;
6025    case 0x8: /* FNMUL */
6026        gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6027        tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6028        break;
6029    default:
6030        g_assert_not_reached();
6031    }
6032
6033    write_fp_sreg(s, rd, tcg_res);
6034
6035    tcg_temp_free_ptr(fpst);
6036    tcg_temp_free_i32(tcg_op1);
6037    tcg_temp_free_i32(tcg_op2);
6038    tcg_temp_free_i32(tcg_res);
6039}
6040
6041/* Floating point data-processing (2 source)
6042 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6043 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6044 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6045 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6046 */
6047static void disas_fp_2src(DisasContext *s, uint32_t insn)
6048{
6049    int mos = extract32(insn, 29, 3);
6050    int type = extract32(insn, 22, 2);
6051    int rd = extract32(insn, 0, 5);
6052    int rn = extract32(insn, 5, 5);
6053    int rm = extract32(insn, 16, 5);
6054    int opcode = extract32(insn, 12, 4);
6055
6056    if (opcode > 8 || mos) {
6057        unallocated_encoding(s);
6058        return;
6059    }
6060
6061    switch (type) {
6062    case 0:
6063        if (!fp_access_check(s)) {
6064            return;
6065        }
6066        handle_fp_2src_single(s, opcode, rd, rn, rm);
6067        break;
6068    case 1:
6069        if (!fp_access_check(s)) {
6070            return;
6071        }
6072        handle_fp_2src_double(s, opcode, rd, rn, rm);
6073        break;
6074    case 3:
6075        if (!dc_isar_feature(aa64_fp16, s)) {
6076            unallocated_encoding(s);
6077            return;
6078        }
6079        if (!fp_access_check(s)) {
6080            return;
6081        }
6082        handle_fp_2src_half(s, opcode, rd, rn, rm);
6083        break;
6084    default:
6085        unallocated_encoding(s);
6086    }
6087}
6088
6089/* Floating-point data-processing (3 source) - single precision */
6090static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6091                                  int rd, int rn, int rm, int ra)
6092{
6093    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6094    TCGv_i32 tcg_res = tcg_temp_new_i32();
6095    TCGv_ptr fpst = get_fpstatus_ptr(false);
6096
6097    tcg_op1 = read_fp_sreg(s, rn);
6098    tcg_op2 = read_fp_sreg(s, rm);
6099    tcg_op3 = read_fp_sreg(s, ra);
6100
6101    /* These are fused multiply-add, and must be done as one
6102     * floating point operation with no rounding between the
6103     * multiplication and addition steps.
6104     * NB that doing the negations here as separate steps is
6105     * correct : an input NaN should come out with its sign bit
6106     * flipped if it is a negated-input.
6107     */
6108    if (o1 == true) {
6109        gen_helper_vfp_negs(tcg_op3, tcg_op3);
6110    }
6111
6112    if (o0 != o1) {
6113        gen_helper_vfp_negs(tcg_op1, tcg_op1);
6114    }
6115
6116    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6117
6118    write_fp_sreg(s, rd, tcg_res);
6119
6120    tcg_temp_free_ptr(fpst);
6121    tcg_temp_free_i32(tcg_op1);
6122    tcg_temp_free_i32(tcg_op2);
6123    tcg_temp_free_i32(tcg_op3);
6124    tcg_temp_free_i32(tcg_res);
6125}
6126
6127/* Floating-point data-processing (3 source) - double precision */
6128static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6129                                  int rd, int rn, int rm, int ra)
6130{
6131    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6132    TCGv_i64 tcg_res = tcg_temp_new_i64();
6133    TCGv_ptr fpst = get_fpstatus_ptr(false);
6134
6135    tcg_op1 = read_fp_dreg(s, rn);
6136    tcg_op2 = read_fp_dreg(s, rm);
6137    tcg_op3 = read_fp_dreg(s, ra);
6138
6139    /* These are fused multiply-add, and must be done as one
6140     * floating point operation with no rounding between the
6141     * multiplication and addition steps.
6142     * NB that doing the negations here as separate steps is
6143     * correct : an input NaN should come out with its sign bit
6144     * flipped if it is a negated-input.
6145     */
6146    if (o1 == true) {
6147        gen_helper_vfp_negd(tcg_op3, tcg_op3);
6148    }
6149
6150    if (o0 != o1) {
6151        gen_helper_vfp_negd(tcg_op1, tcg_op1);
6152    }
6153
6154    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6155
6156    write_fp_dreg(s, rd, tcg_res);
6157
6158    tcg_temp_free_ptr(fpst);
6159    tcg_temp_free_i64(tcg_op1);
6160    tcg_temp_free_i64(tcg_op2);
6161    tcg_temp_free_i64(tcg_op3);
6162    tcg_temp_free_i64(tcg_res);
6163}
6164
6165/* Floating-point data-processing (3 source) - half precision */
6166static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6167                                int rd, int rn, int rm, int ra)
6168{
6169    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6170    TCGv_i32 tcg_res = tcg_temp_new_i32();
6171    TCGv_ptr fpst = get_fpstatus_ptr(true);
6172
6173    tcg_op1 = read_fp_hreg(s, rn);
6174    tcg_op2 = read_fp_hreg(s, rm);
6175    tcg_op3 = read_fp_hreg(s, ra);
6176
6177    /* These are fused multiply-add, and must be done as one
6178     * floating point operation with no rounding between the
6179     * multiplication and addition steps.
6180     * NB that doing the negations here as separate steps is
6181     * correct : an input NaN should come out with its sign bit
6182     * flipped if it is a negated-input.
6183     */
6184    if (o1 == true) {
6185        tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6186    }
6187
6188    if (o0 != o1) {
6189        tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6190    }
6191
6192    gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6193
6194    write_fp_sreg(s, rd, tcg_res);
6195
6196    tcg_temp_free_ptr(fpst);
6197    tcg_temp_free_i32(tcg_op1);
6198    tcg_temp_free_i32(tcg_op2);
6199    tcg_temp_free_i32(tcg_op3);
6200    tcg_temp_free_i32(tcg_res);
6201}
6202
6203/* Floating point data-processing (3 source)
6204 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6205 * +---+---+---+-----------+------+----+------+----+------+------+------+
6206 * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6207 * +---+---+---+-----------+------+----+------+----+------+------+------+
6208 */
6209static void disas_fp_3src(DisasContext *s, uint32_t insn)
6210{
6211    int mos = extract32(insn, 29, 3);
6212    int type = extract32(insn, 22, 2);
6213    int rd = extract32(insn, 0, 5);
6214    int rn = extract32(insn, 5, 5);
6215    int ra = extract32(insn, 10, 5);
6216    int rm = extract32(insn, 16, 5);
6217    bool o0 = extract32(insn, 15, 1);
6218    bool o1 = extract32(insn, 21, 1);
6219
6220    if (mos) {
6221        unallocated_encoding(s);
6222        return;
6223    }
6224
6225    switch (type) {
6226    case 0:
6227        if (!fp_access_check(s)) {
6228            return;
6229        }
6230        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6231        break;
6232    case 1:
6233        if (!fp_access_check(s)) {
6234            return;
6235        }
6236        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6237        break;
6238    case 3:
6239        if (!dc_isar_feature(aa64_fp16, s)) {
6240            unallocated_encoding(s);
6241            return;
6242        }
6243        if (!fp_access_check(s)) {
6244            return;
6245        }
6246        handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6247        break;
6248    default:
6249        unallocated_encoding(s);
6250    }
6251}
6252
6253/* Floating point immediate
6254 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6255 * +---+---+---+-----------+------+---+------------+-------+------+------+
6256 * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6257 * +---+---+---+-----------+------+---+------------+-------+------+------+
6258 */
6259static void disas_fp_imm(DisasContext *s, uint32_t insn)
6260{
6261    int rd = extract32(insn, 0, 5);
6262    int imm5 = extract32(insn, 5, 5);
6263    int imm8 = extract32(insn, 13, 8);
6264    int type = extract32(insn, 22, 2);
6265    int mos = extract32(insn, 29, 3);
6266    uint64_t imm;
6267    TCGv_i64 tcg_res;
6268    MemOp sz;
6269
6270    if (mos || imm5) {
6271        unallocated_encoding(s);
6272        return;
6273    }
6274
6275    switch (type) {
6276    case 0:
6277        sz = MO_32;
6278        break;
6279    case 1:
6280        sz = MO_64;
6281        break;
6282    case 3:
6283        sz = MO_16;
6284        if (dc_isar_feature(aa64_fp16, s)) {
6285            break;
6286        }
6287        /* fallthru */
6288    default:
6289        unallocated_encoding(s);
6290        return;
6291    }
6292
6293    if (!fp_access_check(s)) {
6294        return;
6295    }
6296
6297    imm = vfp_expand_imm(sz, imm8);
6298
6299    tcg_res = tcg_const_i64(imm);
6300    write_fp_dreg(s, rd, tcg_res);
6301    tcg_temp_free_i64(tcg_res);
6302}
6303
6304/* Handle floating point <=> fixed point conversions. Note that we can
6305 * also deal with fp <=> integer conversions as a special case (scale == 64)
6306 * OPTME: consider handling that special case specially or at least skipping
6307 * the call to scalbn in the helpers for zero shifts.
6308 */
6309static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6310                           bool itof, int rmode, int scale, int sf, int type)
6311{
6312    bool is_signed = !(opcode & 1);
6313    TCGv_ptr tcg_fpstatus;
6314    TCGv_i32 tcg_shift, tcg_single;
6315    TCGv_i64 tcg_double;
6316
6317    tcg_fpstatus = get_fpstatus_ptr(type == 3);
6318
6319    tcg_shift = tcg_const_i32(64 - scale);
6320
6321    if (itof) {
6322        TCGv_i64 tcg_int = cpu_reg(s, rn);
6323        if (!sf) {
6324            TCGv_i64 tcg_extend = new_tmp_a64(s);
6325
6326            if (is_signed) {
6327                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6328            } else {
6329                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6330            }
6331
6332            tcg_int = tcg_extend;
6333        }
6334
6335        switch (type) {
6336        case 1: /* float64 */
6337            tcg_double = tcg_temp_new_i64();
6338            if (is_signed) {
6339                gen_helper_vfp_sqtod(tcg_double, tcg_int,
6340                                     tcg_shift, tcg_fpstatus);
6341            } else {
6342                gen_helper_vfp_uqtod(tcg_double, tcg_int,
6343                                     tcg_shift, tcg_fpstatus);
6344            }
6345            write_fp_dreg(s, rd, tcg_double);
6346            tcg_temp_free_i64(tcg_double);
6347            break;
6348
6349        case 0: /* float32 */
6350            tcg_single = tcg_temp_new_i32();
6351            if (is_signed) {
6352                gen_helper_vfp_sqtos(tcg_single, tcg_int,
6353                                     tcg_shift, tcg_fpstatus);
6354            } else {
6355                gen_helper_vfp_uqtos(tcg_single, tcg_int,
6356                                     tcg_shift, tcg_fpstatus);
6357            }
6358            write_fp_sreg(s, rd, tcg_single);
6359            tcg_temp_free_i32(tcg_single);
6360            break;
6361
6362        case 3: /* float16 */
6363            tcg_single = tcg_temp_new_i32();
6364            if (is_signed) {
6365                gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6366                                     tcg_shift, tcg_fpstatus);
6367            } else {
6368                gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6369                                     tcg_shift, tcg_fpstatus);
6370            }
6371            write_fp_sreg(s, rd, tcg_single);
6372            tcg_temp_free_i32(tcg_single);
6373            break;
6374
6375        default:
6376            g_assert_not_reached();
6377        }
6378    } else {
6379        TCGv_i64 tcg_int = cpu_reg(s, rd);
6380        TCGv_i32 tcg_rmode;
6381
6382        if (extract32(opcode, 2, 1)) {
6383            /* There are too many rounding modes to all fit into rmode,
6384             * so FCVTA[US] is a special case.
6385             */
6386            rmode = FPROUNDING_TIEAWAY;
6387        }
6388
6389        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6390
6391        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
6392
6393        switch (type) {
6394        case 1: /* float64 */
6395            tcg_double = read_fp_dreg(s, rn);
6396            if (is_signed) {
6397                if (!sf) {
6398                    gen_helper_vfp_tosld(tcg_int, tcg_double,
6399                                         tcg_shift, tcg_fpstatus);
6400                } else {
6401                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
6402                                         tcg_shift, tcg_fpstatus);
6403                }
6404            } else {
6405                if (!sf) {
6406                    gen_helper_vfp_tould(tcg_int, tcg_double,
6407                                         tcg_shift, tcg_fpstatus);
6408                } else {
6409                    gen_helper_vfp_touqd(tcg_int, tcg_double,
6410                                         tcg_shift, tcg_fpstatus);
6411                }
6412            }
6413            if (!sf) {
6414                tcg_gen_ext32u_i64(tcg_int, tcg_int);
6415            }
6416            tcg_temp_free_i64(tcg_double);
6417            break;
6418
6419        case 0: /* float32 */
6420            tcg_single = read_fp_sreg(s, rn);
6421            if (sf) {
6422                if (is_signed) {
6423                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
6424                                         tcg_shift, tcg_fpstatus);
6425                } else {
6426                    gen_helper_vfp_touqs(tcg_int, tcg_single,
6427                                         tcg_shift, tcg_fpstatus);
6428                }
6429            } else {
6430                TCGv_i32 tcg_dest = tcg_temp_new_i32();
6431                if (is_signed) {
6432                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
6433                                         tcg_shift, tcg_fpstatus);
6434                } else {
6435                    gen_helper_vfp_touls(tcg_dest, tcg_single,
6436                                         tcg_shift, tcg_fpstatus);
6437                }
6438                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6439                tcg_temp_free_i32(tcg_dest);
6440            }
6441            tcg_temp_free_i32(tcg_single);
6442            break;
6443
6444        case 3: /* float16 */
6445            tcg_single = read_fp_sreg(s, rn);
6446            if (sf) {
6447                if (is_signed) {
6448                    gen_helper_vfp_tosqh(tcg_int, tcg_single,
6449                                         tcg_shift, tcg_fpstatus);
6450                } else {
6451                    gen_helper_vfp_touqh(tcg_int, tcg_single,
6452                                         tcg_shift, tcg_fpstatus);
6453                }
6454            } else {
6455                TCGv_i32 tcg_dest = tcg_temp_new_i32();
6456                if (is_signed) {
6457                    gen_helper_vfp_toslh(tcg_dest, tcg_single,
6458                                         tcg_shift, tcg_fpstatus);
6459                } else {
6460                    gen_helper_vfp_toulh(tcg_dest, tcg_single,
6461                                         tcg_shift, tcg_fpstatus);
6462                }
6463                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6464                tcg_temp_free_i32(tcg_dest);
6465            }
6466            tcg_temp_free_i32(tcg_single);
6467            break;
6468
6469        default:
6470            g_assert_not_reached();
6471        }
6472
6473        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
6474        tcg_temp_free_i32(tcg_rmode);
6475    }
6476
6477    tcg_temp_free_ptr(tcg_fpstatus);
6478    tcg_temp_free_i32(tcg_shift);
6479}
6480
6481/* Floating point <-> fixed point conversions
6482 *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
6483 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6484 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
6485 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6486 */
6487static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6488{
6489    int rd = extract32(insn, 0, 5);
6490    int rn = extract32(insn, 5, 5);
6491    int scale = extract32(insn, 10, 6);
6492    int opcode = extract32(insn, 16, 3);
6493    int rmode = extract32(insn, 19, 2);
6494    int type = extract32(insn, 22, 2);
6495    bool sbit = extract32(insn, 29, 1);
6496    bool sf = extract32(insn, 31, 1);
6497    bool itof;
6498
6499    if (sbit || (!sf && scale < 32)) {
6500        unallocated_encoding(s);
6501        return;
6502    }
6503
6504    switch (type) {
6505    case 0: /* float32 */
6506    case 1: /* float64 */
6507        break;
6508    case 3: /* float16 */
6509        if (dc_isar_feature(aa64_fp16, s)) {
6510            break;
6511        }
6512        /* fallthru */
6513    default:
6514        unallocated_encoding(s);
6515        return;
6516    }
6517
6518    switch ((rmode << 3) | opcode) {
6519    case 0x2: /* SCVTF */
6520    case 0x3: /* UCVTF */
6521        itof = true;
6522        break;
6523    case 0x18: /* FCVTZS */
6524    case 0x19: /* FCVTZU */
6525        itof = false;
6526        break;
6527    default:
6528        unallocated_encoding(s);
6529        return;
6530    }
6531
6532    if (!fp_access_check(s)) {
6533        return;
6534    }
6535
6536    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6537}
6538
6539static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6540{
6541    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6542     * without conversion.
6543     */
6544
6545    if (itof) {
6546        TCGv_i64 tcg_rn = cpu_reg(s, rn);
6547        TCGv_i64 tmp;
6548
6549        switch (type) {
6550        case 0:
6551            /* 32 bit */
6552            tmp = tcg_temp_new_i64();
6553            tcg_gen_ext32u_i64(tmp, tcg_rn);
6554            write_fp_dreg(s, rd, tmp);
6555            tcg_temp_free_i64(tmp);
6556            break;
6557        case 1:
6558            /* 64 bit */
6559            write_fp_dreg(s, rd, tcg_rn);
6560            break;
6561        case 2:
6562            /* 64 bit to top half. */
6563            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
6564            clear_vec_high(s, true, rd);
6565            break;
6566        case 3:
6567            /* 16 bit */
6568            tmp = tcg_temp_new_i64();
6569            tcg_gen_ext16u_i64(tmp, tcg_rn);
6570            write_fp_dreg(s, rd, tmp);
6571            tcg_temp_free_i64(tmp);
6572            break;
6573        default:
6574            g_assert_not_reached();
6575        }
6576    } else {
6577        TCGv_i64 tcg_rd = cpu_reg(s, rd);
6578
6579        switch (type) {
6580        case 0:
6581            /* 32 bit */
6582            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
6583            break;
6584        case 1:
6585            /* 64 bit */
6586            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
6587            break;
6588        case 2:
6589            /* 64 bits from top half */
6590            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
6591            break;
6592        case 3:
6593            /* 16 bit */
6594            tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
6595            break;
6596        default:
6597            g_assert_not_reached();
6598        }
6599    }
6600}
6601
6602static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
6603{
6604    TCGv_i64 t = read_fp_dreg(s, rn);
6605    TCGv_ptr fpstatus = get_fpstatus_ptr(false);
6606
6607    gen_helper_fjcvtzs(t, t, fpstatus);
6608
6609    tcg_temp_free_ptr(fpstatus);
6610
6611    tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
6612    tcg_gen_extrh_i64_i32(cpu_ZF, t);
6613    tcg_gen_movi_i32(cpu_CF, 0);
6614    tcg_gen_movi_i32(cpu_NF, 0);
6615    tcg_gen_movi_i32(cpu_VF, 0);
6616
6617    tcg_temp_free_i64(t);
6618}
6619
6620/* Floating point <-> integer conversions
6621 *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
6622 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6623 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
6624 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6625 */
6626static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
6627{
6628    int rd = extract32(insn, 0, 5);
6629    int rn = extract32(insn, 5, 5);
6630    int opcode = extract32(insn, 16, 3);
6631    int rmode = extract32(insn, 19, 2);
6632    int type = extract32(insn, 22, 2);
6633    bool sbit = extract32(insn, 29, 1);
6634    bool sf = extract32(insn, 31, 1);
6635    bool itof = false;
6636
6637    if (sbit) {
6638        goto do_unallocated;
6639    }
6640
6641    switch (opcode) {
6642    case 2: /* SCVTF */
6643    case 3: /* UCVTF */
6644        itof = true;
6645        /* fallthru */
6646    case 4: /* FCVTAS */
6647    case 5: /* FCVTAU */
6648        if (rmode != 0) {
6649            goto do_unallocated;
6650        }
6651        /* fallthru */
6652    case 0: /* FCVT[NPMZ]S */
6653    case 1: /* FCVT[NPMZ]U */
6654        switch (type) {
6655        case 0: /* float32 */
6656        case 1: /* float64 */
6657            break;
6658        case 3: /* float16 */
6659            if (!dc_isar_feature(aa64_fp16, s)) {
6660                goto do_unallocated;
6661            }
6662            break;
6663        default:
6664            goto do_unallocated;
6665        }
6666        if (!fp_access_check(s)) {
6667            return;
6668        }
6669        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
6670        break;
6671
6672    default:
6673        switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
6674        case 0b01100110: /* FMOV half <-> 32-bit int */
6675        case 0b01100111:
6676        case 0b11100110: /* FMOV half <-> 64-bit int */
6677        case 0b11100111:
6678            if (!dc_isar_feature(aa64_fp16, s)) {
6679                goto do_unallocated;
6680            }
6681            /* fallthru */
6682        case 0b00000110: /* FMOV 32-bit */
6683        case 0b00000111:
6684        case 0b10100110: /* FMOV 64-bit */
6685        case 0b10100111:
6686        case 0b11001110: /* FMOV top half of 128-bit */
6687        case 0b11001111:
6688            if (!fp_access_check(s)) {
6689                return;
6690            }
6691            itof = opcode & 1;
6692            handle_fmov(s, rd, rn, type, itof);
6693            break;
6694
6695        case 0b00111110: /* FJCVTZS */
6696            if (!dc_isar_feature(aa64_jscvt, s)) {
6697                goto do_unallocated;
6698            } else if (fp_access_check(s)) {
6699                handle_fjcvtzs(s, rd, rn);
6700            }
6701            break;
6702
6703        default:
6704        do_unallocated:
6705            unallocated_encoding(s);
6706            return;
6707        }
6708        break;
6709    }
6710}
6711
6712/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
6713 *   31  30  29 28     25 24                          0
6714 * +---+---+---+---------+-----------------------------+
6715 * |   | 0 |   | 1 1 1 1 |                             |
6716 * +---+---+---+---------+-----------------------------+
6717 */
6718static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
6719{
6720    if (extract32(insn, 24, 1)) {
6721        /* Floating point data-processing (3 source) */
6722        disas_fp_3src(s, insn);
6723    } else if (extract32(insn, 21, 1) == 0) {
6724        /* Floating point to fixed point conversions */
6725        disas_fp_fixed_conv(s, insn);
6726    } else {
6727        switch (extract32(insn, 10, 2)) {
6728        case 1:
6729            /* Floating point conditional compare */
6730            disas_fp_ccomp(s, insn);
6731            break;
6732        case 2:
6733            /* Floating point data-processing (2 source) */
6734            disas_fp_2src(s, insn);
6735            break;
6736        case 3:
6737            /* Floating point conditional select */
6738            disas_fp_csel(s, insn);
6739            break;
6740        case 0:
6741            switch (ctz32(extract32(insn, 12, 4))) {
6742            case 0: /* [15:12] == xxx1 */
6743                /* Floating point immediate */
6744                disas_fp_imm(s, insn);
6745                break;
6746            case 1: /* [15:12] == xx10 */
6747                /* Floating point compare */
6748                disas_fp_compare(s, insn);
6749                break;
6750            case 2: /* [15:12] == x100 */
6751                /* Floating point data-processing (1 source) */
6752                disas_fp_1src(s, insn);
6753                break;
6754            case 3: /* [15:12] == 1000 */
6755                unallocated_encoding(s);
6756                break;
6757            default: /* [15:12] == 0000 */
6758                /* Floating point <-> integer conversions */
6759                disas_fp_int_conv(s, insn);
6760                break;
6761            }
6762            break;
6763        }
6764    }
6765}
6766
6767static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
6768                     int pos)
6769{
6770    /* Extract 64 bits from the middle of two concatenated 64 bit
6771     * vector register slices left:right. The extracted bits start
6772     * at 'pos' bits into the right (least significant) side.
6773     * We return the result in tcg_right, and guarantee not to
6774     * trash tcg_left.
6775     */
6776    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
6777    assert(pos > 0 && pos < 64);
6778
6779    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
6780    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
6781    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
6782
6783    tcg_temp_free_i64(tcg_tmp);
6784}
6785
6786/* EXT
6787 *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
6788 * +---+---+-------------+-----+---+------+---+------+---+------+------+
6789 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
6790 * +---+---+-------------+-----+---+------+---+------+---+------+------+
6791 */
6792static void disas_simd_ext(DisasContext *s, uint32_t insn)
6793{
6794    int is_q = extract32(insn, 30, 1);
6795    int op2 = extract32(insn, 22, 2);
6796    int imm4 = extract32(insn, 11, 4);
6797    int rm = extract32(insn, 16, 5);
6798    int rn = extract32(insn, 5, 5);
6799    int rd = extract32(insn, 0, 5);
6800    int pos = imm4 << 3;
6801    TCGv_i64 tcg_resl, tcg_resh;
6802
6803    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
6804        unallocated_encoding(s);
6805        return;
6806    }
6807
6808    if (!fp_access_check(s)) {
6809        return;
6810    }
6811
6812    tcg_resh = tcg_temp_new_i64();
6813    tcg_resl = tcg_temp_new_i64();
6814
6815    /* Vd gets bits starting at pos bits into Vm:Vn. This is
6816     * either extracting 128 bits from a 128:128 concatenation, or
6817     * extracting 64 bits from a 64:64 concatenation.
6818     */
6819    if (!is_q) {
6820        read_vec_element(s, tcg_resl, rn, 0, MO_64);
6821        if (pos != 0) {
6822            read_vec_element(s, tcg_resh, rm, 0, MO_64);
6823            do_ext64(s, tcg_resh, tcg_resl, pos);
6824        }
6825        tcg_gen_movi_i64(tcg_resh, 0);
6826    } else {
6827        TCGv_i64 tcg_hh;
6828        typedef struct {
6829            int reg;
6830            int elt;
6831        } EltPosns;
6832        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
6833        EltPosns *elt = eltposns;
6834
6835        if (pos >= 64) {
6836            elt++;
6837            pos -= 64;
6838        }
6839
6840        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
6841        elt++;
6842        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
6843        elt++;
6844        if (pos != 0) {
6845            do_ext64(s, tcg_resh, tcg_resl, pos);
6846            tcg_hh = tcg_temp_new_i64();
6847            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
6848            do_ext64(s, tcg_hh, tcg_resh, pos);
6849            tcg_temp_free_i64(tcg_hh);
6850        }
6851    }
6852
6853    write_vec_element(s, tcg_resl, rd, 0, MO_64);
6854    tcg_temp_free_i64(tcg_resl);
6855    write_vec_element(s, tcg_resh, rd, 1, MO_64);
6856    tcg_temp_free_i64(tcg_resh);
6857}
6858
6859/* TBL/TBX
6860 *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
6861 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6862 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
6863 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6864 */
6865static void disas_simd_tb(DisasContext *s, uint32_t insn)
6866{
6867    int op2 = extract32(insn, 22, 2);
6868    int is_q = extract32(insn, 30, 1);
6869    int rm = extract32(insn, 16, 5);
6870    int rn = extract32(insn, 5, 5);
6871    int rd = extract32(insn, 0, 5);
6872    int is_tblx = extract32(insn, 12, 1);
6873    int len = extract32(insn, 13, 2);
6874    TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
6875    TCGv_i32 tcg_regno, tcg_numregs;
6876
6877    if (op2 != 0) {
6878        unallocated_encoding(s);
6879        return;
6880    }
6881
6882    if (!fp_access_check(s)) {
6883        return;
6884    }
6885
6886    /* This does a table lookup: for every byte element in the input
6887     * we index into a table formed from up to four vector registers,
6888     * and then the output is the result of the lookups. Our helper
6889     * function does the lookup operation for a single 64 bit part of
6890     * the input.
6891     */
6892    tcg_resl = tcg_temp_new_i64();
6893    tcg_resh = tcg_temp_new_i64();
6894
6895    if (is_tblx) {
6896        read_vec_element(s, tcg_resl, rd, 0, MO_64);
6897    } else {
6898        tcg_gen_movi_i64(tcg_resl, 0);
6899    }
6900    if (is_tblx && is_q) {
6901        read_vec_element(s, tcg_resh, rd, 1, MO_64);
6902    } else {
6903        tcg_gen_movi_i64(tcg_resh, 0);
6904    }
6905
6906    tcg_idx = tcg_temp_new_i64();
6907    tcg_regno = tcg_const_i32(rn);
6908    tcg_numregs = tcg_const_i32(len + 1);
6909    read_vec_element(s, tcg_idx, rm, 0, MO_64);
6910    gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
6911                        tcg_regno, tcg_numregs);
6912    if (is_q) {
6913        read_vec_element(s, tcg_idx, rm, 1, MO_64);
6914        gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
6915                            tcg_regno, tcg_numregs);
6916    }
6917    tcg_temp_free_i64(tcg_idx);
6918    tcg_temp_free_i32(tcg_regno);
6919    tcg_temp_free_i32(tcg_numregs);
6920
6921    write_vec_element(s, tcg_resl, rd, 0, MO_64);
6922    tcg_temp_free_i64(tcg_resl);
6923    write_vec_element(s, tcg_resh, rd, 1, MO_64);
6924    tcg_temp_free_i64(tcg_resh);
6925}
6926
6927/* ZIP/UZP/TRN
6928 *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
6929 * +---+---+-------------+------+---+------+---+------------------+------+
6930 * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
6931 * +---+---+-------------+------+---+------+---+------------------+------+
6932 */
6933static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
6934{
6935    int rd = extract32(insn, 0, 5);
6936    int rn = extract32(insn, 5, 5);
6937    int rm = extract32(insn, 16, 5);
6938    int size = extract32(insn, 22, 2);
6939    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
6940     * bit 2 indicates 1 vs 2 variant of the insn.
6941     */
6942    int opcode = extract32(insn, 12, 2);
6943    bool part = extract32(insn, 14, 1);
6944    bool is_q = extract32(insn, 30, 1);
6945    int esize = 8 << size;
6946    int i, ofs;
6947    int datasize = is_q ? 128 : 64;
6948    int elements = datasize / esize;
6949    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
6950
6951    if (opcode == 0 || (size == 3 && !is_q)) {
6952        unallocated_encoding(s);
6953        return;
6954    }
6955
6956    if (!fp_access_check(s)) {
6957        return;
6958    }
6959
6960    tcg_resl = tcg_const_i64(0);
6961    tcg_resh = tcg_const_i64(0);
6962    tcg_res = tcg_temp_new_i64();
6963
6964    for (i = 0; i < elements; i++) {
6965        switch (opcode) {
6966        case 1: /* UZP1/2 */
6967        {
6968            int midpoint = elements / 2;
6969            if (i < midpoint) {
6970                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
6971            } else {
6972                read_vec_element(s, tcg_res, rm,
6973                                 2 * (i - midpoint) + part, size);
6974            }
6975            break;
6976        }
6977        case 2: /* TRN1/2 */
6978            if (i & 1) {
6979                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
6980            } else {
6981                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
6982            }
6983            break;
6984        case 3: /* ZIP1/2 */
6985        {
6986            int base = part * elements / 2;
6987            if (i & 1) {
6988                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
6989            } else {
6990                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
6991            }
6992            break;
6993        }
6994        default:
6995            g_assert_not_reached();
6996        }
6997
6998        ofs = i * esize;
6999        if (ofs < 64) {
7000            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
7001            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
7002        } else {
7003            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
7004            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
7005        }
7006    }
7007
7008    tcg_temp_free_i64(tcg_res);
7009
7010    write_vec_element(s, tcg_resl, rd, 0, MO_64);
7011    tcg_temp_free_i64(tcg_resl);
7012    write_vec_element(s, tcg_resh, rd, 1, MO_64);
7013    tcg_temp_free_i64(tcg_resh);
7014}
7015
7016/*
7017 * do_reduction_op helper
7018 *
7019 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7020 * important for correct NaN propagation that we do these
7021 * operations in exactly the order specified by the pseudocode.
7022 *
7023 * This is a recursive function, TCG temps should be freed by the
7024 * calling function once it is done with the values.
7025 */
7026static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7027                                int esize, int size, int vmap, TCGv_ptr fpst)
7028{
7029    if (esize == size) {
7030        int element;
7031        MemOp msize = esize == 16 ? MO_16 : MO_32;
7032        TCGv_i32 tcg_elem;
7033
7034        /* We should have one register left here */
7035        assert(ctpop8(vmap) == 1);
7036        element = ctz32(vmap);
7037        assert(element < 8);
7038
7039        tcg_elem = tcg_temp_new_i32();
7040        read_vec_element_i32(s, tcg_elem, rn, element, msize);
7041        return tcg_elem;
7042    } else {
7043        int bits = size / 2;
7044        int shift = ctpop8(vmap) / 2;
7045        int vmap_lo = (vmap >> shift) & vmap;
7046        int vmap_hi = (vmap & ~vmap_lo);
7047        TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7048
7049        tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7050        tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7051        tcg_res = tcg_temp_new_i32();
7052
7053        switch (fpopcode) {
7054        case 0x0c: /* fmaxnmv half-precision */
7055            gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7056            break;
7057        case 0x0f: /* fmaxv half-precision */
7058            gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7059            break;
7060        case 0x1c: /* fminnmv half-precision */
7061            gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7062            break;
7063        case 0x1f: /* fminv half-precision */
7064            gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7065            break;
7066        case 0x2c: /* fmaxnmv */
7067            gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7068            break;
7069        case 0x2f: /* fmaxv */
7070            gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7071            break;
7072        case 0x3c: /* fminnmv */
7073            gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7074            break;
7075        case 0x3f: /* fminv */
7076            gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7077            break;
7078        default:
7079            g_assert_not_reached();
7080        }
7081
7082        tcg_temp_free_i32(tcg_hi);
7083        tcg_temp_free_i32(tcg_lo);
7084        return tcg_res;
7085    }
7086}
7087
7088/* AdvSIMD across lanes
7089 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7090 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7091 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7092 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7093 */
7094static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7095{
7096    int rd = extract32(insn, 0, 5);
7097    int rn = extract32(insn, 5, 5);
7098    int size = extract32(insn, 22, 2);
7099    int opcode = extract32(insn, 12, 5);
7100    bool is_q = extract32(insn, 30, 1);
7101    bool is_u = extract32(insn, 29, 1);
7102    bool is_fp = false;
7103    bool is_min = false;
7104    int esize;
7105    int elements;
7106    int i;
7107    TCGv_i64 tcg_res, tcg_elt;
7108
7109    switch (opcode) {
7110    case 0x1b: /* ADDV */
7111        if (is_u) {
7112            unallocated_encoding(s);
7113            return;
7114        }
7115        /* fall through */
7116    case 0x3: /* SADDLV, UADDLV */
7117    case 0xa: /* SMAXV, UMAXV */
7118    case 0x1a: /* SMINV, UMINV */
7119        if (size == 3 || (size == 2 && !is_q)) {
7120            unallocated_encoding(s);
7121            return;
7122        }
7123        break;
7124    case 0xc: /* FMAXNMV, FMINNMV */
7125    case 0xf: /* FMAXV, FMINV */
7126        /* Bit 1 of size field encodes min vs max and the actual size
7127         * depends on the encoding of the U bit. If not set (and FP16
7128         * enabled) then we do half-precision float instead of single
7129         * precision.
7130         */
7131        is_min = extract32(size, 1, 1);
7132        is_fp = true;
7133        if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7134            size = 1;
7135        } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7136            unallocated_encoding(s);
7137            return;
7138        } else {
7139            size = 2;
7140        }
7141        break;
7142    default:
7143        unallocated_encoding(s);
7144        return;
7145    }
7146
7147    if (!fp_access_check(s)) {
7148        return;
7149    }
7150
7151    esize = 8 << size;
7152    elements = (is_q ? 128 : 64) / esize;
7153
7154    tcg_res = tcg_temp_new_i64();
7155    tcg_elt = tcg_temp_new_i64();
7156
7157    /* These instructions operate across all lanes of a vector
7158     * to produce a single result. We can guarantee that a 64
7159     * bit intermediate is sufficient:
7160     *  + for [US]ADDLV the maximum element size is 32 bits, and
7161     *    the result type is 64 bits
7162     *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7163     *    same as the element size, which is 32 bits at most
7164     * For the integer operations we can choose to work at 64
7165     * or 32 bits and truncate at the end; for simplicity
7166     * we use 64 bits always. The floating point
7167     * ops do require 32 bit intermediates, though.
7168     */
7169    if (!is_fp) {
7170        read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7171
7172        for (i = 1; i < elements; i++) {
7173            read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7174
7175            switch (opcode) {
7176            case 0x03: /* SADDLV / UADDLV */
7177            case 0x1b: /* ADDV */
7178                tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7179                break;
7180            case 0x0a: /* SMAXV / UMAXV */
7181                if (is_u) {
7182                    tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7183                } else {
7184                    tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7185                }
7186                break;
7187            case 0x1a: /* SMINV / UMINV */
7188                if (is_u) {
7189                    tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7190                } else {
7191                    tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7192                }
7193                break;
7194            default:
7195                g_assert_not_reached();
7196            }
7197
7198        }
7199    } else {
7200        /* Floating point vector reduction ops which work across 32
7201         * bit (single) or 16 bit (half-precision) intermediates.
7202         * Note that correct NaN propagation requires that we do these
7203         * operations in exactly the order specified by the pseudocode.
7204         */
7205        TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
7206        int fpopcode = opcode | is_min << 4 | is_u << 5;
7207        int vmap = (1 << elements) - 1;
7208        TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7209                                             (is_q ? 128 : 64), vmap, fpst);
7210        tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7211        tcg_temp_free_i32(tcg_res32);
7212        tcg_temp_free_ptr(fpst);
7213    }
7214
7215    tcg_temp_free_i64(tcg_elt);
7216
7217    /* Now truncate the result to the width required for the final output */
7218    if (opcode == 0x03) {
7219        /* SADDLV, UADDLV: result is 2*esize */
7220        size++;
7221    }
7222
7223    switch (size) {
7224    case 0:
7225        tcg_gen_ext8u_i64(tcg_res, tcg_res);
7226        break;
7227    case 1:
7228        tcg_gen_ext16u_i64(tcg_res, tcg_res);
7229        break;
7230    case 2:
7231        tcg_gen_ext32u_i64(tcg_res, tcg_res);
7232        break;
7233    case 3:
7234        break;
7235    default:
7236        g_assert_not_reached();
7237    }
7238
7239    write_fp_dreg(s, rd, tcg_res);
7240    tcg_temp_free_i64(tcg_res);
7241}
7242
7243/* DUP (Element, Vector)
7244 *
7245 *  31  30   29              21 20    16 15        10  9    5 4    0
7246 * +---+---+-------------------+--------+-------------+------+------+
7247 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7248 * +---+---+-------------------+--------+-------------+------+------+
7249 *
7250 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7251 */
7252static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7253                             int imm5)
7254{
7255    int size = ctz32(imm5);
7256    int index = imm5 >> (size + 1);
7257
7258    if (size > 3 || (size == 3 && !is_q)) {
7259        unallocated_encoding(s);
7260        return;
7261    }
7262
7263    if (!fp_access_check(s)) {
7264        return;
7265    }
7266
7267    tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7268                         vec_reg_offset(s, rn, index, size),
7269                         is_q ? 16 : 8, vec_full_reg_size(s));
7270}
7271
7272/* DUP (element, scalar)
7273 *  31                   21 20    16 15        10  9    5 4    0
7274 * +-----------------------+--------+-------------+------+------+
7275 * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7276 * +-----------------------+--------+-------------+------+------+
7277 */
7278static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7279                              int imm5)
7280{
7281    int size = ctz32(imm5);
7282    int index;
7283    TCGv_i64 tmp;
7284
7285    if (size > 3) {
7286        unallocated_encoding(s);
7287        return;
7288    }
7289
7290    if (!fp_access_check(s)) {
7291        return;
7292    }
7293
7294    index = imm5 >> (size + 1);
7295
7296    /* This instruction just extracts the specified element and
7297     * zero-extends it into the bottom of the destination register.
7298     */
7299    tmp = tcg_temp_new_i64();
7300    read_vec_element(s, tmp, rn, index, size);
7301    write_fp_dreg(s, rd, tmp);
7302    tcg_temp_free_i64(tmp);
7303}
7304
7305/* DUP (General)
7306 *
7307 *  31  30   29              21 20    16 15        10  9    5 4    0
7308 * +---+---+-------------------+--------+-------------+------+------+
7309 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7310 * +---+---+-------------------+--------+-------------+------+------+
7311 *
7312 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7313 */
7314static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7315                             int imm5)
7316{
7317    int size = ctz32(imm5);
7318    uint32_t dofs, oprsz, maxsz;
7319
7320    if (size > 3 || ((size == 3) && !is_q)) {
7321        unallocated_encoding(s);
7322        return;
7323    }
7324
7325    if (!fp_access_check(s)) {
7326        return;
7327    }
7328
7329    dofs = vec_full_reg_offset(s, rd);
7330    oprsz = is_q ? 16 : 8;
7331    maxsz = vec_full_reg_size(s);
7332
7333    tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7334}
7335
7336/* INS (Element)
7337 *
7338 *  31                   21 20    16 15  14    11  10 9    5 4    0
7339 * +-----------------------+--------+------------+---+------+------+
7340 * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7341 * +-----------------------+--------+------------+---+------+------+
7342 *
7343 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7344 * index: encoded in imm5<4:size+1>
7345 */
7346static void handle_simd_inse(DisasContext *s, int rd, int rn,
7347                             int imm4, int imm5)
7348{
7349    int size = ctz32(imm5);
7350    int src_index, dst_index;
7351    TCGv_i64 tmp;
7352
7353    if (size > 3) {
7354        unallocated_encoding(s);
7355        return;
7356    }
7357
7358    if (!fp_access_check(s)) {
7359        return;
7360    }
7361
7362    dst_index = extract32(imm5, 1+size, 5);
7363    src_index = extract32(imm4, size, 4);
7364
7365    tmp = tcg_temp_new_i64();
7366
7367    read_vec_element(s, tmp, rn, src_index, size);
7368    write_vec_element(s, tmp, rd, dst_index, size);
7369
7370    tcg_temp_free_i64(tmp);
7371}
7372
7373
7374/* INS (General)
7375 *
7376 *  31                   21 20    16 15        10  9    5 4    0
7377 * +-----------------------+--------+-------------+------+------+
7378 * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
7379 * +-----------------------+--------+-------------+------+------+
7380 *
7381 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7382 * index: encoded in imm5<4:size+1>
7383 */
7384static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7385{
7386    int size = ctz32(imm5);
7387    int idx;
7388
7389    if (size > 3) {
7390        unallocated_encoding(s);
7391        return;
7392    }
7393
7394    if (!fp_access_check(s)) {
7395        return;
7396    }
7397
7398    idx = extract32(imm5, 1 + size, 4 - size);
7399    write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7400}
7401
7402/*
7403 * UMOV (General)
7404 * SMOV (General)
7405 *
7406 *  31  30   29              21 20    16 15    12   10 9    5 4    0
7407 * +---+---+-------------------+--------+-------------+------+------+
7408 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
7409 * +---+---+-------------------+--------+-------------+------+------+
7410 *
7411 * U: unsigned when set
7412 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7413 */
7414static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7415                                  int rn, int rd, int imm5)
7416{
7417    int size = ctz32(imm5);
7418    int element;
7419    TCGv_i64 tcg_rd;
7420
7421    /* Check for UnallocatedEncodings */
7422    if (is_signed) {
7423        if (size > 2 || (size == 2 && !is_q)) {
7424            unallocated_encoding(s);
7425            return;
7426        }
7427    } else {
7428        if (size > 3
7429            || (size < 3 && is_q)
7430            || (size == 3 && !is_q)) {
7431            unallocated_encoding(s);
7432            return;
7433        }
7434    }
7435
7436    if (!fp_access_check(s)) {
7437        return;
7438    }
7439
7440    element = extract32(imm5, 1+size, 4);
7441
7442    tcg_rd = cpu_reg(s, rd);
7443    read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7444    if (is_signed && !is_q) {
7445        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7446    }
7447}
7448
7449/* AdvSIMD copy
7450 *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
7451 * +---+---+----+-----------------+------+---+------+---+------+------+
7452 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7453 * +---+---+----+-----------------+------+---+------+---+------+------+
7454 */
7455static void disas_simd_copy(DisasContext *s, uint32_t insn)
7456{
7457    int rd = extract32(insn, 0, 5);
7458    int rn = extract32(insn, 5, 5);
7459    int imm4 = extract32(insn, 11, 4);
7460    int op = extract32(insn, 29, 1);
7461    int is_q = extract32(insn, 30, 1);
7462    int imm5 = extract32(insn, 16, 5);
7463
7464    if (op) {
7465        if (is_q) {
7466            /* INS (element) */
7467            handle_simd_inse(s, rd, rn, imm4, imm5);
7468        } else {
7469            unallocated_encoding(s);
7470        }
7471    } else {
7472        switch (imm4) {
7473        case 0:
7474            /* DUP (element - vector) */
7475            handle_simd_dupe(s, is_q, rd, rn, imm5);
7476            break;
7477        case 1:
7478            /* DUP (general) */
7479            handle_simd_dupg(s, is_q, rd, rn, imm5);
7480            break;
7481        case 3:
7482            if (is_q) {
7483                /* INS (general) */
7484                handle_simd_insg(s, rd, rn, imm5);
7485            } else {
7486                unallocated_encoding(s);
7487            }
7488            break;
7489        case 5:
7490        case 7:
7491            /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7492            handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7493            break;
7494        default:
7495            unallocated_encoding(s);
7496            break;
7497        }
7498    }
7499}
7500
7501/* AdvSIMD modified immediate
7502 *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
7503 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7504 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
7505 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7506 *
7507 * There are a number of operations that can be carried out here:
7508 *   MOVI - move (shifted) imm into register
7509 *   MVNI - move inverted (shifted) imm into register
7510 *   ORR  - bitwise OR of (shifted) imm with register
7511 *   BIC  - bitwise clear of (shifted) imm with register
7512 * With ARMv8.2 we also have:
7513 *   FMOV half-precision
7514 */
7515static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7516{
7517    int rd = extract32(insn, 0, 5);
7518    int cmode = extract32(insn, 12, 4);
7519    int cmode_3_1 = extract32(cmode, 1, 3);
7520    int cmode_0 = extract32(cmode, 0, 1);
7521    int o2 = extract32(insn, 11, 1);
7522    uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7523    bool is_neg = extract32(insn, 29, 1);
7524    bool is_q = extract32(insn, 30, 1);
7525    uint64_t imm = 0;
7526
7527    if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7528        /* Check for FMOV (vector, immediate) - half-precision */
7529        if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7530            unallocated_encoding(s);
7531            return;
7532        }
7533    }
7534
7535    if (!fp_access_check(s)) {
7536        return;
7537    }
7538
7539    /* See AdvSIMDExpandImm() in ARM ARM */
7540    switch (cmode_3_1) {
7541    case 0: /* Replicate(Zeros(24):imm8, 2) */
7542    case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
7543    case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
7544    case 3: /* Replicate(imm8:Zeros(24), 2) */
7545    {
7546        int shift = cmode_3_1 * 8;
7547        imm = bitfield_replicate(abcdefgh << shift, 32);
7548        break;
7549    }
7550    case 4: /* Replicate(Zeros(8):imm8, 4) */
7551    case 5: /* Replicate(imm8:Zeros(8), 4) */
7552    {
7553        int shift = (cmode_3_1 & 0x1) * 8;
7554        imm = bitfield_replicate(abcdefgh << shift, 16);
7555        break;
7556    }
7557    case 6:
7558        if (cmode_0) {
7559            /* Replicate(Zeros(8):imm8:Ones(16), 2) */
7560            imm = (abcdefgh << 16) | 0xffff;
7561        } else {
7562            /* Replicate(Zeros(16):imm8:Ones(8), 2) */
7563            imm = (abcdefgh << 8) | 0xff;
7564        }
7565        imm = bitfield_replicate(imm, 32);
7566        break;
7567    case 7:
7568        if (!cmode_0 && !is_neg) {
7569            imm = bitfield_replicate(abcdefgh, 8);
7570        } else if (!cmode_0 && is_neg) {
7571            int i;
7572            imm = 0;
7573            for (i = 0; i < 8; i++) {
7574                if ((abcdefgh) & (1 << i)) {
7575                    imm |= 0xffULL << (i * 8);
7576                }
7577            }
7578        } else if (cmode_0) {
7579            if (is_neg) {
7580                imm = (abcdefgh & 0x3f) << 48;
7581                if (abcdefgh & 0x80) {
7582                    imm |= 0x8000000000000000ULL;
7583                }
7584                if (abcdefgh & 0x40) {
7585                    imm |= 0x3fc0000000000000ULL;
7586                } else {
7587                    imm |= 0x4000000000000000ULL;
7588                }
7589            } else {
7590                if (o2) {
7591                    /* FMOV (vector, immediate) - half-precision */
7592                    imm = vfp_expand_imm(MO_16, abcdefgh);
7593                    /* now duplicate across the lanes */
7594                    imm = bitfield_replicate(imm, 16);
7595                } else {
7596                    imm = (abcdefgh & 0x3f) << 19;
7597                    if (abcdefgh & 0x80) {
7598                        imm |= 0x80000000;
7599                    }
7600                    if (abcdefgh & 0x40) {
7601                        imm |= 0x3e000000;
7602                    } else {
7603                        imm |= 0x40000000;
7604                    }
7605                    imm |= (imm << 32);
7606                }
7607            }
7608        }
7609        break;
7610    default:
7611        fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
7612        g_assert_not_reached();
7613    }
7614
7615    if (cmode_3_1 != 7 && is_neg) {
7616        imm = ~imm;
7617    }
7618
7619    if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7620        /* MOVI or MVNI, with MVNI negation handled above.  */
7621        tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7622                            vec_full_reg_size(s), imm);
7623    } else {
7624        /* ORR or BIC, with BIC negation to AND handled above.  */
7625        if (is_neg) {
7626            gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7627        } else {
7628            gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7629        }
7630    }
7631}
7632
7633/* AdvSIMD scalar copy
7634 *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
7635 * +-----+----+-----------------+------+---+------+---+------+------+
7636 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7637 * +-----+----+-----------------+------+---+------+---+------+------+
7638 */
7639static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7640{
7641    int rd = extract32(insn, 0, 5);
7642    int rn = extract32(insn, 5, 5);
7643    int imm4 = extract32(insn, 11, 4);
7644    int imm5 = extract32(insn, 16, 5);
7645    int op = extract32(insn, 29, 1);
7646
7647    if (op != 0 || imm4 != 0) {
7648        unallocated_encoding(s);
7649        return;
7650    }
7651
7652    /* DUP (element, scalar) */
7653    handle_simd_dupes(s, rd, rn, imm5);
7654}
7655
7656/* AdvSIMD scalar pairwise
7657 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7658 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7659 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7660 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7661 */
7662static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7663{
7664    int u = extract32(insn, 29, 1);
7665    int size = extract32(insn, 22, 2);
7666    int opcode = extract32(insn, 12, 5);
7667    int rn = extract32(insn, 5, 5);
7668    int rd = extract32(insn, 0, 5);
7669    TCGv_ptr fpst;
7670
7671    /* For some ops (the FP ones), size[1] is part of the encoding.
7672     * For ADDP strictly it is not but size[1] is always 1 for valid
7673     * encodings.
7674     */
7675    opcode |= (extract32(size, 1, 1) << 5);
7676
7677    switch (opcode) {
7678    case 0x3b: /* ADDP */
7679        if (u || size != 3) {
7680            unallocated_encoding(s);
7681            return;
7682        }
7683        if (!fp_access_check(s)) {
7684            return;
7685        }
7686
7687        fpst = NULL;
7688        break;
7689    case 0xc: /* FMAXNMP */
7690    case 0xd: /* FADDP */
7691    case 0xf: /* FMAXP */
7692    case 0x2c: /* FMINNMP */
7693    case 0x2f: /* FMINP */
7694        /* FP op, size[0] is 32 or 64 bit*/
7695        if (!u) {
7696            if (!dc_isar_feature(aa64_fp16, s)) {
7697                unallocated_encoding(s);
7698                return;
7699            } else {
7700                size = MO_16;
7701            }
7702        } else {
7703            size = extract32(size, 0, 1) ? MO_64 : MO_32;
7704        }
7705
7706        if (!fp_access_check(s)) {
7707            return;
7708        }
7709
7710        fpst = get_fpstatus_ptr(size == MO_16);
7711        break;
7712    default:
7713        unallocated_encoding(s);
7714        return;
7715    }
7716
7717    if (size == MO_64) {
7718        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7719        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7720        TCGv_i64 tcg_res = tcg_temp_new_i64();
7721
7722        read_vec_element(s, tcg_op1, rn, 0, MO_64);
7723        read_vec_element(s, tcg_op2, rn, 1, MO_64);
7724
7725        switch (opcode) {
7726        case 0x3b: /* ADDP */
7727            tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
7728            break;
7729        case 0xc: /* FMAXNMP */
7730            gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7731            break;
7732        case 0xd: /* FADDP */
7733            gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7734            break;
7735        case 0xf: /* FMAXP */
7736            gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7737            break;
7738        case 0x2c: /* FMINNMP */
7739            gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7740            break;
7741        case 0x2f: /* FMINP */
7742            gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7743            break;
7744        default:
7745            g_assert_not_reached();
7746        }
7747
7748        write_fp_dreg(s, rd, tcg_res);
7749
7750        tcg_temp_free_i64(tcg_op1);
7751        tcg_temp_free_i64(tcg_op2);
7752        tcg_temp_free_i64(tcg_res);
7753    } else {
7754        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7755        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7756        TCGv_i32 tcg_res = tcg_temp_new_i32();
7757
7758        read_vec_element_i32(s, tcg_op1, rn, 0, size);
7759        read_vec_element_i32(s, tcg_op2, rn, 1, size);
7760
7761        if (size == MO_16) {
7762            switch (opcode) {
7763            case 0xc: /* FMAXNMP */
7764                gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7765                break;
7766            case 0xd: /* FADDP */
7767                gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
7768                break;
7769            case 0xf: /* FMAXP */
7770                gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
7771                break;
7772            case 0x2c: /* FMINNMP */
7773                gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7774                break;
7775            case 0x2f: /* FMINP */
7776                gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
7777                break;
7778            default:
7779                g_assert_not_reached();
7780            }
7781        } else {
7782            switch (opcode) {
7783            case 0xc: /* FMAXNMP */
7784                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7785                break;
7786            case 0xd: /* FADDP */
7787                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7788                break;
7789            case 0xf: /* FMAXP */
7790                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7791                break;
7792            case 0x2c: /* FMINNMP */
7793                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7794                break;
7795            case 0x2f: /* FMINP */
7796                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7797                break;
7798            default:
7799                g_assert_not_reached();
7800            }
7801        }
7802
7803        write_fp_sreg(s, rd, tcg_res);
7804
7805        tcg_temp_free_i32(tcg_op1);
7806        tcg_temp_free_i32(tcg_op2);
7807        tcg_temp_free_i32(tcg_res);
7808    }
7809
7810    if (fpst) {
7811        tcg_temp_free_ptr(fpst);
7812    }
7813}
7814
7815/*
7816 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
7817 *
7818 * This code is handles the common shifting code and is used by both
7819 * the vector and scalar code.
7820 */
7821static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
7822                                    TCGv_i64 tcg_rnd, bool accumulate,
7823                                    bool is_u, int size, int shift)
7824{
7825    bool extended_result = false;
7826    bool round = tcg_rnd != NULL;
7827    int ext_lshift = 0;
7828    TCGv_i64 tcg_src_hi;
7829
7830    if (round && size == 3) {
7831        extended_result = true;
7832        ext_lshift = 64 - shift;
7833        tcg_src_hi = tcg_temp_new_i64();
7834    } else if (shift == 64) {
7835        if (!accumulate && is_u) {
7836            /* result is zero */
7837            tcg_gen_movi_i64(tcg_res, 0);
7838            return;
7839        }
7840    }
7841
7842    /* Deal with the rounding step */
7843    if (round) {
7844        if (extended_result) {
7845            TCGv_i64 tcg_zero = tcg_const_i64(0);
7846            if (!is_u) {
7847                /* take care of sign extending tcg_res */
7848                tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
7849                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7850                                 tcg_src, tcg_src_hi,
7851                                 tcg_rnd, tcg_zero);
7852            } else {
7853                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7854                                 tcg_src, tcg_zero,
7855                                 tcg_rnd, tcg_zero);
7856            }
7857            tcg_temp_free_i64(tcg_zero);
7858        } else {
7859            tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
7860        }
7861    }
7862
7863    /* Now do the shift right */
7864    if (round && extended_result) {
7865        /* extended case, >64 bit precision required */
7866        if (ext_lshift == 0) {
7867            /* special case, only high bits matter */
7868            tcg_gen_mov_i64(tcg_src, tcg_src_hi);
7869        } else {
7870            tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7871            tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
7872            tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
7873        }
7874    } else {
7875        if (is_u) {
7876            if (shift == 64) {
7877                /* essentially shifting in 64 zeros */
7878                tcg_gen_movi_i64(tcg_src, 0);
7879            } else {
7880                tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7881            }
7882        } else {
7883            if (shift == 64) {
7884                /* effectively extending the sign-bit */
7885                tcg_gen_sari_i64(tcg_src, tcg_src, 63);
7886            } else {
7887                tcg_gen_sari_i64(tcg_src, tcg_src, shift);
7888            }
7889        }
7890    }
7891
7892    if (accumulate) {
7893        tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
7894    } else {
7895        tcg_gen_mov_i64(tcg_res, tcg_src);
7896    }
7897
7898    if (extended_result) {
7899        tcg_temp_free_i64(tcg_src_hi);
7900    }
7901}
7902
7903/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
7904static void handle_scalar_simd_shri(DisasContext *s,
7905                                    bool is_u, int immh, int immb,
7906                                    int opcode, int rn, int rd)
7907{
7908    const int size = 3;
7909    int immhb = immh << 3 | immb;
7910    int shift = 2 * (8 << size) - immhb;
7911    bool accumulate = false;
7912    bool round = false;
7913    bool insert = false;
7914    TCGv_i64 tcg_rn;
7915    TCGv_i64 tcg_rd;
7916    TCGv_i64 tcg_round;
7917
7918    if (!extract32(immh, 3, 1)) {
7919        unallocated_encoding(s);
7920        return;
7921    }
7922
7923    if (!fp_access_check(s)) {
7924        return;
7925    }
7926
7927    switch (opcode) {
7928    case 0x02: /* SSRA / USRA (accumulate) */
7929        accumulate = true;
7930        break;
7931    case 0x04: /* SRSHR / URSHR (rounding) */
7932        round = true;
7933        break;
7934    case 0x06: /* SRSRA / URSRA (accum + rounding) */
7935        accumulate = round = true;
7936        break;
7937    case 0x08: /* SRI */
7938        insert = true;
7939        break;
7940    }
7941
7942    if (round) {
7943        uint64_t round_const = 1ULL << (shift - 1);
7944        tcg_round = tcg_const_i64(round_const);
7945    } else {
7946        tcg_round = NULL;
7947    }
7948
7949    tcg_rn = read_fp_dreg(s, rn);
7950    tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7951
7952    if (insert) {
7953        /* shift count same as element size is valid but does nothing;
7954         * special case to avoid potential shift by 64.
7955         */
7956        int esize = 8 << size;
7957        if (shift != esize) {
7958            tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
7959            tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
7960        }
7961    } else {
7962        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
7963                                accumulate, is_u, size, shift);
7964    }
7965
7966    write_fp_dreg(s, rd, tcg_rd);
7967
7968    tcg_temp_free_i64(tcg_rn);
7969    tcg_temp_free_i64(tcg_rd);
7970    if (round) {
7971        tcg_temp_free_i64(tcg_round);
7972    }
7973}
7974
7975/* SHL/SLI - Scalar shift left */
7976static void handle_scalar_simd_shli(DisasContext *s, bool insert,
7977                                    int immh, int immb, int opcode,
7978                                    int rn, int rd)
7979{
7980    int size = 32 - clz32(immh) - 1;
7981    int immhb = immh << 3 | immb;
7982    int shift = immhb - (8 << size);
7983    TCGv_i64 tcg_rn = new_tmp_a64(s);
7984    TCGv_i64 tcg_rd = new_tmp_a64(s);
7985
7986    if (!extract32(immh, 3, 1)) {
7987        unallocated_encoding(s);
7988        return;
7989    }
7990
7991    if (!fp_access_check(s)) {
7992        return;
7993    }
7994
7995    tcg_rn = read_fp_dreg(s, rn);
7996    tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7997
7998    if (insert) {
7999        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8000    } else {
8001        tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8002    }
8003
8004    write_fp_dreg(s, rd, tcg_rd);
8005
8006    tcg_temp_free_i64(tcg_rn);
8007    tcg_temp_free_i64(tcg_rd);
8008}
8009
8010/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8011 * (signed/unsigned) narrowing */
8012static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8013                                   bool is_u_shift, bool is_u_narrow,
8014                                   int immh, int immb, int opcode,
8015                                   int rn, int rd)
8016{
8017    int immhb = immh << 3 | immb;
8018    int size = 32 - clz32(immh) - 1;
8019    int esize = 8 << size;
8020    int shift = (2 * esize) - immhb;
8021    int elements = is_scalar ? 1 : (64 / esize);
8022    bool round = extract32(opcode, 0, 1);
8023    MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8024    TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8025    TCGv_i32 tcg_rd_narrowed;
8026    TCGv_i64 tcg_final;
8027
8028    static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8029        { gen_helper_neon_narrow_sat_s8,
8030          gen_helper_neon_unarrow_sat8 },
8031        { gen_helper_neon_narrow_sat_s16,
8032          gen_helper_neon_unarrow_sat16 },
8033        { gen_helper_neon_narrow_sat_s32,
8034          gen_helper_neon_unarrow_sat32 },
8035        { NULL, NULL },
8036    };
8037    static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8038        gen_helper_neon_narrow_sat_u8,
8039        gen_helper_neon_narrow_sat_u16,
8040        gen_helper_neon_narrow_sat_u32,
8041        NULL
8042    };
8043    NeonGenNarrowEnvFn *narrowfn;
8044
8045    int i;
8046
8047    assert(size < 4);
8048
8049    if (extract32(immh, 3, 1)) {
8050        unallocated_encoding(s);
8051        return;
8052    }
8053
8054    if (!fp_access_check(s)) {
8055        return;
8056    }
8057
8058    if (is_u_shift) {
8059        narrowfn = unsigned_narrow_fns[size];
8060    } else {
8061        narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8062    }
8063
8064    tcg_rn = tcg_temp_new_i64();
8065    tcg_rd = tcg_temp_new_i64();
8066    tcg_rd_narrowed = tcg_temp_new_i32();
8067    tcg_final = tcg_const_i64(0);
8068
8069    if (round) {
8070        uint64_t round_const = 1ULL << (shift - 1);
8071        tcg_round = tcg_const_i64(round_const);
8072    } else {
8073        tcg_round = NULL;
8074    }
8075
8076    for (i = 0; i < elements; i++) {
8077        read_vec_element(s, tcg_rn, rn, i, ldop);
8078        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8079                                false, is_u_shift, size+1, shift);
8080        narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8081        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8082        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8083    }
8084
8085    if (!is_q) {
8086        write_vec_element(s, tcg_final, rd, 0, MO_64);
8087    } else {
8088        write_vec_element(s, tcg_final, rd, 1, MO_64);
8089    }
8090
8091    if (round) {
8092        tcg_temp_free_i64(tcg_round);
8093    }
8094    tcg_temp_free_i64(tcg_rn);
8095    tcg_temp_free_i64(tcg_rd);
8096    tcg_temp_free_i32(tcg_rd_narrowed);
8097    tcg_temp_free_i64(tcg_final);
8098
8099    clear_vec_high(s, is_q, rd);
8100}
8101
8102/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8103static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8104                             bool src_unsigned, bool dst_unsigned,
8105                             int immh, int immb, int rn, int rd)
8106{
8107    int immhb = immh << 3 | immb;
8108    int size = 32 - clz32(immh) - 1;
8109    int shift = immhb - (8 << size);
8110    int pass;
8111
8112    assert(immh != 0);
8113    assert(!(scalar && is_q));
8114
8115    if (!scalar) {
8116        if (!is_q && extract32(immh, 3, 1)) {
8117            unallocated_encoding(s);
8118            return;
8119        }
8120
8121        /* Since we use the variable-shift helpers we must
8122         * replicate the shift count into each element of
8123         * the tcg_shift value.
8124         */
8125        switch (size) {
8126        case 0:
8127            shift |= shift << 8;
8128            /* fall through */
8129        case 1:
8130            shift |= shift << 16;
8131            break;
8132        case 2:
8133        case 3:
8134            break;
8135        default:
8136            g_assert_not_reached();
8137        }
8138    }
8139
8140    if (!fp_access_check(s)) {
8141        return;
8142    }
8143
8144    if (size == 3) {
8145        TCGv_i64 tcg_shift = tcg_const_i64(shift);
8146        static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8147            { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8148            { NULL, gen_helper_neon_qshl_u64 },
8149        };
8150        NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8151        int maxpass = is_q ? 2 : 1;
8152
8153        for (pass = 0; pass < maxpass; pass++) {
8154            TCGv_i64 tcg_op = tcg_temp_new_i64();
8155
8156            read_vec_element(s, tcg_op, rn, pass, MO_64);
8157            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8158            write_vec_element(s, tcg_op, rd, pass, MO_64);
8159
8160            tcg_temp_free_i64(tcg_op);
8161        }
8162        tcg_temp_free_i64(tcg_shift);
8163        clear_vec_high(s, is_q, rd);
8164    } else {
8165        TCGv_i32 tcg_shift = tcg_const_i32(shift);
8166        static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8167            {
8168                { gen_helper_neon_qshl_s8,
8169                  gen_helper_neon_qshl_s16,
8170                  gen_helper_neon_qshl_s32 },
8171                { gen_helper_neon_qshlu_s8,
8172                  gen_helper_neon_qshlu_s16,
8173                  gen_helper_neon_qshlu_s32 }
8174            }, {
8175                { NULL, NULL, NULL },
8176                { gen_helper_neon_qshl_u8,
8177                  gen_helper_neon_qshl_u16,
8178                  gen_helper_neon_qshl_u32 }
8179            }
8180        };
8181        NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8182        MemOp memop = scalar ? size : MO_32;
8183        int maxpass = scalar ? 1 : is_q ? 4 : 2;
8184
8185        for (pass = 0; pass < maxpass; pass++) {
8186            TCGv_i32 tcg_op = tcg_temp_new_i32();
8187
8188            read_vec_element_i32(s, tcg_op, rn, pass, memop);
8189            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8190            if (scalar) {
8191                switch (size) {
8192                case 0:
8193                    tcg_gen_ext8u_i32(tcg_op, tcg_op);
8194                    break;
8195                case 1:
8196                    tcg_gen_ext16u_i32(tcg_op, tcg_op);
8197                    break;
8198                case 2:
8199                    break;
8200                default:
8201                    g_assert_not_reached();
8202                }
8203                write_fp_sreg(s, rd, tcg_op);
8204            } else {
8205                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8206            }
8207
8208            tcg_temp_free_i32(tcg_op);
8209        }
8210        tcg_temp_free_i32(tcg_shift);
8211
8212        if (!scalar) {
8213            clear_vec_high(s, is_q, rd);
8214        }
8215    }
8216}
8217
8218/* Common vector code for handling integer to FP conversion */
8219static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8220                                   int elements, int is_signed,
8221                                   int fracbits, int size)
8222{
8223    TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16);
8224    TCGv_i32 tcg_shift = NULL;
8225
8226    MemOp mop = size | (is_signed ? MO_SIGN : 0);
8227    int pass;
8228
8229    if (fracbits || size == MO_64) {
8230        tcg_shift = tcg_const_i32(fracbits);
8231    }
8232
8233    if (size == MO_64) {
8234        TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8235        TCGv_i64 tcg_double = tcg_temp_new_i64();
8236
8237        for (pass = 0; pass < elements; pass++) {
8238            read_vec_element(s, tcg_int64, rn, pass, mop);
8239
8240            if (is_signed) {
8241                gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8242                                     tcg_shift, tcg_fpst);
8243            } else {
8244                gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8245                                     tcg_shift, tcg_fpst);
8246            }
8247            if (elements == 1) {
8248                write_fp_dreg(s, rd, tcg_double);
8249            } else {
8250                write_vec_element(s, tcg_double, rd, pass, MO_64);
8251            }
8252        }
8253
8254        tcg_temp_free_i64(tcg_int64);
8255        tcg_temp_free_i64(tcg_double);
8256
8257    } else {
8258        TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8259        TCGv_i32 tcg_float = tcg_temp_new_i32();
8260
8261        for (pass = 0; pass < elements; pass++) {
8262            read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8263
8264            switch (size) {
8265            case MO_32:
8266                if (fracbits) {
8267                    if (is_signed) {
8268                        gen_helper_vfp_sltos(tcg_float, tcg_int32,
8269                                             tcg_shift, tcg_fpst);
8270                    } else {
8271                        gen_helper_vfp_ultos(tcg_float, tcg_int32,
8272                                             tcg_shift, tcg_fpst);
8273                    }
8274                } else {
8275                    if (is_signed) {
8276                        gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8277                    } else {
8278                        gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8279                    }
8280                }
8281                break;
8282            case MO_16:
8283                if (fracbits) {
8284                    if (is_signed) {
8285                        gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8286                                             tcg_shift, tcg_fpst);
8287                    } else {
8288                        gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8289                                             tcg_shift, tcg_fpst);
8290                    }
8291                } else {
8292                    if (is_signed) {
8293                        gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8294                    } else {
8295                        gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8296                    }
8297                }
8298                break;
8299            default:
8300                g_assert_not_reached();
8301            }
8302
8303            if (elements == 1) {
8304                write_fp_sreg(s, rd, tcg_float);
8305            } else {
8306                write_vec_element_i32(s, tcg_float, rd, pass, size);
8307            }
8308        }
8309
8310        tcg_temp_free_i32(tcg_int32);
8311        tcg_temp_free_i32(tcg_float);
8312    }
8313
8314    tcg_temp_free_ptr(tcg_fpst);
8315    if (tcg_shift) {
8316        tcg_temp_free_i32(tcg_shift);
8317    }
8318
8319    clear_vec_high(s, elements << size == 16, rd);
8320}
8321
8322/* UCVTF/SCVTF - Integer to FP conversion */
8323static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8324                                         bool is_q, bool is_u,
8325                                         int immh, int immb, int opcode,
8326                                         int rn, int rd)
8327{
8328    int size, elements, fracbits;
8329    int immhb = immh << 3 | immb;
8330
8331    if (immh & 8) {
8332        size = MO_64;
8333        if (!is_scalar && !is_q) {
8334            unallocated_encoding(s);
8335            return;
8336        }
8337    } else if (immh & 4) {
8338        size = MO_32;
8339    } else if (immh & 2) {
8340        size = MO_16;
8341        if (!dc_isar_feature(aa64_fp16, s)) {
8342            unallocated_encoding(s);
8343            return;
8344        }
8345    } else {
8346        /* immh == 0 would be a failure of the decode logic */
8347        g_assert(immh == 1);
8348        unallocated_encoding(s);
8349        return;
8350    }
8351
8352    if (is_scalar) {
8353        elements = 1;
8354    } else {
8355        elements = (8 << is_q) >> size;
8356    }
8357    fracbits = (16 << size) - immhb;
8358
8359    if (!fp_access_check(s)) {
8360        return;
8361    }
8362
8363    handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8364}
8365
8366/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8367static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8368                                         bool is_q, bool is_u,
8369                                         int immh, int immb, int rn, int rd)
8370{
8371    int immhb = immh << 3 | immb;
8372    int pass, size, fracbits;
8373    TCGv_ptr tcg_fpstatus;
8374    TCGv_i32 tcg_rmode, tcg_shift;
8375
8376    if (immh & 0x8) {
8377        size = MO_64;
8378        if (!is_scalar && !is_q) {
8379            unallocated_encoding(s);
8380            return;
8381        }
8382    } else if (immh & 0x4) {
8383        size = MO_32;
8384    } else if (immh & 0x2) {
8385        size = MO_16;
8386        if (!dc_isar_feature(aa64_fp16, s)) {
8387            unallocated_encoding(s);
8388            return;
8389        }
8390    } else {
8391        /* Should have split out AdvSIMD modified immediate earlier.  */
8392        assert(immh == 1);
8393        unallocated_encoding(s);
8394        return;
8395    }
8396
8397    if (!fp_access_check(s)) {
8398        return;
8399    }
8400
8401    assert(!(is_scalar && is_q));
8402
8403    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
8404    tcg_fpstatus = get_fpstatus_ptr(size == MO_16);
8405    gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8406    fracbits = (16 << size) - immhb;
8407    tcg_shift = tcg_const_i32(fracbits);
8408
8409    if (size == MO_64) {
8410        int maxpass = is_scalar ? 1 : 2;
8411
8412        for (pass = 0; pass < maxpass; pass++) {
8413            TCGv_i64 tcg_op = tcg_temp_new_i64();
8414
8415            read_vec_element(s, tcg_op, rn, pass, MO_64);
8416            if (is_u) {
8417                gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8418            } else {
8419                gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8420            }
8421            write_vec_element(s, tcg_op, rd, pass, MO_64);
8422            tcg_temp_free_i64(tcg_op);
8423        }
8424        clear_vec_high(s, is_q, rd);
8425    } else {
8426        void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8427        int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8428
8429        switch (size) {
8430        case MO_16:
8431            if (is_u) {
8432                fn = gen_helper_vfp_touhh;
8433            } else {
8434                fn = gen_helper_vfp_toshh;
8435            }
8436            break;
8437        case MO_32:
8438            if (is_u) {
8439                fn = gen_helper_vfp_touls;
8440            } else {
8441                fn = gen_helper_vfp_tosls;
8442            }
8443            break;
8444        default:
8445            g_assert_not_reached();
8446        }
8447
8448        for (pass = 0; pass < maxpass; pass++) {
8449            TCGv_i32 tcg_op = tcg_temp_new_i32();
8450
8451            read_vec_element_i32(s, tcg_op, rn, pass, size);
8452            fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8453            if (is_scalar) {
8454                write_fp_sreg(s, rd, tcg_op);
8455            } else {
8456                write_vec_element_i32(s, tcg_op, rd, pass, size);
8457            }
8458            tcg_temp_free_i32(tcg_op);
8459        }
8460        if (!is_scalar) {
8461            clear_vec_high(s, is_q, rd);
8462        }
8463    }
8464
8465    tcg_temp_free_ptr(tcg_fpstatus);
8466    tcg_temp_free_i32(tcg_shift);
8467    gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8468    tcg_temp_free_i32(tcg_rmode);
8469}
8470
8471/* AdvSIMD scalar shift by immediate
8472 *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
8473 * +-----+---+-------------+------+------+--------+---+------+------+
8474 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8475 * +-----+---+-------------+------+------+--------+---+------+------+
8476 *
8477 * This is the scalar version so it works on a fixed sized registers
8478 */
8479static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8480{
8481    int rd = extract32(insn, 0, 5);
8482    int rn = extract32(insn, 5, 5);
8483    int opcode = extract32(insn, 11, 5);
8484    int immb = extract32(insn, 16, 3);
8485    int immh = extract32(insn, 19, 4);
8486    bool is_u = extract32(insn, 29, 1);
8487
8488    if (immh == 0) {
8489        unallocated_encoding(s);
8490        return;
8491    }
8492
8493    switch (opcode) {
8494    case 0x08: /* SRI */
8495        if (!is_u) {
8496            unallocated_encoding(s);
8497            return;
8498        }
8499        /* fall through */
8500    case 0x00: /* SSHR / USHR */
8501    case 0x02: /* SSRA / USRA */
8502    case 0x04: /* SRSHR / URSHR */
8503    case 0x06: /* SRSRA / URSRA */
8504        handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8505        break;
8506    case 0x0a: /* SHL / SLI */
8507        handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8508        break;
8509    case 0x1c: /* SCVTF, UCVTF */
8510        handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8511                                     opcode, rn, rd);
8512        break;
8513    case 0x10: /* SQSHRUN, SQSHRUN2 */
8514    case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8515        if (!is_u) {
8516            unallocated_encoding(s);
8517            return;
8518        }
8519        handle_vec_simd_sqshrn(s, true, false, false, true,
8520                               immh, immb, opcode, rn, rd);
8521        break;
8522    case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8523    case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8524        handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8525                               immh, immb, opcode, rn, rd);
8526        break;
8527    case 0xc: /* SQSHLU */
8528        if (!is_u) {
8529            unallocated_encoding(s);
8530            return;
8531        }
8532        handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8533        break;
8534    case 0xe: /* SQSHL, UQSHL */
8535        handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8536        break;
8537    case 0x1f: /* FCVTZS, FCVTZU */
8538        handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8539        break;
8540    default:
8541        unallocated_encoding(s);
8542        break;
8543    }
8544}
8545
8546/* AdvSIMD scalar three different
8547 *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8548 * +-----+---+-----------+------+---+------+--------+-----+------+------+
8549 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8550 * +-----+---+-----------+------+---+------+--------+-----+------+------+
8551 */
8552static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8553{
8554    bool is_u = extract32(insn, 29, 1);
8555    int size = extract32(insn, 22, 2);
8556    int opcode = extract32(insn, 12, 4);
8557    int rm = extract32(insn, 16, 5);
8558    int rn = extract32(insn, 5, 5);
8559    int rd = extract32(insn, 0, 5);
8560
8561    if (is_u) {
8562        unallocated_encoding(s);
8563        return;
8564    }
8565
8566    switch (opcode) {
8567    case 0x9: /* SQDMLAL, SQDMLAL2 */
8568    case 0xb: /* SQDMLSL, SQDMLSL2 */
8569    case 0xd: /* SQDMULL, SQDMULL2 */
8570        if (size == 0 || size == 3) {
8571            unallocated_encoding(s);
8572            return;
8573        }
8574        break;
8575    default:
8576        unallocated_encoding(s);
8577        return;
8578    }
8579
8580    if (!fp_access_check(s)) {
8581        return;
8582    }
8583
8584    if (size == 2) {
8585        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8586        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8587        TCGv_i64 tcg_res = tcg_temp_new_i64();
8588
8589        read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8590        read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8591
8592        tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8593        gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
8594
8595        switch (opcode) {
8596        case 0xd: /* SQDMULL, SQDMULL2 */
8597            break;
8598        case 0xb: /* SQDMLSL, SQDMLSL2 */
8599            tcg_gen_neg_i64(tcg_res, tcg_res);
8600            /* fall through */
8601        case 0x9: /* SQDMLAL, SQDMLAL2 */
8602            read_vec_element(s, tcg_op1, rd, 0, MO_64);
8603            gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8604                                              tcg_res, tcg_op1);
8605            break;
8606        default:
8607            g_assert_not_reached();
8608        }
8609
8610        write_fp_dreg(s, rd, tcg_res);
8611
8612        tcg_temp_free_i64(tcg_op1);
8613        tcg_temp_free_i64(tcg_op2);
8614        tcg_temp_free_i64(tcg_res);
8615    } else {
8616        TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8617        TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8618        TCGv_i64 tcg_res = tcg_temp_new_i64();
8619
8620        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8621        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8622
8623        switch (opcode) {
8624        case 0xd: /* SQDMULL, SQDMULL2 */
8625            break;
8626        case 0xb: /* SQDMLSL, SQDMLSL2 */
8627            gen_helper_neon_negl_u32(tcg_res, tcg_res);
8628            /* fall through */
8629        case 0x9: /* SQDMLAL, SQDMLAL2 */
8630        {
8631            TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8632            read_vec_element(s, tcg_op3, rd, 0, MO_32);
8633            gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
8634                                              tcg_res, tcg_op3);
8635            tcg_temp_free_i64(tcg_op3);
8636            break;
8637        }
8638        default:
8639            g_assert_not_reached();
8640        }
8641
8642        tcg_gen_ext32u_i64(tcg_res, tcg_res);
8643        write_fp_dreg(s, rd, tcg_res);
8644
8645        tcg_temp_free_i32(tcg_op1);
8646        tcg_temp_free_i32(tcg_op2);
8647        tcg_temp_free_i64(tcg_res);
8648    }
8649}
8650
8651static void handle_3same_64(DisasContext *s, int opcode, bool u,
8652                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8653{
8654    /* Handle 64x64->64 opcodes which are shared between the scalar
8655     * and vector 3-same groups. We cover every opcode where size == 3
8656     * is valid in either the three-reg-same (integer, not pairwise)
8657     * or scalar-three-reg-same groups.
8658     */
8659    TCGCond cond;
8660
8661    switch (opcode) {
8662    case 0x1: /* SQADD */
8663        if (u) {
8664            gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8665        } else {
8666            gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8667        }
8668        break;
8669    case 0x5: /* SQSUB */
8670        if (u) {
8671            gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8672        } else {
8673            gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8674        }
8675        break;
8676    case 0x6: /* CMGT, CMHI */
8677        /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
8678         * We implement this using setcond (test) and then negating.
8679         */
8680        cond = u ? TCG_COND_GTU : TCG_COND_GT;
8681    do_cmop:
8682        tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8683        tcg_gen_neg_i64(tcg_rd, tcg_rd);
8684        break;
8685    case 0x7: /* CMGE, CMHS */
8686        cond = u ? TCG_COND_GEU : TCG_COND_GE;
8687        goto do_cmop;
8688    case 0x11: /* CMTST, CMEQ */
8689        if (u) {
8690            cond = TCG_COND_EQ;
8691            goto do_cmop;
8692        }
8693        gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8694        break;
8695    case 0x8: /* SSHL, USHL */
8696        if (u) {
8697            gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
8698        } else {
8699            gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
8700        }
8701        break;
8702    case 0x9: /* SQSHL, UQSHL */
8703        if (u) {
8704            gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8705        } else {
8706            gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8707        }
8708        break;
8709    case 0xa: /* SRSHL, URSHL */
8710        if (u) {
8711            gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8712        } else {
8713            gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8714        }
8715        break;
8716    case 0xb: /* SQRSHL, UQRSHL */
8717        if (u) {
8718            gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8719        } else {
8720            gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8721        }
8722        break;
8723    case 0x10: /* ADD, SUB */
8724        if (u) {
8725            tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8726        } else {
8727            tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8728        }
8729        break;
8730    default:
8731        g_assert_not_reached();
8732    }
8733}
8734
8735/* Handle the 3-same-operands float operations; shared by the scalar
8736 * and vector encodings. The caller must filter out any encodings
8737 * not allocated for the encoding it is dealing with.
8738 */
8739static void handle_3same_float(DisasContext *s, int size, int elements,
8740                               int fpopcode, int rd, int rn, int rm)
8741{
8742    int pass;
8743    TCGv_ptr fpst = get_fpstatus_ptr(false);
8744
8745    for (pass = 0; pass < elements; pass++) {
8746        if (size) {
8747            /* Double */
8748            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8749            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8750            TCGv_i64 tcg_res = tcg_temp_new_i64();
8751
8752            read_vec_element(s, tcg_op1, rn, pass, MO_64);
8753            read_vec_element(s, tcg_op2, rm, pass, MO_64);
8754
8755            switch (fpopcode) {
8756            case 0x39: /* FMLS */
8757                /* As usual for ARM, separate negation for fused multiply-add */
8758                gen_helper_vfp_negd(tcg_op1, tcg_op1);
8759                /* fall through */
8760            case 0x19: /* FMLA */
8761                read_vec_element(s, tcg_res, rd, pass, MO_64);
8762                gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8763                                       tcg_res, fpst);
8764                break;
8765            case 0x18: /* FMAXNM */
8766                gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8767                break;
8768            case 0x1a: /* FADD */
8769                gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8770                break;
8771            case 0x1b: /* FMULX */
8772                gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8773                break;
8774            case 0x1c: /* FCMEQ */
8775                gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8776                break;
8777            case 0x1e: /* FMAX */
8778                gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8779                break;
8780            case 0x1f: /* FRECPS */
8781                gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8782                break;
8783            case 0x38: /* FMINNM */
8784                gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8785                break;
8786            case 0x3a: /* FSUB */
8787                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8788                break;
8789            case 0x3e: /* FMIN */
8790                gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8791                break;
8792            case 0x3f: /* FRSQRTS */
8793                gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8794                break;
8795            case 0x5b: /* FMUL */
8796                gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
8797                break;
8798            case 0x5c: /* FCMGE */
8799                gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8800                break;
8801            case 0x5d: /* FACGE */
8802                gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8803                break;
8804            case 0x5f: /* FDIV */
8805                gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
8806                break;
8807            case 0x7a: /* FABD */
8808                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8809                gen_helper_vfp_absd(tcg_res, tcg_res);
8810                break;
8811            case 0x7c: /* FCMGT */
8812                gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8813                break;
8814            case 0x7d: /* FACGT */
8815                gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8816                break;
8817            default:
8818                g_assert_not_reached();
8819            }
8820
8821            write_vec_element(s, tcg_res, rd, pass, MO_64);
8822
8823            tcg_temp_free_i64(tcg_res);
8824            tcg_temp_free_i64(tcg_op1);
8825            tcg_temp_free_i64(tcg_op2);
8826        } else {
8827            /* Single */
8828            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8829            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8830            TCGv_i32 tcg_res = tcg_temp_new_i32();
8831
8832            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
8833            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
8834
8835            switch (fpopcode) {
8836            case 0x39: /* FMLS */
8837                /* As usual for ARM, separate negation for fused multiply-add */
8838                gen_helper_vfp_negs(tcg_op1, tcg_op1);
8839                /* fall through */
8840            case 0x19: /* FMLA */
8841                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8842                gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
8843                                       tcg_res, fpst);
8844                break;
8845            case 0x1a: /* FADD */
8846                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8847                break;
8848            case 0x1b: /* FMULX */
8849                gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
8850                break;
8851            case 0x1c: /* FCMEQ */
8852                gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8853                break;
8854            case 0x1e: /* FMAX */
8855                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8856                break;
8857            case 0x1f: /* FRECPS */
8858                gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8859                break;
8860            case 0x18: /* FMAXNM */
8861                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8862                break;
8863            case 0x38: /* FMINNM */
8864                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8865                break;
8866            case 0x3a: /* FSUB */
8867                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8868                break;
8869            case 0x3e: /* FMIN */
8870                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8871                break;
8872            case 0x3f: /* FRSQRTS */
8873                gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8874                break;
8875            case 0x5b: /* FMUL */
8876                gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
8877                break;
8878            case 0x5c: /* FCMGE */
8879                gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8880                break;
8881            case 0x5d: /* FACGE */
8882                gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8883                break;
8884            case 0x5f: /* FDIV */
8885                gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
8886                break;
8887            case 0x7a: /* FABD */
8888                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8889                gen_helper_vfp_abss(tcg_res, tcg_res);
8890                break;
8891            case 0x7c: /* FCMGT */
8892                gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8893                break;
8894            case 0x7d: /* FACGT */
8895                gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8896                break;
8897            default:
8898                g_assert_not_reached();
8899            }
8900
8901            if (elements == 1) {
8902                /* scalar single so clear high part */
8903                TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8904
8905                tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
8906                write_vec_element(s, tcg_tmp, rd, pass, MO_64);
8907                tcg_temp_free_i64(tcg_tmp);
8908            } else {
8909                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8910            }
8911
8912            tcg_temp_free_i32(tcg_res);
8913            tcg_temp_free_i32(tcg_op1);
8914            tcg_temp_free_i32(tcg_op2);
8915        }
8916    }
8917
8918    tcg_temp_free_ptr(fpst);
8919
8920    clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
8921}
8922
8923/* AdvSIMD scalar three same
8924 *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
8925 * +-----+---+-----------+------+---+------+--------+---+------+------+
8926 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
8927 * +-----+---+-----------+------+---+------+--------+---+------+------+
8928 */
8929static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
8930{
8931    int rd = extract32(insn, 0, 5);
8932    int rn = extract32(insn, 5, 5);
8933    int opcode = extract32(insn, 11, 5);
8934    int rm = extract32(insn, 16, 5);
8935    int size = extract32(insn, 22, 2);
8936    bool u = extract32(insn, 29, 1);
8937    TCGv_i64 tcg_rd;
8938
8939    if (opcode >= 0x18) {
8940        /* Floating point: U, size[1] and opcode indicate operation */
8941        int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
8942        switch (fpopcode) {
8943        case 0x1b: /* FMULX */
8944        case 0x1f: /* FRECPS */
8945        case 0x3f: /* FRSQRTS */
8946        case 0x5d: /* FACGE */
8947        case 0x7d: /* FACGT */
8948        case 0x1c: /* FCMEQ */
8949        case 0x5c: /* FCMGE */
8950        case 0x7c: /* FCMGT */
8951        case 0x7a: /* FABD */
8952            break;
8953        default:
8954            unallocated_encoding(s);
8955            return;
8956        }
8957
8958        if (!fp_access_check(s)) {
8959            return;
8960        }
8961
8962        handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
8963        return;
8964    }
8965
8966    switch (opcode) {
8967    case 0x1: /* SQADD, UQADD */
8968    case 0x5: /* SQSUB, UQSUB */
8969    case 0x9: /* SQSHL, UQSHL */
8970    case 0xb: /* SQRSHL, UQRSHL */
8971        break;
8972    case 0x8: /* SSHL, USHL */
8973    case 0xa: /* SRSHL, URSHL */
8974    case 0x6: /* CMGT, CMHI */
8975    case 0x7: /* CMGE, CMHS */
8976    case 0x11: /* CMTST, CMEQ */
8977    case 0x10: /* ADD, SUB (vector) */
8978        if (size != 3) {
8979            unallocated_encoding(s);
8980            return;
8981        }
8982        break;
8983    case 0x16: /* SQDMULH, SQRDMULH (vector) */
8984        if (size != 1 && size != 2) {
8985            unallocated_encoding(s);
8986            return;
8987        }
8988        break;
8989    default:
8990        unallocated_encoding(s);
8991        return;
8992    }
8993
8994    if (!fp_access_check(s)) {
8995        return;
8996    }
8997
8998    tcg_rd = tcg_temp_new_i64();
8999
9000    if (size == 3) {
9001        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9002        TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9003
9004        handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9005        tcg_temp_free_i64(tcg_rn);
9006        tcg_temp_free_i64(tcg_rm);
9007    } else {
9008        /* Do a single operation on the lowest element in the vector.
9009         * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9010         * no side effects for all these operations.
9011         * OPTME: special-purpose helpers would avoid doing some
9012         * unnecessary work in the helper for the 8 and 16 bit cases.
9013         */
9014        NeonGenTwoOpEnvFn *genenvfn;
9015        TCGv_i32 tcg_rn = tcg_temp_new_i32();
9016        TCGv_i32 tcg_rm = tcg_temp_new_i32();
9017        TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9018
9019        read_vec_element_i32(s, tcg_rn, rn, 0, size);
9020        read_vec_element_i32(s, tcg_rm, rm, 0, size);
9021
9022        switch (opcode) {
9023        case 0x1: /* SQADD, UQADD */
9024        {
9025            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9026                { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9027                { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9028                { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9029            };
9030            genenvfn = fns[size][u];
9031            break;
9032        }
9033        case 0x5: /* SQSUB, UQSUB */
9034        {
9035            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9036                { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9037                { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9038                { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9039            };
9040            genenvfn = fns[size][u];
9041            break;
9042        }
9043        case 0x9: /* SQSHL, UQSHL */
9044        {
9045            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9046                { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9047                { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9048                { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9049            };
9050            genenvfn = fns[size][u];
9051            break;
9052        }
9053        case 0xb: /* SQRSHL, UQRSHL */
9054        {
9055            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9056                { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9057                { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9058                { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9059            };
9060            genenvfn = fns[size][u];
9061            break;
9062        }
9063        case 0x16: /* SQDMULH, SQRDMULH */
9064        {
9065            static NeonGenTwoOpEnvFn * const fns[2][2] = {
9066                { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9067                { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9068            };
9069            assert(size == 1 || size == 2);
9070            genenvfn = fns[size - 1][u];
9071            break;
9072        }
9073        default:
9074            g_assert_not_reached();
9075        }
9076
9077        genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9078        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9079        tcg_temp_free_i32(tcg_rd32);
9080        tcg_temp_free_i32(tcg_rn);
9081        tcg_temp_free_i32(tcg_rm);
9082    }
9083
9084    write_fp_dreg(s, rd, tcg_rd);
9085
9086    tcg_temp_free_i64(tcg_rd);
9087}
9088
9089/* AdvSIMD scalar three same FP16
9090 *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9091 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9092 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9093 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9094 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9095 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9096 */
9097static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9098                                                  uint32_t insn)
9099{
9100    int rd = extract32(insn, 0, 5);
9101    int rn = extract32(insn, 5, 5);
9102    int opcode = extract32(insn, 11, 3);
9103    int rm = extract32(insn, 16, 5);
9104    bool u = extract32(insn, 29, 1);
9105    bool a = extract32(insn, 23, 1);
9106    int fpopcode = opcode | (a << 3) |  (u << 4);
9107    TCGv_ptr fpst;
9108    TCGv_i32 tcg_op1;
9109    TCGv_i32 tcg_op2;
9110    TCGv_i32 tcg_res;
9111
9112    switch (fpopcode) {
9113    case 0x03: /* FMULX */
9114    case 0x04: /* FCMEQ (reg) */
9115    case 0x07: /* FRECPS */
9116    case 0x0f: /* FRSQRTS */
9117    case 0x14: /* FCMGE (reg) */
9118    case 0x15: /* FACGE */
9119    case 0x1a: /* FABD */
9120    case 0x1c: /* FCMGT (reg) */
9121    case 0x1d: /* FACGT */
9122        break;
9123    default:
9124        unallocated_encoding(s);
9125        return;
9126    }
9127
9128    if (!dc_isar_feature(aa64_fp16, s)) {
9129        unallocated_encoding(s);
9130    }
9131
9132    if (!fp_access_check(s)) {
9133        return;
9134    }
9135
9136    fpst = get_fpstatus_ptr(true);
9137
9138    tcg_op1 = read_fp_hreg(s, rn);
9139    tcg_op2 = read_fp_hreg(s, rm);
9140    tcg_res = tcg_temp_new_i32();
9141
9142    switch (fpopcode) {
9143    case 0x03: /* FMULX */
9144        gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9145        break;
9146    case 0x04: /* FCMEQ (reg) */
9147        gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9148        break;
9149    case 0x07: /* FRECPS */
9150        gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9151        break;
9152    case 0x0f: /* FRSQRTS */
9153        gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9154        break;
9155    case 0x14: /* FCMGE (reg) */
9156        gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9157        break;
9158    case 0x15: /* FACGE */
9159        gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9160        break;
9161    case 0x1a: /* FABD */
9162        gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9163        tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9164        break;
9165    case 0x1c: /* FCMGT (reg) */
9166        gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9167        break;
9168    case 0x1d: /* FACGT */
9169        gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9170        break;
9171    default:
9172        g_assert_not_reached();
9173    }
9174
9175    write_fp_sreg(s, rd, tcg_res);
9176
9177
9178    tcg_temp_free_i32(tcg_res);
9179    tcg_temp_free_i32(tcg_op1);
9180    tcg_temp_free_i32(tcg_op2);
9181    tcg_temp_free_ptr(fpst);
9182}
9183
9184/* AdvSIMD scalar three same extra
9185 *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9186 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9187 * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9188 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9189 */
9190static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9191                                                   uint32_t insn)
9192{
9193    int rd = extract32(insn, 0, 5);
9194    int rn = extract32(insn, 5, 5);
9195    int opcode = extract32(insn, 11, 4);
9196    int rm = extract32(insn, 16, 5);
9197    int size = extract32(insn, 22, 2);
9198    bool u = extract32(insn, 29, 1);
9199    TCGv_i32 ele1, ele2, ele3;
9200    TCGv_i64 res;
9201    bool feature;
9202
9203    switch (u * 16 + opcode) {
9204    case 0x10: /* SQRDMLAH (vector) */
9205    case 0x11: /* SQRDMLSH (vector) */
9206        if (size != 1 && size != 2) {
9207            unallocated_encoding(s);
9208            return;
9209        }
9210        feature = dc_isar_feature(aa64_rdm, s);
9211        break;
9212    default:
9213        unallocated_encoding(s);
9214        return;
9215    }
9216    if (!feature) {
9217        unallocated_encoding(s);
9218        return;
9219    }
9220    if (!fp_access_check(s)) {
9221        return;
9222    }
9223
9224    /* Do a single operation on the lowest element in the vector.
9225     * We use the standard Neon helpers and rely on 0 OP 0 == 0
9226     * with no side effects for all these operations.
9227     * OPTME: special-purpose helpers would avoid doing some
9228     * unnecessary work in the helper for the 16 bit cases.
9229     */
9230    ele1 = tcg_temp_new_i32();
9231    ele2 = tcg_temp_new_i32();
9232    ele3 = tcg_temp_new_i32();
9233
9234    read_vec_element_i32(s, ele1, rn, 0, size);
9235    read_vec_element_i32(s, ele2, rm, 0, size);
9236    read_vec_element_i32(s, ele3, rd, 0, size);
9237
9238    switch (opcode) {
9239    case 0x0: /* SQRDMLAH */
9240        if (size == 1) {
9241            gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9242        } else {
9243            gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9244        }
9245        break;
9246    case 0x1: /* SQRDMLSH */
9247        if (size == 1) {
9248            gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9249        } else {
9250            gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9251        }
9252        break;
9253    default:
9254        g_assert_not_reached();
9255    }
9256    tcg_temp_free_i32(ele1);
9257    tcg_temp_free_i32(ele2);
9258
9259    res = tcg_temp_new_i64();
9260    tcg_gen_extu_i32_i64(res, ele3);
9261    tcg_temp_free_i32(ele3);
9262
9263    write_fp_dreg(s, rd, res);
9264    tcg_temp_free_i64(res);
9265}
9266
9267static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9268                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9269                            TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9270{
9271    /* Handle 64->64 opcodes which are shared between the scalar and
9272     * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9273     * is valid in either group and also the double-precision fp ops.
9274     * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9275     * requires them.
9276     */
9277    TCGCond cond;
9278
9279    switch (opcode) {
9280    case 0x4: /* CLS, CLZ */
9281        if (u) {
9282            tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9283        } else {
9284            tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9285        }
9286        break;
9287    case 0x5: /* NOT */
9288        /* This opcode is shared with CNT and RBIT but we have earlier
9289         * enforced that size == 3 if and only if this is the NOT insn.
9290         */
9291        tcg_gen_not_i64(tcg_rd, tcg_rn);
9292        break;
9293    case 0x7: /* SQABS, SQNEG */
9294        if (u) {
9295            gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9296        } else {
9297            gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9298        }
9299        break;
9300    case 0xa: /* CMLT */
9301        /* 64 bit integer comparison against zero, result is
9302         * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9303         * subtracting 1.
9304         */
9305        cond = TCG_COND_LT;
9306    do_cmop:
9307        tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9308        tcg_gen_neg_i64(tcg_rd, tcg_rd);
9309        break;
9310    case 0x8: /* CMGT, CMGE */
9311        cond = u ? TCG_COND_GE : TCG_COND_GT;
9312        goto do_cmop;
9313    case 0x9: /* CMEQ, CMLE */
9314        cond = u ? TCG_COND_LE : TCG_COND_EQ;
9315        goto do_cmop;
9316    case 0xb: /* ABS, NEG */
9317        if (u) {
9318            tcg_gen_neg_i64(tcg_rd, tcg_rn);
9319        } else {
9320            tcg_gen_abs_i64(tcg_rd, tcg_rn);
9321        }
9322        break;
9323    case 0x2f: /* FABS */
9324        gen_helper_vfp_absd(tcg_rd, tcg_rn);
9325        break;
9326    case 0x6f: /* FNEG */
9327        gen_helper_vfp_negd(tcg_rd, tcg_rn);
9328        break;
9329    case 0x7f: /* FSQRT */
9330        gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9331        break;
9332    case 0x1a: /* FCVTNS */
9333    case 0x1b: /* FCVTMS */
9334    case 0x1c: /* FCVTAS */
9335    case 0x3a: /* FCVTPS */
9336    case 0x3b: /* FCVTZS */
9337    {
9338        TCGv_i32 tcg_shift = tcg_const_i32(0);
9339        gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9340        tcg_temp_free_i32(tcg_shift);
9341        break;
9342    }
9343    case 0x5a: /* FCVTNU */
9344    case 0x5b: /* FCVTMU */
9345    case 0x5c: /* FCVTAU */
9346    case 0x7a: /* FCVTPU */
9347    case 0x7b: /* FCVTZU */
9348    {
9349        TCGv_i32 tcg_shift = tcg_const_i32(0);
9350        gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9351        tcg_temp_free_i32(tcg_shift);
9352        break;
9353    }
9354    case 0x18: /* FRINTN */
9355    case 0x19: /* FRINTM */
9356    case 0x38: /* FRINTP */
9357    case 0x39: /* FRINTZ */
9358    case 0x58: /* FRINTA */
9359    case 0x79: /* FRINTI */
9360        gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9361        break;
9362    case 0x59: /* FRINTX */
9363        gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9364        break;
9365    case 0x1e: /* FRINT32Z */
9366    case 0x5e: /* FRINT32X */
9367        gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9368        break;
9369    case 0x1f: /* FRINT64Z */
9370    case 0x5f: /* FRINT64X */
9371        gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9372        break;
9373    default:
9374        g_assert_not_reached();
9375    }
9376}
9377
9378static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9379                                   bool is_scalar, bool is_u, bool is_q,
9380                                   int size, int rn, int rd)
9381{
9382    bool is_double = (size == MO_64);
9383    TCGv_ptr fpst;
9384
9385    if (!fp_access_check(s)) {
9386        return;
9387    }
9388
9389    fpst = get_fpstatus_ptr(size == MO_16);
9390
9391    if (is_double) {
9392        TCGv_i64 tcg_op = tcg_temp_new_i64();
9393        TCGv_i64 tcg_zero = tcg_const_i64(0);
9394        TCGv_i64 tcg_res = tcg_temp_new_i64();
9395        NeonGenTwoDoubleOPFn *genfn;
9396        bool swap = false;
9397        int pass;
9398
9399        switch (opcode) {
9400        case 0x2e: /* FCMLT (zero) */
9401            swap = true;
9402            /* fallthrough */
9403        case 0x2c: /* FCMGT (zero) */
9404            genfn = gen_helper_neon_cgt_f64;
9405            break;
9406        case 0x2d: /* FCMEQ (zero) */
9407            genfn = gen_helper_neon_ceq_f64;
9408            break;
9409        case 0x6d: /* FCMLE (zero) */
9410            swap = true;
9411            /* fall through */
9412        case 0x6c: /* FCMGE (zero) */
9413            genfn = gen_helper_neon_cge_f64;
9414            break;
9415        default:
9416            g_assert_not_reached();
9417        }
9418
9419        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9420            read_vec_element(s, tcg_op, rn, pass, MO_64);
9421            if (swap) {
9422                genfn(tcg_res, tcg_zero, tcg_op, fpst);
9423            } else {
9424                genfn(tcg_res, tcg_op, tcg_zero, fpst);
9425            }
9426            write_vec_element(s, tcg_res, rd, pass, MO_64);
9427        }
9428        tcg_temp_free_i64(tcg_res);
9429        tcg_temp_free_i64(tcg_zero);
9430        tcg_temp_free_i64(tcg_op);
9431
9432        clear_vec_high(s, !is_scalar, rd);
9433    } else {
9434        TCGv_i32 tcg_op = tcg_temp_new_i32();
9435        TCGv_i32 tcg_zero = tcg_const_i32(0);
9436        TCGv_i32 tcg_res = tcg_temp_new_i32();
9437        NeonGenTwoSingleOPFn *genfn;
9438        bool swap = false;
9439        int pass, maxpasses;
9440
9441        if (size == MO_16) {
9442            switch (opcode) {
9443            case 0x2e: /* FCMLT (zero) */
9444                swap = true;
9445                /* fall through */
9446            case 0x2c: /* FCMGT (zero) */
9447                genfn = gen_helper_advsimd_cgt_f16;
9448                break;
9449            case 0x2d: /* FCMEQ (zero) */
9450                genfn = gen_helper_advsimd_ceq_f16;
9451                break;
9452            case 0x6d: /* FCMLE (zero) */
9453                swap = true;
9454                /* fall through */
9455            case 0x6c: /* FCMGE (zero) */
9456                genfn = gen_helper_advsimd_cge_f16;
9457                break;
9458            default:
9459                g_assert_not_reached();
9460            }
9461        } else {
9462            switch (opcode) {
9463            case 0x2e: /* FCMLT (zero) */
9464                swap = true;
9465                /* fall through */
9466            case 0x2c: /* FCMGT (zero) */
9467                genfn = gen_helper_neon_cgt_f32;
9468                break;
9469            case 0x2d: /* FCMEQ (zero) */
9470                genfn = gen_helper_neon_ceq_f32;
9471                break;
9472            case 0x6d: /* FCMLE (zero) */
9473                swap = true;
9474                /* fall through */
9475            case 0x6c: /* FCMGE (zero) */
9476                genfn = gen_helper_neon_cge_f32;
9477                break;
9478            default:
9479                g_assert_not_reached();
9480            }
9481        }
9482
9483        if (is_scalar) {
9484            maxpasses = 1;
9485        } else {
9486            int vector_size = 8 << is_q;
9487            maxpasses = vector_size >> size;
9488        }
9489
9490        for (pass = 0; pass < maxpasses; pass++) {
9491            read_vec_element_i32(s, tcg_op, rn, pass, size);
9492            if (swap) {
9493                genfn(tcg_res, tcg_zero, tcg_op, fpst);
9494            } else {
9495                genfn(tcg_res, tcg_op, tcg_zero, fpst);
9496            }
9497            if (is_scalar) {
9498                write_fp_sreg(s, rd, tcg_res);
9499            } else {
9500                write_vec_element_i32(s, tcg_res, rd, pass, size);
9501            }
9502        }
9503        tcg_temp_free_i32(tcg_res);
9504        tcg_temp_free_i32(tcg_zero);
9505        tcg_temp_free_i32(tcg_op);
9506        if (!is_scalar) {
9507            clear_vec_high(s, is_q, rd);
9508        }
9509    }
9510
9511    tcg_temp_free_ptr(fpst);
9512}
9513
9514static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9515                                    bool is_scalar, bool is_u, bool is_q,
9516                                    int size, int rn, int rd)
9517{
9518    bool is_double = (size == 3);
9519    TCGv_ptr fpst = get_fpstatus_ptr(false);
9520
9521    if (is_double) {
9522        TCGv_i64 tcg_op = tcg_temp_new_i64();
9523        TCGv_i64 tcg_res = tcg_temp_new_i64();
9524        int pass;
9525
9526        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9527            read_vec_element(s, tcg_op, rn, pass, MO_64);
9528            switch (opcode) {
9529            case 0x3d: /* FRECPE */
9530                gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9531                break;
9532            case 0x3f: /* FRECPX */
9533                gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9534                break;
9535            case 0x7d: /* FRSQRTE */
9536                gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9537                break;
9538            default:
9539                g_assert_not_reached();
9540            }
9541            write_vec_element(s, tcg_res, rd, pass, MO_64);
9542        }
9543        tcg_temp_free_i64(tcg_res);
9544        tcg_temp_free_i64(tcg_op);
9545        clear_vec_high(s, !is_scalar, rd);
9546    } else {
9547        TCGv_i32 tcg_op = tcg_temp_new_i32();
9548        TCGv_i32 tcg_res = tcg_temp_new_i32();
9549        int pass, maxpasses;
9550
9551        if (is_scalar) {
9552            maxpasses = 1;
9553        } else {
9554            maxpasses = is_q ? 4 : 2;
9555        }
9556
9557        for (pass = 0; pass < maxpasses; pass++) {
9558            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9559
9560            switch (opcode) {
9561            case 0x3c: /* URECPE */
9562                gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
9563                break;
9564            case 0x3d: /* FRECPE */
9565                gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9566                break;
9567            case 0x3f: /* FRECPX */
9568                gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9569                break;
9570            case 0x7d: /* FRSQRTE */
9571                gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9572                break;
9573            default:
9574                g_assert_not_reached();
9575            }
9576
9577            if (is_scalar) {
9578                write_fp_sreg(s, rd, tcg_res);
9579            } else {
9580                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9581            }
9582        }
9583        tcg_temp_free_i32(tcg_res);
9584        tcg_temp_free_i32(tcg_op);
9585        if (!is_scalar) {
9586            clear_vec_high(s, is_q, rd);
9587        }
9588    }
9589    tcg_temp_free_ptr(fpst);
9590}
9591
9592static void handle_2misc_narrow(DisasContext *s, bool scalar,
9593                                int opcode, bool u, bool is_q,
9594                                int size, int rn, int rd)
9595{
9596    /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9597     * in the source becomes a size element in the destination).
9598     */
9599    int pass;
9600    TCGv_i32 tcg_res[2];
9601    int destelt = is_q ? 2 : 0;
9602    int passes = scalar ? 1 : 2;
9603
9604    if (scalar) {
9605        tcg_res[1] = tcg_const_i32(0);
9606    }
9607
9608    for (pass = 0; pass < passes; pass++) {
9609        TCGv_i64 tcg_op = tcg_temp_new_i64();
9610        NeonGenNarrowFn *genfn = NULL;
9611        NeonGenNarrowEnvFn *genenvfn = NULL;
9612
9613        if (scalar) {
9614            read_vec_element(s, tcg_op, rn, pass, size + 1);
9615        } else {
9616            read_vec_element(s, tcg_op, rn, pass, MO_64);
9617        }
9618        tcg_res[pass] = tcg_temp_new_i32();
9619
9620        switch (opcode) {
9621        case 0x12: /* XTN, SQXTUN */
9622        {
9623            static NeonGenNarrowFn * const xtnfns[3] = {
9624                gen_helper_neon_narrow_u8,
9625                gen_helper_neon_narrow_u16,
9626                tcg_gen_extrl_i64_i32,
9627            };
9628            static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9629                gen_helper_neon_unarrow_sat8,
9630                gen_helper_neon_unarrow_sat16,
9631                gen_helper_neon_unarrow_sat32,
9632            };
9633            if (u) {
9634                genenvfn = sqxtunfns[size];
9635            } else {
9636                genfn = xtnfns[size];
9637            }
9638            break;
9639        }
9640        case 0x14: /* SQXTN, UQXTN */
9641        {
9642            static NeonGenNarrowEnvFn * const fns[3][2] = {
9643                { gen_helper_neon_narrow_sat_s8,
9644                  gen_helper_neon_narrow_sat_u8 },
9645                { gen_helper_neon_narrow_sat_s16,
9646                  gen_helper_neon_narrow_sat_u16 },
9647                { gen_helper_neon_narrow_sat_s32,
9648                  gen_helper_neon_narrow_sat_u32 },
9649            };
9650            genenvfn = fns[size][u];
9651            break;
9652        }
9653        case 0x16: /* FCVTN, FCVTN2 */
9654            /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9655            if (size == 2) {
9656                gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9657            } else {
9658                TCGv_i32 tcg_lo = tcg_temp_new_i32();
9659                TCGv_i32 tcg_hi = tcg_temp_new_i32();
9660                TCGv_ptr fpst = get_fpstatus_ptr(false);
9661                TCGv_i32 ahp = get_ahp_flag();
9662
9663                tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9664                gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9665                gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9666                tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9667                tcg_temp_free_i32(tcg_lo);
9668                tcg_temp_free_i32(tcg_hi);
9669                tcg_temp_free_ptr(fpst);
9670                tcg_temp_free_i32(ahp);
9671            }
9672            break;
9673        case 0x56:  /* FCVTXN, FCVTXN2 */
9674            /* 64 bit to 32 bit float conversion
9675             * with von Neumann rounding (round to odd)
9676             */
9677            assert(size == 2);
9678            gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9679            break;
9680        default:
9681            g_assert_not_reached();
9682        }
9683
9684        if (genfn) {
9685            genfn(tcg_res[pass], tcg_op);
9686        } else if (genenvfn) {
9687            genenvfn(tcg_res[pass], cpu_env, tcg_op);
9688        }
9689
9690        tcg_temp_free_i64(tcg_op);
9691    }
9692
9693    for (pass = 0; pass < 2; pass++) {
9694        write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9695        tcg_temp_free_i32(tcg_res[pass]);
9696    }
9697    clear_vec_high(s, is_q, rd);
9698}
9699
9700/* Remaining saturating accumulating ops */
9701static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9702                                bool is_q, int size, int rn, int rd)
9703{
9704    bool is_double = (size == 3);
9705
9706    if (is_double) {
9707        TCGv_i64 tcg_rn = tcg_temp_new_i64();
9708        TCGv_i64 tcg_rd = tcg_temp_new_i64();
9709        int pass;
9710
9711        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9712            read_vec_element(s, tcg_rn, rn, pass, MO_64);
9713            read_vec_element(s, tcg_rd, rd, pass, MO_64);
9714
9715            if (is_u) { /* USQADD */
9716                gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9717            } else { /* SUQADD */
9718                gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9719            }
9720            write_vec_element(s, tcg_rd, rd, pass, MO_64);
9721        }
9722        tcg_temp_free_i64(tcg_rd);
9723        tcg_temp_free_i64(tcg_rn);
9724        clear_vec_high(s, !is_scalar, rd);
9725    } else {
9726        TCGv_i32 tcg_rn = tcg_temp_new_i32();
9727        TCGv_i32 tcg_rd = tcg_temp_new_i32();
9728        int pass, maxpasses;
9729
9730        if (is_scalar) {
9731            maxpasses = 1;
9732        } else {
9733            maxpasses = is_q ? 4 : 2;
9734        }
9735
9736        for (pass = 0; pass < maxpasses; pass++) {
9737            if (is_scalar) {
9738                read_vec_element_i32(s, tcg_rn, rn, pass, size);
9739                read_vec_element_i32(s, tcg_rd, rd, pass, size);
9740            } else {
9741                read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9742                read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9743            }
9744
9745            if (is_u) { /* USQADD */
9746                switch (size) {
9747                case 0:
9748                    gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9749                    break;
9750                case 1:
9751                    gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9752                    break;
9753                case 2:
9754                    gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9755                    break;
9756                default:
9757                    g_assert_not_reached();
9758                }
9759            } else { /* SUQADD */
9760                switch (size) {
9761                case 0:
9762                    gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9763                    break;
9764                case 1:
9765                    gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9766                    break;
9767                case 2:
9768                    gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9769                    break;
9770                default:
9771                    g_assert_not_reached();
9772                }
9773            }
9774
9775            if (is_scalar) {
9776                TCGv_i64 tcg_zero = tcg_const_i64(0);
9777                write_vec_element(s, tcg_zero, rd, 0, MO_64);
9778                tcg_temp_free_i64(tcg_zero);
9779            }
9780            write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9781        }
9782        tcg_temp_free_i32(tcg_rd);
9783        tcg_temp_free_i32(tcg_rn);
9784        clear_vec_high(s, is_q, rd);
9785    }
9786}
9787
9788/* AdvSIMD scalar two reg misc
9789 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9790 * +-----+---+-----------+------+-----------+--------+-----+------+------+
9791 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9792 * +-----+---+-----------+------+-----------+--------+-----+------+------+
9793 */
9794static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9795{
9796    int rd = extract32(insn, 0, 5);
9797    int rn = extract32(insn, 5, 5);
9798    int opcode = extract32(insn, 12, 5);
9799    int size = extract32(insn, 22, 2);
9800    bool u = extract32(insn, 29, 1);
9801    bool is_fcvt = false;
9802    int rmode;
9803    TCGv_i32 tcg_rmode;
9804    TCGv_ptr tcg_fpstatus;
9805
9806    switch (opcode) {
9807    case 0x3: /* USQADD / SUQADD*/
9808        if (!fp_access_check(s)) {
9809            return;
9810        }
9811        handle_2misc_satacc(s, true, u, false, size, rn, rd);
9812        return;
9813    case 0x7: /* SQABS / SQNEG */
9814        break;
9815    case 0xa: /* CMLT */
9816        if (u) {
9817            unallocated_encoding(s);
9818            return;
9819        }
9820        /* fall through */
9821    case 0x8: /* CMGT, CMGE */
9822    case 0x9: /* CMEQ, CMLE */
9823    case 0xb: /* ABS, NEG */
9824        if (size != 3) {
9825            unallocated_encoding(s);
9826            return;
9827        }
9828        break;
9829    case 0x12: /* SQXTUN */
9830        if (!u) {
9831            unallocated_encoding(s);
9832            return;
9833        }
9834        /* fall through */
9835    case 0x14: /* SQXTN, UQXTN */
9836        if (size == 3) {
9837            unallocated_encoding(s);
9838            return;
9839        }
9840        if (!fp_access_check(s)) {
9841            return;
9842        }
9843        handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
9844        return;
9845    case 0xc ... 0xf:
9846    case 0x16 ... 0x1d:
9847    case 0x1f:
9848        /* Floating point: U, size[1] and opcode indicate operation;
9849         * size[0] indicates single or double precision.
9850         */
9851        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9852        size = extract32(size, 0, 1) ? 3 : 2;
9853        switch (opcode) {
9854        case 0x2c: /* FCMGT (zero) */
9855        case 0x2d: /* FCMEQ (zero) */
9856        case 0x2e: /* FCMLT (zero) */
9857        case 0x6c: /* FCMGE (zero) */
9858        case 0x6d: /* FCMLE (zero) */
9859            handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
9860            return;
9861        case 0x1d: /* SCVTF */
9862        case 0x5d: /* UCVTF */
9863        {
9864            bool is_signed = (opcode == 0x1d);
9865            if (!fp_access_check(s)) {
9866                return;
9867            }
9868            handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
9869            return;
9870        }
9871        case 0x3d: /* FRECPE */
9872        case 0x3f: /* FRECPX */
9873        case 0x7d: /* FRSQRTE */
9874            if (!fp_access_check(s)) {
9875                return;
9876            }
9877            handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
9878            return;
9879        case 0x1a: /* FCVTNS */
9880        case 0x1b: /* FCVTMS */
9881        case 0x3a: /* FCVTPS */
9882        case 0x3b: /* FCVTZS */
9883        case 0x5a: /* FCVTNU */
9884        case 0x5b: /* FCVTMU */
9885        case 0x7a: /* FCVTPU */
9886        case 0x7b: /* FCVTZU */
9887            is_fcvt = true;
9888            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9889            break;
9890        case 0x1c: /* FCVTAS */
9891        case 0x5c: /* FCVTAU */
9892            /* TIEAWAY doesn't fit in the usual rounding mode encoding */
9893            is_fcvt = true;
9894            rmode = FPROUNDING_TIEAWAY;
9895            break;
9896        case 0x56: /* FCVTXN, FCVTXN2 */
9897            if (size == 2) {
9898                unallocated_encoding(s);
9899                return;
9900            }
9901            if (!fp_access_check(s)) {
9902                return;
9903            }
9904            handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
9905            return;
9906        default:
9907            unallocated_encoding(s);
9908            return;
9909        }
9910        break;
9911    default:
9912        unallocated_encoding(s);
9913        return;
9914    }
9915
9916    if (!fp_access_check(s)) {
9917        return;
9918    }
9919
9920    if (is_fcvt) {
9921        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
9922        tcg_fpstatus = get_fpstatus_ptr(false);
9923        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9924    } else {
9925        tcg_rmode = NULL;
9926        tcg_fpstatus = NULL;
9927    }
9928
9929    if (size == 3) {
9930        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9931        TCGv_i64 tcg_rd = tcg_temp_new_i64();
9932
9933        handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
9934        write_fp_dreg(s, rd, tcg_rd);
9935        tcg_temp_free_i64(tcg_rd);
9936        tcg_temp_free_i64(tcg_rn);
9937    } else {
9938        TCGv_i32 tcg_rn = tcg_temp_new_i32();
9939        TCGv_i32 tcg_rd = tcg_temp_new_i32();
9940
9941        read_vec_element_i32(s, tcg_rn, rn, 0, size);
9942
9943        switch (opcode) {
9944        case 0x7: /* SQABS, SQNEG */
9945        {
9946            NeonGenOneOpEnvFn *genfn;
9947            static NeonGenOneOpEnvFn * const fns[3][2] = {
9948                { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
9949                { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
9950                { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
9951            };
9952            genfn = fns[size][u];
9953            genfn(tcg_rd, cpu_env, tcg_rn);
9954            break;
9955        }
9956        case 0x1a: /* FCVTNS */
9957        case 0x1b: /* FCVTMS */
9958        case 0x1c: /* FCVTAS */
9959        case 0x3a: /* FCVTPS */
9960        case 0x3b: /* FCVTZS */
9961        {
9962            TCGv_i32 tcg_shift = tcg_const_i32(0);
9963            gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9964            tcg_temp_free_i32(tcg_shift);
9965            break;
9966        }
9967        case 0x5a: /* FCVTNU */
9968        case 0x5b: /* FCVTMU */
9969        case 0x5c: /* FCVTAU */
9970        case 0x7a: /* FCVTPU */
9971        case 0x7b: /* FCVTZU */
9972        {
9973            TCGv_i32 tcg_shift = tcg_const_i32(0);
9974            gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9975            tcg_temp_free_i32(tcg_shift);
9976            break;
9977        }
9978        default:
9979            g_assert_not_reached();
9980        }
9981
9982        write_fp_sreg(s, rd, tcg_rd);
9983        tcg_temp_free_i32(tcg_rd);
9984        tcg_temp_free_i32(tcg_rn);
9985    }
9986
9987    if (is_fcvt) {
9988        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9989        tcg_temp_free_i32(tcg_rmode);
9990        tcg_temp_free_ptr(tcg_fpstatus);
9991    }
9992}
9993
9994/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
9995static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
9996                                 int immh, int immb, int opcode, int rn, int rd)
9997{
9998    int size = 32 - clz32(immh) - 1;
9999    int immhb = immh << 3 | immb;
10000    int shift = 2 * (8 << size) - immhb;
10001    bool accumulate = false;
10002    int dsize = is_q ? 128 : 64;
10003    int esize = 8 << size;
10004    int elements = dsize/esize;
10005    MemOp memop = size | (is_u ? 0 : MO_SIGN);
10006    TCGv_i64 tcg_rn = new_tmp_a64(s);
10007    TCGv_i64 tcg_rd = new_tmp_a64(s);
10008    TCGv_i64 tcg_round;
10009    uint64_t round_const;
10010    int i;
10011
10012    if (extract32(immh, 3, 1) && !is_q) {
10013        unallocated_encoding(s);
10014        return;
10015    }
10016    tcg_debug_assert(size <= 3);
10017
10018    if (!fp_access_check(s)) {
10019        return;
10020    }
10021
10022    switch (opcode) {
10023    case 0x02: /* SSRA / USRA (accumulate) */
10024        if (is_u) {
10025            /* Shift count same as element size produces zero to add.  */
10026            if (shift == 8 << size) {
10027                goto done;
10028            }
10029            gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
10030        } else {
10031            /* Shift count same as element size produces all sign to add.  */
10032            if (shift == 8 << size) {
10033                shift -= 1;
10034            }
10035            gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
10036        }
10037        return;
10038    case 0x08: /* SRI */
10039        /* Shift count same as element size is valid but does nothing.  */
10040        if (shift == 8 << size) {
10041            goto done;
10042        }
10043        gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
10044        return;
10045
10046    case 0x00: /* SSHR / USHR */
10047        if (is_u) {
10048            if (shift == 8 << size) {
10049                /* Shift count the same size as element size produces zero.  */
10050                tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
10051                                   is_q ? 16 : 8, vec_full_reg_size(s), 0);
10052            } else {
10053                gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
10054            }
10055        } else {
10056            /* Shift count the same size as element size produces all sign.  */
10057            if (shift == 8 << size) {
10058                shift -= 1;
10059            }
10060            gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
10061        }
10062        return;
10063
10064    case 0x04: /* SRSHR / URSHR (rounding) */
10065        break;
10066    case 0x06: /* SRSRA / URSRA (accum + rounding) */
10067        accumulate = true;
10068        break;
10069    default:
10070        g_assert_not_reached();
10071    }
10072
10073    round_const = 1ULL << (shift - 1);
10074    tcg_round = tcg_const_i64(round_const);
10075
10076    for (i = 0; i < elements; i++) {
10077        read_vec_element(s, tcg_rn, rn, i, memop);
10078        if (accumulate) {
10079            read_vec_element(s, tcg_rd, rd, i, memop);
10080        }
10081
10082        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10083                                accumulate, is_u, size, shift);
10084
10085        write_vec_element(s, tcg_rd, rd, i, size);
10086    }
10087    tcg_temp_free_i64(tcg_round);
10088
10089 done:
10090    clear_vec_high(s, is_q, rd);
10091}
10092
10093/* SHL/SLI - Vector shift left */
10094static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10095                                 int immh, int immb, int opcode, int rn, int rd)
10096{
10097    int size = 32 - clz32(immh) - 1;
10098    int immhb = immh << 3 | immb;
10099    int shift = immhb - (8 << size);
10100
10101    /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10102    assert(size >= 0 && size <= 3);
10103
10104    if (extract32(immh, 3, 1) && !is_q) {
10105        unallocated_encoding(s);
10106        return;
10107    }
10108
10109    if (!fp_access_check(s)) {
10110        return;
10111    }
10112
10113    if (insert) {
10114        gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]);
10115    } else {
10116        gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10117    }
10118}
10119
10120/* USHLL/SHLL - Vector shift left with widening */
10121static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10122                                 int immh, int immb, int opcode, int rn, int rd)
10123{
10124    int size = 32 - clz32(immh) - 1;
10125    int immhb = immh << 3 | immb;
10126    int shift = immhb - (8 << size);
10127    int dsize = 64;
10128    int esize = 8 << size;
10129    int elements = dsize/esize;
10130    TCGv_i64 tcg_rn = new_tmp_a64(s);
10131    TCGv_i64 tcg_rd = new_tmp_a64(s);
10132    int i;
10133
10134    if (size >= 3) {
10135        unallocated_encoding(s);
10136        return;
10137    }
10138
10139    if (!fp_access_check(s)) {
10140        return;
10141    }
10142
10143    /* For the LL variants the store is larger than the load,
10144     * so if rd == rn we would overwrite parts of our input.
10145     * So load everything right now and use shifts in the main loop.
10146     */
10147    read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10148
10149    for (i = 0; i < elements; i++) {
10150        tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10151        ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10152        tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10153        write_vec_element(s, tcg_rd, rd, i, size + 1);
10154    }
10155}
10156
10157/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10158static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10159                                 int immh, int immb, int opcode, int rn, int rd)
10160{
10161    int immhb = immh << 3 | immb;
10162    int size = 32 - clz32(immh) - 1;
10163    int dsize = 64;
10164    int esize = 8 << size;
10165    int elements = dsize/esize;
10166    int shift = (2 * esize) - immhb;
10167    bool round = extract32(opcode, 0, 1);
10168    TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10169    TCGv_i64 tcg_round;
10170    int i;
10171
10172    if (extract32(immh, 3, 1)) {
10173        unallocated_encoding(s);
10174        return;
10175    }
10176
10177    if (!fp_access_check(s)) {
10178        return;
10179    }
10180
10181    tcg_rn = tcg_temp_new_i64();
10182    tcg_rd = tcg_temp_new_i64();
10183    tcg_final = tcg_temp_new_i64();
10184    read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10185
10186    if (round) {
10187        uint64_t round_const = 1ULL << (shift - 1);
10188        tcg_round = tcg_const_i64(round_const);
10189    } else {
10190        tcg_round = NULL;
10191    }
10192
10193    for (i = 0; i < elements; i++) {
10194        read_vec_element(s, tcg_rn, rn, i, size+1);
10195        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10196                                false, true, size+1, shift);
10197
10198        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10199    }
10200
10201    if (!is_q) {
10202        write_vec_element(s, tcg_final, rd, 0, MO_64);
10203    } else {
10204        write_vec_element(s, tcg_final, rd, 1, MO_64);
10205    }
10206    if (round) {
10207        tcg_temp_free_i64(tcg_round);
10208    }
10209    tcg_temp_free_i64(tcg_rn);
10210    tcg_temp_free_i64(tcg_rd);
10211    tcg_temp_free_i64(tcg_final);
10212
10213    clear_vec_high(s, is_q, rd);
10214}
10215
10216
10217/* AdvSIMD shift by immediate
10218 *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10219 * +---+---+---+-------------+------+------+--------+---+------+------+
10220 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10221 * +---+---+---+-------------+------+------+--------+---+------+------+
10222 */
10223static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10224{
10225    int rd = extract32(insn, 0, 5);
10226    int rn = extract32(insn, 5, 5);
10227    int opcode = extract32(insn, 11, 5);
10228    int immb = extract32(insn, 16, 3);
10229    int immh = extract32(insn, 19, 4);
10230    bool is_u = extract32(insn, 29, 1);
10231    bool is_q = extract32(insn, 30, 1);
10232
10233    switch (opcode) {
10234    case 0x08: /* SRI */
10235        if (!is_u) {
10236            unallocated_encoding(s);
10237            return;
10238        }
10239        /* fall through */
10240    case 0x00: /* SSHR / USHR */
10241    case 0x02: /* SSRA / USRA (accumulate) */
10242    case 0x04: /* SRSHR / URSHR (rounding) */
10243    case 0x06: /* SRSRA / URSRA (accum + rounding) */
10244        handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10245        break;
10246    case 0x0a: /* SHL / SLI */
10247        handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10248        break;
10249    case 0x10: /* SHRN */
10250    case 0x11: /* RSHRN / SQRSHRUN */
10251        if (is_u) {
10252            handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10253                                   opcode, rn, rd);
10254        } else {
10255            handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10256        }
10257        break;
10258    case 0x12: /* SQSHRN / UQSHRN */
10259    case 0x13: /* SQRSHRN / UQRSHRN */
10260        handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10261                               opcode, rn, rd);
10262        break;
10263    case 0x14: /* SSHLL / USHLL */
10264        handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10265        break;
10266    case 0x1c: /* SCVTF / UCVTF */
10267        handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10268                                     opcode, rn, rd);
10269        break;
10270    case 0xc: /* SQSHLU */
10271        if (!is_u) {
10272            unallocated_encoding(s);
10273            return;
10274        }
10275        handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10276        break;
10277    case 0xe: /* SQSHL, UQSHL */
10278        handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10279        break;
10280    case 0x1f: /* FCVTZS/ FCVTZU */
10281        handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10282        return;
10283    default:
10284        unallocated_encoding(s);
10285        return;
10286    }
10287}
10288
10289/* Generate code to do a "long" addition or subtraction, ie one done in
10290 * TCGv_i64 on vector lanes twice the width specified by size.
10291 */
10292static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10293                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10294{
10295    static NeonGenTwo64OpFn * const fns[3][2] = {
10296        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10297        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10298        { tcg_gen_add_i64, tcg_gen_sub_i64 },
10299    };
10300    NeonGenTwo64OpFn *genfn;
10301    assert(size < 3);
10302
10303    genfn = fns[size][is_sub];
10304    genfn(tcg_res, tcg_op1, tcg_op2);
10305}
10306
10307static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10308                                int opcode, int rd, int rn, int rm)
10309{
10310    /* 3-reg-different widening insns: 64 x 64 -> 128 */
10311    TCGv_i64 tcg_res[2];
10312    int pass, accop;
10313
10314    tcg_res[0] = tcg_temp_new_i64();
10315    tcg_res[1] = tcg_temp_new_i64();
10316
10317    /* Does this op do an adding accumulate, a subtracting accumulate,
10318     * or no accumulate at all?
10319     */
10320    switch (opcode) {
10321    case 5:
10322    case 8:
10323    case 9:
10324        accop = 1;
10325        break;
10326    case 10:
10327    case 11:
10328        accop = -1;
10329        break;
10330    default:
10331        accop = 0;
10332        break;
10333    }
10334
10335    if (accop != 0) {
10336        read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10337        read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10338    }
10339
10340    /* size == 2 means two 32x32->64 operations; this is worth special
10341     * casing because we can generally handle it inline.
10342     */
10343    if (size == 2) {
10344        for (pass = 0; pass < 2; pass++) {
10345            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10346            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10347            TCGv_i64 tcg_passres;
10348            MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10349
10350            int elt = pass + is_q * 2;
10351
10352            read_vec_element(s, tcg_op1, rn, elt, memop);
10353            read_vec_element(s, tcg_op2, rm, elt, memop);
10354
10355            if (accop == 0) {
10356                tcg_passres = tcg_res[pass];
10357            } else {
10358                tcg_passres = tcg_temp_new_i64();
10359            }
10360
10361            switch (opcode) {
10362            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10363                tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10364                break;
10365            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10366                tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10367                break;
10368            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10369            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10370            {
10371                TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10372                TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10373
10374                tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10375                tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10376                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10377                                    tcg_passres,
10378                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10379                tcg_temp_free_i64(tcg_tmp1);
10380                tcg_temp_free_i64(tcg_tmp2);
10381                break;
10382            }
10383            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10384            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10385            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10386                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10387                break;
10388            case 9: /* SQDMLAL, SQDMLAL2 */
10389            case 11: /* SQDMLSL, SQDMLSL2 */
10390            case 13: /* SQDMULL, SQDMULL2 */
10391                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10392                gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10393                                                  tcg_passres, tcg_passres);
10394                break;
10395            default:
10396                g_assert_not_reached();
10397            }
10398
10399            if (opcode == 9 || opcode == 11) {
10400                /* saturating accumulate ops */
10401                if (accop < 0) {
10402                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
10403                }
10404                gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10405                                                  tcg_res[pass], tcg_passres);
10406            } else if (accop > 0) {
10407                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10408            } else if (accop < 0) {
10409                tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10410            }
10411
10412            if (accop != 0) {
10413                tcg_temp_free_i64(tcg_passres);
10414            }
10415
10416            tcg_temp_free_i64(tcg_op1);
10417            tcg_temp_free_i64(tcg_op2);
10418        }
10419    } else {
10420        /* size 0 or 1, generally helper functions */
10421        for (pass = 0; pass < 2; pass++) {
10422            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10423            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10424            TCGv_i64 tcg_passres;
10425            int elt = pass + is_q * 2;
10426
10427            read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10428            read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10429
10430            if (accop == 0) {
10431                tcg_passres = tcg_res[pass];
10432            } else {
10433                tcg_passres = tcg_temp_new_i64();
10434            }
10435
10436            switch (opcode) {
10437            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10438            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10439            {
10440                TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10441                static NeonGenWidenFn * const widenfns[2][2] = {
10442                    { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10443                    { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10444                };
10445                NeonGenWidenFn *widenfn = widenfns[size][is_u];
10446
10447                widenfn(tcg_op2_64, tcg_op2);
10448                widenfn(tcg_passres, tcg_op1);
10449                gen_neon_addl(size, (opcode == 2), tcg_passres,
10450                              tcg_passres, tcg_op2_64);
10451                tcg_temp_free_i64(tcg_op2_64);
10452                break;
10453            }
10454            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10455            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10456                if (size == 0) {
10457                    if (is_u) {
10458                        gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10459                    } else {
10460                        gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10461                    }
10462                } else {
10463                    if (is_u) {
10464                        gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10465                    } else {
10466                        gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10467                    }
10468                }
10469                break;
10470            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10471            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10472            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10473                if (size == 0) {
10474                    if (is_u) {
10475                        gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10476                    } else {
10477                        gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10478                    }
10479                } else {
10480                    if (is_u) {
10481                        gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10482                    } else {
10483                        gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10484                    }
10485                }
10486                break;
10487            case 9: /* SQDMLAL, SQDMLAL2 */
10488            case 11: /* SQDMLSL, SQDMLSL2 */
10489            case 13: /* SQDMULL, SQDMULL2 */
10490                assert(size == 1);
10491                gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10492                gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10493                                                  tcg_passres, tcg_passres);
10494                break;
10495            case 14: /* PMULL */
10496                assert(size == 0);
10497                gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
10498                break;
10499            default:
10500                g_assert_not_reached();
10501            }
10502            tcg_temp_free_i32(tcg_op1);
10503            tcg_temp_free_i32(tcg_op2);
10504
10505            if (accop != 0) {
10506                if (opcode == 9 || opcode == 11) {
10507                    /* saturating accumulate ops */
10508                    if (accop < 0) {
10509                        gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10510                    }
10511                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10512                                                      tcg_res[pass],
10513                                                      tcg_passres);
10514                } else {
10515                    gen_neon_addl(size, (accop < 0), tcg_res[pass],
10516                                  tcg_res[pass], tcg_passres);
10517                }
10518                tcg_temp_free_i64(tcg_passres);
10519            }
10520        }
10521    }
10522
10523    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10524    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10525    tcg_temp_free_i64(tcg_res[0]);
10526    tcg_temp_free_i64(tcg_res[1]);
10527}
10528
10529static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10530                            int opcode, int rd, int rn, int rm)
10531{
10532    TCGv_i64 tcg_res[2];
10533    int part = is_q ? 2 : 0;
10534    int pass;
10535
10536    for (pass = 0; pass < 2; pass++) {
10537        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10538        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10539        TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10540        static NeonGenWidenFn * const widenfns[3][2] = {
10541            { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10542            { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10543            { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10544        };
10545        NeonGenWidenFn *widenfn = widenfns[size][is_u];
10546
10547        read_vec_element(s, tcg_op1, rn, pass, MO_64);
10548        read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10549        widenfn(tcg_op2_wide, tcg_op2);
10550        tcg_temp_free_i32(tcg_op2);
10551        tcg_res[pass] = tcg_temp_new_i64();
10552        gen_neon_addl(size, (opcode == 3),
10553                      tcg_res[pass], tcg_op1, tcg_op2_wide);
10554        tcg_temp_free_i64(tcg_op1);
10555        tcg_temp_free_i64(tcg_op2_wide);
10556    }
10557
10558    for (pass = 0; pass < 2; pass++) {
10559        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10560        tcg_temp_free_i64(tcg_res[pass]);
10561    }
10562}
10563
10564static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10565{
10566    tcg_gen_addi_i64(in, in, 1U << 31);
10567    tcg_gen_extrh_i64_i32(res, in);
10568}
10569
10570static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10571                                 int opcode, int rd, int rn, int rm)
10572{
10573    TCGv_i32 tcg_res[2];
10574    int part = is_q ? 2 : 0;
10575    int pass;
10576
10577    for (pass = 0; pass < 2; pass++) {
10578        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10579        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10580        TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10581        static NeonGenNarrowFn * const narrowfns[3][2] = {
10582            { gen_helper_neon_narrow_high_u8,
10583              gen_helper_neon_narrow_round_high_u8 },
10584            { gen_helper_neon_narrow_high_u16,
10585              gen_helper_neon_narrow_round_high_u16 },
10586            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10587        };
10588        NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10589
10590        read_vec_element(s, tcg_op1, rn, pass, MO_64);
10591        read_vec_element(s, tcg_op2, rm, pass, MO_64);
10592
10593        gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10594
10595        tcg_temp_free_i64(tcg_op1);
10596        tcg_temp_free_i64(tcg_op2);
10597
10598        tcg_res[pass] = tcg_temp_new_i32();
10599        gennarrow(tcg_res[pass], tcg_wideres);
10600        tcg_temp_free_i64(tcg_wideres);
10601    }
10602
10603    for (pass = 0; pass < 2; pass++) {
10604        write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10605        tcg_temp_free_i32(tcg_res[pass]);
10606    }
10607    clear_vec_high(s, is_q, rd);
10608}
10609
10610static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
10611{
10612    /* PMULL of 64 x 64 -> 128 is an odd special case because it
10613     * is the only three-reg-diff instruction which produces a
10614     * 128-bit wide result from a single operation. However since
10615     * it's possible to calculate the two halves more or less
10616     * separately we just use two helper calls.
10617     */
10618    TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10619    TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10620    TCGv_i64 tcg_res = tcg_temp_new_i64();
10621
10622    read_vec_element(s, tcg_op1, rn, is_q, MO_64);
10623    read_vec_element(s, tcg_op2, rm, is_q, MO_64);
10624    gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
10625    write_vec_element(s, tcg_res, rd, 0, MO_64);
10626    gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
10627    write_vec_element(s, tcg_res, rd, 1, MO_64);
10628
10629    tcg_temp_free_i64(tcg_op1);
10630    tcg_temp_free_i64(tcg_op2);
10631    tcg_temp_free_i64(tcg_res);
10632}
10633
10634/* AdvSIMD three different
10635 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10636 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10637 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10638 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10639 */
10640static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10641{
10642    /* Instructions in this group fall into three basic classes
10643     * (in each case with the operation working on each element in
10644     * the input vectors):
10645     * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10646     *     128 bit input)
10647     * (2) wide 64 x 128 -> 128
10648     * (3) narrowing 128 x 128 -> 64
10649     * Here we do initial decode, catch unallocated cases and
10650     * dispatch to separate functions for each class.
10651     */
10652    int is_q = extract32(insn, 30, 1);
10653    int is_u = extract32(insn, 29, 1);
10654    int size = extract32(insn, 22, 2);
10655    int opcode = extract32(insn, 12, 4);
10656    int rm = extract32(insn, 16, 5);
10657    int rn = extract32(insn, 5, 5);
10658    int rd = extract32(insn, 0, 5);
10659
10660    switch (opcode) {
10661    case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10662    case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10663        /* 64 x 128 -> 128 */
10664        if (size == 3) {
10665            unallocated_encoding(s);
10666            return;
10667        }
10668        if (!fp_access_check(s)) {
10669            return;
10670        }
10671        handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10672        break;
10673    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10674    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10675        /* 128 x 128 -> 64 */
10676        if (size == 3) {
10677            unallocated_encoding(s);
10678            return;
10679        }
10680        if (!fp_access_check(s)) {
10681            return;
10682        }
10683        handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10684        break;
10685    case 14: /* PMULL, PMULL2 */
10686        if (is_u || size == 1 || size == 2) {
10687            unallocated_encoding(s);
10688            return;
10689        }
10690        if (size == 3) {
10691            if (!dc_isar_feature(aa64_pmull, s)) {
10692                unallocated_encoding(s);
10693                return;
10694            }
10695            if (!fp_access_check(s)) {
10696                return;
10697            }
10698            handle_pmull_64(s, is_q, rd, rn, rm);
10699            return;
10700        }
10701        goto is_widening;
10702    case 9: /* SQDMLAL, SQDMLAL2 */
10703    case 11: /* SQDMLSL, SQDMLSL2 */
10704    case 13: /* SQDMULL, SQDMULL2 */
10705        if (is_u || size == 0) {
10706            unallocated_encoding(s);
10707            return;
10708        }
10709        /* fall through */
10710    case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10711    case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10712    case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10713    case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10714    case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10715    case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10716    case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10717        /* 64 x 64 -> 128 */
10718        if (size == 3) {
10719            unallocated_encoding(s);
10720            return;
10721        }
10722    is_widening:
10723        if (!fp_access_check(s)) {
10724            return;
10725        }
10726
10727        handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10728        break;
10729    default:
10730        /* opcode 15 not allocated */
10731        unallocated_encoding(s);
10732        break;
10733    }
10734}
10735
10736/* Logic op (opcode == 3) subgroup of C3.6.16. */
10737static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10738{
10739    int rd = extract32(insn, 0, 5);
10740    int rn = extract32(insn, 5, 5);
10741    int rm = extract32(insn, 16, 5);
10742    int size = extract32(insn, 22, 2);
10743    bool is_u = extract32(insn, 29, 1);
10744    bool is_q = extract32(insn, 30, 1);
10745
10746    if (!fp_access_check(s)) {
10747        return;
10748    }
10749
10750    switch (size + 4 * is_u) {
10751    case 0: /* AND */
10752        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10753        return;
10754    case 1: /* BIC */
10755        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10756        return;
10757    case 2: /* ORR */
10758        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10759        return;
10760    case 3: /* ORN */
10761        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10762        return;
10763    case 4: /* EOR */
10764        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10765        return;
10766
10767    case 5: /* BSL bitwise select */
10768        gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10769        return;
10770    case 6: /* BIT, bitwise insert if true */
10771        gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10772        return;
10773    case 7: /* BIF, bitwise insert if false */
10774        gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10775        return;
10776
10777    default:
10778        g_assert_not_reached();
10779    }
10780}
10781
10782/* Pairwise op subgroup of C3.6.16.
10783 *
10784 * This is called directly or via the handle_3same_float for float pairwise
10785 * operations where the opcode and size are calculated differently.
10786 */
10787static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10788                                   int size, int rn, int rm, int rd)
10789{
10790    TCGv_ptr fpst;
10791    int pass;
10792
10793    /* Floating point operations need fpst */
10794    if (opcode >= 0x58) {
10795        fpst = get_fpstatus_ptr(false);
10796    } else {
10797        fpst = NULL;
10798    }
10799
10800    if (!fp_access_check(s)) {
10801        return;
10802    }
10803
10804    /* These operations work on the concatenated rm:rn, with each pair of
10805     * adjacent elements being operated on to produce an element in the result.
10806     */
10807    if (size == 3) {
10808        TCGv_i64 tcg_res[2];
10809
10810        for (pass = 0; pass < 2; pass++) {
10811            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10812            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10813            int passreg = (pass == 0) ? rn : rm;
10814
10815            read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10816            read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10817            tcg_res[pass] = tcg_temp_new_i64();
10818
10819            switch (opcode) {
10820            case 0x17: /* ADDP */
10821                tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10822                break;
10823            case 0x58: /* FMAXNMP */
10824                gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10825                break;
10826            case 0x5a: /* FADDP */
10827                gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10828                break;
10829            case 0x5e: /* FMAXP */
10830                gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10831                break;
10832            case 0x78: /* FMINNMP */
10833                gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10834                break;
10835            case 0x7e: /* FMINP */
10836                gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10837                break;
10838            default:
10839                g_assert_not_reached();
10840            }
10841
10842            tcg_temp_free_i64(tcg_op1);
10843            tcg_temp_free_i64(tcg_op2);
10844        }
10845
10846        for (pass = 0; pass < 2; pass++) {
10847            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10848            tcg_temp_free_i64(tcg_res[pass]);
10849        }
10850    } else {
10851        int maxpass = is_q ? 4 : 2;
10852        TCGv_i32 tcg_res[4];
10853
10854        for (pass = 0; pass < maxpass; pass++) {
10855            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10856            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10857            NeonGenTwoOpFn *genfn = NULL;
10858            int passreg = pass < (maxpass / 2) ? rn : rm;
10859            int passelt = (is_q && (pass & 1)) ? 2 : 0;
10860
10861            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10862            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10863            tcg_res[pass] = tcg_temp_new_i32();
10864
10865            switch (opcode) {
10866            case 0x17: /* ADDP */
10867            {
10868                static NeonGenTwoOpFn * const fns[3] = {
10869                    gen_helper_neon_padd_u8,
10870                    gen_helper_neon_padd_u16,
10871                    tcg_gen_add_i32,
10872                };
10873                genfn = fns[size];
10874                break;
10875            }
10876            case 0x14: /* SMAXP, UMAXP */
10877            {
10878                static NeonGenTwoOpFn * const fns[3][2] = {
10879                    { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10880                    { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10881                    { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10882                };
10883                genfn = fns[size][u];
10884                break;
10885            }
10886            case 0x15: /* SMINP, UMINP */
10887            {
10888                static NeonGenTwoOpFn * const fns[3][2] = {
10889                    { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10890                    { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10891                    { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10892                };
10893                genfn = fns[size][u];
10894                break;
10895            }
10896            /* The FP operations are all on single floats (32 bit) */
10897            case 0x58: /* FMAXNMP */
10898                gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10899                break;
10900            case 0x5a: /* FADDP */
10901                gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10902                break;
10903            case 0x5e: /* FMAXP */
10904                gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10905                break;
10906            case 0x78: /* FMINNMP */
10907                gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10908                break;
10909            case 0x7e: /* FMINP */
10910                gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10911                break;
10912            default:
10913                g_assert_not_reached();
10914            }
10915
10916            /* FP ops called directly, otherwise call now */
10917            if (genfn) {
10918                genfn(tcg_res[pass], tcg_op1, tcg_op2);
10919            }
10920
10921            tcg_temp_free_i32(tcg_op1);
10922            tcg_temp_free_i32(tcg_op2);
10923        }
10924
10925        for (pass = 0; pass < maxpass; pass++) {
10926            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10927            tcg_temp_free_i32(tcg_res[pass]);
10928        }
10929        clear_vec_high(s, is_q, rd);
10930    }
10931
10932    if (fpst) {
10933        tcg_temp_free_ptr(fpst);
10934    }
10935}
10936
10937/* Floating point op subgroup of C3.6.16. */
10938static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10939{
10940    /* For floating point ops, the U, size[1] and opcode bits
10941     * together indicate the operation. size[0] indicates single
10942     * or double.
10943     */
10944    int fpopcode = extract32(insn, 11, 5)
10945        | (extract32(insn, 23, 1) << 5)
10946        | (extract32(insn, 29, 1) << 6);
10947    int is_q = extract32(insn, 30, 1);
10948    int size = extract32(insn, 22, 1);
10949    int rm = extract32(insn, 16, 5);
10950    int rn = extract32(insn, 5, 5);
10951    int rd = extract32(insn, 0, 5);
10952
10953    int datasize = is_q ? 128 : 64;
10954    int esize = 32 << size;
10955    int elements = datasize / esize;
10956
10957    if (size == 1 && !is_q) {
10958        unallocated_encoding(s);
10959        return;
10960    }
10961
10962    switch (fpopcode) {
10963    case 0x58: /* FMAXNMP */
10964    case 0x5a: /* FADDP */
10965    case 0x5e: /* FMAXP */
10966    case 0x78: /* FMINNMP */
10967    case 0x7e: /* FMINP */
10968        if (size && !is_q) {
10969            unallocated_encoding(s);
10970            return;
10971        }
10972        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
10973                               rn, rm, rd);
10974        return;
10975    case 0x1b: /* FMULX */
10976    case 0x1f: /* FRECPS */
10977    case 0x3f: /* FRSQRTS */
10978    case 0x5d: /* FACGE */
10979    case 0x7d: /* FACGT */
10980    case 0x19: /* FMLA */
10981    case 0x39: /* FMLS */
10982    case 0x18: /* FMAXNM */
10983    case 0x1a: /* FADD */
10984    case 0x1c: /* FCMEQ */
10985    case 0x1e: /* FMAX */
10986    case 0x38: /* FMINNM */
10987    case 0x3a: /* FSUB */
10988    case 0x3e: /* FMIN */
10989    case 0x5b: /* FMUL */
10990    case 0x5c: /* FCMGE */
10991    case 0x5f: /* FDIV */
10992    case 0x7a: /* FABD */
10993    case 0x7c: /* FCMGT */
10994        if (!fp_access_check(s)) {
10995            return;
10996        }
10997        handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
10998        return;
10999
11000    case 0x1d: /* FMLAL  */
11001    case 0x3d: /* FMLSL  */
11002    case 0x59: /* FMLAL2 */
11003    case 0x79: /* FMLSL2 */
11004        if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11005            unallocated_encoding(s);
11006            return;
11007        }
11008        if (fp_access_check(s)) {
11009            int is_s = extract32(insn, 23, 1);
11010            int is_2 = extract32(insn, 29, 1);
11011            int data = (is_2 << 1) | is_s;
11012            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11013                               vec_full_reg_offset(s, rn),
11014                               vec_full_reg_offset(s, rm), cpu_env,
11015                               is_q ? 16 : 8, vec_full_reg_size(s),
11016                               data, gen_helper_gvec_fmlal_a64);
11017        }
11018        return;
11019
11020    default:
11021        unallocated_encoding(s);
11022        return;
11023    }
11024}
11025
11026/* Integer op subgroup of C3.6.16. */
11027static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11028{
11029    int is_q = extract32(insn, 30, 1);
11030    int u = extract32(insn, 29, 1);
11031    int size = extract32(insn, 22, 2);
11032    int opcode = extract32(insn, 11, 5);
11033    int rm = extract32(insn, 16, 5);
11034    int rn = extract32(insn, 5, 5);
11035    int rd = extract32(insn, 0, 5);
11036    int pass;
11037    TCGCond cond;
11038
11039    switch (opcode) {
11040    case 0x13: /* MUL, PMUL */
11041        if (u && size != 0) {
11042            unallocated_encoding(s);
11043            return;
11044        }
11045        /* fall through */
11046    case 0x0: /* SHADD, UHADD */
11047    case 0x2: /* SRHADD, URHADD */
11048    case 0x4: /* SHSUB, UHSUB */
11049    case 0xc: /* SMAX, UMAX */
11050    case 0xd: /* SMIN, UMIN */
11051    case 0xe: /* SABD, UABD */
11052    case 0xf: /* SABA, UABA */
11053    case 0x12: /* MLA, MLS */
11054        if (size == 3) {
11055            unallocated_encoding(s);
11056            return;
11057        }
11058        break;
11059    case 0x16: /* SQDMULH, SQRDMULH */
11060        if (size == 0 || size == 3) {
11061            unallocated_encoding(s);
11062            return;
11063        }
11064        break;
11065    default:
11066        if (size == 3 && !is_q) {
11067            unallocated_encoding(s);
11068            return;
11069        }
11070        break;
11071    }
11072
11073    if (!fp_access_check(s)) {
11074        return;
11075    }
11076
11077    switch (opcode) {
11078    case 0x01: /* SQADD, UQADD */
11079        tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
11080                       offsetof(CPUARMState, vfp.qc),
11081                       vec_full_reg_offset(s, rn),
11082                       vec_full_reg_offset(s, rm),
11083                       is_q ? 16 : 8, vec_full_reg_size(s),
11084                       (u ? uqadd_op : sqadd_op) + size);
11085        return;
11086    case 0x05: /* SQSUB, UQSUB */
11087        tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
11088                       offsetof(CPUARMState, vfp.qc),
11089                       vec_full_reg_offset(s, rn),
11090                       vec_full_reg_offset(s, rm),
11091                       is_q ? 16 : 8, vec_full_reg_size(s),
11092                       (u ? uqsub_op : sqsub_op) + size);
11093        return;
11094    case 0x0c: /* SMAX, UMAX */
11095        if (u) {
11096            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11097        } else {
11098            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11099        }
11100        return;
11101    case 0x0d: /* SMIN, UMIN */
11102        if (u) {
11103            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11104        } else {
11105            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11106        }
11107        return;
11108    case 0x10: /* ADD, SUB */
11109        if (u) {
11110            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11111        } else {
11112            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11113        }
11114        return;
11115    case 0x13: /* MUL, PMUL */
11116        if (!u) { /* MUL */
11117            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11118            return;
11119        }
11120        break;
11121    case 0x12: /* MLA, MLS */
11122        if (u) {
11123            gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
11124        } else {
11125            gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
11126        }
11127        return;
11128    case 0x11:
11129        if (!u) { /* CMTST */
11130            gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
11131            return;
11132        }
11133        /* else CMEQ */
11134        cond = TCG_COND_EQ;
11135        goto do_gvec_cmp;
11136    case 0x06: /* CMGT, CMHI */
11137        cond = u ? TCG_COND_GTU : TCG_COND_GT;
11138        goto do_gvec_cmp;
11139    case 0x07: /* CMGE, CMHS */
11140        cond = u ? TCG_COND_GEU : TCG_COND_GE;
11141    do_gvec_cmp:
11142        tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11143                         vec_full_reg_offset(s, rn),
11144                         vec_full_reg_offset(s, rm),
11145                         is_q ? 16 : 8, vec_full_reg_size(s));
11146        return;
11147    }
11148
11149    if (size == 3) {
11150        assert(is_q);
11151        for (pass = 0; pass < 2; pass++) {
11152            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11153            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11154            TCGv_i64 tcg_res = tcg_temp_new_i64();
11155
11156            read_vec_element(s, tcg_op1, rn, pass, MO_64);
11157            read_vec_element(s, tcg_op2, rm, pass, MO_64);
11158
11159            handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11160
11161            write_vec_element(s, tcg_res, rd, pass, MO_64);
11162
11163            tcg_temp_free_i64(tcg_res);
11164            tcg_temp_free_i64(tcg_op1);
11165            tcg_temp_free_i64(tcg_op2);
11166        }
11167    } else {
11168        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11169            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11170            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11171            TCGv_i32 tcg_res = tcg_temp_new_i32();
11172            NeonGenTwoOpFn *genfn = NULL;
11173            NeonGenTwoOpEnvFn *genenvfn = NULL;
11174
11175            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11176            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11177
11178            switch (opcode) {
11179            case 0x0: /* SHADD, UHADD */
11180            {
11181                static NeonGenTwoOpFn * const fns[3][2] = {
11182                    { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11183                    { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11184                    { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11185                };
11186                genfn = fns[size][u];
11187                break;
11188            }
11189            case 0x2: /* SRHADD, URHADD */
11190            {
11191                static NeonGenTwoOpFn * const fns[3][2] = {
11192                    { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11193                    { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11194                    { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11195                };
11196                genfn = fns[size][u];
11197                break;
11198            }
11199            case 0x4: /* SHSUB, UHSUB */
11200            {
11201                static NeonGenTwoOpFn * const fns[3][2] = {
11202                    { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11203                    { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11204                    { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11205                };
11206                genfn = fns[size][u];
11207                break;
11208            }
11209            case 0x8: /* SSHL, USHL */
11210            {
11211                static NeonGenTwoOpFn * const fns[3][2] = {
11212                    { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
11213                    { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
11214                    { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
11215                };
11216                genfn = fns[size][u];
11217                break;
11218            }
11219            case 0x9: /* SQSHL, UQSHL */
11220            {
11221                static NeonGenTwoOpEnvFn * const fns[3][2] = {
11222                    { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11223                    { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11224                    { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11225                };
11226                genenvfn = fns[size][u];
11227                break;
11228            }
11229            case 0xa: /* SRSHL, URSHL */
11230            {
11231                static NeonGenTwoOpFn * const fns[3][2] = {
11232                    { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11233                    { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11234                    { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11235                };
11236                genfn = fns[size][u];
11237                break;
11238            }
11239            case 0xb: /* SQRSHL, UQRSHL */
11240            {
11241                static NeonGenTwoOpEnvFn * const fns[3][2] = {
11242                    { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11243                    { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11244                    { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11245                };
11246                genenvfn = fns[size][u];
11247                break;
11248            }
11249            case 0xe: /* SABD, UABD */
11250            case 0xf: /* SABA, UABA */
11251            {
11252                static NeonGenTwoOpFn * const fns[3][2] = {
11253                    { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
11254                    { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
11255                    { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
11256                };
11257                genfn = fns[size][u];
11258                break;
11259            }
11260            case 0x13: /* MUL, PMUL */
11261                assert(u); /* PMUL */
11262                assert(size == 0);
11263                genfn = gen_helper_neon_mul_p8;
11264                break;
11265            case 0x16: /* SQDMULH, SQRDMULH */
11266            {
11267                static NeonGenTwoOpEnvFn * const fns[2][2] = {
11268                    { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
11269                    { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
11270                };
11271                assert(size == 1 || size == 2);
11272                genenvfn = fns[size - 1][u];
11273                break;
11274            }
11275            default:
11276                g_assert_not_reached();
11277            }
11278
11279            if (genenvfn) {
11280                genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11281            } else {
11282                genfn(tcg_res, tcg_op1, tcg_op2);
11283            }
11284
11285            if (opcode == 0xf) {
11286                /* SABA, UABA: accumulating ops */
11287                static NeonGenTwoOpFn * const fns[3] = {
11288                    gen_helper_neon_add_u8,
11289                    gen_helper_neon_add_u16,
11290                    tcg_gen_add_i32,
11291                };
11292
11293                read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
11294                fns[size](tcg_res, tcg_op1, tcg_res);
11295            }
11296
11297            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11298
11299            tcg_temp_free_i32(tcg_res);
11300            tcg_temp_free_i32(tcg_op1);
11301            tcg_temp_free_i32(tcg_op2);
11302        }
11303    }
11304    clear_vec_high(s, is_q, rd);
11305}
11306
11307/* AdvSIMD three same
11308 *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11309 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11310 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11311 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11312 */
11313static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11314{
11315    int opcode = extract32(insn, 11, 5);
11316
11317    switch (opcode) {
11318    case 0x3: /* logic ops */
11319        disas_simd_3same_logic(s, insn);
11320        break;
11321    case 0x17: /* ADDP */
11322    case 0x14: /* SMAXP, UMAXP */
11323    case 0x15: /* SMINP, UMINP */
11324    {
11325        /* Pairwise operations */
11326        int is_q = extract32(insn, 30, 1);
11327        int u = extract32(insn, 29, 1);
11328        int size = extract32(insn, 22, 2);
11329        int rm = extract32(insn, 16, 5);
11330        int rn = extract32(insn, 5, 5);
11331        int rd = extract32(insn, 0, 5);
11332        if (opcode == 0x17) {
11333            if (u || (size == 3 && !is_q)) {
11334                unallocated_encoding(s);
11335                return;
11336            }
11337        } else {
11338            if (size == 3) {
11339                unallocated_encoding(s);
11340                return;
11341            }
11342        }
11343        handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11344        break;
11345    }
11346    case 0x18 ... 0x31:
11347        /* floating point ops, sz[1] and U are part of opcode */
11348        disas_simd_3same_float(s, insn);
11349        break;
11350    default:
11351        disas_simd_3same_int(s, insn);
11352        break;
11353    }
11354}
11355
11356/*
11357 * Advanced SIMD three same (ARMv8.2 FP16 variants)
11358 *
11359 *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11360 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11361 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11362 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11363 *
11364 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11365 * (register), FACGE, FABD, FCMGT (register) and FACGT.
11366 *
11367 */
11368static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11369{
11370    int opcode, fpopcode;
11371    int is_q, u, a, rm, rn, rd;
11372    int datasize, elements;
11373    int pass;
11374    TCGv_ptr fpst;
11375    bool pairwise = false;
11376
11377    if (!dc_isar_feature(aa64_fp16, s)) {
11378        unallocated_encoding(s);
11379        return;
11380    }
11381
11382    if (!fp_access_check(s)) {
11383        return;
11384    }
11385
11386    /* For these floating point ops, the U, a and opcode bits
11387     * together indicate the operation.
11388     */
11389    opcode = extract32(insn, 11, 3);
11390    u = extract32(insn, 29, 1);
11391    a = extract32(insn, 23, 1);
11392    is_q = extract32(insn, 30, 1);
11393    rm = extract32(insn, 16, 5);
11394    rn = extract32(insn, 5, 5);
11395    rd = extract32(insn, 0, 5);
11396
11397    fpopcode = opcode | (a << 3) |  (u << 4);
11398    datasize = is_q ? 128 : 64;
11399    elements = datasize / 16;
11400
11401    switch (fpopcode) {
11402    case 0x10: /* FMAXNMP */
11403    case 0x12: /* FADDP */
11404    case 0x16: /* FMAXP */
11405    case 0x18: /* FMINNMP */
11406    case 0x1e: /* FMINP */
11407        pairwise = true;
11408        break;
11409    }
11410
11411    fpst = get_fpstatus_ptr(true);
11412
11413    if (pairwise) {
11414        int maxpass = is_q ? 8 : 4;
11415        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11416        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11417        TCGv_i32 tcg_res[8];
11418
11419        for (pass = 0; pass < maxpass; pass++) {
11420            int passreg = pass < (maxpass / 2) ? rn : rm;
11421            int passelt = (pass << 1) & (maxpass - 1);
11422
11423            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11424            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11425            tcg_res[pass] = tcg_temp_new_i32();
11426
11427            switch (fpopcode) {
11428            case 0x10: /* FMAXNMP */
11429                gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11430                                           fpst);
11431                break;
11432            case 0x12: /* FADDP */
11433                gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11434                break;
11435            case 0x16: /* FMAXP */
11436                gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11437                break;
11438            case 0x18: /* FMINNMP */
11439                gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11440                                           fpst);
11441                break;
11442            case 0x1e: /* FMINP */
11443                gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11444                break;
11445            default:
11446                g_assert_not_reached();
11447            }
11448        }
11449
11450        for (pass = 0; pass < maxpass; pass++) {
11451            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11452            tcg_temp_free_i32(tcg_res[pass]);
11453        }
11454
11455        tcg_temp_free_i32(tcg_op1);
11456        tcg_temp_free_i32(tcg_op2);
11457
11458    } else {
11459        for (pass = 0; pass < elements; pass++) {
11460            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11461            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11462            TCGv_i32 tcg_res = tcg_temp_new_i32();
11463
11464            read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11465            read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11466
11467            switch (fpopcode) {
11468            case 0x0: /* FMAXNM */
11469                gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11470                break;
11471            case 0x1: /* FMLA */
11472                read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11473                gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11474                                           fpst);
11475                break;
11476            case 0x2: /* FADD */
11477                gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11478                break;
11479            case 0x3: /* FMULX */
11480                gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11481                break;
11482            case 0x4: /* FCMEQ */
11483                gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11484                break;
11485            case 0x6: /* FMAX */
11486                gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11487                break;
11488            case 0x7: /* FRECPS */
11489                gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11490                break;
11491            case 0x8: /* FMINNM */
11492                gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11493                break;
11494            case 0x9: /* FMLS */
11495                /* As usual for ARM, separate negation for fused multiply-add */
11496                tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11497                read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11498                gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11499                                           fpst);
11500                break;
11501            case 0xa: /* FSUB */
11502                gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11503                break;
11504            case 0xe: /* FMIN */
11505                gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11506                break;
11507            case 0xf: /* FRSQRTS */
11508                gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11509                break;
11510            case 0x13: /* FMUL */
11511                gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11512                break;
11513            case 0x14: /* FCMGE */
11514                gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11515                break;
11516            case 0x15: /* FACGE */
11517                gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11518                break;
11519            case 0x17: /* FDIV */
11520                gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11521                break;
11522            case 0x1a: /* FABD */
11523                gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11524                tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11525                break;
11526            case 0x1c: /* FCMGT */
11527                gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11528                break;
11529            case 0x1d: /* FACGT */
11530                gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11531                break;
11532            default:
11533                fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
11534                        __func__, insn, fpopcode, s->pc_curr);
11535                g_assert_not_reached();
11536            }
11537
11538            write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11539            tcg_temp_free_i32(tcg_res);
11540            tcg_temp_free_i32(tcg_op1);
11541            tcg_temp_free_i32(tcg_op2);
11542        }
11543    }
11544
11545    tcg_temp_free_ptr(fpst);
11546
11547    clear_vec_high(s, is_q, rd);
11548}
11549
11550/* AdvSIMD three same extra
11551 *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11552 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11553 * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11554 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11555 */
11556static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11557{
11558    int rd = extract32(insn, 0, 5);
11559    int rn = extract32(insn, 5, 5);
11560    int opcode = extract32(insn, 11, 4);
11561    int rm = extract32(insn, 16, 5);
11562    int size = extract32(insn, 22, 2);
11563    bool u = extract32(insn, 29, 1);
11564    bool is_q = extract32(insn, 30, 1);
11565    bool feature;
11566    int rot;
11567
11568    switch (u * 16 + opcode) {
11569    case 0x10: /* SQRDMLAH (vector) */
11570    case 0x11: /* SQRDMLSH (vector) */
11571        if (size != 1 && size != 2) {
11572            unallocated_encoding(s);
11573            return;
11574        }
11575        feature = dc_isar_feature(aa64_rdm, s);
11576        break;
11577    case 0x02: /* SDOT (vector) */
11578    case 0x12: /* UDOT (vector) */
11579        if (size != MO_32) {
11580            unallocated_encoding(s);
11581            return;
11582        }
11583        feature = dc_isar_feature(aa64_dp, s);
11584        break;
11585    case 0x18: /* FCMLA, #0 */
11586    case 0x19: /* FCMLA, #90 */
11587    case 0x1a: /* FCMLA, #180 */
11588    case 0x1b: /* FCMLA, #270 */
11589    case 0x1c: /* FCADD, #90 */
11590    case 0x1e: /* FCADD, #270 */
11591        if (size == 0
11592            || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11593            || (size == 3 && !is_q)) {
11594            unallocated_encoding(s);
11595            return;
11596        }
11597        feature = dc_isar_feature(aa64_fcma, s);
11598        break;
11599    default:
11600        unallocated_encoding(s);
11601        return;
11602    }
11603    if (!feature) {
11604        unallocated_encoding(s);
11605        return;
11606    }
11607    if (!fp_access_check(s)) {
11608        return;
11609    }
11610
11611    switch (opcode) {
11612    case 0x0: /* SQRDMLAH (vector) */
11613        switch (size) {
11614        case 1:
11615            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
11616            break;
11617        case 2:
11618            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
11619            break;
11620        default:
11621            g_assert_not_reached();
11622        }
11623        return;
11624
11625    case 0x1: /* SQRDMLSH (vector) */
11626        switch (size) {
11627        case 1:
11628            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
11629            break;
11630        case 2:
11631            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
11632            break;
11633        default:
11634            g_assert_not_reached();
11635        }
11636        return;
11637
11638    case 0x2: /* SDOT / UDOT */
11639        gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0,
11640                         u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11641        return;
11642
11643    case 0x8: /* FCMLA, #0 */
11644    case 0x9: /* FCMLA, #90 */
11645    case 0xa: /* FCMLA, #180 */
11646    case 0xb: /* FCMLA, #270 */
11647        rot = extract32(opcode, 0, 2);
11648        switch (size) {
11649        case 1:
11650            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, true, rot,
11651                              gen_helper_gvec_fcmlah);
11652            break;
11653        case 2:
11654            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11655                              gen_helper_gvec_fcmlas);
11656            break;
11657        case 3:
11658            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11659                              gen_helper_gvec_fcmlad);
11660            break;
11661        default:
11662            g_assert_not_reached();
11663        }
11664        return;
11665
11666    case 0xc: /* FCADD, #90 */
11667    case 0xe: /* FCADD, #270 */
11668        rot = extract32(opcode, 1, 1);
11669        switch (size) {
11670        case 1:
11671            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11672                              gen_helper_gvec_fcaddh);
11673            break;
11674        case 2:
11675            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11676                              gen_helper_gvec_fcadds);
11677            break;
11678        case 3:
11679            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11680                              gen_helper_gvec_fcaddd);
11681            break;
11682        default:
11683            g_assert_not_reached();
11684        }
11685        return;
11686
11687    default:
11688        g_assert_not_reached();
11689    }
11690}
11691
11692static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11693                                  int size, int rn, int rd)
11694{
11695    /* Handle 2-reg-misc ops which are widening (so each size element
11696     * in the source becomes a 2*size element in the destination.
11697     * The only instruction like this is FCVTL.
11698     */
11699    int pass;
11700
11701    if (size == 3) {
11702        /* 32 -> 64 bit fp conversion */
11703        TCGv_i64 tcg_res[2];
11704        int srcelt = is_q ? 2 : 0;
11705
11706        for (pass = 0; pass < 2; pass++) {
11707            TCGv_i32 tcg_op = tcg_temp_new_i32();
11708            tcg_res[pass] = tcg_temp_new_i64();
11709
11710            read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11711            gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11712            tcg_temp_free_i32(tcg_op);
11713        }
11714        for (pass = 0; pass < 2; pass++) {
11715            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11716            tcg_temp_free_i64(tcg_res[pass]);
11717        }
11718    } else {
11719        /* 16 -> 32 bit fp conversion */
11720        int srcelt = is_q ? 4 : 0;
11721        TCGv_i32 tcg_res[4];
11722        TCGv_ptr fpst = get_fpstatus_ptr(false);
11723        TCGv_i32 ahp = get_ahp_flag();
11724
11725        for (pass = 0; pass < 4; pass++) {
11726            tcg_res[pass] = tcg_temp_new_i32();
11727
11728            read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11729            gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11730                                           fpst, ahp);
11731        }
11732        for (pass = 0; pass < 4; pass++) {
11733            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11734            tcg_temp_free_i32(tcg_res[pass]);
11735        }
11736
11737        tcg_temp_free_ptr(fpst);
11738        tcg_temp_free_i32(ahp);
11739    }
11740}
11741
11742static void handle_rev(DisasContext *s, int opcode, bool u,
11743                       bool is_q, int size, int rn, int rd)
11744{
11745    int op = (opcode << 1) | u;
11746    int opsz = op + size;
11747    int grp_size = 3 - opsz;
11748    int dsize = is_q ? 128 : 64;
11749    int i;
11750
11751    if (opsz >= 3) {
11752        unallocated_encoding(s);
11753        return;
11754    }
11755
11756    if (!fp_access_check(s)) {
11757        return;
11758    }
11759
11760    if (size == 0) {
11761        /* Special case bytes, use bswap op on each group of elements */
11762        int groups = dsize / (8 << grp_size);
11763
11764        for (i = 0; i < groups; i++) {
11765            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11766
11767            read_vec_element(s, tcg_tmp, rn, i, grp_size);
11768            switch (grp_size) {
11769            case MO_16:
11770                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
11771                break;
11772            case MO_32:
11773                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
11774                break;
11775            case MO_64:
11776                tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11777                break;
11778            default:
11779                g_assert_not_reached();
11780            }
11781            write_vec_element(s, tcg_tmp, rd, i, grp_size);
11782            tcg_temp_free_i64(tcg_tmp);
11783        }
11784        clear_vec_high(s, is_q, rd);
11785    } else {
11786        int revmask = (1 << grp_size) - 1;
11787        int esize = 8 << size;
11788        int elements = dsize / esize;
11789        TCGv_i64 tcg_rn = tcg_temp_new_i64();
11790        TCGv_i64 tcg_rd = tcg_const_i64(0);
11791        TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
11792
11793        for (i = 0; i < elements; i++) {
11794            int e_rev = (i & 0xf) ^ revmask;
11795            int off = e_rev * esize;
11796            read_vec_element(s, tcg_rn, rn, i, size);
11797            if (off >= 64) {
11798                tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
11799                                    tcg_rn, off - 64, esize);
11800            } else {
11801                tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
11802            }
11803        }
11804        write_vec_element(s, tcg_rd, rd, 0, MO_64);
11805        write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
11806
11807        tcg_temp_free_i64(tcg_rd_hi);
11808        tcg_temp_free_i64(tcg_rd);
11809        tcg_temp_free_i64(tcg_rn);
11810    }
11811}
11812
11813static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11814                                  bool is_q, int size, int rn, int rd)
11815{
11816    /* Implement the pairwise operations from 2-misc:
11817     * SADDLP, UADDLP, SADALP, UADALP.
11818     * These all add pairs of elements in the input to produce a
11819     * double-width result element in the output (possibly accumulating).
11820     */
11821    bool accum = (opcode == 0x6);
11822    int maxpass = is_q ? 2 : 1;
11823    int pass;
11824    TCGv_i64 tcg_res[2];
11825
11826    if (size == 2) {
11827        /* 32 + 32 -> 64 op */
11828        MemOp memop = size + (u ? 0 : MO_SIGN);
11829
11830        for (pass = 0; pass < maxpass; pass++) {
11831            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11832            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11833
11834            tcg_res[pass] = tcg_temp_new_i64();
11835
11836            read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11837            read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11838            tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11839            if (accum) {
11840                read_vec_element(s, tcg_op1, rd, pass, MO_64);
11841                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11842            }
11843
11844            tcg_temp_free_i64(tcg_op1);
11845            tcg_temp_free_i64(tcg_op2);
11846        }
11847    } else {
11848        for (pass = 0; pass < maxpass; pass++) {
11849            TCGv_i64 tcg_op = tcg_temp_new_i64();
11850            NeonGenOneOpFn *genfn;
11851            static NeonGenOneOpFn * const fns[2][2] = {
11852                { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11853                { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11854            };
11855
11856            genfn = fns[size][u];
11857
11858            tcg_res[pass] = tcg_temp_new_i64();
11859
11860            read_vec_element(s, tcg_op, rn, pass, MO_64);
11861            genfn(tcg_res[pass], tcg_op);
11862
11863            if (accum) {
11864                read_vec_element(s, tcg_op, rd, pass, MO_64);
11865                if (size == 0) {
11866                    gen_helper_neon_addl_u16(tcg_res[pass],
11867                                             tcg_res[pass], tcg_op);
11868                } else {
11869                    gen_helper_neon_addl_u32(tcg_res[pass],
11870                                             tcg_res[pass], tcg_op);
11871                }
11872            }
11873            tcg_temp_free_i64(tcg_op);
11874        }
11875    }
11876    if (!is_q) {
11877        tcg_res[1] = tcg_const_i64(0);
11878    }
11879    for (pass = 0; pass < 2; pass++) {
11880        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11881        tcg_temp_free_i64(tcg_res[pass]);
11882    }
11883}
11884
11885static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11886{
11887    /* Implement SHLL and SHLL2 */
11888    int pass;
11889    int part = is_q ? 2 : 0;
11890    TCGv_i64 tcg_res[2];
11891
11892    for (pass = 0; pass < 2; pass++) {
11893        static NeonGenWidenFn * const widenfns[3] = {
11894            gen_helper_neon_widen_u8,
11895            gen_helper_neon_widen_u16,
11896            tcg_gen_extu_i32_i64,
11897        };
11898        NeonGenWidenFn *widenfn = widenfns[size];
11899        TCGv_i32 tcg_op = tcg_temp_new_i32();
11900
11901        read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11902        tcg_res[pass] = tcg_temp_new_i64();
11903        widenfn(tcg_res[pass], tcg_op);
11904        tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11905
11906        tcg_temp_free_i32(tcg_op);
11907    }
11908
11909    for (pass = 0; pass < 2; pass++) {
11910        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11911        tcg_temp_free_i64(tcg_res[pass]);
11912    }
11913}
11914
11915/* AdvSIMD two reg misc
11916 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11917 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11918 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11919 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11920 */
11921static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11922{
11923    int size = extract32(insn, 22, 2);
11924    int opcode = extract32(insn, 12, 5);
11925    bool u = extract32(insn, 29, 1);
11926    bool is_q = extract32(insn, 30, 1);
11927    int rn = extract32(insn, 5, 5);
11928    int rd = extract32(insn, 0, 5);
11929    bool need_fpstatus = false;
11930    bool need_rmode = false;
11931    int rmode = -1;
11932    TCGv_i32 tcg_rmode;
11933    TCGv_ptr tcg_fpstatus;
11934
11935    switch (opcode) {
11936    case 0x0: /* REV64, REV32 */
11937    case 0x1: /* REV16 */
11938        handle_rev(s, opcode, u, is_q, size, rn, rd);
11939        return;
11940    case 0x5: /* CNT, NOT, RBIT */
11941        if (u && size == 0) {
11942            /* NOT */
11943            break;
11944        } else if (u && size == 1) {
11945            /* RBIT */
11946            break;
11947        } else if (!u && size == 0) {
11948            /* CNT */
11949            break;
11950        }
11951        unallocated_encoding(s);
11952        return;
11953    case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11954    case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11955        if (size == 3) {
11956            unallocated_encoding(s);
11957            return;
11958        }
11959        if (!fp_access_check(s)) {
11960            return;
11961        }
11962
11963        handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11964        return;
11965    case 0x4: /* CLS, CLZ */
11966        if (size == 3) {
11967            unallocated_encoding(s);
11968            return;
11969        }
11970        break;
11971    case 0x2: /* SADDLP, UADDLP */
11972    case 0x6: /* SADALP, UADALP */
11973        if (size == 3) {
11974            unallocated_encoding(s);
11975            return;
11976        }
11977        if (!fp_access_check(s)) {
11978            return;
11979        }
11980        handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
11981        return;
11982    case 0x13: /* SHLL, SHLL2 */
11983        if (u == 0 || size == 3) {
11984            unallocated_encoding(s);
11985            return;
11986        }
11987        if (!fp_access_check(s)) {
11988            return;
11989        }
11990        handle_shll(s, is_q, size, rn, rd);
11991        return;
11992    case 0xa: /* CMLT */
11993        if (u == 1) {
11994            unallocated_encoding(s);
11995            return;
11996        }
11997        /* fall through */
11998    case 0x8: /* CMGT, CMGE */
11999    case 0x9: /* CMEQ, CMLE */
12000    case 0xb: /* ABS, NEG */
12001        if (size == 3 && !is_q) {
12002            unallocated_encoding(s);
12003            return;
12004        }
12005        break;
12006    case 0x3: /* SUQADD, USQADD */
12007        if (size == 3 && !is_q) {
12008            unallocated_encoding(s);
12009            return;
12010        }
12011        if (!fp_access_check(s)) {
12012            return;
12013        }
12014        handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12015        return;
12016    case 0x7: /* SQABS, SQNEG */
12017        if (size == 3 && !is_q) {
12018            unallocated_encoding(s);
12019            return;
12020        }
12021        break;
12022    case 0xc ... 0xf:
12023    case 0x16 ... 0x1f:
12024    {
12025        /* Floating point: U, size[1] and opcode indicate operation;
12026         * size[0] indicates single or double precision.
12027         */
12028        int is_double = extract32(size, 0, 1);
12029        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12030        size = is_double ? 3 : 2;
12031        switch (opcode) {
12032        case 0x2f: /* FABS */
12033        case 0x6f: /* FNEG */
12034            if (size == 3 && !is_q) {
12035                unallocated_encoding(s);
12036                return;
12037            }
12038            break;
12039        case 0x1d: /* SCVTF */
12040        case 0x5d: /* UCVTF */
12041        {
12042            bool is_signed = (opcode == 0x1d) ? true : false;
12043            int elements = is_double ? 2 : is_q ? 4 : 2;
12044            if (is_double && !is_q) {
12045                unallocated_encoding(s);
12046                return;
12047            }
12048            if (!fp_access_check(s)) {
12049                return;
12050            }
12051            handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12052            return;
12053        }
12054        case 0x2c: /* FCMGT (zero) */
12055        case 0x2d: /* FCMEQ (zero) */
12056        case 0x2e: /* FCMLT (zero) */
12057        case 0x6c: /* FCMGE (zero) */
12058        case 0x6d: /* FCMLE (zero) */
12059            if (size == 3 && !is_q) {
12060                unallocated_encoding(s);
12061                return;
12062            }
12063            handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12064            return;
12065        case 0x7f: /* FSQRT */
12066            if (size == 3 && !is_q) {
12067                unallocated_encoding(s);
12068                return;
12069            }
12070            break;
12071        case 0x1a: /* FCVTNS */
12072        case 0x1b: /* FCVTMS */
12073        case 0x3a: /* FCVTPS */
12074        case 0x3b: /* FCVTZS */
12075        case 0x5a: /* FCVTNU */
12076        case 0x5b: /* FCVTMU */
12077        case 0x7a: /* FCVTPU */
12078        case 0x7b: /* FCVTZU */
12079            need_fpstatus = true;
12080            need_rmode = true;
12081            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12082            if (size == 3 && !is_q) {
12083                unallocated_encoding(s);
12084                return;
12085            }
12086            break;
12087        case 0x5c: /* FCVTAU */
12088        case 0x1c: /* FCVTAS */
12089            need_fpstatus = true;
12090            need_rmode = true;
12091            rmode = FPROUNDING_TIEAWAY;
12092            if (size == 3 && !is_q) {
12093                unallocated_encoding(s);
12094                return;
12095            }
12096            break;
12097        case 0x3c: /* URECPE */
12098            if (size == 3) {
12099                unallocated_encoding(s);
12100                return;
12101            }
12102            /* fall through */
12103        case 0x3d: /* FRECPE */
12104        case 0x7d: /* FRSQRTE */
12105            if (size == 3 && !is_q) {
12106                unallocated_encoding(s);
12107                return;
12108            }
12109            if (!fp_access_check(s)) {
12110                return;
12111            }
12112            handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12113            return;
12114        case 0x56: /* FCVTXN, FCVTXN2 */
12115            if (size == 2) {
12116                unallocated_encoding(s);
12117                return;
12118            }
12119            /* fall through */
12120        case 0x16: /* FCVTN, FCVTN2 */
12121            /* handle_2misc_narrow does a 2*size -> size operation, but these
12122             * instructions encode the source size rather than dest size.
12123             */
12124            if (!fp_access_check(s)) {
12125                return;
12126            }
12127            handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12128            return;
12129        case 0x17: /* FCVTL, FCVTL2 */
12130            if (!fp_access_check(s)) {
12131                return;
12132            }
12133            handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12134            return;
12135        case 0x18: /* FRINTN */
12136        case 0x19: /* FRINTM */
12137        case 0x38: /* FRINTP */
12138        case 0x39: /* FRINTZ */
12139            need_rmode = true;
12140            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12141            /* fall through */
12142        case 0x59: /* FRINTX */
12143        case 0x79: /* FRINTI */
12144            need_fpstatus = true;
12145            if (size == 3 && !is_q) {
12146                unallocated_encoding(s);
12147                return;
12148            }
12149            break;
12150        case 0x58: /* FRINTA */
12151            need_rmode = true;
12152            rmode = FPROUNDING_TIEAWAY;
12153            need_fpstatus = true;
12154            if (size == 3 && !is_q) {
12155                unallocated_encoding(s);
12156                return;
12157            }
12158            break;
12159        case 0x7c: /* URSQRTE */
12160            if (size == 3) {
12161                unallocated_encoding(s);
12162                return;
12163            }
12164            need_fpstatus = true;
12165            break;
12166        case 0x1e: /* FRINT32Z */
12167        case 0x1f: /* FRINT64Z */
12168            need_rmode = true;
12169            rmode = FPROUNDING_ZERO;
12170            /* fall through */
12171        case 0x5e: /* FRINT32X */
12172        case 0x5f: /* FRINT64X */
12173            need_fpstatus = true;
12174            if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12175                unallocated_encoding(s);
12176                return;
12177            }
12178            break;
12179        default:
12180            unallocated_encoding(s);
12181            return;
12182        }
12183        break;
12184    }
12185    default:
12186        unallocated_encoding(s);
12187        return;
12188    }
12189
12190    if (!fp_access_check(s)) {
12191        return;
12192    }
12193
12194    if (need_fpstatus || need_rmode) {
12195        tcg_fpstatus = get_fpstatus_ptr(false);
12196    } else {
12197        tcg_fpstatus = NULL;
12198    }
12199    if (need_rmode) {
12200        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12201        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12202    } else {
12203        tcg_rmode = NULL;
12204    }
12205
12206    switch (opcode) {
12207    case 0x5:
12208        if (u && size == 0) { /* NOT */
12209            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12210            return;
12211        }
12212        break;
12213    case 0xb:
12214        if (u) { /* ABS, NEG */
12215            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12216        } else {
12217            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12218        }
12219        return;
12220    }
12221
12222    if (size == 3) {
12223        /* All 64-bit element operations can be shared with scalar 2misc */
12224        int pass;
12225
12226        /* Coverity claims (size == 3 && !is_q) has been eliminated
12227         * from all paths leading to here.
12228         */
12229        tcg_debug_assert(is_q);
12230        for (pass = 0; pass < 2; pass++) {
12231            TCGv_i64 tcg_op = tcg_temp_new_i64();
12232            TCGv_i64 tcg_res = tcg_temp_new_i64();
12233
12234            read_vec_element(s, tcg_op, rn, pass, MO_64);
12235
12236            handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12237                            tcg_rmode, tcg_fpstatus);
12238
12239            write_vec_element(s, tcg_res, rd, pass, MO_64);
12240
12241            tcg_temp_free_i64(tcg_res);
12242            tcg_temp_free_i64(tcg_op);
12243        }
12244    } else {
12245        int pass;
12246
12247        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12248            TCGv_i32 tcg_op = tcg_temp_new_i32();
12249            TCGv_i32 tcg_res = tcg_temp_new_i32();
12250            TCGCond cond;
12251
12252            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12253
12254            if (size == 2) {
12255                /* Special cases for 32 bit elements */
12256                switch (opcode) {
12257                case 0xa: /* CMLT */
12258                    /* 32 bit integer comparison against zero, result is
12259                     * test ? (2^32 - 1) : 0. We implement via setcond(test)
12260                     * and inverting.
12261                     */
12262                    cond = TCG_COND_LT;
12263                do_cmop:
12264                    tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
12265                    tcg_gen_neg_i32(tcg_res, tcg_res);
12266                    break;
12267                case 0x8: /* CMGT, CMGE */
12268                    cond = u ? TCG_COND_GE : TCG_COND_GT;
12269                    goto do_cmop;
12270                case 0x9: /* CMEQ, CMLE */
12271                    cond = u ? TCG_COND_LE : TCG_COND_EQ;
12272                    goto do_cmop;
12273                case 0x4: /* CLS */
12274                    if (u) {
12275                        tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12276                    } else {
12277                        tcg_gen_clrsb_i32(tcg_res, tcg_op);
12278                    }
12279                    break;
12280                case 0x7: /* SQABS, SQNEG */
12281                    if (u) {
12282                        gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12283                    } else {
12284                        gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12285                    }
12286                    break;
12287                case 0x2f: /* FABS */
12288                    gen_helper_vfp_abss(tcg_res, tcg_op);
12289                    break;
12290                case 0x6f: /* FNEG */
12291                    gen_helper_vfp_negs(tcg_res, tcg_op);
12292                    break;
12293                case 0x7f: /* FSQRT */
12294                    gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12295                    break;
12296                case 0x1a: /* FCVTNS */
12297                case 0x1b: /* FCVTMS */
12298                case 0x1c: /* FCVTAS */
12299                case 0x3a: /* FCVTPS */
12300                case 0x3b: /* FCVTZS */
12301                {
12302                    TCGv_i32 tcg_shift = tcg_const_i32(0);
12303                    gen_helper_vfp_tosls(tcg_res, tcg_op,
12304                                         tcg_shift, tcg_fpstatus);
12305                    tcg_temp_free_i32(tcg_shift);
12306                    break;
12307                }
12308                case 0x5a: /* FCVTNU */
12309                case 0x5b: /* FCVTMU */
12310                case 0x5c: /* FCVTAU */
12311                case 0x7a: /* FCVTPU */
12312                case 0x7b: /* FCVTZU */
12313                {
12314                    TCGv_i32 tcg_shift = tcg_const_i32(0);
12315                    gen_helper_vfp_touls(tcg_res, tcg_op,
12316                                         tcg_shift, tcg_fpstatus);
12317                    tcg_temp_free_i32(tcg_shift);
12318                    break;
12319                }
12320                case 0x18: /* FRINTN */
12321                case 0x19: /* FRINTM */
12322                case 0x38: /* FRINTP */
12323                case 0x39: /* FRINTZ */
12324                case 0x58: /* FRINTA */
12325                case 0x79: /* FRINTI */
12326                    gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12327                    break;
12328                case 0x59: /* FRINTX */
12329                    gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12330                    break;
12331                case 0x7c: /* URSQRTE */
12332                    gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
12333                    break;
12334                case 0x1e: /* FRINT32Z */
12335                case 0x5e: /* FRINT32X */
12336                    gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12337                    break;
12338                case 0x1f: /* FRINT64Z */
12339                case 0x5f: /* FRINT64X */
12340                    gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12341                    break;
12342                default:
12343                    g_assert_not_reached();
12344                }
12345            } else {
12346                /* Use helpers for 8 and 16 bit elements */
12347                switch (opcode) {
12348                case 0x5: /* CNT, RBIT */
12349                    /* For these two insns size is part of the opcode specifier
12350                     * (handled earlier); they always operate on byte elements.
12351                     */
12352                    if (u) {
12353                        gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12354                    } else {
12355                        gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12356                    }
12357                    break;
12358                case 0x7: /* SQABS, SQNEG */
12359                {
12360                    NeonGenOneOpEnvFn *genfn;
12361                    static NeonGenOneOpEnvFn * const fns[2][2] = {
12362                        { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12363                        { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12364                    };
12365                    genfn = fns[size][u];
12366                    genfn(tcg_res, cpu_env, tcg_op);
12367                    break;
12368                }
12369                case 0x8: /* CMGT, CMGE */
12370                case 0x9: /* CMEQ, CMLE */
12371                case 0xa: /* CMLT */
12372                {
12373                    static NeonGenTwoOpFn * const fns[3][2] = {
12374                        { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
12375                        { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
12376                        { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
12377                    };
12378                    NeonGenTwoOpFn *genfn;
12379                    int comp;
12380                    bool reverse;
12381                    TCGv_i32 tcg_zero = tcg_const_i32(0);
12382
12383                    /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
12384                    comp = (opcode - 0x8) * 2 + u;
12385                    /* ...but LE, LT are implemented as reverse GE, GT */
12386                    reverse = (comp > 2);
12387                    if (reverse) {
12388                        comp = 4 - comp;
12389                    }
12390                    genfn = fns[comp][size];
12391                    if (reverse) {
12392                        genfn(tcg_res, tcg_zero, tcg_op);
12393                    } else {
12394                        genfn(tcg_res, tcg_op, tcg_zero);
12395                    }
12396                    tcg_temp_free_i32(tcg_zero);
12397                    break;
12398                }
12399                case 0x4: /* CLS, CLZ */
12400                    if (u) {
12401                        if (size == 0) {
12402                            gen_helper_neon_clz_u8(tcg_res, tcg_op);
12403                        } else {
12404                            gen_helper_neon_clz_u16(tcg_res, tcg_op);
12405                        }
12406                    } else {
12407                        if (size == 0) {
12408                            gen_helper_neon_cls_s8(tcg_res, tcg_op);
12409                        } else {
12410                            gen_helper_neon_cls_s16(tcg_res, tcg_op);
12411                        }
12412                    }
12413                    break;
12414                default:
12415                    g_assert_not_reached();
12416                }
12417            }
12418
12419            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12420
12421            tcg_temp_free_i32(tcg_res);
12422            tcg_temp_free_i32(tcg_op);
12423        }
12424    }
12425    clear_vec_high(s, is_q, rd);
12426
12427    if (need_rmode) {
12428        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12429        tcg_temp_free_i32(tcg_rmode);
12430    }
12431    if (need_fpstatus) {
12432        tcg_temp_free_ptr(tcg_fpstatus);
12433    }
12434}
12435
12436/* AdvSIMD [scalar] two register miscellaneous (FP16)
12437 *
12438 *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12439 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12440 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12441 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12442 *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12443 *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12444 *
12445 * This actually covers two groups where scalar access is governed by
12446 * bit 28. A bunch of the instructions (float to integral) only exist
12447 * in the vector form and are un-allocated for the scalar decode. Also
12448 * in the scalar decode Q is always 1.
12449 */
12450static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12451{
12452    int fpop, opcode, a, u;
12453    int rn, rd;
12454    bool is_q;
12455    bool is_scalar;
12456    bool only_in_vector = false;
12457
12458    int pass;
12459    TCGv_i32 tcg_rmode = NULL;
12460    TCGv_ptr tcg_fpstatus = NULL;
12461    bool need_rmode = false;
12462    bool need_fpst = true;
12463    int rmode;
12464
12465    if (!dc_isar_feature(aa64_fp16, s)) {
12466        unallocated_encoding(s);
12467        return;
12468    }
12469
12470    rd = extract32(insn, 0, 5);
12471    rn = extract32(insn, 5, 5);
12472
12473    a = extract32(insn, 23, 1);
12474    u = extract32(insn, 29, 1);
12475    is_scalar = extract32(insn, 28, 1);
12476    is_q = extract32(insn, 30, 1);
12477
12478    opcode = extract32(insn, 12, 5);
12479    fpop = deposit32(opcode, 5, 1, a);
12480    fpop = deposit32(fpop, 6, 1, u);
12481
12482    rd = extract32(insn, 0, 5);
12483    rn = extract32(insn, 5, 5);
12484
12485    switch (fpop) {
12486    case 0x1d: /* SCVTF */
12487    case 0x5d: /* UCVTF */
12488    {
12489        int elements;
12490
12491        if (is_scalar) {
12492            elements = 1;
12493        } else {
12494            elements = (is_q ? 8 : 4);
12495        }
12496
12497        if (!fp_access_check(s)) {
12498            return;
12499        }
12500        handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12501        return;
12502    }
12503    break;
12504    case 0x2c: /* FCMGT (zero) */
12505    case 0x2d: /* FCMEQ (zero) */
12506    case 0x2e: /* FCMLT (zero) */
12507    case 0x6c: /* FCMGE (zero) */
12508    case 0x6d: /* FCMLE (zero) */
12509        handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12510        return;
12511    case 0x3d: /* FRECPE */
12512    case 0x3f: /* FRECPX */
12513        break;
12514    case 0x18: /* FRINTN */
12515        need_rmode = true;
12516        only_in_vector = true;
12517        rmode = FPROUNDING_TIEEVEN;
12518        break;
12519    case 0x19: /* FRINTM */
12520        need_rmode = true;
12521        only_in_vector = true;
12522        rmode = FPROUNDING_NEGINF;
12523        break;
12524    case 0x38: /* FRINTP */
12525        need_rmode = true;
12526        only_in_vector = true;
12527        rmode = FPROUNDING_POSINF;
12528        break;
12529    case 0x39: /* FRINTZ */
12530        need_rmode = true;
12531        only_in_vector = true;
12532        rmode = FPROUNDING_ZERO;
12533        break;
12534    case 0x58: /* FRINTA */
12535        need_rmode = true;
12536        only_in_vector = true;
12537        rmode = FPROUNDING_TIEAWAY;
12538        break;
12539    case 0x59: /* FRINTX */
12540    case 0x79: /* FRINTI */
12541        only_in_vector = true;
12542        /* current rounding mode */
12543        break;
12544    case 0x1a: /* FCVTNS */
12545        need_rmode = true;
12546        rmode = FPROUNDING_TIEEVEN;
12547        break;
12548    case 0x1b: /* FCVTMS */
12549        need_rmode = true;
12550        rmode = FPROUNDING_NEGINF;
12551        break;
12552    case 0x1c: /* FCVTAS */
12553        need_rmode = true;
12554        rmode = FPROUNDING_TIEAWAY;
12555        break;
12556    case 0x3a: /* FCVTPS */
12557        need_rmode = true;
12558        rmode = FPROUNDING_POSINF;
12559        break;
12560    case 0x3b: /* FCVTZS */
12561        need_rmode = true;
12562        rmode = FPROUNDING_ZERO;
12563        break;
12564    case 0x5a: /* FCVTNU */
12565        need_rmode = true;
12566        rmode = FPROUNDING_TIEEVEN;
12567        break;
12568    case 0x5b: /* FCVTMU */
12569        need_rmode = true;
12570        rmode = FPROUNDING_NEGINF;
12571        break;
12572    case 0x5c: /* FCVTAU */
12573        need_rmode = true;
12574        rmode = FPROUNDING_TIEAWAY;
12575        break;
12576    case 0x7a: /* FCVTPU */
12577        need_rmode = true;
12578        rmode = FPROUNDING_POSINF;
12579        break;
12580    case 0x7b: /* FCVTZU */
12581        need_rmode = true;
12582        rmode = FPROUNDING_ZERO;
12583        break;
12584    case 0x2f: /* FABS */
12585    case 0x6f: /* FNEG */
12586        need_fpst = false;
12587        break;
12588    case 0x7d: /* FRSQRTE */
12589    case 0x7f: /* FSQRT (vector) */
12590        break;
12591    default:
12592        fprintf(stderr, "%s: insn %#04x fpop %#2x\n", __func__, insn, fpop);
12593        g_assert_not_reached();
12594    }
12595
12596
12597    /* Check additional constraints for the scalar encoding */
12598    if (is_scalar) {
12599        if (!is_q) {
12600            unallocated_encoding(s);
12601            return;
12602        }
12603        /* FRINTxx is only in the vector form */
12604        if (only_in_vector) {
12605            unallocated_encoding(s);
12606            return;
12607        }
12608    }
12609
12610    if (!fp_access_check(s)) {
12611        return;
12612    }
12613
12614    if (need_rmode || need_fpst) {
12615        tcg_fpstatus = get_fpstatus_ptr(true);
12616    }
12617
12618    if (need_rmode) {
12619        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12620        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12621    }
12622
12623    if (is_scalar) {
12624        TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12625        TCGv_i32 tcg_res = tcg_temp_new_i32();
12626
12627        switch (fpop) {
12628        case 0x1a: /* FCVTNS */
12629        case 0x1b: /* FCVTMS */
12630        case 0x1c: /* FCVTAS */
12631        case 0x3a: /* FCVTPS */
12632        case 0x3b: /* FCVTZS */
12633            gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12634            break;
12635        case 0x3d: /* FRECPE */
12636            gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12637            break;
12638        case 0x3f: /* FRECPX */
12639            gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12640            break;
12641        case 0x5a: /* FCVTNU */
12642        case 0x5b: /* FCVTMU */
12643        case 0x5c: /* FCVTAU */
12644        case 0x7a: /* FCVTPU */
12645        case 0x7b: /* FCVTZU */
12646            gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12647            break;
12648        case 0x6f: /* FNEG */
12649            tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12650            break;
12651        case 0x7d: /* FRSQRTE */
12652            gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12653            break;
12654        default:
12655            g_assert_not_reached();
12656        }
12657
12658        /* limit any sign extension going on */
12659        tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12660        write_fp_sreg(s, rd, tcg_res);
12661
12662        tcg_temp_free_i32(tcg_res);
12663        tcg_temp_free_i32(tcg_op);
12664    } else {
12665        for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12666            TCGv_i32 tcg_op = tcg_temp_new_i32();
12667            TCGv_i32 tcg_res = tcg_temp_new_i32();
12668
12669            read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12670
12671            switch (fpop) {
12672            case 0x1a: /* FCVTNS */
12673            case 0x1b: /* FCVTMS */
12674            case 0x1c: /* FCVTAS */
12675            case 0x3a: /* FCVTPS */
12676            case 0x3b: /* FCVTZS */
12677                gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12678                break;
12679            case 0x3d: /* FRECPE */
12680                gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12681                break;
12682            case 0x5a: /* FCVTNU */
12683            case 0x5b: /* FCVTMU */
12684            case 0x5c: /* FCVTAU */
12685            case 0x7a: /* FCVTPU */
12686            case 0x7b: /* FCVTZU */
12687                gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12688                break;
12689            case 0x18: /* FRINTN */
12690            case 0x19: /* FRINTM */
12691            case 0x38: /* FRINTP */
12692            case 0x39: /* FRINTZ */
12693            case 0x58: /* FRINTA */
12694            case 0x79: /* FRINTI */
12695                gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12696                break;
12697            case 0x59: /* FRINTX */
12698                gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12699                break;
12700            case 0x2f: /* FABS */
12701                tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12702                break;
12703            case 0x6f: /* FNEG */
12704                tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12705                break;
12706            case 0x7d: /* FRSQRTE */
12707                gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12708                break;
12709            case 0x7f: /* FSQRT */
12710                gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12711                break;
12712            default:
12713                g_assert_not_reached();
12714            }
12715
12716            write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12717
12718            tcg_temp_free_i32(tcg_res);
12719            tcg_temp_free_i32(tcg_op);
12720        }
12721
12722        clear_vec_high(s, is_q, rd);
12723    }
12724
12725    if (tcg_rmode) {
12726        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12727        tcg_temp_free_i32(tcg_rmode);
12728    }
12729
12730    if (tcg_fpstatus) {
12731        tcg_temp_free_ptr(tcg_fpstatus);
12732    }
12733}
12734
12735/* AdvSIMD scalar x indexed element
12736 *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12737 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12738 * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12739 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12740 * AdvSIMD vector x indexed element
12741 *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12742 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12743 * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12744 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12745 */
12746static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12747{
12748    /* This encoding has two kinds of instruction:
12749     *  normal, where we perform elt x idxelt => elt for each
12750     *     element in the vector
12751     *  long, where we perform elt x idxelt and generate a result of
12752     *     double the width of the input element
12753     * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12754     */
12755    bool is_scalar = extract32(insn, 28, 1);
12756    bool is_q = extract32(insn, 30, 1);
12757    bool u = extract32(insn, 29, 1);
12758    int size = extract32(insn, 22, 2);
12759    int l = extract32(insn, 21, 1);
12760    int m = extract32(insn, 20, 1);
12761    /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12762    int rm = extract32(insn, 16, 4);
12763    int opcode = extract32(insn, 12, 4);
12764    int h = extract32(insn, 11, 1);
12765    int rn = extract32(insn, 5, 5);
12766    int rd = extract32(insn, 0, 5);
12767    bool is_long = false;
12768    int is_fp = 0;
12769    bool is_fp16 = false;
12770    int index;
12771    TCGv_ptr fpst;
12772
12773    switch (16 * u + opcode) {
12774    case 0x08: /* MUL */
12775    case 0x10: /* MLA */
12776    case 0x14: /* MLS */
12777        if (is_scalar) {
12778            unallocated_encoding(s);
12779            return;
12780        }
12781        break;
12782    case 0x02: /* SMLAL, SMLAL2 */
12783    case 0x12: /* UMLAL, UMLAL2 */
12784    case 0x06: /* SMLSL, SMLSL2 */
12785    case 0x16: /* UMLSL, UMLSL2 */
12786    case 0x0a: /* SMULL, SMULL2 */
12787    case 0x1a: /* UMULL, UMULL2 */
12788        if (is_scalar) {
12789            unallocated_encoding(s);
12790            return;
12791        }
12792        is_long = true;
12793        break;
12794    case 0x03: /* SQDMLAL, SQDMLAL2 */
12795    case 0x07: /* SQDMLSL, SQDMLSL2 */
12796    case 0x0b: /* SQDMULL, SQDMULL2 */
12797        is_long = true;
12798        break;
12799    case 0x0c: /* SQDMULH */
12800    case 0x0d: /* SQRDMULH */
12801        break;
12802    case 0x01: /* FMLA */
12803    case 0x05: /* FMLS */
12804    case 0x09: /* FMUL */
12805    case 0x19: /* FMULX */
12806        is_fp = 1;
12807        break;
12808    case 0x1d: /* SQRDMLAH */
12809    case 0x1f: /* SQRDMLSH */
12810        if (!dc_isar_feature(aa64_rdm, s)) {
12811            unallocated_encoding(s);
12812            return;
12813        }
12814        break;
12815    case 0x0e: /* SDOT */
12816    case 0x1e: /* UDOT */
12817        if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12818            unallocated_encoding(s);
12819            return;
12820        }
12821        break;
12822    case 0x11: /* FCMLA #0 */
12823    case 0x13: /* FCMLA #90 */
12824    case 0x15: /* FCMLA #180 */
12825    case 0x17: /* FCMLA #270 */
12826        if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12827            unallocated_encoding(s);
12828            return;
12829        }
12830        is_fp = 2;
12831        break;
12832    case 0x00: /* FMLAL */
12833    case 0x04: /* FMLSL */
12834    case 0x18: /* FMLAL2 */
12835    case 0x1c: /* FMLSL2 */
12836        if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12837            unallocated_encoding(s);
12838            return;
12839        }
12840        size = MO_16;
12841        /* is_fp, but we pass cpu_env not fp_status.  */
12842        break;
12843    default:
12844        unallocated_encoding(s);
12845        return;
12846    }
12847
12848    switch (is_fp) {
12849    case 1: /* normal fp */
12850        /* convert insn encoded size to MemOp size */
12851        switch (size) {
12852        case 0: /* half-precision */
12853            size = MO_16;
12854            is_fp16 = true;
12855            break;
12856        case MO_32: /* single precision */
12857        case MO_64: /* double precision */
12858            break;
12859        default:
12860            unallocated_encoding(s);
12861            return;
12862        }
12863        break;
12864
12865    case 2: /* complex fp */
12866        /* Each indexable element is a complex pair.  */
12867        size += 1;
12868        switch (size) {
12869        case MO_32:
12870            if (h && !is_q) {
12871                unallocated_encoding(s);
12872                return;
12873            }
12874            is_fp16 = true;
12875            break;
12876        case MO_64:
12877            break;
12878        default:
12879            unallocated_encoding(s);
12880            return;
12881        }
12882        break;
12883
12884    default: /* integer */
12885        switch (size) {
12886        case MO_8:
12887        case MO_64:
12888            unallocated_encoding(s);
12889            return;
12890        }
12891        break;
12892    }
12893    if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12894        unallocated_encoding(s);
12895        return;
12896    }
12897
12898    /* Given MemOp size, adjust register and indexing.  */
12899    switch (size) {
12900    case MO_16:
12901        index = h << 2 | l << 1 | m;
12902        break;
12903    case MO_32:
12904        index = h << 1 | l;
12905        rm |= m << 4;
12906        break;
12907    case MO_64:
12908        if (l || !is_q) {
12909            unallocated_encoding(s);
12910            return;
12911        }
12912        index = h;
12913        rm |= m << 4;
12914        break;
12915    default:
12916        g_assert_not_reached();
12917    }
12918
12919    if (!fp_access_check(s)) {
12920        return;
12921    }
12922
12923    if (is_fp) {
12924        fpst = get_fpstatus_ptr(is_fp16);
12925    } else {
12926        fpst = NULL;
12927    }
12928
12929    switch (16 * u + opcode) {
12930    case 0x0e: /* SDOT */
12931    case 0x1e: /* UDOT */
12932        gen_gvec_op3_ool(s, is_q, rd, rn, rm, index,
12933                         u ? gen_helper_gvec_udot_idx_b
12934                         : gen_helper_gvec_sdot_idx_b);
12935        return;
12936    case 0x11: /* FCMLA #0 */
12937    case 0x13: /* FCMLA #90 */
12938    case 0x15: /* FCMLA #180 */
12939    case 0x17: /* FCMLA #270 */
12940        {
12941            int rot = extract32(insn, 13, 2);
12942            int data = (index << 2) | rot;
12943            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12944                               vec_full_reg_offset(s, rn),
12945                               vec_full_reg_offset(s, rm), fpst,
12946                               is_q ? 16 : 8, vec_full_reg_size(s), data,
12947                               size == MO_64
12948                               ? gen_helper_gvec_fcmlas_idx
12949                               : gen_helper_gvec_fcmlah_idx);
12950            tcg_temp_free_ptr(fpst);
12951        }
12952        return;
12953
12954    case 0x00: /* FMLAL */
12955    case 0x04: /* FMLSL */
12956    case 0x18: /* FMLAL2 */
12957    case 0x1c: /* FMLSL2 */
12958        {
12959            int is_s = extract32(opcode, 2, 1);
12960            int is_2 = u;
12961            int data = (index << 2) | (is_2 << 1) | is_s;
12962            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12963                               vec_full_reg_offset(s, rn),
12964                               vec_full_reg_offset(s, rm), cpu_env,
12965                               is_q ? 16 : 8, vec_full_reg_size(s),
12966                               data, gen_helper_gvec_fmlal_idx_a64);
12967        }
12968        return;
12969    }
12970
12971    if (size == 3) {
12972        TCGv_i64 tcg_idx = tcg_temp_new_i64();
12973        int pass;
12974
12975        assert(is_fp && is_q && !is_long);
12976
12977        read_vec_element(s, tcg_idx, rm, index, MO_64);
12978
12979        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12980            TCGv_i64 tcg_op = tcg_temp_new_i64();
12981            TCGv_i64 tcg_res = tcg_temp_new_i64();
12982
12983            read_vec_element(s, tcg_op, rn, pass, MO_64);
12984
12985            switch (16 * u + opcode) {
12986            case 0x05: /* FMLS */
12987                /* As usual for ARM, separate negation for fused multiply-add */
12988                gen_helper_vfp_negd(tcg_op, tcg_op);
12989                /* fall through */
12990            case 0x01: /* FMLA */
12991                read_vec_element(s, tcg_res, rd, pass, MO_64);
12992                gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
12993                break;
12994            case 0x09: /* FMUL */
12995                gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
12996                break;
12997            case 0x19: /* FMULX */
12998                gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
12999                break;
13000            default:
13001                g_assert_not_reached();
13002            }
13003
13004            write_vec_element(s, tcg_res, rd, pass, MO_64);
13005            tcg_temp_free_i64(tcg_op);
13006            tcg_temp_free_i64(tcg_res);
13007        }
13008
13009        tcg_temp_free_i64(tcg_idx);
13010        clear_vec_high(s, !is_scalar, rd);
13011    } else if (!is_long) {
13012        /* 32 bit floating point, or 16 or 32 bit integer.
13013         * For the 16 bit scalar case we use the usual Neon helpers and
13014         * rely on the fact that 0 op 0 == 0 with no side effects.
13015         */
13016        TCGv_i32 tcg_idx = tcg_temp_new_i32();
13017        int pass, maxpasses;
13018
13019        if (is_scalar) {
13020            maxpasses = 1;
13021        } else {
13022            maxpasses = is_q ? 4 : 2;
13023        }
13024
13025        read_vec_element_i32(s, tcg_idx, rm, index, size);
13026
13027        if (size == 1 && !is_scalar) {
13028            /* The simplest way to handle the 16x16 indexed ops is to duplicate
13029             * the index into both halves of the 32 bit tcg_idx and then use
13030             * the usual Neon helpers.
13031             */
13032            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13033        }
13034
13035        for (pass = 0; pass < maxpasses; pass++) {
13036            TCGv_i32 tcg_op = tcg_temp_new_i32();
13037            TCGv_i32 tcg_res = tcg_temp_new_i32();
13038
13039            read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13040
13041            switch (16 * u + opcode) {
13042            case 0x08: /* MUL */
13043            case 0x10: /* MLA */
13044            case 0x14: /* MLS */
13045            {
13046                static NeonGenTwoOpFn * const fns[2][2] = {
13047                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13048                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
13049                };
13050                NeonGenTwoOpFn *genfn;
13051                bool is_sub = opcode == 0x4;
13052
13053                if (size == 1) {
13054                    gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13055                } else {
13056                    tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13057                }
13058                if (opcode == 0x8) {
13059                    break;
13060                }
13061                read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13062                genfn = fns[size - 1][is_sub];
13063                genfn(tcg_res, tcg_op, tcg_res);
13064                break;
13065            }
13066            case 0x05: /* FMLS */
13067            case 0x01: /* FMLA */
13068                read_vec_element_i32(s, tcg_res, rd, pass,
13069                                     is_scalar ? size : MO_32);
13070                switch (size) {
13071                case 1:
13072                    if (opcode == 0x5) {
13073                        /* As usual for ARM, separate negation for fused
13074                         * multiply-add */
13075                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13076                    }
13077                    if (is_scalar) {
13078                        gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13079                                                   tcg_res, fpst);
13080                    } else {
13081                        gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13082                                                    tcg_res, fpst);
13083                    }
13084                    break;
13085                case 2:
13086                    if (opcode == 0x5) {
13087                        /* As usual for ARM, separate negation for
13088                         * fused multiply-add */
13089                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13090                    }
13091                    gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13092                                           tcg_res, fpst);
13093                    break;
13094                default:
13095                    g_assert_not_reached();
13096                }
13097                break;
13098            case 0x09: /* FMUL */
13099                switch (size) {
13100                case 1:
13101                    if (is_scalar) {
13102                        gen_helper_advsimd_mulh(tcg_res, tcg_op,
13103                                                tcg_idx, fpst);
13104                    } else {
13105                        gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13106                                                 tcg_idx, fpst);
13107                    }
13108                    break;
13109                case 2:
13110                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13111                    break;
13112                default:
13113                    g_assert_not_reached();
13114                }
13115                break;
13116            case 0x19: /* FMULX */
13117                switch (size) {
13118                case 1:
13119                    if (is_scalar) {
13120                        gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13121                                                 tcg_idx, fpst);
13122                    } else {
13123                        gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13124                                                  tcg_idx, fpst);
13125                    }
13126                    break;
13127                case 2:
13128                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13129                    break;
13130                default:
13131                    g_assert_not_reached();
13132                }
13133                break;
13134            case 0x0c: /* SQDMULH */
13135                if (size == 1) {
13136                    gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13137                                               tcg_op, tcg_idx);
13138                } else {
13139                    gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13140                                               tcg_op, tcg_idx);
13141                }
13142                break;
13143            case 0x0d: /* SQRDMULH */
13144                if (size == 1) {
13145                    gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13146                                                tcg_op, tcg_idx);
13147                } else {
13148                    gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13149                                                tcg_op, tcg_idx);
13150                }
13151                break;
13152            case 0x1d: /* SQRDMLAH */
13153                read_vec_element_i32(s, tcg_res, rd, pass,
13154                                     is_scalar ? size : MO_32);
13155                if (size == 1) {
13156                    gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13157                                                tcg_op, tcg_idx, tcg_res);
13158                } else {
13159                    gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13160                                                tcg_op, tcg_idx, tcg_res);
13161                }
13162                break;
13163            case 0x1f: /* SQRDMLSH */
13164                read_vec_element_i32(s, tcg_res, rd, pass,
13165                                     is_scalar ? size : MO_32);
13166                if (size == 1) {
13167                    gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13168                                                tcg_op, tcg_idx, tcg_res);
13169                } else {
13170                    gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13171                                                tcg_op, tcg_idx, tcg_res);
13172                }
13173                break;
13174            default:
13175                g_assert_not_reached();
13176            }
13177
13178            if (is_scalar) {
13179                write_fp_sreg(s, rd, tcg_res);
13180            } else {
13181                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13182            }
13183
13184            tcg_temp_free_i32(tcg_op);
13185            tcg_temp_free_i32(tcg_res);
13186        }
13187
13188        tcg_temp_free_i32(tcg_idx);
13189        clear_vec_high(s, is_q, rd);
13190    } else {
13191        /* long ops: 16x16->32 or 32x32->64 */
13192        TCGv_i64 tcg_res[2];
13193        int pass;
13194        bool satop = extract32(opcode, 0, 1);
13195        MemOp memop = MO_32;
13196
13197        if (satop || !u) {
13198            memop |= MO_SIGN;
13199        }
13200
13201        if (size == 2) {
13202            TCGv_i64 tcg_idx = tcg_temp_new_i64();
13203
13204            read_vec_element(s, tcg_idx, rm, index, memop);
13205
13206            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13207                TCGv_i64 tcg_op = tcg_temp_new_i64();
13208                TCGv_i64 tcg_passres;
13209                int passelt;
13210
13211                if (is_scalar) {
13212                    passelt = 0;
13213                } else {
13214                    passelt = pass + (is_q * 2);
13215                }
13216
13217                read_vec_element(s, tcg_op, rn, passelt, memop);
13218
13219                tcg_res[pass] = tcg_temp_new_i64();
13220
13221                if (opcode == 0xa || opcode == 0xb) {
13222                    /* Non-accumulating ops */
13223                    tcg_passres = tcg_res[pass];
13224                } else {
13225                    tcg_passres = tcg_temp_new_i64();
13226                }
13227
13228                tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13229                tcg_temp_free_i64(tcg_op);
13230
13231                if (satop) {
13232                    /* saturating, doubling */
13233                    gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13234                                                      tcg_passres, tcg_passres);
13235                }
13236
13237                if (opcode == 0xa || opcode == 0xb) {
13238                    continue;
13239                }
13240
13241                /* Accumulating op: handle accumulate step */
13242                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13243
13244                switch (opcode) {
13245                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13246                    tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13247                    break;
13248                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13249                    tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13250                    break;
13251                case 0x7: /* SQDMLSL, SQDMLSL2 */
13252                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
13253                    /* fall through */
13254                case 0x3: /* SQDMLAL, SQDMLAL2 */
13255                    gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13256                                                      tcg_res[pass],
13257                                                      tcg_passres);
13258                    break;
13259                default:
13260                    g_assert_not_reached();
13261                }
13262                tcg_temp_free_i64(tcg_passres);
13263            }
13264            tcg_temp_free_i64(tcg_idx);
13265
13266            clear_vec_high(s, !is_scalar, rd);
13267        } else {
13268            TCGv_i32 tcg_idx = tcg_temp_new_i32();
13269
13270            assert(size == 1);
13271            read_vec_element_i32(s, tcg_idx, rm, index, size);
13272
13273            if (!is_scalar) {
13274                /* The simplest way to handle the 16x16 indexed ops is to
13275                 * duplicate the index into both halves of the 32 bit tcg_idx
13276                 * and then use the usual Neon helpers.
13277                 */
13278                tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13279            }
13280
13281            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13282                TCGv_i32 tcg_op = tcg_temp_new_i32();
13283                TCGv_i64 tcg_passres;
13284
13285                if (is_scalar) {
13286                    read_vec_element_i32(s, tcg_op, rn, pass, size);
13287                } else {
13288                    read_vec_element_i32(s, tcg_op, rn,
13289                                         pass + (is_q * 2), MO_32);
13290                }
13291
13292                tcg_res[pass] = tcg_temp_new_i64();
13293
13294                if (opcode == 0xa || opcode == 0xb) {
13295                    /* Non-accumulating ops */
13296                    tcg_passres = tcg_res[pass];
13297                } else {
13298                    tcg_passres = tcg_temp_new_i64();
13299                }
13300
13301                if (memop & MO_SIGN) {
13302                    gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13303                } else {
13304                    gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13305                }
13306                if (satop) {
13307                    gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13308                                                      tcg_passres, tcg_passres);
13309                }
13310                tcg_temp_free_i32(tcg_op);
13311
13312                if (opcode == 0xa || opcode == 0xb) {
13313                    continue;
13314                }
13315
13316                /* Accumulating op: handle accumulate step */
13317                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13318
13319                switch (opcode) {
13320                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13321                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13322                                             tcg_passres);
13323                    break;
13324                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13325                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13326                                             tcg_passres);
13327                    break;
13328                case 0x7: /* SQDMLSL, SQDMLSL2 */
13329                    gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13330                    /* fall through */
13331                case 0x3: /* SQDMLAL, SQDMLAL2 */
13332                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13333                                                      tcg_res[pass],
13334                                                      tcg_passres);
13335                    break;
13336                default:
13337                    g_assert_not_reached();
13338                }
13339                tcg_temp_free_i64(tcg_passres);
13340            }
13341            tcg_temp_free_i32(tcg_idx);
13342
13343            if (is_scalar) {
13344                tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13345            }
13346        }
13347
13348        if (is_scalar) {
13349            tcg_res[1] = tcg_const_i64(0);
13350        }
13351
13352        for (pass = 0; pass < 2; pass++) {
13353            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13354            tcg_temp_free_i64(tcg_res[pass]);
13355        }
13356    }
13357
13358    if (fpst) {
13359        tcg_temp_free_ptr(fpst);
13360    }
13361}
13362
13363/* Crypto AES
13364 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13365 * +-----------------+------+-----------+--------+-----+------+------+
13366 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13367 * +-----------------+------+-----------+--------+-----+------+------+
13368 */
13369static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13370{
13371    int size = extract32(insn, 22, 2);
13372    int opcode = extract32(insn, 12, 5);
13373    int rn = extract32(insn, 5, 5);
13374    int rd = extract32(insn, 0, 5);
13375    int decrypt;
13376    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13377    TCGv_i32 tcg_decrypt;
13378    CryptoThreeOpIntFn *genfn;
13379
13380    if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13381        unallocated_encoding(s);
13382        return;
13383    }
13384
13385    switch (opcode) {
13386    case 0x4: /* AESE */
13387        decrypt = 0;
13388        genfn = gen_helper_crypto_aese;
13389        break;
13390    case 0x6: /* AESMC */
13391        decrypt = 0;
13392        genfn = gen_helper_crypto_aesmc;
13393        break;
13394    case 0x5: /* AESD */
13395        decrypt = 1;
13396        genfn = gen_helper_crypto_aese;
13397        break;
13398    case 0x7: /* AESIMC */
13399        decrypt = 1;
13400        genfn = gen_helper_crypto_aesmc;
13401        break;
13402    default:
13403        unallocated_encoding(s);
13404        return;
13405    }
13406
13407    if (!fp_access_check(s)) {
13408        return;
13409    }
13410
13411    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13412    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13413    tcg_decrypt = tcg_const_i32(decrypt);
13414
13415    genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
13416
13417    tcg_temp_free_ptr(tcg_rd_ptr);
13418    tcg_temp_free_ptr(tcg_rn_ptr);
13419    tcg_temp_free_i32(tcg_decrypt);
13420}
13421
13422/* Crypto three-reg SHA
13423 *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13424 * +-----------------+------+---+------+---+--------+-----+------+------+
13425 * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13426 * +-----------------+------+---+------+---+--------+-----+------+------+
13427 */
13428static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13429{
13430    int size = extract32(insn, 22, 2);
13431    int opcode = extract32(insn, 12, 3);
13432    int rm = extract32(insn, 16, 5);
13433    int rn = extract32(insn, 5, 5);
13434    int rd = extract32(insn, 0, 5);
13435    CryptoThreeOpFn *genfn;
13436    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13437    bool feature;
13438
13439    if (size != 0) {
13440        unallocated_encoding(s);
13441        return;
13442    }
13443
13444    switch (opcode) {
13445    case 0: /* SHA1C */
13446    case 1: /* SHA1P */
13447    case 2: /* SHA1M */
13448    case 3: /* SHA1SU0 */
13449        genfn = NULL;
13450        feature = dc_isar_feature(aa64_sha1, s);
13451        break;
13452    case 4: /* SHA256H */
13453        genfn = gen_helper_crypto_sha256h;
13454        feature = dc_isar_feature(aa64_sha256, s);
13455        break;
13456    case 5: /* SHA256H2 */
13457        genfn = gen_helper_crypto_sha256h2;
13458        feature = dc_isar_feature(aa64_sha256, s);
13459        break;
13460    case 6: /* SHA256SU1 */
13461        genfn = gen_helper_crypto_sha256su1;
13462        feature = dc_isar_feature(aa64_sha256, s);
13463        break;
13464    default:
13465        unallocated_encoding(s);
13466        return;
13467    }
13468
13469    if (!feature) {
13470        unallocated_encoding(s);
13471        return;
13472    }
13473
13474    if (!fp_access_check(s)) {
13475        return;
13476    }
13477
13478    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13479    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13480    tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13481
13482    if (genfn) {
13483        genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13484    } else {
13485        TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
13486
13487        gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
13488                                    tcg_rm_ptr, tcg_opcode);
13489        tcg_temp_free_i32(tcg_opcode);
13490    }
13491
13492    tcg_temp_free_ptr(tcg_rd_ptr);
13493    tcg_temp_free_ptr(tcg_rn_ptr);
13494    tcg_temp_free_ptr(tcg_rm_ptr);
13495}
13496
13497/* Crypto two-reg SHA
13498 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13499 * +-----------------+------+-----------+--------+-----+------+------+
13500 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13501 * +-----------------+------+-----------+--------+-----+------+------+
13502 */
13503static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13504{
13505    int size = extract32(insn, 22, 2);
13506    int opcode = extract32(insn, 12, 5);
13507    int rn = extract32(insn, 5, 5);
13508    int rd = extract32(insn, 0, 5);
13509    CryptoTwoOpFn *genfn;
13510    bool feature;
13511    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13512
13513    if (size != 0) {
13514        unallocated_encoding(s);
13515        return;
13516    }
13517
13518    switch (opcode) {
13519    case 0: /* SHA1H */
13520        feature = dc_isar_feature(aa64_sha1, s);
13521        genfn = gen_helper_crypto_sha1h;
13522        break;
13523    case 1: /* SHA1SU1 */
13524        feature = dc_isar_feature(aa64_sha1, s);
13525        genfn = gen_helper_crypto_sha1su1;
13526        break;
13527    case 2: /* SHA256SU0 */
13528        feature = dc_isar_feature(aa64_sha256, s);
13529        genfn = gen_helper_crypto_sha256su0;
13530        break;
13531    default:
13532        unallocated_encoding(s);
13533        return;
13534    }
13535
13536    if (!feature) {
13537        unallocated_encoding(s);
13538        return;
13539    }
13540
13541    if (!fp_access_check(s)) {
13542        return;
13543    }
13544
13545    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13546    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13547
13548    genfn(tcg_rd_ptr, tcg_rn_ptr);
13549
13550    tcg_temp_free_ptr(tcg_rd_ptr);
13551    tcg_temp_free_ptr(tcg_rn_ptr);
13552}
13553
13554/* Crypto three-reg SHA512
13555 *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13556 * +-----------------------+------+---+---+-----+--------+------+------+
13557 * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13558 * +-----------------------+------+---+---+-----+--------+------+------+
13559 */
13560static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13561{
13562    int opcode = extract32(insn, 10, 2);
13563    int o =  extract32(insn, 14, 1);
13564    int rm = extract32(insn, 16, 5);
13565    int rn = extract32(insn, 5, 5);
13566    int rd = extract32(insn, 0, 5);
13567    bool feature;
13568    CryptoThreeOpFn *genfn;
13569
13570    if (o == 0) {
13571        switch (opcode) {
13572        case 0: /* SHA512H */
13573            feature = dc_isar_feature(aa64_sha512, s);
13574            genfn = gen_helper_crypto_sha512h;
13575            break;
13576        case 1: /* SHA512H2 */
13577            feature = dc_isar_feature(aa64_sha512, s);
13578            genfn = gen_helper_crypto_sha512h2;
13579            break;
13580        case 2: /* SHA512SU1 */
13581            feature = dc_isar_feature(aa64_sha512, s);
13582            genfn = gen_helper_crypto_sha512su1;
13583            break;
13584        case 3: /* RAX1 */
13585            feature = dc_isar_feature(aa64_sha3, s);
13586            genfn = NULL;
13587            break;
13588        }
13589    } else {
13590        switch (opcode) {
13591        case 0: /* SM3PARTW1 */
13592            feature = dc_isar_feature(aa64_sm3, s);
13593            genfn = gen_helper_crypto_sm3partw1;
13594            break;
13595        case 1: /* SM3PARTW2 */
13596            feature = dc_isar_feature(aa64_sm3, s);
13597            genfn = gen_helper_crypto_sm3partw2;
13598            break;
13599        case 2: /* SM4EKEY */
13600            feature = dc_isar_feature(aa64_sm4, s);
13601            genfn = gen_helper_crypto_sm4ekey;
13602            break;
13603        default:
13604            unallocated_encoding(s);
13605            return;
13606        }
13607    }
13608
13609    if (!feature) {
13610        unallocated_encoding(s);
13611        return;
13612    }
13613
13614    if (!fp_access_check(s)) {
13615        return;
13616    }
13617
13618    if (genfn) {
13619        TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13620
13621        tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13622        tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13623        tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13624
13625        genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13626
13627        tcg_temp_free_ptr(tcg_rd_ptr);
13628        tcg_temp_free_ptr(tcg_rn_ptr);
13629        tcg_temp_free_ptr(tcg_rm_ptr);
13630    } else {
13631        TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13632        int pass;
13633
13634        tcg_op1 = tcg_temp_new_i64();
13635        tcg_op2 = tcg_temp_new_i64();
13636        tcg_res[0] = tcg_temp_new_i64();
13637        tcg_res[1] = tcg_temp_new_i64();
13638
13639        for (pass = 0; pass < 2; pass++) {
13640            read_vec_element(s, tcg_op1, rn, pass, MO_64);
13641            read_vec_element(s, tcg_op2, rm, pass, MO_64);
13642
13643            tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1);
13644            tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13645        }
13646        write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13647        write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13648
13649        tcg_temp_free_i64(tcg_op1);
13650        tcg_temp_free_i64(tcg_op2);
13651        tcg_temp_free_i64(tcg_res[0]);
13652        tcg_temp_free_i64(tcg_res[1]);
13653    }
13654}
13655
13656/* Crypto two-reg SHA512
13657 *  31                                     12  11  10  9    5 4    0
13658 * +-----------------------------------------+--------+------+------+
13659 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13660 * +-----------------------------------------+--------+------+------+
13661 */
13662static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13663{
13664    int opcode = extract32(insn, 10, 2);
13665    int rn = extract32(insn, 5, 5);
13666    int rd = extract32(insn, 0, 5);
13667    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13668    bool feature;
13669    CryptoTwoOpFn *genfn;
13670
13671    switch (opcode) {
13672    case 0: /* SHA512SU0 */
13673        feature = dc_isar_feature(aa64_sha512, s);
13674        genfn = gen_helper_crypto_sha512su0;
13675        break;
13676    case 1: /* SM4E */
13677        feature = dc_isar_feature(aa64_sm4, s);
13678        genfn = gen_helper_crypto_sm4e;
13679        break;
13680    default:
13681        unallocated_encoding(s);
13682        return;
13683    }
13684
13685    if (!feature) {
13686        unallocated_encoding(s);
13687        return;
13688    }
13689
13690    if (!fp_access_check(s)) {
13691        return;
13692    }
13693
13694    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13695    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13696
13697    genfn(tcg_rd_ptr, tcg_rn_ptr);
13698
13699    tcg_temp_free_ptr(tcg_rd_ptr);
13700    tcg_temp_free_ptr(tcg_rn_ptr);
13701}
13702
13703/* Crypto four-register
13704 *  31               23 22 21 20  16 15  14  10 9    5 4    0
13705 * +-------------------+-----+------+---+------+------+------+
13706 * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13707 * +-------------------+-----+------+---+------+------+------+
13708 */
13709static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13710{
13711    int op0 = extract32(insn, 21, 2);
13712    int rm = extract32(insn, 16, 5);
13713    int ra = extract32(insn, 10, 5);
13714    int rn = extract32(insn, 5, 5);
13715    int rd = extract32(insn, 0, 5);
13716    bool feature;
13717
13718    switch (op0) {
13719    case 0: /* EOR3 */
13720    case 1: /* BCAX */
13721        feature = dc_isar_feature(aa64_sha3, s);
13722        break;
13723    case 2: /* SM3SS1 */
13724        feature = dc_isar_feature(aa64_sm3, s);
13725        break;
13726    default:
13727        unallocated_encoding(s);
13728        return;
13729    }
13730
13731    if (!feature) {
13732        unallocated_encoding(s);
13733        return;
13734    }
13735
13736    if (!fp_access_check(s)) {
13737        return;
13738    }
13739
13740    if (op0 < 2) {
13741        TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13742        int pass;
13743
13744        tcg_op1 = tcg_temp_new_i64();
13745        tcg_op2 = tcg_temp_new_i64();
13746        tcg_op3 = tcg_temp_new_i64();
13747        tcg_res[0] = tcg_temp_new_i64();
13748        tcg_res[1] = tcg_temp_new_i64();
13749
13750        for (pass = 0; pass < 2; pass++) {
13751            read_vec_element(s, tcg_op1, rn, pass, MO_64);
13752            read_vec_element(s, tcg_op2, rm, pass, MO_64);
13753            read_vec_element(s, tcg_op3, ra, pass, MO_64);
13754
13755            if (op0 == 0) {
13756                /* EOR3 */
13757                tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13758            } else {
13759                /* BCAX */
13760                tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13761            }
13762            tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13763        }
13764        write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13765        write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13766
13767        tcg_temp_free_i64(tcg_op1);
13768        tcg_temp_free_i64(tcg_op2);
13769        tcg_temp_free_i64(tcg_op3);
13770        tcg_temp_free_i64(tcg_res[0]);
13771        tcg_temp_free_i64(tcg_res[1]);
13772    } else {
13773        TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13774
13775        tcg_op1 = tcg_temp_new_i32();
13776        tcg_op2 = tcg_temp_new_i32();
13777        tcg_op3 = tcg_temp_new_i32();
13778        tcg_res = tcg_temp_new_i32();
13779        tcg_zero = tcg_const_i32(0);
13780
13781        read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13782        read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13783        read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13784
13785        tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13786        tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13787        tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13788        tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13789
13790        write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13791        write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13792        write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13793        write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13794
13795        tcg_temp_free_i32(tcg_op1);
13796        tcg_temp_free_i32(tcg_op2);
13797        tcg_temp_free_i32(tcg_op3);
13798        tcg_temp_free_i32(tcg_res);
13799        tcg_temp_free_i32(tcg_zero);
13800    }
13801}
13802
13803/* Crypto XAR
13804 *  31                   21 20  16 15    10 9    5 4    0
13805 * +-----------------------+------+--------+------+------+
13806 * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13807 * +-----------------------+------+--------+------+------+
13808 */
13809static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13810{
13811    int rm = extract32(insn, 16, 5);
13812    int imm6 = extract32(insn, 10, 6);
13813    int rn = extract32(insn, 5, 5);
13814    int rd = extract32(insn, 0, 5);
13815    TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13816    int pass;
13817
13818    if (!dc_isar_feature(aa64_sha3, s)) {
13819        unallocated_encoding(s);
13820        return;
13821    }
13822
13823    if (!fp_access_check(s)) {
13824        return;
13825    }
13826
13827    tcg_op1 = tcg_temp_new_i64();
13828    tcg_op2 = tcg_temp_new_i64();
13829    tcg_res[0] = tcg_temp_new_i64();
13830    tcg_res[1] = tcg_temp_new_i64();
13831
13832    for (pass = 0; pass < 2; pass++) {
13833        read_vec_element(s, tcg_op1, rn, pass, MO_64);
13834        read_vec_element(s, tcg_op2, rm, pass, MO_64);
13835
13836        tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
13837        tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
13838    }
13839    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13840    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13841
13842    tcg_temp_free_i64(tcg_op1);
13843    tcg_temp_free_i64(tcg_op2);
13844    tcg_temp_free_i64(tcg_res[0]);
13845    tcg_temp_free_i64(tcg_res[1]);
13846}
13847
13848/* Crypto three-reg imm2
13849 *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13850 * +-----------------------+------+-----+------+--------+------+------+
13851 * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13852 * +-----------------------+------+-----+------+--------+------+------+
13853 */
13854static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13855{
13856    int opcode = extract32(insn, 10, 2);
13857    int imm2 = extract32(insn, 12, 2);
13858    int rm = extract32(insn, 16, 5);
13859    int rn = extract32(insn, 5, 5);
13860    int rd = extract32(insn, 0, 5);
13861    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13862    TCGv_i32 tcg_imm2, tcg_opcode;
13863
13864    if (!dc_isar_feature(aa64_sm3, s)) {
13865        unallocated_encoding(s);
13866        return;
13867    }
13868
13869    if (!fp_access_check(s)) {
13870        return;
13871    }
13872
13873    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13874    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13875    tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13876    tcg_imm2   = tcg_const_i32(imm2);
13877    tcg_opcode = tcg_const_i32(opcode);
13878
13879    gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
13880                            tcg_opcode);
13881
13882    tcg_temp_free_ptr(tcg_rd_ptr);
13883    tcg_temp_free_ptr(tcg_rn_ptr);
13884    tcg_temp_free_ptr(tcg_rm_ptr);
13885    tcg_temp_free_i32(tcg_imm2);
13886    tcg_temp_free_i32(tcg_opcode);
13887}
13888
13889/* C3.6 Data processing - SIMD, inc Crypto
13890 *
13891 * As the decode gets a little complex we are using a table based
13892 * approach for this part of the decode.
13893 */
13894static const AArch64DecodeTable data_proc_simd[] = {
13895    /* pattern  ,  mask     ,  fn                        */
13896    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13897    { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13898    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13899    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13900    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13901    { 0x0e000400, 0x9fe08400, disas_simd_copy },
13902    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13903    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13904    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13905    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13906    { 0x0e000000, 0xbf208c00, disas_simd_tb },
13907    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13908    { 0x2e000000, 0xbf208400, disas_simd_ext },
13909    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13910    { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13911    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13912    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13913    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13914    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13915    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13916    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13917    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13918    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13919    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13920    { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13921    { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13922    { 0xce000000, 0xff808000, disas_crypto_four_reg },
13923    { 0xce800000, 0xffe00000, disas_crypto_xar },
13924    { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13925    { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13926    { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13927    { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13928    { 0x00000000, 0x00000000, NULL }
13929};
13930
13931static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13932{
13933    /* Note that this is called with all non-FP cases from
13934     * table C3-6 so it must UNDEF for entries not specifically
13935     * allocated to instructions in that table.
13936     */
13937    AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13938    if (fn) {
13939        fn(s, insn);
13940    } else {
13941        unallocated_encoding(s);
13942    }
13943}
13944
13945/* C3.6 Data processing - SIMD and floating point */
13946static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13947{
13948    if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13949        disas_data_proc_fp(s, insn);
13950    } else {
13951        /* SIMD, including crypto */
13952        disas_data_proc_simd(s, insn);
13953    }
13954}
13955
13956/**
13957 * is_guarded_page:
13958 * @env: The cpu environment
13959 * @s: The DisasContext
13960 *
13961 * Return true if the page is guarded.
13962 */
13963static bool is_guarded_page(CPUARMState *env, DisasContext *s)
13964{
13965#ifdef CONFIG_USER_ONLY
13966    return false;  /* FIXME */
13967#else
13968    uint64_t addr = s->base.pc_first;
13969    int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
13970    unsigned int index = tlb_index(env, mmu_idx, addr);
13971    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
13972
13973    /*
13974     * We test this immediately after reading an insn, which means
13975     * that any normal page must be in the TLB.  The only exception
13976     * would be for executing from flash or device memory, which
13977     * does not retain the TLB entry.
13978     *
13979     * FIXME: Assume false for those, for now.  We could use
13980     * arm_cpu_get_phys_page_attrs_debug to re-read the page
13981     * table entry even for that case.
13982     */
13983    return (tlb_hit(entry->addr_code, addr) &&
13984            env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0);
13985#endif
13986}
13987
13988/**
13989 * btype_destination_ok:
13990 * @insn: The instruction at the branch destination
13991 * @bt: SCTLR_ELx.BT
13992 * @btype: PSTATE.BTYPE, and is non-zero
13993 *
13994 * On a guarded page, there are a limited number of insns
13995 * that may be present at the branch target:
13996 *   - branch target identifiers,
13997 *   - paciasp, pacibsp,
13998 *   - BRK insn
13999 *   - HLT insn
14000 * Anything else causes a Branch Target Exception.
14001 *
14002 * Return true if the branch is compatible, false to raise BTITRAP.
14003 */
14004static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
14005{
14006    if ((insn & 0xfffff01fu) == 0xd503201fu) {
14007        /* HINT space */
14008        switch (extract32(insn, 5, 7)) {
14009        case 0b011001: /* PACIASP */
14010        case 0b011011: /* PACIBSP */
14011            /*
14012             * If SCTLR_ELx.BT, then PACI*SP are not compatible
14013             * with btype == 3.  Otherwise all btype are ok.
14014             */
14015            return !bt || btype != 3;
14016        case 0b100000: /* BTI */
14017            /* Not compatible with any btype.  */
14018            return false;
14019        case 0b100010: /* BTI c */
14020            /* Not compatible with btype == 3 */
14021            return btype != 3;
14022        case 0b100100: /* BTI j */
14023            /* Not compatible with btype == 2 */
14024            return btype != 2;
14025        case 0b100110: /* BTI jc */
14026            /* Compatible with any btype.  */
14027            return true;
14028        }
14029    } else {
14030        switch (insn & 0xffe0001fu) {
14031        case 0xd4200000u: /* BRK */
14032        case 0xd4400000u: /* HLT */
14033            /* Give priority to the breakpoint exception.  */
14034            return true;
14035        }
14036    }
14037    return false;
14038}
14039
14040/* C3.1 A64 instruction index by encoding */
14041static void disas_a64_insn(CPUARMState *env, DisasContext *s)
14042{
14043    uint32_t insn;
14044
14045    s->pc_curr = s->base.pc_next;
14046    insn = arm_ldl_code(env, s->base.pc_next, s->sctlr_b);
14047    s->insn = insn;
14048    s->base.pc_next += 4;
14049
14050    s->fp_access_checked = false;
14051
14052    if (dc_isar_feature(aa64_bti, s)) {
14053        if (s->base.num_insns == 1) {
14054            /*
14055             * At the first insn of the TB, compute s->guarded_page.
14056             * We delayed computing this until successfully reading
14057             * the first insn of the TB, above.  This (mostly) ensures
14058             * that the softmmu tlb entry has been populated, and the
14059             * page table GP bit is available.
14060             *
14061             * Note that we need to compute this even if btype == 0,
14062             * because this value is used for BR instructions later
14063             * where ENV is not available.
14064             */
14065            s->guarded_page = is_guarded_page(env, s);
14066
14067            /* First insn can have btype set to non-zero.  */
14068            tcg_debug_assert(s->btype >= 0);
14069
14070            /*
14071             * Note that the Branch Target Exception has fairly high
14072             * priority -- below debugging exceptions but above most
14073             * everything else.  This allows us to handle this now
14074             * instead of waiting until the insn is otherwise decoded.
14075             */
14076            if (s->btype != 0
14077                && s->guarded_page
14078                && !btype_destination_ok(insn, s->bt, s->btype)) {
14079                gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
14080                                   syn_btitrap(s->btype),
14081                                   default_exception_el(s));
14082                return;
14083            }
14084        } else {
14085            /* Not the first insn: btype must be 0.  */
14086            tcg_debug_assert(s->btype == 0);
14087        }
14088    }
14089
14090    switch (extract32(insn, 25, 4)) {
14091    case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
14092        unallocated_encoding(s);
14093        break;
14094    case 0x2:
14095        if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) {
14096            unallocated_encoding(s);
14097        }
14098        break;
14099    case 0x8: case 0x9: /* Data processing - immediate */
14100        disas_data_proc_imm(s, insn);
14101        break;
14102    case 0xa: case 0xb: /* Branch, exception generation and system insns */
14103        disas_b_exc_sys(s, insn);
14104        break;
14105    case 0x4:
14106    case 0x6:
14107    case 0xc:
14108    case 0xe:      /* Loads and stores */
14109        disas_ldst(s, insn);
14110        break;
14111    case 0x5:
14112    case 0xd:      /* Data processing - register */
14113        disas_data_proc_reg(s, insn);
14114        break;
14115    case 0x7:
14116    case 0xf:      /* Data processing - SIMD and floating point */
14117        disas_data_proc_simd_fp(s, insn);
14118        break;
14119    default:
14120        assert(FALSE); /* all 15 cases should be handled above */
14121        break;
14122    }
14123
14124    /* if we allocated any temporaries, free them here */
14125    free_tmp_a64(s);
14126
14127    /*
14128     * After execution of most insns, btype is reset to 0.
14129     * Note that we set btype == -1 when the insn sets btype.
14130     */
14131    if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14132        reset_btype(s);
14133    }
14134}
14135
14136static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14137                                          CPUState *cpu)
14138{
14139    DisasContext *dc = container_of(dcbase, DisasContext, base);
14140    CPUARMState *env = cpu->env_ptr;
14141    ARMCPU *arm_cpu = env_archcpu(env);
14142    uint32_t tb_flags = dc->base.tb->flags;
14143    int bound, core_mmu_idx;
14144
14145    dc->isar = &arm_cpu->isar;
14146    dc->condjmp = 0;
14147
14148    dc->aarch64 = 1;
14149    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
14150     * there is no secure EL1, so we route exceptions to EL3.
14151     */
14152    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
14153                               !arm_el_is_aa64(env, 3);
14154    dc->thumb = 0;
14155    dc->sctlr_b = 0;
14156    dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
14157    dc->condexec_mask = 0;
14158    dc->condexec_cond = 0;
14159    core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
14160    dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
14161    dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII);
14162    dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID);
14163    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14164#if !defined(CONFIG_USER_ONLY)
14165    dc->user = (dc->current_el == 0);
14166#endif
14167    dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
14168    dc->sve_excp_el = FIELD_EX32(tb_flags, TBFLAG_A64, SVEEXC_EL);
14169    dc->sve_len = (FIELD_EX32(tb_flags, TBFLAG_A64, ZCR_LEN) + 1) * 16;
14170    dc->pauth_active = FIELD_EX32(tb_flags, TBFLAG_A64, PAUTH_ACTIVE);
14171    dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT);
14172    dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE);
14173    dc->vec_len = 0;
14174    dc->vec_stride = 0;
14175    dc->cp_regs = arm_cpu->cp_regs;
14176    dc->features = env->features;
14177
14178    /* Single step state. The code-generation logic here is:
14179     *  SS_ACTIVE == 0:
14180     *   generate code with no special handling for single-stepping (except
14181     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14182     *   this happens anyway because those changes are all system register or
14183     *   PSTATE writes).
14184     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14185     *   emit code for one insn
14186     *   emit code to clear PSTATE.SS
14187     *   emit code to generate software step exception for completed step
14188     *   end TB (as usual for having generated an exception)
14189     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14190     *   emit code to generate a software step exception
14191     *   end the TB
14192     */
14193    dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
14194    dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
14195    dc->is_ldex = false;
14196    dc->debug_target_el = FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
14197
14198    /* Bound the number of insns to execute to those left on the page.  */
14199    bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14200
14201    /* If architectural single step active, limit to 1.  */
14202    if (dc->ss_active) {
14203        bound = 1;
14204    }
14205    dc->base.max_insns = MIN(dc->base.max_insns, bound);
14206
14207    init_tmp_a64_array(dc);
14208}
14209
14210static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14211{
14212}
14213
14214static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14215{
14216    DisasContext *dc = container_of(dcbase, DisasContext, base);
14217
14218    tcg_gen_insn_start(dc->base.pc_next, 0, 0);
14219    dc->insn_start = tcg_last_op();
14220}
14221
14222static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
14223                                        const CPUBreakpoint *bp)
14224{
14225    DisasContext *dc = container_of(dcbase, DisasContext, base);
14226
14227    if (bp->flags & BP_CPU) {
14228        gen_a64_set_pc_im(dc->base.pc_next);
14229        gen_helper_check_breakpoints(cpu_env);
14230        /* End the TB early; it likely won't be executed */
14231        dc->base.is_jmp = DISAS_TOO_MANY;
14232    } else {
14233        gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
14234        /* The address covered by the breakpoint must be
14235           included in [tb->pc, tb->pc + tb->size) in order
14236           to for it to be properly cleared -- thus we
14237           increment the PC here so that the logic setting
14238           tb->size below does the right thing.  */
14239        dc->base.pc_next += 4;
14240        dc->base.is_jmp = DISAS_NORETURN;
14241    }
14242
14243    return true;
14244}
14245
14246static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14247{
14248    DisasContext *dc = container_of(dcbase, DisasContext, base);
14249    CPUARMState *env = cpu->env_ptr;
14250
14251    if (dc->ss_active && !dc->pstate_ss) {
14252        /* Singlestep state is Active-pending.
14253         * If we're in this state at the start of a TB then either
14254         *  a) we just took an exception to an EL which is being debugged
14255         *     and this is the first insn in the exception handler
14256         *  b) debug exceptions were masked and we just unmasked them
14257         *     without changing EL (eg by clearing PSTATE.D)
14258         * In either case we're going to take a swstep exception in the
14259         * "did not step an insn" case, and so the syndrome ISV and EX
14260         * bits should be zero.
14261         */
14262        assert(dc->base.num_insns == 1);
14263        gen_swstep_exception(dc, 0, 0);
14264        dc->base.is_jmp = DISAS_NORETURN;
14265    } else {
14266        disas_a64_insn(env, dc);
14267    }
14268
14269    translator_loop_temp_check(&dc->base);
14270}
14271
14272static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14273{
14274    DisasContext *dc = container_of(dcbase, DisasContext, base);
14275
14276    if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
14277        /* Note that this means single stepping WFI doesn't halt the CPU.
14278         * For conditional branch insns this is harmless unreachable code as
14279         * gen_goto_tb() has already handled emitting the debug exception
14280         * (and thus a tb-jump is not possible when singlestepping).
14281         */
14282        switch (dc->base.is_jmp) {
14283        default:
14284            gen_a64_set_pc_im(dc->base.pc_next);
14285            /* fall through */
14286        case DISAS_EXIT:
14287        case DISAS_JUMP:
14288            if (dc->base.singlestep_enabled) {
14289                gen_exception_internal(EXCP_DEBUG);
14290            } else {
14291                gen_step_complete_exception(dc);
14292            }
14293            break;
14294        case DISAS_NORETURN:
14295            break;
14296        }
14297    } else {
14298        switch (dc->base.is_jmp) {
14299        case DISAS_NEXT:
14300        case DISAS_TOO_MANY:
14301            gen_goto_tb(dc, 1, dc->base.pc_next);
14302            break;
14303        default:
14304        case DISAS_UPDATE:
14305            gen_a64_set_pc_im(dc->base.pc_next);
14306            /* fall through */
14307        case DISAS_EXIT:
14308            tcg_gen_exit_tb(NULL, 0);
14309            break;
14310        case DISAS_JUMP:
14311            tcg_gen_lookup_and_goto_ptr();
14312            break;
14313        case DISAS_NORETURN:
14314        case DISAS_SWI:
14315            break;
14316        case DISAS_WFE:
14317            gen_a64_set_pc_im(dc->base.pc_next);
14318            gen_helper_wfe(cpu_env);
14319            break;
14320        case DISAS_YIELD:
14321            gen_a64_set_pc_im(dc->base.pc_next);
14322            gen_helper_yield(cpu_env);
14323            break;
14324        case DISAS_WFI:
14325        {
14326            /* This is a special case because we don't want to just halt the CPU
14327             * if trying to debug across a WFI.
14328             */
14329            TCGv_i32 tmp = tcg_const_i32(4);
14330
14331            gen_a64_set_pc_im(dc->base.pc_next);
14332            gen_helper_wfi(cpu_env, tmp);
14333            tcg_temp_free_i32(tmp);
14334            /* The helper doesn't necessarily throw an exception, but we
14335             * must go back to the main loop to check for interrupts anyway.
14336             */
14337            tcg_gen_exit_tb(NULL, 0);
14338            break;
14339        }
14340        }
14341    }
14342}
14343
14344static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14345                                      CPUState *cpu)
14346{
14347    DisasContext *dc = container_of(dcbase, DisasContext, base);
14348
14349    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
14350    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
14351}
14352
14353const TranslatorOps aarch64_translator_ops = {
14354    .init_disas_context = aarch64_tr_init_disas_context,
14355    .tb_start           = aarch64_tr_tb_start,
14356    .insn_start         = aarch64_tr_insn_start,
14357    .breakpoint_check   = aarch64_tr_breakpoint_check,
14358    .translate_insn     = aarch64_tr_translate_insn,
14359    .tb_stop            = aarch64_tr_tb_stop,
14360    .disas_log          = aarch64_tr_disas_log,
14361};
14362