qemu/target/arm/translate-a64.c
<<
>>
Prefs
   1/*
   2 *  AArch64 translation
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg-op.h"
  24#include "tcg-op-gvec.h"
  25#include "qemu/log.h"
  26#include "arm_ldst.h"
  27#include "translate.h"
  28#include "internals.h"
  29#include "qemu/host-utils.h"
  30
  31#include "hw/semihosting/semihost.h"
  32#include "exec/gen-icount.h"
  33
  34#include "exec/helper-proto.h"
  35#include "exec/helper-gen.h"
  36#include "exec/log.h"
  37
  38#include "trace-tcg.h"
  39#include "translate-a64.h"
  40#include "qemu/atomic128.h"
  41
  42static TCGv_i64 cpu_X[32];
  43static TCGv_i64 cpu_pc;
  44
  45/* Load/store exclusive handling */
  46static TCGv_i64 cpu_exclusive_high;
  47
  48static const char *regnames[] = {
  49    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  50    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  51    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  52    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  53};
  54
  55enum a64_shift_type {
  56    A64_SHIFT_TYPE_LSL = 0,
  57    A64_SHIFT_TYPE_LSR = 1,
  58    A64_SHIFT_TYPE_ASR = 2,
  59    A64_SHIFT_TYPE_ROR = 3
  60};
  61
  62/* Table based decoder typedefs - used when the relevant bits for decode
  63 * are too awkwardly scattered across the instruction (eg SIMD).
  64 */
  65typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  66
  67typedef struct AArch64DecodeTable {
  68    uint32_t pattern;
  69    uint32_t mask;
  70    AArch64DecodeFn *disas_fn;
  71} AArch64DecodeTable;
  72
  73/* Function prototype for gen_ functions for calling Neon helpers */
  74typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  75typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  76typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  77typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  78typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  79typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  80typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  81typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  82typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  83typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  84typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  85typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
  86typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
  87typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
  88typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
  89
  90/* initialize TCG globals.  */
  91void a64_translate_init(void)
  92{
  93    int i;
  94
  95    cpu_pc = tcg_global_mem_new_i64(cpu_env,
  96                                    offsetof(CPUARMState, pc),
  97                                    "pc");
  98    for (i = 0; i < 32; i++) {
  99        cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
 100                                          offsetof(CPUARMState, xregs[i]),
 101                                          regnames[i]);
 102    }
 103
 104    cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 105        offsetof(CPUARMState, exclusive_high), "exclusive_high");
 106}
 107
 108static inline int get_a64_user_mem_index(DisasContext *s)
 109{
 110    /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
 111     *  if EL1, access as if EL0; otherwise access at current EL
 112     */
 113    ARMMMUIdx useridx;
 114
 115    switch (s->mmu_idx) {
 116    case ARMMMUIdx_S12NSE1:
 117        useridx = ARMMMUIdx_S12NSE0;
 118        break;
 119    case ARMMMUIdx_S1SE1:
 120        useridx = ARMMMUIdx_S1SE0;
 121        break;
 122    case ARMMMUIdx_S2NS:
 123        g_assert_not_reached();
 124    default:
 125        useridx = s->mmu_idx;
 126        break;
 127    }
 128    return arm_to_core_mmu_idx(useridx);
 129}
 130
 131static void reset_btype(DisasContext *s)
 132{
 133    if (s->btype != 0) {
 134        TCGv_i32 zero = tcg_const_i32(0);
 135        tcg_gen_st_i32(zero, cpu_env, offsetof(CPUARMState, btype));
 136        tcg_temp_free_i32(zero);
 137        s->btype = 0;
 138    }
 139}
 140
 141static void set_btype(DisasContext *s, int val)
 142{
 143    TCGv_i32 tcg_val;
 144
 145    /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
 146    tcg_debug_assert(val >= 1 && val <= 3);
 147
 148    tcg_val = tcg_const_i32(val);
 149    tcg_gen_st_i32(tcg_val, cpu_env, offsetof(CPUARMState, btype));
 150    tcg_temp_free_i32(tcg_val);
 151    s->btype = -1;
 152}
 153
 154void gen_a64_set_pc_im(uint64_t val)
 155{
 156    tcg_gen_movi_i64(cpu_pc, val);
 157}
 158
 159/*
 160 * Handle Top Byte Ignore (TBI) bits.
 161 *
 162 * If address tagging is enabled via the TCR TBI bits:
 163 *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 164 *    then the address is zero-extended, clearing bits [63:56]
 165 *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 166 *    and TBI1 controls addressses with bit 55 == 1.
 167 *    If the appropriate TBI bit is set for the address then
 168 *    the address is sign-extended from bit 55 into bits [63:56]
 169 *
 170 * Here We have concatenated TBI{1,0} into tbi.
 171 */
 172static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
 173                                TCGv_i64 src, int tbi)
 174{
 175    if (tbi == 0) {
 176        /* Load unmodified address */
 177        tcg_gen_mov_i64(dst, src);
 178    } else if (s->current_el >= 2) {
 179        /* FIXME: ARMv8.1-VHE S2 translation regime.  */
 180        /* Force tag byte to all zero */
 181        tcg_gen_extract_i64(dst, src, 0, 56);
 182    } else {
 183        /* Sign-extend from bit 55.  */
 184        tcg_gen_sextract_i64(dst, src, 0, 56);
 185
 186        if (tbi != 3) {
 187            TCGv_i64 tcg_zero = tcg_const_i64(0);
 188
 189            /*
 190             * The two TBI bits differ.
 191             * If tbi0, then !tbi1: only use the extension if positive.
 192             * if !tbi0, then tbi1: only use the extension if negative.
 193             */
 194            tcg_gen_movcond_i64(tbi == 1 ? TCG_COND_GE : TCG_COND_LT,
 195                                dst, dst, tcg_zero, dst, src);
 196            tcg_temp_free_i64(tcg_zero);
 197        }
 198    }
 199}
 200
 201static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 202{
 203    /*
 204     * If address tagging is enabled for instructions via the TCR TBI bits,
 205     * then loading an address into the PC will clear out any tag.
 206     */
 207    gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
 208}
 209
 210/*
 211 * Return a "clean" address for ADDR according to TBID.
 212 * This is always a fresh temporary, as we need to be able to
 213 * increment this independently of a dirty write-back address.
 214 */
 215static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
 216{
 217    TCGv_i64 clean = new_tmp_a64(s);
 218    gen_top_byte_ignore(s, clean, addr, s->tbid);
 219    return clean;
 220}
 221
 222typedef struct DisasCompare64 {
 223    TCGCond cond;
 224    TCGv_i64 value;
 225} DisasCompare64;
 226
 227static void a64_test_cc(DisasCompare64 *c64, int cc)
 228{
 229    DisasCompare c32;
 230
 231    arm_test_cc(&c32, cc);
 232
 233    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 234       * properly.  The NE/EQ comparisons are also fine with this choice.  */
 235    c64->cond = c32.cond;
 236    c64->value = tcg_temp_new_i64();
 237    tcg_gen_ext_i32_i64(c64->value, c32.value);
 238
 239    arm_free_cc(&c32);
 240}
 241
 242static void a64_free_cc(DisasCompare64 *c64)
 243{
 244    tcg_temp_free_i64(c64->value);
 245}
 246
 247static void gen_exception_internal(int excp)
 248{
 249    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 250
 251    assert(excp_is_internal(excp));
 252    gen_helper_exception_internal(cpu_env, tcg_excp);
 253    tcg_temp_free_i32(tcg_excp);
 254}
 255
 256static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 257{
 258    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 259    TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 260    TCGv_i32 tcg_el = tcg_const_i32(target_el);
 261
 262    gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 263                                       tcg_syn, tcg_el);
 264    tcg_temp_free_i32(tcg_el);
 265    tcg_temp_free_i32(tcg_syn);
 266    tcg_temp_free_i32(tcg_excp);
 267}
 268
 269static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 270{
 271    gen_a64_set_pc_im(s->pc - offset);
 272    gen_exception_internal(excp);
 273    s->base.is_jmp = DISAS_NORETURN;
 274}
 275
 276static void gen_exception_insn(DisasContext *s, int offset, int excp,
 277                               uint32_t syndrome, uint32_t target_el)
 278{
 279    gen_a64_set_pc_im(s->pc - offset);
 280    gen_exception(excp, syndrome, target_el);
 281    s->base.is_jmp = DISAS_NORETURN;
 282}
 283
 284static void gen_exception_bkpt_insn(DisasContext *s, int offset,
 285                                    uint32_t syndrome)
 286{
 287    TCGv_i32 tcg_syn;
 288
 289    gen_a64_set_pc_im(s->pc - offset);
 290    tcg_syn = tcg_const_i32(syndrome);
 291    gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
 292    tcg_temp_free_i32(tcg_syn);
 293    s->base.is_jmp = DISAS_NORETURN;
 294}
 295
 296static void gen_step_complete_exception(DisasContext *s)
 297{
 298    /* We just completed step of an insn. Move from Active-not-pending
 299     * to Active-pending, and then also take the swstep exception.
 300     * This corresponds to making the (IMPDEF) choice to prioritize
 301     * swstep exceptions over asynchronous exceptions taken to an exception
 302     * level where debug is disabled. This choice has the advantage that
 303     * we do not need to maintain internal state corresponding to the
 304     * ISV/EX syndrome bits between completion of the step and generation
 305     * of the exception, and our syndrome information is always correct.
 306     */
 307    gen_ss_advance(s);
 308    gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 309                  default_exception_el(s));
 310    s->base.is_jmp = DISAS_NORETURN;
 311}
 312
 313static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 314{
 315    /* No direct tb linking with singlestep (either QEMU's or the ARM
 316     * debug architecture kind) or deterministic io
 317     */
 318    if (s->base.singlestep_enabled || s->ss_active ||
 319        (tb_cflags(s->base.tb) & CF_LAST_IO)) {
 320        return false;
 321    }
 322
 323#ifndef CONFIG_USER_ONLY
 324    /* Only link tbs from inside the same guest page */
 325    if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 326        return false;
 327    }
 328#endif
 329
 330    return true;
 331}
 332
 333static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 334{
 335    TranslationBlock *tb;
 336
 337    tb = s->base.tb;
 338    if (use_goto_tb(s, n, dest)) {
 339        tcg_gen_goto_tb(n);
 340        gen_a64_set_pc_im(dest);
 341        tcg_gen_exit_tb(tb, n);
 342        s->base.is_jmp = DISAS_NORETURN;
 343    } else {
 344        gen_a64_set_pc_im(dest);
 345        if (s->ss_active) {
 346            gen_step_complete_exception(s);
 347        } else if (s->base.singlestep_enabled) {
 348            gen_exception_internal(EXCP_DEBUG);
 349        } else {
 350            tcg_gen_lookup_and_goto_ptr();
 351            s->base.is_jmp = DISAS_NORETURN;
 352        }
 353    }
 354}
 355
 356void unallocated_encoding(DisasContext *s)
 357{
 358    /* Unallocated and reserved encodings are uncategorized */
 359    gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 360                       default_exception_el(s));
 361}
 362
 363static void init_tmp_a64_array(DisasContext *s)
 364{
 365#ifdef CONFIG_DEBUG_TCG
 366    memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
 367#endif
 368    s->tmp_a64_count = 0;
 369}
 370
 371static void free_tmp_a64(DisasContext *s)
 372{
 373    int i;
 374    for (i = 0; i < s->tmp_a64_count; i++) {
 375        tcg_temp_free_i64(s->tmp_a64[i]);
 376    }
 377    init_tmp_a64_array(s);
 378}
 379
 380TCGv_i64 new_tmp_a64(DisasContext *s)
 381{
 382    assert(s->tmp_a64_count < TMP_A64_MAX);
 383    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 384}
 385
 386TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 387{
 388    TCGv_i64 t = new_tmp_a64(s);
 389    tcg_gen_movi_i64(t, 0);
 390    return t;
 391}
 392
 393/*
 394 * Register access functions
 395 *
 396 * These functions are used for directly accessing a register in where
 397 * changes to the final register value are likely to be made. If you
 398 * need to use a register for temporary calculation (e.g. index type
 399 * operations) use the read_* form.
 400 *
 401 * B1.2.1 Register mappings
 402 *
 403 * In instruction register encoding 31 can refer to ZR (zero register) or
 404 * the SP (stack pointer) depending on context. In QEMU's case we map SP
 405 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 406 * This is the point of the _sp forms.
 407 */
 408TCGv_i64 cpu_reg(DisasContext *s, int reg)
 409{
 410    if (reg == 31) {
 411        return new_tmp_a64_zero(s);
 412    } else {
 413        return cpu_X[reg];
 414    }
 415}
 416
 417/* register access for when 31 == SP */
 418TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 419{
 420    return cpu_X[reg];
 421}
 422
 423/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 424 * representing the register contents. This TCGv is an auto-freed
 425 * temporary so it need not be explicitly freed, and may be modified.
 426 */
 427TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 428{
 429    TCGv_i64 v = new_tmp_a64(s);
 430    if (reg != 31) {
 431        if (sf) {
 432            tcg_gen_mov_i64(v, cpu_X[reg]);
 433        } else {
 434            tcg_gen_ext32u_i64(v, cpu_X[reg]);
 435        }
 436    } else {
 437        tcg_gen_movi_i64(v, 0);
 438    }
 439    return v;
 440}
 441
 442TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 443{
 444    TCGv_i64 v = new_tmp_a64(s);
 445    if (sf) {
 446        tcg_gen_mov_i64(v, cpu_X[reg]);
 447    } else {
 448        tcg_gen_ext32u_i64(v, cpu_X[reg]);
 449    }
 450    return v;
 451}
 452
 453/* Return the offset into CPUARMState of a slice (from
 454 * the least significant end) of FP register Qn (ie
 455 * Dn, Sn, Hn or Bn).
 456 * (Note that this is not the same mapping as for A32; see cpu.h)
 457 */
 458static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 459{
 460    return vec_reg_offset(s, regno, 0, size);
 461}
 462
 463/* Offset of the high half of the 128 bit vector Qn */
 464static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 465{
 466    return vec_reg_offset(s, regno, 1, MO_64);
 467}
 468
 469/* Convenience accessors for reading and writing single and double
 470 * FP registers. Writing clears the upper parts of the associated
 471 * 128 bit vector register, as required by the architecture.
 472 * Note that unlike the GP register accessors, the values returned
 473 * by the read functions must be manually freed.
 474 */
 475static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 476{
 477    TCGv_i64 v = tcg_temp_new_i64();
 478
 479    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 480    return v;
 481}
 482
 483static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 484{
 485    TCGv_i32 v = tcg_temp_new_i32();
 486
 487    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 488    return v;
 489}
 490
 491static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
 492{
 493    TCGv_i32 v = tcg_temp_new_i32();
 494
 495    tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
 496    return v;
 497}
 498
 499/* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
 500 * If SVE is not enabled, then there are only 128 bits in the vector.
 501 */
 502static void clear_vec_high(DisasContext *s, bool is_q, int rd)
 503{
 504    unsigned ofs = fp_reg_offset(s, rd, MO_64);
 505    unsigned vsz = vec_full_reg_size(s);
 506
 507    if (!is_q) {
 508        TCGv_i64 tcg_zero = tcg_const_i64(0);
 509        tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
 510        tcg_temp_free_i64(tcg_zero);
 511    }
 512    if (vsz > 16) {
 513        tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
 514    }
 515}
 516
 517void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 518{
 519    unsigned ofs = fp_reg_offset(s, reg, MO_64);
 520
 521    tcg_gen_st_i64(v, cpu_env, ofs);
 522    clear_vec_high(s, false, reg);
 523}
 524
 525static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 526{
 527    TCGv_i64 tmp = tcg_temp_new_i64();
 528
 529    tcg_gen_extu_i32_i64(tmp, v);
 530    write_fp_dreg(s, reg, tmp);
 531    tcg_temp_free_i64(tmp);
 532}
 533
 534TCGv_ptr get_fpstatus_ptr(bool is_f16)
 535{
 536    TCGv_ptr statusptr = tcg_temp_new_ptr();
 537    int offset;
 538
 539    /* In A64 all instructions (both FP and Neon) use the FPCR; there
 540     * is no equivalent of the A32 Neon "standard FPSCR value".
 541     * However half-precision operations operate under a different
 542     * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
 543     */
 544    if (is_f16) {
 545        offset = offsetof(CPUARMState, vfp.fp_status_f16);
 546    } else {
 547        offset = offsetof(CPUARMState, vfp.fp_status);
 548    }
 549    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 550    return statusptr;
 551}
 552
 553/* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
 554static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
 555                         GVecGen2Fn *gvec_fn, int vece)
 556{
 557    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 558            is_q ? 16 : 8, vec_full_reg_size(s));
 559}
 560
 561/* Expand a 2-operand + immediate AdvSIMD vector operation using
 562 * an expander function.
 563 */
 564static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
 565                          int64_t imm, GVecGen2iFn *gvec_fn, int vece)
 566{
 567    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 568            imm, is_q ? 16 : 8, vec_full_reg_size(s));
 569}
 570
 571/* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
 572static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
 573                         GVecGen3Fn *gvec_fn, int vece)
 574{
 575    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 576            vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
 577}
 578
 579/* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
 580static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
 581                         int rx, GVecGen4Fn *gvec_fn, int vece)
 582{
 583    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 584            vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
 585            is_q ? 16 : 8, vec_full_reg_size(s));
 586}
 587
 588/* Expand a 2-operand + immediate AdvSIMD vector operation using
 589 * an op descriptor.
 590 */
 591static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
 592                          int rn, int64_t imm, const GVecGen2i *gvec_op)
 593{
 594    tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 595                    is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
 596}
 597
 598/* Expand a 3-operand AdvSIMD vector operation using an op descriptor.  */
 599static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
 600                         int rn, int rm, const GVecGen3 *gvec_op)
 601{
 602    tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 603                   vec_full_reg_offset(s, rm), is_q ? 16 : 8,
 604                   vec_full_reg_size(s), gvec_op);
 605}
 606
 607/* Expand a 3-operand operation using an out-of-line helper.  */
 608static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
 609                             int rn, int rm, int data, gen_helper_gvec_3 *fn)
 610{
 611    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 612                       vec_full_reg_offset(s, rn),
 613                       vec_full_reg_offset(s, rm),
 614                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 615}
 616
 617/* Expand a 3-operand + env pointer operation using
 618 * an out-of-line helper.
 619 */
 620static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
 621                             int rn, int rm, gen_helper_gvec_3_ptr *fn)
 622{
 623    tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 624                       vec_full_reg_offset(s, rn),
 625                       vec_full_reg_offset(s, rm), cpu_env,
 626                       is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
 627}
 628
 629/* Expand a 3-operand + fpstatus pointer + simd data value operation using
 630 * an out-of-line helper.
 631 */
 632static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
 633                              int rm, bool is_fp16, int data,
 634                              gen_helper_gvec_3_ptr *fn)
 635{
 636    TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
 637    tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 638                       vec_full_reg_offset(s, rn),
 639                       vec_full_reg_offset(s, rm), fpst,
 640                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 641    tcg_temp_free_ptr(fpst);
 642}
 643
 644/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 645 * than the 32 bit equivalent.
 646 */
 647static inline void gen_set_NZ64(TCGv_i64 result)
 648{
 649    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 650    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 651}
 652
 653/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 654static inline void gen_logic_CC(int sf, TCGv_i64 result)
 655{
 656    if (sf) {
 657        gen_set_NZ64(result);
 658    } else {
 659        tcg_gen_extrl_i64_i32(cpu_ZF, result);
 660        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 661    }
 662    tcg_gen_movi_i32(cpu_CF, 0);
 663    tcg_gen_movi_i32(cpu_VF, 0);
 664}
 665
 666/* dest = T0 + T1; compute C, N, V and Z flags */
 667static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 668{
 669    if (sf) {
 670        TCGv_i64 result, flag, tmp;
 671        result = tcg_temp_new_i64();
 672        flag = tcg_temp_new_i64();
 673        tmp = tcg_temp_new_i64();
 674
 675        tcg_gen_movi_i64(tmp, 0);
 676        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 677
 678        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 679
 680        gen_set_NZ64(result);
 681
 682        tcg_gen_xor_i64(flag, result, t0);
 683        tcg_gen_xor_i64(tmp, t0, t1);
 684        tcg_gen_andc_i64(flag, flag, tmp);
 685        tcg_temp_free_i64(tmp);
 686        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 687
 688        tcg_gen_mov_i64(dest, result);
 689        tcg_temp_free_i64(result);
 690        tcg_temp_free_i64(flag);
 691    } else {
 692        /* 32 bit arithmetic */
 693        TCGv_i32 t0_32 = tcg_temp_new_i32();
 694        TCGv_i32 t1_32 = tcg_temp_new_i32();
 695        TCGv_i32 tmp = tcg_temp_new_i32();
 696
 697        tcg_gen_movi_i32(tmp, 0);
 698        tcg_gen_extrl_i64_i32(t0_32, t0);
 699        tcg_gen_extrl_i64_i32(t1_32, t1);
 700        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 701        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 702        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 703        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 704        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 705        tcg_gen_extu_i32_i64(dest, cpu_NF);
 706
 707        tcg_temp_free_i32(tmp);
 708        tcg_temp_free_i32(t0_32);
 709        tcg_temp_free_i32(t1_32);
 710    }
 711}
 712
 713/* dest = T0 - T1; compute C, N, V and Z flags */
 714static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 715{
 716    if (sf) {
 717        /* 64 bit arithmetic */
 718        TCGv_i64 result, flag, tmp;
 719
 720        result = tcg_temp_new_i64();
 721        flag = tcg_temp_new_i64();
 722        tcg_gen_sub_i64(result, t0, t1);
 723
 724        gen_set_NZ64(result);
 725
 726        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 727        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 728
 729        tcg_gen_xor_i64(flag, result, t0);
 730        tmp = tcg_temp_new_i64();
 731        tcg_gen_xor_i64(tmp, t0, t1);
 732        tcg_gen_and_i64(flag, flag, tmp);
 733        tcg_temp_free_i64(tmp);
 734        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 735        tcg_gen_mov_i64(dest, result);
 736        tcg_temp_free_i64(flag);
 737        tcg_temp_free_i64(result);
 738    } else {
 739        /* 32 bit arithmetic */
 740        TCGv_i32 t0_32 = tcg_temp_new_i32();
 741        TCGv_i32 t1_32 = tcg_temp_new_i32();
 742        TCGv_i32 tmp;
 743
 744        tcg_gen_extrl_i64_i32(t0_32, t0);
 745        tcg_gen_extrl_i64_i32(t1_32, t1);
 746        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 747        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 748        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 749        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 750        tmp = tcg_temp_new_i32();
 751        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 752        tcg_temp_free_i32(t0_32);
 753        tcg_temp_free_i32(t1_32);
 754        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 755        tcg_temp_free_i32(tmp);
 756        tcg_gen_extu_i32_i64(dest, cpu_NF);
 757    }
 758}
 759
 760/* dest = T0 + T1 + CF; do not compute flags. */
 761static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 762{
 763    TCGv_i64 flag = tcg_temp_new_i64();
 764    tcg_gen_extu_i32_i64(flag, cpu_CF);
 765    tcg_gen_add_i64(dest, t0, t1);
 766    tcg_gen_add_i64(dest, dest, flag);
 767    tcg_temp_free_i64(flag);
 768
 769    if (!sf) {
 770        tcg_gen_ext32u_i64(dest, dest);
 771    }
 772}
 773
 774/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 775static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 776{
 777    if (sf) {
 778        TCGv_i64 result, cf_64, vf_64, tmp;
 779        result = tcg_temp_new_i64();
 780        cf_64 = tcg_temp_new_i64();
 781        vf_64 = tcg_temp_new_i64();
 782        tmp = tcg_const_i64(0);
 783
 784        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 785        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 786        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 787        tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 788        gen_set_NZ64(result);
 789
 790        tcg_gen_xor_i64(vf_64, result, t0);
 791        tcg_gen_xor_i64(tmp, t0, t1);
 792        tcg_gen_andc_i64(vf_64, vf_64, tmp);
 793        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 794
 795        tcg_gen_mov_i64(dest, result);
 796
 797        tcg_temp_free_i64(tmp);
 798        tcg_temp_free_i64(vf_64);
 799        tcg_temp_free_i64(cf_64);
 800        tcg_temp_free_i64(result);
 801    } else {
 802        TCGv_i32 t0_32, t1_32, tmp;
 803        t0_32 = tcg_temp_new_i32();
 804        t1_32 = tcg_temp_new_i32();
 805        tmp = tcg_const_i32(0);
 806
 807        tcg_gen_extrl_i64_i32(t0_32, t0);
 808        tcg_gen_extrl_i64_i32(t1_32, t1);
 809        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 810        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 811
 812        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 813        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 814        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 815        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 816        tcg_gen_extu_i32_i64(dest, cpu_NF);
 817
 818        tcg_temp_free_i32(tmp);
 819        tcg_temp_free_i32(t1_32);
 820        tcg_temp_free_i32(t0_32);
 821    }
 822}
 823
 824/*
 825 * Load/Store generators
 826 */
 827
 828/*
 829 * Store from GPR register to memory.
 830 */
 831static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 832                             TCGv_i64 tcg_addr, int size, int memidx,
 833                             bool iss_valid,
 834                             unsigned int iss_srt,
 835                             bool iss_sf, bool iss_ar)
 836{
 837    g_assert(size <= 3);
 838    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 839
 840    if (iss_valid) {
 841        uint32_t syn;
 842
 843        syn = syn_data_abort_with_iss(0,
 844                                      size,
 845                                      false,
 846                                      iss_srt,
 847                                      iss_sf,
 848                                      iss_ar,
 849                                      0, 0, 0, 0, 0, false);
 850        disas_set_insn_syndrome(s, syn);
 851    }
 852}
 853
 854static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 855                      TCGv_i64 tcg_addr, int size,
 856                      bool iss_valid,
 857                      unsigned int iss_srt,
 858                      bool iss_sf, bool iss_ar)
 859{
 860    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 861                     iss_valid, iss_srt, iss_sf, iss_ar);
 862}
 863
 864/*
 865 * Load from memory to GPR register
 866 */
 867static void do_gpr_ld_memidx(DisasContext *s,
 868                             TCGv_i64 dest, TCGv_i64 tcg_addr,
 869                             int size, bool is_signed,
 870                             bool extend, int memidx,
 871                             bool iss_valid, unsigned int iss_srt,
 872                             bool iss_sf, bool iss_ar)
 873{
 874    TCGMemOp memop = s->be_data + size;
 875
 876    g_assert(size <= 3);
 877
 878    if (is_signed) {
 879        memop += MO_SIGN;
 880    }
 881
 882    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 883
 884    if (extend && is_signed) {
 885        g_assert(size < 3);
 886        tcg_gen_ext32u_i64(dest, dest);
 887    }
 888
 889    if (iss_valid) {
 890        uint32_t syn;
 891
 892        syn = syn_data_abort_with_iss(0,
 893                                      size,
 894                                      is_signed,
 895                                      iss_srt,
 896                                      iss_sf,
 897                                      iss_ar,
 898                                      0, 0, 0, 0, 0, false);
 899        disas_set_insn_syndrome(s, syn);
 900    }
 901}
 902
 903static void do_gpr_ld(DisasContext *s,
 904                      TCGv_i64 dest, TCGv_i64 tcg_addr,
 905                      int size, bool is_signed, bool extend,
 906                      bool iss_valid, unsigned int iss_srt,
 907                      bool iss_sf, bool iss_ar)
 908{
 909    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 910                     get_mem_index(s),
 911                     iss_valid, iss_srt, iss_sf, iss_ar);
 912}
 913
 914/*
 915 * Store from FP register to memory
 916 */
 917static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 918{
 919    /* This writes the bottom N bits of a 128 bit wide vector to memory */
 920    TCGv_i64 tmp = tcg_temp_new_i64();
 921    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 922    if (size < 4) {
 923        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 924                            s->be_data + size);
 925    } else {
 926        bool be = s->be_data == MO_BE;
 927        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 928
 929        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 930        tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 931                            s->be_data | MO_Q);
 932        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 933        tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 934                            s->be_data | MO_Q);
 935        tcg_temp_free_i64(tcg_hiaddr);
 936    }
 937
 938    tcg_temp_free_i64(tmp);
 939}
 940
 941/*
 942 * Load from memory to FP register
 943 */
 944static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 945{
 946    /* This always zero-extends and writes to a full 128 bit wide vector */
 947    TCGv_i64 tmplo = tcg_temp_new_i64();
 948    TCGv_i64 tmphi;
 949
 950    if (size < 4) {
 951        TCGMemOp memop = s->be_data + size;
 952        tmphi = tcg_const_i64(0);
 953        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 954    } else {
 955        bool be = s->be_data == MO_BE;
 956        TCGv_i64 tcg_hiaddr;
 957
 958        tmphi = tcg_temp_new_i64();
 959        tcg_hiaddr = tcg_temp_new_i64();
 960
 961        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 962        tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 963                            s->be_data | MO_Q);
 964        tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 965                            s->be_data | MO_Q);
 966        tcg_temp_free_i64(tcg_hiaddr);
 967    }
 968
 969    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 970    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 971
 972    tcg_temp_free_i64(tmplo);
 973    tcg_temp_free_i64(tmphi);
 974
 975    clear_vec_high(s, true, destidx);
 976}
 977
 978/*
 979 * Vector load/store helpers.
 980 *
 981 * The principal difference between this and a FP load is that we don't
 982 * zero extend as we are filling a partial chunk of the vector register.
 983 * These functions don't support 128 bit loads/stores, which would be
 984 * normal load/store operations.
 985 *
 986 * The _i32 versions are useful when operating on 32 bit quantities
 987 * (eg for floating point single or using Neon helper functions).
 988 */
 989
 990/* Get value of an element within a vector register */
 991static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 992                             int element, TCGMemOp memop)
 993{
 994    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 995    switch (memop) {
 996    case MO_8:
 997        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 998        break;
 999    case MO_16:
1000        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
1001        break;
1002    case MO_32:
1003        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
1004        break;
1005    case MO_8|MO_SIGN:
1006        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1007        break;
1008    case MO_16|MO_SIGN:
1009        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1010        break;
1011    case MO_32|MO_SIGN:
1012        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1013        break;
1014    case MO_64:
1015    case MO_64|MO_SIGN:
1016        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1017        break;
1018    default:
1019        g_assert_not_reached();
1020    }
1021}
1022
1023static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1024                                 int element, TCGMemOp memop)
1025{
1026    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1027    switch (memop) {
1028    case MO_8:
1029        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1030        break;
1031    case MO_16:
1032        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1033        break;
1034    case MO_8|MO_SIGN:
1035        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1036        break;
1037    case MO_16|MO_SIGN:
1038        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1039        break;
1040    case MO_32:
1041    case MO_32|MO_SIGN:
1042        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1043        break;
1044    default:
1045        g_assert_not_reached();
1046    }
1047}
1048
1049/* Set value of an element within a vector register */
1050static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1051                              int element, TCGMemOp memop)
1052{
1053    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1054    switch (memop) {
1055    case MO_8:
1056        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1057        break;
1058    case MO_16:
1059        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1060        break;
1061    case MO_32:
1062        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1063        break;
1064    case MO_64:
1065        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1066        break;
1067    default:
1068        g_assert_not_reached();
1069    }
1070}
1071
1072static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1073                                  int destidx, int element, TCGMemOp memop)
1074{
1075    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1076    switch (memop) {
1077    case MO_8:
1078        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1079        break;
1080    case MO_16:
1081        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1082        break;
1083    case MO_32:
1084        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1085        break;
1086    default:
1087        g_assert_not_reached();
1088    }
1089}
1090
1091/* Store from vector register to memory */
1092static void do_vec_st(DisasContext *s, int srcidx, int element,
1093                      TCGv_i64 tcg_addr, int size, TCGMemOp endian)
1094{
1095    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1096
1097    read_vec_element(s, tcg_tmp, srcidx, element, size);
1098    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1099
1100    tcg_temp_free_i64(tcg_tmp);
1101}
1102
1103/* Load from memory to vector register */
1104static void do_vec_ld(DisasContext *s, int destidx, int element,
1105                      TCGv_i64 tcg_addr, int size, TCGMemOp endian)
1106{
1107    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1108
1109    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1110    write_vec_element(s, tcg_tmp, destidx, element, size);
1111
1112    tcg_temp_free_i64(tcg_tmp);
1113}
1114
1115/* Check that FP/Neon access is enabled. If it is, return
1116 * true. If not, emit code to generate an appropriate exception,
1117 * and return false; the caller should not emit any code for
1118 * the instruction. Note that this check must happen after all
1119 * unallocated-encoding checks (otherwise the syndrome information
1120 * for the resulting exception will be incorrect).
1121 */
1122static inline bool fp_access_check(DisasContext *s)
1123{
1124    assert(!s->fp_access_checked);
1125    s->fp_access_checked = true;
1126
1127    if (!s->fp_excp_el) {
1128        return true;
1129    }
1130
1131    gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1132                       s->fp_excp_el);
1133    return false;
1134}
1135
1136/* Check that SVE access is enabled.  If it is, return true.
1137 * If not, emit code to generate an appropriate exception and return false.
1138 */
1139bool sve_access_check(DisasContext *s)
1140{
1141    if (s->sve_excp_el) {
1142        gen_exception_insn(s, 4, EXCP_UDEF, syn_sve_access_trap(),
1143                           s->sve_excp_el);
1144        return false;
1145    }
1146    return fp_access_check(s);
1147}
1148
1149/*
1150 * This utility function is for doing register extension with an
1151 * optional shift. You will likely want to pass a temporary for the
1152 * destination register. See DecodeRegExtend() in the ARM ARM.
1153 */
1154static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1155                              int option, unsigned int shift)
1156{
1157    int extsize = extract32(option, 0, 2);
1158    bool is_signed = extract32(option, 2, 1);
1159
1160    if (is_signed) {
1161        switch (extsize) {
1162        case 0:
1163            tcg_gen_ext8s_i64(tcg_out, tcg_in);
1164            break;
1165        case 1:
1166            tcg_gen_ext16s_i64(tcg_out, tcg_in);
1167            break;
1168        case 2:
1169            tcg_gen_ext32s_i64(tcg_out, tcg_in);
1170            break;
1171        case 3:
1172            tcg_gen_mov_i64(tcg_out, tcg_in);
1173            break;
1174        }
1175    } else {
1176        switch (extsize) {
1177        case 0:
1178            tcg_gen_ext8u_i64(tcg_out, tcg_in);
1179            break;
1180        case 1:
1181            tcg_gen_ext16u_i64(tcg_out, tcg_in);
1182            break;
1183        case 2:
1184            tcg_gen_ext32u_i64(tcg_out, tcg_in);
1185            break;
1186        case 3:
1187            tcg_gen_mov_i64(tcg_out, tcg_in);
1188            break;
1189        }
1190    }
1191
1192    if (shift) {
1193        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1194    }
1195}
1196
1197static inline void gen_check_sp_alignment(DisasContext *s)
1198{
1199    /* The AArch64 architecture mandates that (if enabled via PSTATE
1200     * or SCTLR bits) there is a check that SP is 16-aligned on every
1201     * SP-relative load or store (with an exception generated if it is not).
1202     * In line with general QEMU practice regarding misaligned accesses,
1203     * we omit these checks for the sake of guest program performance.
1204     * This function is provided as a hook so we can more easily add these
1205     * checks in future (possibly as a "favour catching guest program bugs
1206     * over speed" user selectable option).
1207     */
1208}
1209
1210/*
1211 * This provides a simple table based table lookup decoder. It is
1212 * intended to be used when the relevant bits for decode are too
1213 * awkwardly placed and switch/if based logic would be confusing and
1214 * deeply nested. Since it's a linear search through the table, tables
1215 * should be kept small.
1216 *
1217 * It returns the first handler where insn & mask == pattern, or
1218 * NULL if there is no match.
1219 * The table is terminated by an empty mask (i.e. 0)
1220 */
1221static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1222                                               uint32_t insn)
1223{
1224    const AArch64DecodeTable *tptr = table;
1225
1226    while (tptr->mask) {
1227        if ((insn & tptr->mask) == tptr->pattern) {
1228            return tptr->disas_fn;
1229        }
1230        tptr++;
1231    }
1232    return NULL;
1233}
1234
1235/*
1236 * The instruction disassembly implemented here matches
1237 * the instruction encoding classifications in chapter C4
1238 * of the ARM Architecture Reference Manual (DDI0487B_a);
1239 * classification names and decode diagrams here should generally
1240 * match up with those in the manual.
1241 */
1242
1243/* Unconditional branch (immediate)
1244 *   31  30       26 25                                  0
1245 * +----+-----------+-------------------------------------+
1246 * | op | 0 0 1 0 1 |                 imm26               |
1247 * +----+-----------+-------------------------------------+
1248 */
1249static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1250{
1251    uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1252
1253    if (insn & (1U << 31)) {
1254        /* BL Branch with link */
1255        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1256    }
1257
1258    /* B Branch / BL Branch with link */
1259    reset_btype(s);
1260    gen_goto_tb(s, 0, addr);
1261}
1262
1263/* Compare and branch (immediate)
1264 *   31  30         25  24  23                  5 4      0
1265 * +----+-------------+----+---------------------+--------+
1266 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1267 * +----+-------------+----+---------------------+--------+
1268 */
1269static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1270{
1271    unsigned int sf, op, rt;
1272    uint64_t addr;
1273    TCGLabel *label_match;
1274    TCGv_i64 tcg_cmp;
1275
1276    sf = extract32(insn, 31, 1);
1277    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1278    rt = extract32(insn, 0, 5);
1279    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1280
1281    tcg_cmp = read_cpu_reg(s, rt, sf);
1282    label_match = gen_new_label();
1283
1284    reset_btype(s);
1285    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1286                        tcg_cmp, 0, label_match);
1287
1288    gen_goto_tb(s, 0, s->pc);
1289    gen_set_label(label_match);
1290    gen_goto_tb(s, 1, addr);
1291}
1292
1293/* Test and branch (immediate)
1294 *   31  30         25  24  23   19 18          5 4    0
1295 * +----+-------------+----+-------+-------------+------+
1296 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1297 * +----+-------------+----+-------+-------------+------+
1298 */
1299static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1300{
1301    unsigned int bit_pos, op, rt;
1302    uint64_t addr;
1303    TCGLabel *label_match;
1304    TCGv_i64 tcg_cmp;
1305
1306    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1307    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1308    addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1309    rt = extract32(insn, 0, 5);
1310
1311    tcg_cmp = tcg_temp_new_i64();
1312    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1313    label_match = gen_new_label();
1314
1315    reset_btype(s);
1316    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1317                        tcg_cmp, 0, label_match);
1318    tcg_temp_free_i64(tcg_cmp);
1319    gen_goto_tb(s, 0, s->pc);
1320    gen_set_label(label_match);
1321    gen_goto_tb(s, 1, addr);
1322}
1323
1324/* Conditional branch (immediate)
1325 *  31           25  24  23                  5   4  3    0
1326 * +---------------+----+---------------------+----+------+
1327 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1328 * +---------------+----+---------------------+----+------+
1329 */
1330static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1331{
1332    unsigned int cond;
1333    uint64_t addr;
1334
1335    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1336        unallocated_encoding(s);
1337        return;
1338    }
1339    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1340    cond = extract32(insn, 0, 4);
1341
1342    reset_btype(s);
1343    if (cond < 0x0e) {
1344        /* genuinely conditional branches */
1345        TCGLabel *label_match = gen_new_label();
1346        arm_gen_test_cc(cond, label_match);
1347        gen_goto_tb(s, 0, s->pc);
1348        gen_set_label(label_match);
1349        gen_goto_tb(s, 1, addr);
1350    } else {
1351        /* 0xe and 0xf are both "always" conditions */
1352        gen_goto_tb(s, 0, addr);
1353    }
1354}
1355
1356/* HINT instruction group, including various allocated HINTs */
1357static void handle_hint(DisasContext *s, uint32_t insn,
1358                        unsigned int op1, unsigned int op2, unsigned int crm)
1359{
1360    unsigned int selector = crm << 3 | op2;
1361
1362    if (op1 != 3) {
1363        unallocated_encoding(s);
1364        return;
1365    }
1366
1367    switch (selector) {
1368    case 0b00000: /* NOP */
1369        break;
1370    case 0b00011: /* WFI */
1371        s->base.is_jmp = DISAS_WFI;
1372        break;
1373    case 0b00001: /* YIELD */
1374        /* When running in MTTCG we don't generate jumps to the yield and
1375         * WFE helpers as it won't affect the scheduling of other vCPUs.
1376         * If we wanted to more completely model WFE/SEV so we don't busy
1377         * spin unnecessarily we would need to do something more involved.
1378         */
1379        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1380            s->base.is_jmp = DISAS_YIELD;
1381        }
1382        break;
1383    case 0b00010: /* WFE */
1384        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1385            s->base.is_jmp = DISAS_WFE;
1386        }
1387        break;
1388    case 0b00100: /* SEV */
1389    case 0b00101: /* SEVL */
1390        /* we treat all as NOP at least for now */
1391        break;
1392    case 0b00111: /* XPACLRI */
1393        if (s->pauth_active) {
1394            gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1395        }
1396        break;
1397    case 0b01000: /* PACIA1716 */
1398        if (s->pauth_active) {
1399            gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1400        }
1401        break;
1402    case 0b01010: /* PACIB1716 */
1403        if (s->pauth_active) {
1404            gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1405        }
1406        break;
1407    case 0b01100: /* AUTIA1716 */
1408        if (s->pauth_active) {
1409            gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1410        }
1411        break;
1412    case 0b01110: /* AUTIB1716 */
1413        if (s->pauth_active) {
1414            gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1415        }
1416        break;
1417    case 0b11000: /* PACIAZ */
1418        if (s->pauth_active) {
1419            gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1420                                new_tmp_a64_zero(s));
1421        }
1422        break;
1423    case 0b11001: /* PACIASP */
1424        if (s->pauth_active) {
1425            gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1426        }
1427        break;
1428    case 0b11010: /* PACIBZ */
1429        if (s->pauth_active) {
1430            gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1431                                new_tmp_a64_zero(s));
1432        }
1433        break;
1434    case 0b11011: /* PACIBSP */
1435        if (s->pauth_active) {
1436            gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1437        }
1438        break;
1439    case 0b11100: /* AUTIAZ */
1440        if (s->pauth_active) {
1441            gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1442                              new_tmp_a64_zero(s));
1443        }
1444        break;
1445    case 0b11101: /* AUTIASP */
1446        if (s->pauth_active) {
1447            gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1448        }
1449        break;
1450    case 0b11110: /* AUTIBZ */
1451        if (s->pauth_active) {
1452            gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1453                              new_tmp_a64_zero(s));
1454        }
1455        break;
1456    case 0b11111: /* AUTIBSP */
1457        if (s->pauth_active) {
1458            gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1459        }
1460        break;
1461    default:
1462        /* default specified as NOP equivalent */
1463        break;
1464    }
1465}
1466
1467static void gen_clrex(DisasContext *s, uint32_t insn)
1468{
1469    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1470}
1471
1472/* CLREX, DSB, DMB, ISB */
1473static void handle_sync(DisasContext *s, uint32_t insn,
1474                        unsigned int op1, unsigned int op2, unsigned int crm)
1475{
1476    TCGBar bar;
1477
1478    if (op1 != 3) {
1479        unallocated_encoding(s);
1480        return;
1481    }
1482
1483    switch (op2) {
1484    case 2: /* CLREX */
1485        gen_clrex(s, insn);
1486        return;
1487    case 4: /* DSB */
1488    case 5: /* DMB */
1489        switch (crm & 3) {
1490        case 1: /* MBReqTypes_Reads */
1491            bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1492            break;
1493        case 2: /* MBReqTypes_Writes */
1494            bar = TCG_BAR_SC | TCG_MO_ST_ST;
1495            break;
1496        default: /* MBReqTypes_All */
1497            bar = TCG_BAR_SC | TCG_MO_ALL;
1498            break;
1499        }
1500        tcg_gen_mb(bar);
1501        return;
1502    case 6: /* ISB */
1503        /* We need to break the TB after this insn to execute
1504         * a self-modified code correctly and also to take
1505         * any pending interrupts immediately.
1506         */
1507        reset_btype(s);
1508        gen_goto_tb(s, 0, s->pc);
1509        return;
1510
1511    case 7: /* SB */
1512        if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1513            goto do_unallocated;
1514        }
1515        /*
1516         * TODO: There is no speculation barrier opcode for TCG;
1517         * MB and end the TB instead.
1518         */
1519        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1520        gen_goto_tb(s, 0, s->pc);
1521        return;
1522
1523    default:
1524    do_unallocated:
1525        unallocated_encoding(s);
1526        return;
1527    }
1528}
1529
1530static void gen_xaflag(void)
1531{
1532    TCGv_i32 z = tcg_temp_new_i32();
1533
1534    tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1535
1536    /*
1537     * (!C & !Z) << 31
1538     * (!(C | Z)) << 31
1539     * ~((C | Z) << 31)
1540     * ~-(C | Z)
1541     * (C | Z) - 1
1542     */
1543    tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1544    tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1545
1546    /* !(Z & C) */
1547    tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1548    tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1549
1550    /* (!C & Z) << 31 -> -(Z & ~C) */
1551    tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1552    tcg_gen_neg_i32(cpu_VF, cpu_VF);
1553
1554    /* C | Z */
1555    tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1556
1557    tcg_temp_free_i32(z);
1558}
1559
1560static void gen_axflag(void)
1561{
1562    tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1563    tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1564
1565    /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1566    tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1567
1568    tcg_gen_movi_i32(cpu_NF, 0);
1569    tcg_gen_movi_i32(cpu_VF, 0);
1570}
1571
1572/* MSR (immediate) - move immediate to processor state field */
1573static void handle_msr_i(DisasContext *s, uint32_t insn,
1574                         unsigned int op1, unsigned int op2, unsigned int crm)
1575{
1576    TCGv_i32 t1;
1577    int op = op1 << 3 | op2;
1578
1579    /* End the TB by default, chaining is ok.  */
1580    s->base.is_jmp = DISAS_TOO_MANY;
1581
1582    switch (op) {
1583    case 0x00: /* CFINV */
1584        if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1585            goto do_unallocated;
1586        }
1587        tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1588        s->base.is_jmp = DISAS_NEXT;
1589        break;
1590
1591    case 0x01: /* XAFlag */
1592        if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1593            goto do_unallocated;
1594        }
1595        gen_xaflag();
1596        s->base.is_jmp = DISAS_NEXT;
1597        break;
1598
1599    case 0x02: /* AXFlag */
1600        if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1601            goto do_unallocated;
1602        }
1603        gen_axflag();
1604        s->base.is_jmp = DISAS_NEXT;
1605        break;
1606
1607    case 0x05: /* SPSel */
1608        if (s->current_el == 0) {
1609            goto do_unallocated;
1610        }
1611        t1 = tcg_const_i32(crm & PSTATE_SP);
1612        gen_helper_msr_i_spsel(cpu_env, t1);
1613        tcg_temp_free_i32(t1);
1614        break;
1615
1616    case 0x1e: /* DAIFSet */
1617        t1 = tcg_const_i32(crm);
1618        gen_helper_msr_i_daifset(cpu_env, t1);
1619        tcg_temp_free_i32(t1);
1620        break;
1621
1622    case 0x1f: /* DAIFClear */
1623        t1 = tcg_const_i32(crm);
1624        gen_helper_msr_i_daifclear(cpu_env, t1);
1625        tcg_temp_free_i32(t1);
1626        /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1627        s->base.is_jmp = DISAS_UPDATE;
1628        break;
1629
1630    default:
1631    do_unallocated:
1632        unallocated_encoding(s);
1633        return;
1634    }
1635}
1636
1637static void gen_get_nzcv(TCGv_i64 tcg_rt)
1638{
1639    TCGv_i32 tmp = tcg_temp_new_i32();
1640    TCGv_i32 nzcv = tcg_temp_new_i32();
1641
1642    /* build bit 31, N */
1643    tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1644    /* build bit 30, Z */
1645    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1646    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1647    /* build bit 29, C */
1648    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1649    /* build bit 28, V */
1650    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1651    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1652    /* generate result */
1653    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1654
1655    tcg_temp_free_i32(nzcv);
1656    tcg_temp_free_i32(tmp);
1657}
1658
1659static void gen_set_nzcv(TCGv_i64 tcg_rt)
1660{
1661    TCGv_i32 nzcv = tcg_temp_new_i32();
1662
1663    /* take NZCV from R[t] */
1664    tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1665
1666    /* bit 31, N */
1667    tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1668    /* bit 30, Z */
1669    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1670    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1671    /* bit 29, C */
1672    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1673    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1674    /* bit 28, V */
1675    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1676    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1677    tcg_temp_free_i32(nzcv);
1678}
1679
1680/* MRS - move from system register
1681 * MSR (register) - move to system register
1682 * SYS
1683 * SYSL
1684 * These are all essentially the same insn in 'read' and 'write'
1685 * versions, with varying op0 fields.
1686 */
1687static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1688                       unsigned int op0, unsigned int op1, unsigned int op2,
1689                       unsigned int crn, unsigned int crm, unsigned int rt)
1690{
1691    const ARMCPRegInfo *ri;
1692    TCGv_i64 tcg_rt;
1693
1694    ri = get_arm_cp_reginfo(s->cp_regs,
1695                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1696                                               crn, crm, op0, op1, op2));
1697
1698    if (!ri) {
1699        /* Unknown register; this might be a guest error or a QEMU
1700         * unimplemented feature.
1701         */
1702        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1703                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1704                      isread ? "read" : "write", op0, op1, crn, crm, op2);
1705        unallocated_encoding(s);
1706        return;
1707    }
1708
1709    /* Check access permissions */
1710    if (!cp_access_ok(s->current_el, ri, isread)) {
1711        unallocated_encoding(s);
1712        return;
1713    }
1714
1715    if (ri->accessfn) {
1716        /* Emit code to perform further access permissions checks at
1717         * runtime; this may result in an exception.
1718         */
1719        TCGv_ptr tmpptr;
1720        TCGv_i32 tcg_syn, tcg_isread;
1721        uint32_t syndrome;
1722
1723        gen_a64_set_pc_im(s->pc - 4);
1724        tmpptr = tcg_const_ptr(ri);
1725        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1726        tcg_syn = tcg_const_i32(syndrome);
1727        tcg_isread = tcg_const_i32(isread);
1728        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1729        tcg_temp_free_ptr(tmpptr);
1730        tcg_temp_free_i32(tcg_syn);
1731        tcg_temp_free_i32(tcg_isread);
1732    }
1733
1734    /* Handle special cases first */
1735    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1736    case ARM_CP_NOP:
1737        return;
1738    case ARM_CP_NZCV:
1739        tcg_rt = cpu_reg(s, rt);
1740        if (isread) {
1741            gen_get_nzcv(tcg_rt);
1742        } else {
1743            gen_set_nzcv(tcg_rt);
1744        }
1745        return;
1746    case ARM_CP_CURRENTEL:
1747        /* Reads as current EL value from pstate, which is
1748         * guaranteed to be constant by the tb flags.
1749         */
1750        tcg_rt = cpu_reg(s, rt);
1751        tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1752        return;
1753    case ARM_CP_DC_ZVA:
1754        /* Writes clear the aligned block of memory which rt points into. */
1755        tcg_rt = cpu_reg(s, rt);
1756        gen_helper_dc_zva(cpu_env, tcg_rt);
1757        return;
1758    default:
1759        break;
1760    }
1761    if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1762        return;
1763    } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1764        return;
1765    }
1766
1767    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1768        gen_io_start();
1769    }
1770
1771    tcg_rt = cpu_reg(s, rt);
1772
1773    if (isread) {
1774        if (ri->type & ARM_CP_CONST) {
1775            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1776        } else if (ri->readfn) {
1777            TCGv_ptr tmpptr;
1778            tmpptr = tcg_const_ptr(ri);
1779            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1780            tcg_temp_free_ptr(tmpptr);
1781        } else {
1782            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1783        }
1784    } else {
1785        if (ri->type & ARM_CP_CONST) {
1786            /* If not forbidden by access permissions, treat as WI */
1787            return;
1788        } else if (ri->writefn) {
1789            TCGv_ptr tmpptr;
1790            tmpptr = tcg_const_ptr(ri);
1791            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1792            tcg_temp_free_ptr(tmpptr);
1793        } else {
1794            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1795        }
1796    }
1797
1798    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1799        /* I/O operations must end the TB here (whether read or write) */
1800        gen_io_end();
1801        s->base.is_jmp = DISAS_UPDATE;
1802    } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1803        /* We default to ending the TB on a coprocessor register write,
1804         * but allow this to be suppressed by the register definition
1805         * (usually only necessary to work around guest bugs).
1806         */
1807        s->base.is_jmp = DISAS_UPDATE;
1808    }
1809}
1810
1811/* System
1812 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1813 * +---------------------+---+-----+-----+-------+-------+-----+------+
1814 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1815 * +---------------------+---+-----+-----+-------+-------+-----+------+
1816 */
1817static void disas_system(DisasContext *s, uint32_t insn)
1818{
1819    unsigned int l, op0, op1, crn, crm, op2, rt;
1820    l = extract32(insn, 21, 1);
1821    op0 = extract32(insn, 19, 2);
1822    op1 = extract32(insn, 16, 3);
1823    crn = extract32(insn, 12, 4);
1824    crm = extract32(insn, 8, 4);
1825    op2 = extract32(insn, 5, 3);
1826    rt = extract32(insn, 0, 5);
1827
1828    if (op0 == 0) {
1829        if (l || rt != 31) {
1830            unallocated_encoding(s);
1831            return;
1832        }
1833        switch (crn) {
1834        case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1835            handle_hint(s, insn, op1, op2, crm);
1836            break;
1837        case 3: /* CLREX, DSB, DMB, ISB */
1838            handle_sync(s, insn, op1, op2, crm);
1839            break;
1840        case 4: /* MSR (immediate) */
1841            handle_msr_i(s, insn, op1, op2, crm);
1842            break;
1843        default:
1844            unallocated_encoding(s);
1845            break;
1846        }
1847        return;
1848    }
1849    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1850}
1851
1852/* Exception generation
1853 *
1854 *  31             24 23 21 20                     5 4   2 1  0
1855 * +-----------------+-----+------------------------+-----+----+
1856 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1857 * +-----------------------+------------------------+----------+
1858 */
1859static void disas_exc(DisasContext *s, uint32_t insn)
1860{
1861    int opc = extract32(insn, 21, 3);
1862    int op2_ll = extract32(insn, 0, 5);
1863    int imm16 = extract32(insn, 5, 16);
1864    TCGv_i32 tmp;
1865
1866    switch (opc) {
1867    case 0:
1868        /* For SVC, HVC and SMC we advance the single-step state
1869         * machine before taking the exception. This is architecturally
1870         * mandated, to ensure that single-stepping a system call
1871         * instruction works properly.
1872         */
1873        switch (op2_ll) {
1874        case 1:                                                     /* SVC */
1875            gen_ss_advance(s);
1876            gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1877                               default_exception_el(s));
1878            break;
1879        case 2:                                                     /* HVC */
1880            if (s->current_el == 0) {
1881                unallocated_encoding(s);
1882                break;
1883            }
1884            /* The pre HVC helper handles cases when HVC gets trapped
1885             * as an undefined insn by runtime configuration.
1886             */
1887            gen_a64_set_pc_im(s->pc - 4);
1888            gen_helper_pre_hvc(cpu_env);
1889            gen_ss_advance(s);
1890            gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1891            break;
1892        case 3:                                                     /* SMC */
1893            if (s->current_el == 0) {
1894                unallocated_encoding(s);
1895                break;
1896            }
1897            gen_a64_set_pc_im(s->pc - 4);
1898            tmp = tcg_const_i32(syn_aa64_smc(imm16));
1899            gen_helper_pre_smc(cpu_env, tmp);
1900            tcg_temp_free_i32(tmp);
1901            gen_ss_advance(s);
1902            gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1903            break;
1904        default:
1905            unallocated_encoding(s);
1906            break;
1907        }
1908        break;
1909    case 1:
1910        if (op2_ll != 0) {
1911            unallocated_encoding(s);
1912            break;
1913        }
1914        /* BRK */
1915        gen_exception_bkpt_insn(s, 4, syn_aa64_bkpt(imm16));
1916        break;
1917    case 2:
1918        if (op2_ll != 0) {
1919            unallocated_encoding(s);
1920            break;
1921        }
1922        /* HLT. This has two purposes.
1923         * Architecturally, it is an external halting debug instruction.
1924         * Since QEMU doesn't implement external debug, we treat this as
1925         * it is required for halting debug disabled: it will UNDEF.
1926         * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1927         */
1928        if (semihosting_enabled() && imm16 == 0xf000) {
1929#ifndef CONFIG_USER_ONLY
1930            /* In system mode, don't allow userspace access to semihosting,
1931             * to provide some semblance of security (and for consistency
1932             * with our 32-bit semihosting).
1933             */
1934            if (s->current_el == 0) {
1935                unsupported_encoding(s, insn);
1936                break;
1937            }
1938#endif
1939            gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1940        } else {
1941            unsupported_encoding(s, insn);
1942        }
1943        break;
1944    case 5:
1945        if (op2_ll < 1 || op2_ll > 3) {
1946            unallocated_encoding(s);
1947            break;
1948        }
1949        /* DCPS1, DCPS2, DCPS3 */
1950        unsupported_encoding(s, insn);
1951        break;
1952    default:
1953        unallocated_encoding(s);
1954        break;
1955    }
1956}
1957
1958/* Unconditional branch (register)
1959 *  31           25 24   21 20   16 15   10 9    5 4     0
1960 * +---------------+-------+-------+-------+------+-------+
1961 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1962 * +---------------+-------+-------+-------+------+-------+
1963 */
1964static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1965{
1966    unsigned int opc, op2, op3, rn, op4;
1967    unsigned btype_mod = 2;   /* 0: BR, 1: BLR, 2: other */
1968    TCGv_i64 dst;
1969    TCGv_i64 modifier;
1970
1971    opc = extract32(insn, 21, 4);
1972    op2 = extract32(insn, 16, 5);
1973    op3 = extract32(insn, 10, 6);
1974    rn = extract32(insn, 5, 5);
1975    op4 = extract32(insn, 0, 5);
1976
1977    if (op2 != 0x1f) {
1978        goto do_unallocated;
1979    }
1980
1981    switch (opc) {
1982    case 0: /* BR */
1983    case 1: /* BLR */
1984    case 2: /* RET */
1985        btype_mod = opc;
1986        switch (op3) {
1987        case 0:
1988            /* BR, BLR, RET */
1989            if (op4 != 0) {
1990                goto do_unallocated;
1991            }
1992            dst = cpu_reg(s, rn);
1993            break;
1994
1995        case 2:
1996        case 3:
1997            if (!dc_isar_feature(aa64_pauth, s)) {
1998                goto do_unallocated;
1999            }
2000            if (opc == 2) {
2001                /* RETAA, RETAB */
2002                if (rn != 0x1f || op4 != 0x1f) {
2003                    goto do_unallocated;
2004                }
2005                rn = 30;
2006                modifier = cpu_X[31];
2007            } else {
2008                /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */
2009                if (op4 != 0x1f) {
2010                    goto do_unallocated;
2011                }
2012                modifier = new_tmp_a64_zero(s);
2013            }
2014            if (s->pauth_active) {
2015                dst = new_tmp_a64(s);
2016                if (op3 == 2) {
2017                    gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2018                } else {
2019                    gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2020                }
2021            } else {
2022                dst = cpu_reg(s, rn);
2023            }
2024            break;
2025
2026        default:
2027            goto do_unallocated;
2028        }
2029        gen_a64_set_pc(s, dst);
2030        /* BLR also needs to load return address */
2031        if (opc == 1) {
2032            tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
2033        }
2034        break;
2035
2036    case 8: /* BRAA */
2037    case 9: /* BLRAA */
2038        if (!dc_isar_feature(aa64_pauth, s)) {
2039            goto do_unallocated;
2040        }
2041        if ((op3 & ~1) != 2) {
2042            goto do_unallocated;
2043        }
2044        btype_mod = opc & 1;
2045        if (s->pauth_active) {
2046            dst = new_tmp_a64(s);
2047            modifier = cpu_reg_sp(s, op4);
2048            if (op3 == 2) {
2049                gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2050            } else {
2051                gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2052            }
2053        } else {
2054            dst = cpu_reg(s, rn);
2055        }
2056        gen_a64_set_pc(s, dst);
2057        /* BLRAA also needs to load return address */
2058        if (opc == 9) {
2059            tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
2060        }
2061        break;
2062
2063    case 4: /* ERET */
2064        if (s->current_el == 0) {
2065            goto do_unallocated;
2066        }
2067        switch (op3) {
2068        case 0: /* ERET */
2069            if (op4 != 0) {
2070                goto do_unallocated;
2071            }
2072            dst = tcg_temp_new_i64();
2073            tcg_gen_ld_i64(dst, cpu_env,
2074                           offsetof(CPUARMState, elr_el[s->current_el]));
2075            break;
2076
2077        case 2: /* ERETAA */
2078        case 3: /* ERETAB */
2079            if (!dc_isar_feature(aa64_pauth, s)) {
2080                goto do_unallocated;
2081            }
2082            if (rn != 0x1f || op4 != 0x1f) {
2083                goto do_unallocated;
2084            }
2085            dst = tcg_temp_new_i64();
2086            tcg_gen_ld_i64(dst, cpu_env,
2087                           offsetof(CPUARMState, elr_el[s->current_el]));
2088            if (s->pauth_active) {
2089                modifier = cpu_X[31];
2090                if (op3 == 2) {
2091                    gen_helper_autia(dst, cpu_env, dst, modifier);
2092                } else {
2093                    gen_helper_autib(dst, cpu_env, dst, modifier);
2094                }
2095            }
2096            break;
2097
2098        default:
2099            goto do_unallocated;
2100        }
2101        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2102            gen_io_start();
2103        }
2104
2105        gen_helper_exception_return(cpu_env, dst);
2106        tcg_temp_free_i64(dst);
2107        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2108            gen_io_end();
2109        }
2110        /* Must exit loop to check un-masked IRQs */
2111        s->base.is_jmp = DISAS_EXIT;
2112        return;
2113
2114    case 5: /* DRPS */
2115        if (op3 != 0 || op4 != 0 || rn != 0x1f) {
2116            goto do_unallocated;
2117        } else {
2118            unsupported_encoding(s, insn);
2119        }
2120        return;
2121
2122    default:
2123    do_unallocated:
2124        unallocated_encoding(s);
2125        return;
2126    }
2127
2128    switch (btype_mod) {
2129    case 0: /* BR */
2130        if (dc_isar_feature(aa64_bti, s)) {
2131            /* BR to {x16,x17} or !guard -> 1, else 3.  */
2132            set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
2133        }
2134        break;
2135
2136    case 1: /* BLR */
2137        if (dc_isar_feature(aa64_bti, s)) {
2138            /* BLR sets BTYPE to 2, regardless of source guarded page.  */
2139            set_btype(s, 2);
2140        }
2141        break;
2142
2143    default: /* RET or none of the above.  */
2144        /* BTYPE will be set to 0 by normal end-of-insn processing.  */
2145        break;
2146    }
2147
2148    s->base.is_jmp = DISAS_JUMP;
2149}
2150
2151/* Branches, exception generating and system instructions */
2152static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2153{
2154    switch (extract32(insn, 25, 7)) {
2155    case 0x0a: case 0x0b:
2156    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
2157        disas_uncond_b_imm(s, insn);
2158        break;
2159    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
2160        disas_comp_b_imm(s, insn);
2161        break;
2162    case 0x1b: case 0x5b: /* Test & branch (immediate) */
2163        disas_test_b_imm(s, insn);
2164        break;
2165    case 0x2a: /* Conditional branch (immediate) */
2166        disas_cond_b_imm(s, insn);
2167        break;
2168    case 0x6a: /* Exception generation / System */
2169        if (insn & (1 << 24)) {
2170            if (extract32(insn, 22, 2) == 0) {
2171                disas_system(s, insn);
2172            } else {
2173                unallocated_encoding(s);
2174            }
2175        } else {
2176            disas_exc(s, insn);
2177        }
2178        break;
2179    case 0x6b: /* Unconditional branch (register) */
2180        disas_uncond_b_reg(s, insn);
2181        break;
2182    default:
2183        unallocated_encoding(s);
2184        break;
2185    }
2186}
2187
2188/*
2189 * Load/Store exclusive instructions are implemented by remembering
2190 * the value/address loaded, and seeing if these are the same
2191 * when the store is performed. This is not actually the architecturally
2192 * mandated semantics, but it works for typical guest code sequences
2193 * and avoids having to monitor regular stores.
2194 *
2195 * The store exclusive uses the atomic cmpxchg primitives to avoid
2196 * races in multi-threaded linux-user and when MTTCG softmmu is
2197 * enabled.
2198 */
2199static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2200                               TCGv_i64 addr, int size, bool is_pair)
2201{
2202    int idx = get_mem_index(s);
2203    TCGMemOp memop = s->be_data;
2204
2205    g_assert(size <= 3);
2206    if (is_pair) {
2207        g_assert(size >= 2);
2208        if (size == 2) {
2209            /* The pair must be single-copy atomic for the doubleword.  */
2210            memop |= MO_64 | MO_ALIGN;
2211            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2212            if (s->be_data == MO_LE) {
2213                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2214                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2215            } else {
2216                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2217                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2218            }
2219        } else {
2220            /* The pair must be single-copy atomic for *each* doubleword, not
2221               the entire quadword, however it must be quadword aligned.  */
2222            memop |= MO_64;
2223            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2224                                memop | MO_ALIGN_16);
2225
2226            TCGv_i64 addr2 = tcg_temp_new_i64();
2227            tcg_gen_addi_i64(addr2, addr, 8);
2228            tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2229            tcg_temp_free_i64(addr2);
2230
2231            tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2232            tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2233        }
2234    } else {
2235        memop |= size | MO_ALIGN;
2236        tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2237        tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2238    }
2239    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2240}
2241
2242static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2243                                TCGv_i64 addr, int size, int is_pair)
2244{
2245    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2246     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2247     *     [addr] = {Rt};
2248     *     if (is_pair) {
2249     *         [addr + datasize] = {Rt2};
2250     *     }
2251     *     {Rd} = 0;
2252     * } else {
2253     *     {Rd} = 1;
2254     * }
2255     * env->exclusive_addr = -1;
2256     */
2257    TCGLabel *fail_label = gen_new_label();
2258    TCGLabel *done_label = gen_new_label();
2259    TCGv_i64 tmp;
2260
2261    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2262
2263    tmp = tcg_temp_new_i64();
2264    if (is_pair) {
2265        if (size == 2) {
2266            if (s->be_data == MO_LE) {
2267                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2268            } else {
2269                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2270            }
2271            tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2272                                       cpu_exclusive_val, tmp,
2273                                       get_mem_index(s),
2274                                       MO_64 | MO_ALIGN | s->be_data);
2275            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2276        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2277            if (!HAVE_CMPXCHG128) {
2278                gen_helper_exit_atomic(cpu_env);
2279                s->base.is_jmp = DISAS_NORETURN;
2280            } else if (s->be_data == MO_LE) {
2281                gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2282                                                        cpu_exclusive_addr,
2283                                                        cpu_reg(s, rt),
2284                                                        cpu_reg(s, rt2));
2285            } else {
2286                gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2287                                                        cpu_exclusive_addr,
2288                                                        cpu_reg(s, rt),
2289                                                        cpu_reg(s, rt2));
2290            }
2291        } else if (s->be_data == MO_LE) {
2292            gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2293                                           cpu_reg(s, rt), cpu_reg(s, rt2));
2294        } else {
2295            gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2296                                           cpu_reg(s, rt), cpu_reg(s, rt2));
2297        }
2298    } else {
2299        tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2300                                   cpu_reg(s, rt), get_mem_index(s),
2301                                   size | MO_ALIGN | s->be_data);
2302        tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2303    }
2304    tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2305    tcg_temp_free_i64(tmp);
2306    tcg_gen_br(done_label);
2307
2308    gen_set_label(fail_label);
2309    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2310    gen_set_label(done_label);
2311    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2312}
2313
2314static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2315                                 int rn, int size)
2316{
2317    TCGv_i64 tcg_rs = cpu_reg(s, rs);
2318    TCGv_i64 tcg_rt = cpu_reg(s, rt);
2319    int memidx = get_mem_index(s);
2320    TCGv_i64 clean_addr;
2321
2322    if (rn == 31) {
2323        gen_check_sp_alignment(s);
2324    }
2325    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2326    tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2327                               size | MO_ALIGN | s->be_data);
2328}
2329
2330static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2331                                      int rn, int size)
2332{
2333    TCGv_i64 s1 = cpu_reg(s, rs);
2334    TCGv_i64 s2 = cpu_reg(s, rs + 1);
2335    TCGv_i64 t1 = cpu_reg(s, rt);
2336    TCGv_i64 t2 = cpu_reg(s, rt + 1);
2337    TCGv_i64 clean_addr;
2338    int memidx = get_mem_index(s);
2339
2340    if (rn == 31) {
2341        gen_check_sp_alignment(s);
2342    }
2343    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2344
2345    if (size == 2) {
2346        TCGv_i64 cmp = tcg_temp_new_i64();
2347        TCGv_i64 val = tcg_temp_new_i64();
2348
2349        if (s->be_data == MO_LE) {
2350            tcg_gen_concat32_i64(val, t1, t2);
2351            tcg_gen_concat32_i64(cmp, s1, s2);
2352        } else {
2353            tcg_gen_concat32_i64(val, t2, t1);
2354            tcg_gen_concat32_i64(cmp, s2, s1);
2355        }
2356
2357        tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2358                                   MO_64 | MO_ALIGN | s->be_data);
2359        tcg_temp_free_i64(val);
2360
2361        if (s->be_data == MO_LE) {
2362            tcg_gen_extr32_i64(s1, s2, cmp);
2363        } else {
2364            tcg_gen_extr32_i64(s2, s1, cmp);
2365        }
2366        tcg_temp_free_i64(cmp);
2367    } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2368        if (HAVE_CMPXCHG128) {
2369            TCGv_i32 tcg_rs = tcg_const_i32(rs);
2370            if (s->be_data == MO_LE) {
2371                gen_helper_casp_le_parallel(cpu_env, tcg_rs,
2372                                            clean_addr, t1, t2);
2373            } else {
2374                gen_helper_casp_be_parallel(cpu_env, tcg_rs,
2375                                            clean_addr, t1, t2);
2376            }
2377            tcg_temp_free_i32(tcg_rs);
2378        } else {
2379            gen_helper_exit_atomic(cpu_env);
2380            s->base.is_jmp = DISAS_NORETURN;
2381        }
2382    } else {
2383        TCGv_i64 d1 = tcg_temp_new_i64();
2384        TCGv_i64 d2 = tcg_temp_new_i64();
2385        TCGv_i64 a2 = tcg_temp_new_i64();
2386        TCGv_i64 c1 = tcg_temp_new_i64();
2387        TCGv_i64 c2 = tcg_temp_new_i64();
2388        TCGv_i64 zero = tcg_const_i64(0);
2389
2390        /* Load the two words, in memory order.  */
2391        tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
2392                            MO_64 | MO_ALIGN_16 | s->be_data);
2393        tcg_gen_addi_i64(a2, clean_addr, 8);
2394        tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
2395
2396        /* Compare the two words, also in memory order.  */
2397        tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
2398        tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
2399        tcg_gen_and_i64(c2, c2, c1);
2400
2401        /* If compare equal, write back new data, else write back old data.  */
2402        tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
2403        tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
2404        tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
2405        tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
2406        tcg_temp_free_i64(a2);
2407        tcg_temp_free_i64(c1);
2408        tcg_temp_free_i64(c2);
2409        tcg_temp_free_i64(zero);
2410
2411        /* Write back the data from memory to Rs.  */
2412        tcg_gen_mov_i64(s1, d1);
2413        tcg_gen_mov_i64(s2, d2);
2414        tcg_temp_free_i64(d1);
2415        tcg_temp_free_i64(d2);
2416    }
2417}
2418
2419/* Update the Sixty-Four bit (SF) registersize. This logic is derived
2420 * from the ARMv8 specs for LDR (Shared decode for all encodings).
2421 */
2422static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2423{
2424    int opc0 = extract32(opc, 0, 1);
2425    int regsize;
2426
2427    if (is_signed) {
2428        regsize = opc0 ? 32 : 64;
2429    } else {
2430        regsize = size == 3 ? 64 : 32;
2431    }
2432    return regsize == 64;
2433}
2434
2435/* Load/store exclusive
2436 *
2437 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2438 * +-----+-------------+----+---+----+------+----+-------+------+------+
2439 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2440 * +-----+-------------+----+---+----+------+----+-------+------+------+
2441 *
2442 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2443 *   L: 0 -> store, 1 -> load
2444 *  o2: 0 -> exclusive, 1 -> not
2445 *  o1: 0 -> single register, 1 -> register pair
2446 *  o0: 1 -> load-acquire/store-release, 0 -> not
2447 */
2448static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2449{
2450    int rt = extract32(insn, 0, 5);
2451    int rn = extract32(insn, 5, 5);
2452    int rt2 = extract32(insn, 10, 5);
2453    int rs = extract32(insn, 16, 5);
2454    int is_lasr = extract32(insn, 15, 1);
2455    int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2456    int size = extract32(insn, 30, 2);
2457    TCGv_i64 clean_addr;
2458
2459    switch (o2_L_o1_o0) {
2460    case 0x0: /* STXR */
2461    case 0x1: /* STLXR */
2462        if (rn == 31) {
2463            gen_check_sp_alignment(s);
2464        }
2465        if (is_lasr) {
2466            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2467        }
2468        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2469        gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2470        return;
2471
2472    case 0x4: /* LDXR */
2473    case 0x5: /* LDAXR */
2474        if (rn == 31) {
2475            gen_check_sp_alignment(s);
2476        }
2477        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2478        s->is_ldex = true;
2479        gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2480        if (is_lasr) {
2481            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2482        }
2483        return;
2484
2485    case 0x8: /* STLLR */
2486        if (!dc_isar_feature(aa64_lor, s)) {
2487            break;
2488        }
2489        /* StoreLORelease is the same as Store-Release for QEMU.  */
2490        /* fall through */
2491    case 0x9: /* STLR */
2492        /* Generate ISS for non-exclusive accesses including LASR.  */
2493        if (rn == 31) {
2494            gen_check_sp_alignment(s);
2495        }
2496        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2497        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2498        do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
2499                  disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2500        return;
2501
2502    case 0xc: /* LDLAR */
2503        if (!dc_isar_feature(aa64_lor, s)) {
2504            break;
2505        }
2506        /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2507        /* fall through */
2508    case 0xd: /* LDAR */
2509        /* Generate ISS for non-exclusive accesses including LASR.  */
2510        if (rn == 31) {
2511            gen_check_sp_alignment(s);
2512        }
2513        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2514        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
2515                  disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2516        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2517        return;
2518
2519    case 0x2: case 0x3: /* CASP / STXP */
2520        if (size & 2) { /* STXP / STLXP */
2521            if (rn == 31) {
2522                gen_check_sp_alignment(s);
2523            }
2524            if (is_lasr) {
2525                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2526            }
2527            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2528            gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2529            return;
2530        }
2531        if (rt2 == 31
2532            && ((rt | rs) & 1) == 0
2533            && dc_isar_feature(aa64_atomics, s)) {
2534            /* CASP / CASPL */
2535            gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2536            return;
2537        }
2538        break;
2539
2540    case 0x6: case 0x7: /* CASPA / LDXP */
2541        if (size & 2) { /* LDXP / LDAXP */
2542            if (rn == 31) {
2543                gen_check_sp_alignment(s);
2544            }
2545            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2546            s->is_ldex = true;
2547            gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2548            if (is_lasr) {
2549                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2550            }
2551            return;
2552        }
2553        if (rt2 == 31
2554            && ((rt | rs) & 1) == 0
2555            && dc_isar_feature(aa64_atomics, s)) {
2556            /* CASPA / CASPAL */
2557            gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2558            return;
2559        }
2560        break;
2561
2562    case 0xa: /* CAS */
2563    case 0xb: /* CASL */
2564    case 0xe: /* CASA */
2565    case 0xf: /* CASAL */
2566        if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2567            gen_compare_and_swap(s, rs, rt, rn, size);
2568            return;
2569        }
2570        break;
2571    }
2572    unallocated_encoding(s);
2573}
2574
2575/*
2576 * Load register (literal)
2577 *
2578 *  31 30 29   27  26 25 24 23                5 4     0
2579 * +-----+-------+---+-----+-------------------+-------+
2580 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2581 * +-----+-------+---+-----+-------------------+-------+
2582 *
2583 * V: 1 -> vector (simd/fp)
2584 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2585 *                   10-> 32 bit signed, 11 -> prefetch
2586 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2587 */
2588static void disas_ld_lit(DisasContext *s, uint32_t insn)
2589{
2590    int rt = extract32(insn, 0, 5);
2591    int64_t imm = sextract32(insn, 5, 19) << 2;
2592    bool is_vector = extract32(insn, 26, 1);
2593    int opc = extract32(insn, 30, 2);
2594    bool is_signed = false;
2595    int size = 2;
2596    TCGv_i64 tcg_rt, clean_addr;
2597
2598    if (is_vector) {
2599        if (opc == 3) {
2600            unallocated_encoding(s);
2601            return;
2602        }
2603        size = 2 + opc;
2604        if (!fp_access_check(s)) {
2605            return;
2606        }
2607    } else {
2608        if (opc == 3) {
2609            /* PRFM (literal) : prefetch */
2610            return;
2611        }
2612        size = 2 + extract32(opc, 0, 1);
2613        is_signed = extract32(opc, 1, 1);
2614    }
2615
2616    tcg_rt = cpu_reg(s, rt);
2617
2618    clean_addr = tcg_const_i64((s->pc - 4) + imm);
2619    if (is_vector) {
2620        do_fp_ld(s, rt, clean_addr, size);
2621    } else {
2622        /* Only unsigned 32bit loads target 32bit registers.  */
2623        bool iss_sf = opc != 0;
2624
2625        do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false,
2626                  true, rt, iss_sf, false);
2627    }
2628    tcg_temp_free_i64(clean_addr);
2629}
2630
2631/*
2632 * LDNP (Load Pair - non-temporal hint)
2633 * LDP (Load Pair - non vector)
2634 * LDPSW (Load Pair Signed Word - non vector)
2635 * STNP (Store Pair - non-temporal hint)
2636 * STP (Store Pair - non vector)
2637 * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2638 * LDP (Load Pair of SIMD&FP)
2639 * STNP (Store Pair of SIMD&FP - non-temporal hint)
2640 * STP (Store Pair of SIMD&FP)
2641 *
2642 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2643 * +-----+-------+---+---+-------+---+-----------------------------+
2644 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2645 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2646 *
2647 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2648 *      LDPSW                    01
2649 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2650 *   V: 0 -> GPR, 1 -> Vector
2651 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2652 *      10 -> signed offset, 11 -> pre-index
2653 *   L: 0 -> Store 1 -> Load
2654 *
2655 * Rt, Rt2 = GPR or SIMD registers to be stored
2656 * Rn = general purpose register containing address
2657 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2658 */
2659static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2660{
2661    int rt = extract32(insn, 0, 5);
2662    int rn = extract32(insn, 5, 5);
2663    int rt2 = extract32(insn, 10, 5);
2664    uint64_t offset = sextract64(insn, 15, 7);
2665    int index = extract32(insn, 23, 2);
2666    bool is_vector = extract32(insn, 26, 1);
2667    bool is_load = extract32(insn, 22, 1);
2668    int opc = extract32(insn, 30, 2);
2669
2670    bool is_signed = false;
2671    bool postindex = false;
2672    bool wback = false;
2673
2674    TCGv_i64 clean_addr, dirty_addr;
2675
2676    int size;
2677
2678    if (opc == 3) {
2679        unallocated_encoding(s);
2680        return;
2681    }
2682
2683    if (is_vector) {
2684        size = 2 + opc;
2685    } else {
2686        size = 2 + extract32(opc, 1, 1);
2687        is_signed = extract32(opc, 0, 1);
2688        if (!is_load && is_signed) {
2689            unallocated_encoding(s);
2690            return;
2691        }
2692    }
2693
2694    switch (index) {
2695    case 1: /* post-index */
2696        postindex = true;
2697        wback = true;
2698        break;
2699    case 0:
2700        /* signed offset with "non-temporal" hint. Since we don't emulate
2701         * caches we don't care about hints to the cache system about
2702         * data access patterns, and handle this identically to plain
2703         * signed offset.
2704         */
2705        if (is_signed) {
2706            /* There is no non-temporal-hint version of LDPSW */
2707            unallocated_encoding(s);
2708            return;
2709        }
2710        postindex = false;
2711        break;
2712    case 2: /* signed offset, rn not updated */
2713        postindex = false;
2714        break;
2715    case 3: /* pre-index */
2716        postindex = false;
2717        wback = true;
2718        break;
2719    }
2720
2721    if (is_vector && !fp_access_check(s)) {
2722        return;
2723    }
2724
2725    offset <<= size;
2726
2727    if (rn == 31) {
2728        gen_check_sp_alignment(s);
2729    }
2730
2731    dirty_addr = read_cpu_reg_sp(s, rn, 1);
2732    if (!postindex) {
2733        tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2734    }
2735    clean_addr = clean_data_tbi(s, dirty_addr);
2736
2737    if (is_vector) {
2738        if (is_load) {
2739            do_fp_ld(s, rt, clean_addr, size);
2740        } else {
2741            do_fp_st(s, rt, clean_addr, size);
2742        }
2743        tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2744        if (is_load) {
2745            do_fp_ld(s, rt2, clean_addr, size);
2746        } else {
2747            do_fp_st(s, rt2, clean_addr, size);
2748        }
2749    } else {
2750        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2751        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2752
2753        if (is_load) {
2754            TCGv_i64 tmp = tcg_temp_new_i64();
2755
2756            /* Do not modify tcg_rt before recognizing any exception
2757             * from the second load.
2758             */
2759            do_gpr_ld(s, tmp, clean_addr, size, is_signed, false,
2760                      false, 0, false, false);
2761            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2762            do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false,
2763                      false, 0, false, false);
2764
2765            tcg_gen_mov_i64(tcg_rt, tmp);
2766            tcg_temp_free_i64(tmp);
2767        } else {
2768            do_gpr_st(s, tcg_rt, clean_addr, size,
2769                      false, 0, false, false);
2770            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2771            do_gpr_st(s, tcg_rt2, clean_addr, size,
2772                      false, 0, false, false);
2773        }
2774    }
2775
2776    if (wback) {
2777        if (postindex) {
2778            tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2779        }
2780        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
2781    }
2782}
2783
2784/*
2785 * Load/store (immediate post-indexed)
2786 * Load/store (immediate pre-indexed)
2787 * Load/store (unscaled immediate)
2788 *
2789 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2790 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2791 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2792 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2793 *
2794 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2795         10 -> unprivileged
2796 * V = 0 -> non-vector
2797 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2798 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2799 */
2800static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2801                                int opc,
2802                                int size,
2803                                int rt,
2804                                bool is_vector)
2805{
2806    int rn = extract32(insn, 5, 5);
2807    int imm9 = sextract32(insn, 12, 9);
2808    int idx = extract32(insn, 10, 2);
2809    bool is_signed = false;
2810    bool is_store = false;
2811    bool is_extended = false;
2812    bool is_unpriv = (idx == 2);
2813    bool iss_valid = !is_vector;
2814    bool post_index;
2815    bool writeback;
2816
2817    TCGv_i64 clean_addr, dirty_addr;
2818
2819    if (is_vector) {
2820        size |= (opc & 2) << 1;
2821        if (size > 4 || is_unpriv) {
2822            unallocated_encoding(s);
2823            return;
2824        }
2825        is_store = ((opc & 1) == 0);
2826        if (!fp_access_check(s)) {
2827            return;
2828        }
2829    } else {
2830        if (size == 3 && opc == 2) {
2831            /* PRFM - prefetch */
2832            if (idx != 0) {
2833                unallocated_encoding(s);
2834                return;
2835            }
2836            return;
2837        }
2838        if (opc == 3 && size > 1) {
2839            unallocated_encoding(s);
2840            return;
2841        }
2842        is_store = (opc == 0);
2843        is_signed = extract32(opc, 1, 1);
2844        is_extended = (size < 3) && extract32(opc, 0, 1);
2845    }
2846
2847    switch (idx) {
2848    case 0:
2849    case 2:
2850        post_index = false;
2851        writeback = false;
2852        break;
2853    case 1:
2854        post_index = true;
2855        writeback = true;
2856        break;
2857    case 3:
2858        post_index = false;
2859        writeback = true;
2860        break;
2861    default:
2862        g_assert_not_reached();
2863    }
2864
2865    if (rn == 31) {
2866        gen_check_sp_alignment(s);
2867    }
2868
2869    dirty_addr = read_cpu_reg_sp(s, rn, 1);
2870    if (!post_index) {
2871        tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
2872    }
2873    clean_addr = clean_data_tbi(s, dirty_addr);
2874
2875    if (is_vector) {
2876        if (is_store) {
2877            do_fp_st(s, rt, clean_addr, size);
2878        } else {
2879            do_fp_ld(s, rt, clean_addr, size);
2880        }
2881    } else {
2882        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2883        int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2884        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2885
2886        if (is_store) {
2887            do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
2888                             iss_valid, rt, iss_sf, false);
2889        } else {
2890            do_gpr_ld_memidx(s, tcg_rt, clean_addr, size,
2891                             is_signed, is_extended, memidx,
2892                             iss_valid, rt, iss_sf, false);
2893        }
2894    }
2895
2896    if (writeback) {
2897        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2898        if (post_index) {
2899            tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
2900        }
2901        tcg_gen_mov_i64(tcg_rn, dirty_addr);
2902    }
2903}
2904
2905/*
2906 * Load/store (register offset)
2907 *
2908 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2909 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2910 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2911 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2912 *
2913 * For non-vector:
2914 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2915 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2916 * For vector:
2917 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2918 *   opc<0>: 0 -> store, 1 -> load
2919 * V: 1 -> vector/simd
2920 * opt: extend encoding (see DecodeRegExtend)
2921 * S: if S=1 then scale (essentially index by sizeof(size))
2922 * Rt: register to transfer into/out of
2923 * Rn: address register or SP for base
2924 * Rm: offset register or ZR for offset
2925 */
2926static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2927                                   int opc,
2928                                   int size,
2929                                   int rt,
2930                                   bool is_vector)
2931{
2932    int rn = extract32(insn, 5, 5);
2933    int shift = extract32(insn, 12, 1);
2934    int rm = extract32(insn, 16, 5);
2935    int opt = extract32(insn, 13, 3);
2936    bool is_signed = false;
2937    bool is_store = false;
2938    bool is_extended = false;
2939
2940    TCGv_i64 tcg_rm, clean_addr, dirty_addr;
2941
2942    if (extract32(opt, 1, 1) == 0) {
2943        unallocated_encoding(s);
2944        return;
2945    }
2946
2947    if (is_vector) {
2948        size |= (opc & 2) << 1;
2949        if (size > 4) {
2950            unallocated_encoding(s);
2951            return;
2952        }
2953        is_store = !extract32(opc, 0, 1);
2954        if (!fp_access_check(s)) {
2955            return;
2956        }
2957    } else {
2958        if (size == 3 && opc == 2) {
2959            /* PRFM - prefetch */
2960            return;
2961        }
2962        if (opc == 3 && size > 1) {
2963            unallocated_encoding(s);
2964            return;
2965        }
2966        is_store = (opc == 0);
2967        is_signed = extract32(opc, 1, 1);
2968        is_extended = (size < 3) && extract32(opc, 0, 1);
2969    }
2970
2971    if (rn == 31) {
2972        gen_check_sp_alignment(s);
2973    }
2974    dirty_addr = read_cpu_reg_sp(s, rn, 1);
2975
2976    tcg_rm = read_cpu_reg(s, rm, 1);
2977    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2978
2979    tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
2980    clean_addr = clean_data_tbi(s, dirty_addr);
2981
2982    if (is_vector) {
2983        if (is_store) {
2984            do_fp_st(s, rt, clean_addr, size);
2985        } else {
2986            do_fp_ld(s, rt, clean_addr, size);
2987        }
2988    } else {
2989        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2990        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2991        if (is_store) {
2992            do_gpr_st(s, tcg_rt, clean_addr, size,
2993                      true, rt, iss_sf, false);
2994        } else {
2995            do_gpr_ld(s, tcg_rt, clean_addr, size,
2996                      is_signed, is_extended,
2997                      true, rt, iss_sf, false);
2998        }
2999    }
3000}
3001
3002/*
3003 * Load/store (unsigned immediate)
3004 *
3005 * 31 30 29   27  26 25 24 23 22 21        10 9     5
3006 * +----+-------+---+-----+-----+------------+-------+------+
3007 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
3008 * +----+-------+---+-----+-----+------------+-------+------+
3009 *
3010 * For non-vector:
3011 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3012 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3013 * For vector:
3014 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3015 *   opc<0>: 0 -> store, 1 -> load
3016 * Rn: base address register (inc SP)
3017 * Rt: target register
3018 */
3019static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3020                                        int opc,
3021                                        int size,
3022                                        int rt,
3023                                        bool is_vector)
3024{
3025    int rn = extract32(insn, 5, 5);
3026    unsigned int imm12 = extract32(insn, 10, 12);
3027    unsigned int offset;
3028
3029    TCGv_i64 clean_addr, dirty_addr;
3030
3031    bool is_store;
3032    bool is_signed = false;
3033    bool is_extended = false;
3034
3035    if (is_vector) {
3036        size |= (opc & 2) << 1;
3037        if (size > 4) {
3038            unallocated_encoding(s);
3039            return;
3040        }
3041        is_store = !extract32(opc, 0, 1);
3042        if (!fp_access_check(s)) {
3043            return;
3044        }
3045    } else {
3046        if (size == 3 && opc == 2) {
3047            /* PRFM - prefetch */
3048            return;
3049        }
3050        if (opc == 3 && size > 1) {
3051            unallocated_encoding(s);
3052            return;
3053        }
3054        is_store = (opc == 0);
3055        is_signed = extract32(opc, 1, 1);
3056        is_extended = (size < 3) && extract32(opc, 0, 1);
3057    }
3058
3059    if (rn == 31) {
3060        gen_check_sp_alignment(s);
3061    }
3062    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3063    offset = imm12 << size;
3064    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3065    clean_addr = clean_data_tbi(s, dirty_addr);
3066
3067    if (is_vector) {
3068        if (is_store) {
3069            do_fp_st(s, rt, clean_addr, size);
3070        } else {
3071            do_fp_ld(s, rt, clean_addr, size);
3072        }
3073    } else {
3074        TCGv_i64 tcg_rt = cpu_reg(s, rt);
3075        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3076        if (is_store) {
3077            do_gpr_st(s, tcg_rt, clean_addr, size,
3078                      true, rt, iss_sf, false);
3079        } else {
3080            do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended,
3081                      true, rt, iss_sf, false);
3082        }
3083    }
3084}
3085
3086/* Atomic memory operations
3087 *
3088 *  31  30      27  26    24    22  21   16   15    12    10    5     0
3089 * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3090 * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
3091 * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3092 *
3093 * Rt: the result register
3094 * Rn: base address or SP
3095 * Rs: the source register for the operation
3096 * V: vector flag (always 0 as of v8.3)
3097 * A: acquire flag
3098 * R: release flag
3099 */
3100static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3101                              int size, int rt, bool is_vector)
3102{
3103    int rs = extract32(insn, 16, 5);
3104    int rn = extract32(insn, 5, 5);
3105    int o3_opc = extract32(insn, 12, 4);
3106    TCGv_i64 tcg_rs, clean_addr;
3107    AtomicThreeOpFn *fn;
3108
3109    if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3110        unallocated_encoding(s);
3111        return;
3112    }
3113    switch (o3_opc) {
3114    case 000: /* LDADD */
3115        fn = tcg_gen_atomic_fetch_add_i64;
3116        break;
3117    case 001: /* LDCLR */
3118        fn = tcg_gen_atomic_fetch_and_i64;
3119        break;
3120    case 002: /* LDEOR */
3121        fn = tcg_gen_atomic_fetch_xor_i64;
3122        break;
3123    case 003: /* LDSET */
3124        fn = tcg_gen_atomic_fetch_or_i64;
3125        break;
3126    case 004: /* LDSMAX */
3127        fn = tcg_gen_atomic_fetch_smax_i64;
3128        break;
3129    case 005: /* LDSMIN */
3130        fn = tcg_gen_atomic_fetch_smin_i64;
3131        break;
3132    case 006: /* LDUMAX */
3133        fn = tcg_gen_atomic_fetch_umax_i64;
3134        break;
3135    case 007: /* LDUMIN */
3136        fn = tcg_gen_atomic_fetch_umin_i64;
3137        break;
3138    case 010: /* SWP */
3139        fn = tcg_gen_atomic_xchg_i64;
3140        break;
3141    default:
3142        unallocated_encoding(s);
3143        return;
3144    }
3145
3146    if (rn == 31) {
3147        gen_check_sp_alignment(s);
3148    }
3149    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
3150    tcg_rs = read_cpu_reg(s, rs, true);
3151
3152    if (o3_opc == 1) { /* LDCLR */
3153        tcg_gen_not_i64(tcg_rs, tcg_rs);
3154    }
3155
3156    /* The tcg atomic primitives are all full barriers.  Therefore we
3157     * can ignore the Acquire and Release bits of this instruction.
3158     */
3159    fn(cpu_reg(s, rt), clean_addr, tcg_rs, get_mem_index(s),
3160       s->be_data | size | MO_ALIGN);
3161}
3162
3163/*
3164 * PAC memory operations
3165 *
3166 *  31  30      27  26    24    22  21       12  11  10    5     0
3167 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3168 * | size | 1 1 1 | V | 0 0 | M S | 1 |  imm9  | W | 1 | Rn |  Rt |
3169 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3170 *
3171 * Rt: the result register
3172 * Rn: base address or SP
3173 * V: vector flag (always 0 as of v8.3)
3174 * M: clear for key DA, set for key DB
3175 * W: pre-indexing flag
3176 * S: sign for imm9.
3177 */
3178static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3179                           int size, int rt, bool is_vector)
3180{
3181    int rn = extract32(insn, 5, 5);
3182    bool is_wback = extract32(insn, 11, 1);
3183    bool use_key_a = !extract32(insn, 23, 1);
3184    int offset;
3185    TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3186
3187    if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3188        unallocated_encoding(s);
3189        return;
3190    }
3191
3192    if (rn == 31) {
3193        gen_check_sp_alignment(s);
3194    }
3195    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3196
3197    if (s->pauth_active) {
3198        if (use_key_a) {
3199            gen_helper_autda(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
3200        } else {
3201            gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
3202        }
3203    }
3204
3205    /* Form the 10-bit signed, scaled offset.  */
3206    offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3207    offset = sextract32(offset << size, 0, 10 + size);
3208    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3209
3210    /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3211    clean_addr = clean_data_tbi(s, dirty_addr);
3212
3213    tcg_rt = cpu_reg(s, rt);
3214    do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false,
3215              /* extend */ false, /* iss_valid */ !is_wback,
3216              /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3217
3218    if (is_wback) {
3219        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3220    }
3221}
3222
3223/* Load/store register (all forms) */
3224static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3225{
3226    int rt = extract32(insn, 0, 5);
3227    int opc = extract32(insn, 22, 2);
3228    bool is_vector = extract32(insn, 26, 1);
3229    int size = extract32(insn, 30, 2);
3230
3231    switch (extract32(insn, 24, 2)) {
3232    case 0:
3233        if (extract32(insn, 21, 1) == 0) {
3234            /* Load/store register (unscaled immediate)
3235             * Load/store immediate pre/post-indexed
3236             * Load/store register unprivileged
3237             */
3238            disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3239            return;
3240        }
3241        switch (extract32(insn, 10, 2)) {
3242        case 0:
3243            disas_ldst_atomic(s, insn, size, rt, is_vector);
3244            return;
3245        case 2:
3246            disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3247            return;
3248        default:
3249            disas_ldst_pac(s, insn, size, rt, is_vector);
3250            return;
3251        }
3252        break;
3253    case 1:
3254        disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3255        return;
3256    }
3257    unallocated_encoding(s);
3258}
3259
3260/* AdvSIMD load/store multiple structures
3261 *
3262 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
3263 * +---+---+---------------+---+-------------+--------+------+------+------+
3264 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
3265 * +---+---+---------------+---+-------------+--------+------+------+------+
3266 *
3267 * AdvSIMD load/store multiple structures (post-indexed)
3268 *
3269 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
3270 * +---+---+---------------+---+---+---------+--------+------+------+------+
3271 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
3272 * +---+---+---------------+---+---+---------+--------+------+------+------+
3273 *
3274 * Rt: first (or only) SIMD&FP register to be transferred
3275 * Rn: base address or SP
3276 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3277 */
3278static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3279{
3280    int rt = extract32(insn, 0, 5);
3281    int rn = extract32(insn, 5, 5);
3282    int rm = extract32(insn, 16, 5);
3283    int size = extract32(insn, 10, 2);
3284    int opcode = extract32(insn, 12, 4);
3285    bool is_store = !extract32(insn, 22, 1);
3286    bool is_postidx = extract32(insn, 23, 1);
3287    bool is_q = extract32(insn, 30, 1);
3288    TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3289    TCGMemOp endian = s->be_data;
3290
3291    int ebytes;   /* bytes per element */
3292    int elements; /* elements per vector */
3293    int rpt;    /* num iterations */
3294    int selem;  /* structure elements */
3295    int r;
3296
3297    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3298        unallocated_encoding(s);
3299        return;
3300    }
3301
3302    if (!is_postidx && rm != 0) {
3303        unallocated_encoding(s);
3304        return;
3305    }
3306
3307    /* From the shared decode logic */
3308    switch (opcode) {
3309    case 0x0:
3310        rpt = 1;
3311        selem = 4;
3312        break;
3313    case 0x2:
3314        rpt = 4;
3315        selem = 1;
3316        break;
3317    case 0x4:
3318        rpt = 1;
3319        selem = 3;
3320        break;
3321    case 0x6:
3322        rpt = 3;
3323        selem = 1;
3324        break;
3325    case 0x7:
3326        rpt = 1;
3327        selem = 1;
3328        break;
3329    case 0x8:
3330        rpt = 1;
3331        selem = 2;
3332        break;
3333    case 0xa:
3334        rpt = 2;
3335        selem = 1;
3336        break;
3337    default:
3338        unallocated_encoding(s);
3339        return;
3340    }
3341
3342    if (size == 3 && !is_q && selem != 1) {
3343        /* reserved */
3344        unallocated_encoding(s);
3345        return;
3346    }
3347
3348    if (!fp_access_check(s)) {
3349        return;
3350    }
3351
3352    if (rn == 31) {
3353        gen_check_sp_alignment(s);
3354    }
3355
3356    /* For our purposes, bytes are always little-endian.  */
3357    if (size == 0) {
3358        endian = MO_LE;
3359    }
3360
3361    /* Consecutive little-endian elements from a single register
3362     * can be promoted to a larger little-endian operation.
3363     */
3364    if (selem == 1 && endian == MO_LE) {
3365        size = 3;
3366    }
3367    ebytes = 1 << size;
3368    elements = (is_q ? 16 : 8) / ebytes;
3369
3370    tcg_rn = cpu_reg_sp(s, rn);
3371    clean_addr = clean_data_tbi(s, tcg_rn);
3372    tcg_ebytes = tcg_const_i64(ebytes);
3373
3374    for (r = 0; r < rpt; r++) {
3375        int e;
3376        for (e = 0; e < elements; e++) {
3377            int xs;
3378            for (xs = 0; xs < selem; xs++) {
3379                int tt = (rt + r + xs) % 32;
3380                if (is_store) {
3381                    do_vec_st(s, tt, e, clean_addr, size, endian);
3382                } else {
3383                    do_vec_ld(s, tt, e, clean_addr, size, endian);
3384                }
3385                tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3386            }
3387        }
3388    }
3389    tcg_temp_free_i64(tcg_ebytes);
3390
3391    if (!is_store) {
3392        /* For non-quad operations, setting a slice of the low
3393         * 64 bits of the register clears the high 64 bits (in
3394         * the ARM ARM pseudocode this is implicit in the fact
3395         * that 'rval' is a 64 bit wide variable).
3396         * For quad operations, we might still need to zero the
3397         * high bits of SVE.
3398         */
3399        for (r = 0; r < rpt * selem; r++) {
3400            int tt = (rt + r) % 32;
3401            clear_vec_high(s, is_q, tt);
3402        }
3403    }
3404
3405    if (is_postidx) {
3406        if (rm == 31) {
3407            tcg_gen_addi_i64(tcg_rn, tcg_rn, rpt * elements * selem * ebytes);
3408        } else {
3409            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3410        }
3411    }
3412}
3413
3414/* AdvSIMD load/store single structure
3415 *
3416 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3417 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3418 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3419 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3420 *
3421 * AdvSIMD load/store single structure (post-indexed)
3422 *
3423 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3424 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3425 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3426 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3427 *
3428 * Rt: first (or only) SIMD&FP register to be transferred
3429 * Rn: base address or SP
3430 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3431 * index = encoded in Q:S:size dependent on size
3432 *
3433 * lane_size = encoded in R, opc
3434 * transfer width = encoded in opc, S, size
3435 */
3436static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3437{
3438    int rt = extract32(insn, 0, 5);
3439    int rn = extract32(insn, 5, 5);
3440    int rm = extract32(insn, 16, 5);
3441    int size = extract32(insn, 10, 2);
3442    int S = extract32(insn, 12, 1);
3443    int opc = extract32(insn, 13, 3);
3444    int R = extract32(insn, 21, 1);
3445    int is_load = extract32(insn, 22, 1);
3446    int is_postidx = extract32(insn, 23, 1);
3447    int is_q = extract32(insn, 30, 1);
3448
3449    int scale = extract32(opc, 1, 2);
3450    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3451    bool replicate = false;
3452    int index = is_q << 3 | S << 2 | size;
3453    int ebytes, xs;
3454    TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3455
3456    if (extract32(insn, 31, 1)) {
3457        unallocated_encoding(s);
3458        return;
3459    }
3460    if (!is_postidx && rm != 0) {
3461        unallocated_encoding(s);
3462        return;
3463    }
3464
3465    switch (scale) {
3466    case 3:
3467        if (!is_load || S) {
3468            unallocated_encoding(s);
3469            return;
3470        }
3471        scale = size;
3472        replicate = true;
3473        break;
3474    case 0:
3475        break;
3476    case 1:
3477        if (extract32(size, 0, 1)) {
3478            unallocated_encoding(s);
3479            return;
3480        }
3481        index >>= 1;
3482        break;
3483    case 2:
3484        if (extract32(size, 1, 1)) {
3485            unallocated_encoding(s);
3486            return;
3487        }
3488        if (!extract32(size, 0, 1)) {
3489            index >>= 2;
3490        } else {
3491            if (S) {
3492                unallocated_encoding(s);
3493                return;
3494            }
3495            index >>= 3;
3496            scale = 3;
3497        }
3498        break;
3499    default:
3500        g_assert_not_reached();
3501    }
3502
3503    if (!fp_access_check(s)) {
3504        return;
3505    }
3506
3507    ebytes = 1 << scale;
3508
3509    if (rn == 31) {
3510        gen_check_sp_alignment(s);
3511    }
3512
3513    tcg_rn = cpu_reg_sp(s, rn);
3514    clean_addr = clean_data_tbi(s, tcg_rn);
3515    tcg_ebytes = tcg_const_i64(ebytes);
3516
3517    for (xs = 0; xs < selem; xs++) {
3518        if (replicate) {
3519            /* Load and replicate to all elements */
3520            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3521
3522            tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr,
3523                                get_mem_index(s), s->be_data + scale);
3524            tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3525                                 (is_q + 1) * 8, vec_full_reg_size(s),
3526                                 tcg_tmp);
3527            tcg_temp_free_i64(tcg_tmp);
3528        } else {
3529            /* Load/store one element per register */
3530            if (is_load) {
3531                do_vec_ld(s, rt, index, clean_addr, scale, s->be_data);
3532            } else {
3533                do_vec_st(s, rt, index, clean_addr, scale, s->be_data);
3534            }
3535        }
3536        tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3537        rt = (rt + 1) % 32;
3538    }
3539    tcg_temp_free_i64(tcg_ebytes);
3540
3541    if (is_postidx) {
3542        if (rm == 31) {
3543            tcg_gen_addi_i64(tcg_rn, tcg_rn, selem * ebytes);
3544        } else {
3545            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3546        }
3547    }
3548}
3549
3550/* Loads and stores */
3551static void disas_ldst(DisasContext *s, uint32_t insn)
3552{
3553    switch (extract32(insn, 24, 6)) {
3554    case 0x08: /* Load/store exclusive */
3555        disas_ldst_excl(s, insn);
3556        break;
3557    case 0x18: case 0x1c: /* Load register (literal) */
3558        disas_ld_lit(s, insn);
3559        break;
3560    case 0x28: case 0x29:
3561    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
3562        disas_ldst_pair(s, insn);
3563        break;
3564    case 0x38: case 0x39:
3565    case 0x3c: case 0x3d: /* Load/store register (all forms) */
3566        disas_ldst_reg(s, insn);
3567        break;
3568    case 0x0c: /* AdvSIMD load/store multiple structures */
3569        disas_ldst_multiple_struct(s, insn);
3570        break;
3571    case 0x0d: /* AdvSIMD load/store single structure */
3572        disas_ldst_single_struct(s, insn);
3573        break;
3574    default:
3575        unallocated_encoding(s);
3576        break;
3577    }
3578}
3579
3580/* PC-rel. addressing
3581 *   31  30   29 28       24 23                5 4    0
3582 * +----+-------+-----------+-------------------+------+
3583 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
3584 * +----+-------+-----------+-------------------+------+
3585 */
3586static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
3587{
3588    unsigned int page, rd;
3589    uint64_t base;
3590    uint64_t offset;
3591
3592    page = extract32(insn, 31, 1);
3593    /* SignExtend(immhi:immlo) -> offset */
3594    offset = sextract64(insn, 5, 19);
3595    offset = offset << 2 | extract32(insn, 29, 2);
3596    rd = extract32(insn, 0, 5);
3597    base = s->pc - 4;
3598
3599    if (page) {
3600        /* ADRP (page based) */
3601        base &= ~0xfff;
3602        offset <<= 12;
3603    }
3604
3605    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
3606}
3607
3608/*
3609 * Add/subtract (immediate)
3610 *
3611 *  31 30 29 28       24 23 22 21         10 9   5 4   0
3612 * +--+--+--+-----------+-----+-------------+-----+-----+
3613 * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
3614 * +--+--+--+-----------+-----+-------------+-----+-----+
3615 *
3616 *    sf: 0 -> 32bit, 1 -> 64bit
3617 *    op: 0 -> add  , 1 -> sub
3618 *     S: 1 -> set flags
3619 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
3620 */
3621static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
3622{
3623    int rd = extract32(insn, 0, 5);
3624    int rn = extract32(insn, 5, 5);
3625    uint64_t imm = extract32(insn, 10, 12);
3626    int shift = extract32(insn, 22, 2);
3627    bool setflags = extract32(insn, 29, 1);
3628    bool sub_op = extract32(insn, 30, 1);
3629    bool is_64bit = extract32(insn, 31, 1);
3630
3631    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3632    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
3633    TCGv_i64 tcg_result;
3634
3635    switch (shift) {
3636    case 0x0:
3637        break;
3638    case 0x1:
3639        imm <<= 12;
3640        break;
3641    default:
3642        unallocated_encoding(s);
3643        return;
3644    }
3645
3646    tcg_result = tcg_temp_new_i64();
3647    if (!setflags) {
3648        if (sub_op) {
3649            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
3650        } else {
3651            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
3652        }
3653    } else {
3654        TCGv_i64 tcg_imm = tcg_const_i64(imm);
3655        if (sub_op) {
3656            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3657        } else {
3658            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3659        }
3660        tcg_temp_free_i64(tcg_imm);
3661    }
3662
3663    if (is_64bit) {
3664        tcg_gen_mov_i64(tcg_rd, tcg_result);
3665    } else {
3666        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3667    }
3668
3669    tcg_temp_free_i64(tcg_result);
3670}
3671
3672/* The input should be a value in the bottom e bits (with higher
3673 * bits zero); returns that value replicated into every element
3674 * of size e in a 64 bit integer.
3675 */
3676static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
3677{
3678    assert(e != 0);
3679    while (e < 64) {
3680        mask |= mask << e;
3681        e *= 2;
3682    }
3683    return mask;
3684}
3685
3686/* Return a value with the bottom len bits set (where 0 < len <= 64) */
3687static inline uint64_t bitmask64(unsigned int length)
3688{
3689    assert(length > 0 && length <= 64);
3690    return ~0ULL >> (64 - length);
3691}
3692
3693/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3694 * only require the wmask. Returns false if the imms/immr/immn are a reserved
3695 * value (ie should cause a guest UNDEF exception), and true if they are
3696 * valid, in which case the decoded bit pattern is written to result.
3697 */
3698bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3699                            unsigned int imms, unsigned int immr)
3700{
3701    uint64_t mask;
3702    unsigned e, levels, s, r;
3703    int len;
3704
3705    assert(immn < 2 && imms < 64 && immr < 64);
3706
3707    /* The bit patterns we create here are 64 bit patterns which
3708     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3709     * 64 bits each. Each element contains the same value: a run
3710     * of between 1 and e-1 non-zero bits, rotated within the
3711     * element by between 0 and e-1 bits.
3712     *
3713     * The element size and run length are encoded into immn (1 bit)
3714     * and imms (6 bits) as follows:
3715     * 64 bit elements: immn = 1, imms = <length of run - 1>
3716     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3717     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3718     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3719     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3720     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3721     * Notice that immn = 0, imms = 11111x is the only combination
3722     * not covered by one of the above options; this is reserved.
3723     * Further, <length of run - 1> all-ones is a reserved pattern.
3724     *
3725     * In all cases the rotation is by immr % e (and immr is 6 bits).
3726     */
3727
3728    /* First determine the element size */
3729    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3730    if (len < 1) {
3731        /* This is the immn == 0, imms == 0x11111x case */
3732        return false;
3733    }
3734    e = 1 << len;
3735
3736    levels = e - 1;
3737    s = imms & levels;
3738    r = immr & levels;
3739
3740    if (s == levels) {
3741        /* <length of run - 1> mustn't be all-ones. */
3742        return false;
3743    }
3744
3745    /* Create the value of one element: s+1 set bits rotated
3746     * by r within the element (which is e bits wide)...
3747     */
3748    mask = bitmask64(s + 1);
3749    if (r) {
3750        mask = (mask >> r) | (mask << (e - r));
3751        mask &= bitmask64(e);
3752    }
3753    /* ...then replicate the element over the whole 64 bit value */
3754    mask = bitfield_replicate(mask, e);
3755    *result = mask;
3756    return true;
3757}
3758
3759/* Logical (immediate)
3760 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3761 * +----+-----+-------------+---+------+------+------+------+
3762 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3763 * +----+-----+-------------+---+------+------+------+------+
3764 */
3765static void disas_logic_imm(DisasContext *s, uint32_t insn)
3766{
3767    unsigned int sf, opc, is_n, immr, imms, rn, rd;
3768    TCGv_i64 tcg_rd, tcg_rn;
3769    uint64_t wmask;
3770    bool is_and = false;
3771
3772    sf = extract32(insn, 31, 1);
3773    opc = extract32(insn, 29, 2);
3774    is_n = extract32(insn, 22, 1);
3775    immr = extract32(insn, 16, 6);
3776    imms = extract32(insn, 10, 6);
3777    rn = extract32(insn, 5, 5);
3778    rd = extract32(insn, 0, 5);
3779
3780    if (!sf && is_n) {
3781        unallocated_encoding(s);
3782        return;
3783    }
3784
3785    if (opc == 0x3) { /* ANDS */
3786        tcg_rd = cpu_reg(s, rd);
3787    } else {
3788        tcg_rd = cpu_reg_sp(s, rd);
3789    }
3790    tcg_rn = cpu_reg(s, rn);
3791
3792    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3793        /* some immediate field values are reserved */
3794        unallocated_encoding(s);
3795        return;
3796    }
3797
3798    if (!sf) {
3799        wmask &= 0xffffffff;
3800    }
3801
3802    switch (opc) {
3803    case 0x3: /* ANDS */
3804    case 0x0: /* AND */
3805        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3806        is_and = true;
3807        break;
3808    case 0x1: /* ORR */
3809        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3810        break;
3811    case 0x2: /* EOR */
3812        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3813        break;
3814    default:
3815        assert(FALSE); /* must handle all above */
3816        break;
3817    }
3818
3819    if (!sf && !is_and) {
3820        /* zero extend final result; we know we can skip this for AND
3821         * since the immediate had the high 32 bits clear.
3822         */
3823        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3824    }
3825
3826    if (opc == 3) { /* ANDS */
3827        gen_logic_CC(sf, tcg_rd);
3828    }
3829}
3830
3831/*
3832 * Move wide (immediate)
3833 *
3834 *  31 30 29 28         23 22 21 20             5 4    0
3835 * +--+-----+-------------+-----+----------------+------+
3836 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3837 * +--+-----+-------------+-----+----------------+------+
3838 *
3839 * sf: 0 -> 32 bit, 1 -> 64 bit
3840 * opc: 00 -> N, 10 -> Z, 11 -> K
3841 * hw: shift/16 (0,16, and sf only 32, 48)
3842 */
3843static void disas_movw_imm(DisasContext *s, uint32_t insn)
3844{
3845    int rd = extract32(insn, 0, 5);
3846    uint64_t imm = extract32(insn, 5, 16);
3847    int sf = extract32(insn, 31, 1);
3848    int opc = extract32(insn, 29, 2);
3849    int pos = extract32(insn, 21, 2) << 4;
3850    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3851    TCGv_i64 tcg_imm;
3852
3853    if (!sf && (pos >= 32)) {
3854        unallocated_encoding(s);
3855        return;
3856    }
3857
3858    switch (opc) {
3859    case 0: /* MOVN */
3860    case 2: /* MOVZ */
3861        imm <<= pos;
3862        if (opc == 0) {
3863            imm = ~imm;
3864        }
3865        if (!sf) {
3866            imm &= 0xffffffffu;
3867        }
3868        tcg_gen_movi_i64(tcg_rd, imm);
3869        break;
3870    case 3: /* MOVK */
3871        tcg_imm = tcg_const_i64(imm);
3872        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3873        tcg_temp_free_i64(tcg_imm);
3874        if (!sf) {
3875            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3876        }
3877        break;
3878    default:
3879        unallocated_encoding(s);
3880        break;
3881    }
3882}
3883
3884/* Bitfield
3885 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3886 * +----+-----+-------------+---+------+------+------+------+
3887 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3888 * +----+-----+-------------+---+------+------+------+------+
3889 */
3890static void disas_bitfield(DisasContext *s, uint32_t insn)
3891{
3892    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3893    TCGv_i64 tcg_rd, tcg_tmp;
3894
3895    sf = extract32(insn, 31, 1);
3896    opc = extract32(insn, 29, 2);
3897    n = extract32(insn, 22, 1);
3898    ri = extract32(insn, 16, 6);
3899    si = extract32(insn, 10, 6);
3900    rn = extract32(insn, 5, 5);
3901    rd = extract32(insn, 0, 5);
3902    bitsize = sf ? 64 : 32;
3903
3904    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3905        unallocated_encoding(s);
3906        return;
3907    }
3908
3909    tcg_rd = cpu_reg(s, rd);
3910
3911    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3912       to be smaller than bitsize, we'll never reference data outside the
3913       low 32-bits anyway.  */
3914    tcg_tmp = read_cpu_reg(s, rn, 1);
3915
3916    /* Recognize simple(r) extractions.  */
3917    if (si >= ri) {
3918        /* Wd<s-r:0> = Wn<s:r> */
3919        len = (si - ri) + 1;
3920        if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3921            tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3922            goto done;
3923        } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3924            tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3925            return;
3926        }
3927        /* opc == 1, BFXIL fall through to deposit */
3928        tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3929        pos = 0;
3930    } else {
3931        /* Handle the ri > si case with a deposit
3932         * Wd<32+s-r,32-r> = Wn<s:0>
3933         */
3934        len = si + 1;
3935        pos = (bitsize - ri) & (bitsize - 1);
3936    }
3937
3938    if (opc == 0 && len < ri) {
3939        /* SBFM: sign extend the destination field from len to fill
3940           the balance of the word.  Let the deposit below insert all
3941           of those sign bits.  */
3942        tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3943        len = ri;
3944    }
3945
3946    if (opc == 1) { /* BFM, BFXIL */
3947        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3948    } else {
3949        /* SBFM or UBFM: We start with zero, and we haven't modified
3950           any bits outside bitsize, therefore the zero-extension
3951           below is unneeded.  */
3952        tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3953        return;
3954    }
3955
3956 done:
3957    if (!sf) { /* zero extend final result */
3958        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3959    }
3960}
3961
3962/* Extract
3963 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3964 * +----+------+-------------+---+----+------+--------+------+------+
3965 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3966 * +----+------+-------------+---+----+------+--------+------+------+
3967 */
3968static void disas_extract(DisasContext *s, uint32_t insn)
3969{
3970    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3971
3972    sf = extract32(insn, 31, 1);
3973    n = extract32(insn, 22, 1);
3974    rm = extract32(insn, 16, 5);
3975    imm = extract32(insn, 10, 6);
3976    rn = extract32(insn, 5, 5);
3977    rd = extract32(insn, 0, 5);
3978    op21 = extract32(insn, 29, 2);
3979    op0 = extract32(insn, 21, 1);
3980    bitsize = sf ? 64 : 32;
3981
3982    if (sf != n || op21 || op0 || imm >= bitsize) {
3983        unallocated_encoding(s);
3984    } else {
3985        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3986
3987        tcg_rd = cpu_reg(s, rd);
3988
3989        if (unlikely(imm == 0)) {
3990            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3991             * so an extract from bit 0 is a special case.
3992             */
3993            if (sf) {
3994                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3995            } else {
3996                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3997            }
3998        } else {
3999            tcg_rm = cpu_reg(s, rm);
4000            tcg_rn = cpu_reg(s, rn);
4001
4002            if (sf) {
4003                /* Specialization to ROR happens in EXTRACT2.  */
4004                tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm);
4005            } else {
4006                TCGv_i32 t0 = tcg_temp_new_i32();
4007
4008                tcg_gen_extrl_i64_i32(t0, tcg_rm);
4009                if (rm == rn) {
4010                    tcg_gen_rotri_i32(t0, t0, imm);
4011                } else {
4012                    TCGv_i32 t1 = tcg_temp_new_i32();
4013                    tcg_gen_extrl_i64_i32(t1, tcg_rn);
4014                    tcg_gen_extract2_i32(t0, t0, t1, imm);
4015                    tcg_temp_free_i32(t1);
4016                }
4017                tcg_gen_extu_i32_i64(tcg_rd, t0);
4018                tcg_temp_free_i32(t0);
4019            }
4020        }
4021    }
4022}
4023
4024/* Data processing - immediate */
4025static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
4026{
4027    switch (extract32(insn, 23, 6)) {
4028    case 0x20: case 0x21: /* PC-rel. addressing */
4029        disas_pc_rel_adr(s, insn);
4030        break;
4031    case 0x22: case 0x23: /* Add/subtract (immediate) */
4032        disas_add_sub_imm(s, insn);
4033        break;
4034    case 0x24: /* Logical (immediate) */
4035        disas_logic_imm(s, insn);
4036        break;
4037    case 0x25: /* Move wide (immediate) */
4038        disas_movw_imm(s, insn);
4039        break;
4040    case 0x26: /* Bitfield */
4041        disas_bitfield(s, insn);
4042        break;
4043    case 0x27: /* Extract */
4044        disas_extract(s, insn);
4045        break;
4046    default:
4047        unallocated_encoding(s);
4048        break;
4049    }
4050}
4051
4052/* Shift a TCGv src by TCGv shift_amount, put result in dst.
4053 * Note that it is the caller's responsibility to ensure that the
4054 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4055 * mandated semantics for out of range shifts.
4056 */
4057static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4058                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4059{
4060    switch (shift_type) {
4061    case A64_SHIFT_TYPE_LSL:
4062        tcg_gen_shl_i64(dst, src, shift_amount);
4063        break;
4064    case A64_SHIFT_TYPE_LSR:
4065        tcg_gen_shr_i64(dst, src, shift_amount);
4066        break;
4067    case A64_SHIFT_TYPE_ASR:
4068        if (!sf) {
4069            tcg_gen_ext32s_i64(dst, src);
4070        }
4071        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4072        break;
4073    case A64_SHIFT_TYPE_ROR:
4074        if (sf) {
4075            tcg_gen_rotr_i64(dst, src, shift_amount);
4076        } else {
4077            TCGv_i32 t0, t1;
4078            t0 = tcg_temp_new_i32();
4079            t1 = tcg_temp_new_i32();
4080            tcg_gen_extrl_i64_i32(t0, src);
4081            tcg_gen_extrl_i64_i32(t1, shift_amount);
4082            tcg_gen_rotr_i32(t0, t0, t1);
4083            tcg_gen_extu_i32_i64(dst, t0);
4084            tcg_temp_free_i32(t0);
4085            tcg_temp_free_i32(t1);
4086        }
4087        break;
4088    default:
4089        assert(FALSE); /* all shift types should be handled */
4090        break;
4091    }
4092
4093    if (!sf) { /* zero extend final result */
4094        tcg_gen_ext32u_i64(dst, dst);
4095    }
4096}
4097
4098/* Shift a TCGv src by immediate, put result in dst.
4099 * The shift amount must be in range (this should always be true as the
4100 * relevant instructions will UNDEF on bad shift immediates).
4101 */
4102static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4103                          enum a64_shift_type shift_type, unsigned int shift_i)
4104{
4105    assert(shift_i < (sf ? 64 : 32));
4106
4107    if (shift_i == 0) {
4108        tcg_gen_mov_i64(dst, src);
4109    } else {
4110        TCGv_i64 shift_const;
4111
4112        shift_const = tcg_const_i64(shift_i);
4113        shift_reg(dst, src, sf, shift_type, shift_const);
4114        tcg_temp_free_i64(shift_const);
4115    }
4116}
4117
4118/* Logical (shifted register)
4119 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4120 * +----+-----+-----------+-------+---+------+--------+------+------+
4121 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4122 * +----+-----+-----------+-------+---+------+--------+------+------+
4123 */
4124static void disas_logic_reg(DisasContext *s, uint32_t insn)
4125{
4126    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4127    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4128
4129    sf = extract32(insn, 31, 1);
4130    opc = extract32(insn, 29, 2);
4131    shift_type = extract32(insn, 22, 2);
4132    invert = extract32(insn, 21, 1);
4133    rm = extract32(insn, 16, 5);
4134    shift_amount = extract32(insn, 10, 6);
4135    rn = extract32(insn, 5, 5);
4136    rd = extract32(insn, 0, 5);
4137
4138    if (!sf && (shift_amount & (1 << 5))) {
4139        unallocated_encoding(s);
4140        return;
4141    }
4142
4143    tcg_rd = cpu_reg(s, rd);
4144
4145    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4146        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4147         * register-register MOV and MVN, so it is worth special casing.
4148         */
4149        tcg_rm = cpu_reg(s, rm);
4150        if (invert) {
4151            tcg_gen_not_i64(tcg_rd, tcg_rm);
4152            if (!sf) {
4153                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4154            }
4155        } else {
4156            if (sf) {
4157                tcg_gen_mov_i64(tcg_rd, tcg_rm);
4158            } else {
4159                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4160            }
4161        }
4162        return;
4163    }
4164
4165    tcg_rm = read_cpu_reg(s, rm, sf);
4166
4167    if (shift_amount) {
4168        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4169    }
4170
4171    tcg_rn = cpu_reg(s, rn);
4172
4173    switch (opc | (invert << 2)) {
4174    case 0: /* AND */
4175    case 3: /* ANDS */
4176        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4177        break;
4178    case 1: /* ORR */
4179        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4180        break;
4181    case 2: /* EOR */
4182        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4183        break;
4184    case 4: /* BIC */
4185    case 7: /* BICS */
4186        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4187        break;
4188    case 5: /* ORN */
4189        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4190        break;
4191    case 6: /* EON */
4192        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4193        break;
4194    default:
4195        assert(FALSE);
4196        break;
4197    }
4198
4199    if (!sf) {
4200        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4201    }
4202
4203    if (opc == 3) {
4204        gen_logic_CC(sf, tcg_rd);
4205    }
4206}
4207
4208/*
4209 * Add/subtract (extended register)
4210 *
4211 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4212 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4213 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4214 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4215 *
4216 *  sf: 0 -> 32bit, 1 -> 64bit
4217 *  op: 0 -> add  , 1 -> sub
4218 *   S: 1 -> set flags
4219 * opt: 00
4220 * option: extension type (see DecodeRegExtend)
4221 * imm3: optional shift to Rm
4222 *
4223 * Rd = Rn + LSL(extend(Rm), amount)
4224 */
4225static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4226{
4227    int rd = extract32(insn, 0, 5);
4228    int rn = extract32(insn, 5, 5);
4229    int imm3 = extract32(insn, 10, 3);
4230    int option = extract32(insn, 13, 3);
4231    int rm = extract32(insn, 16, 5);
4232    int opt = extract32(insn, 22, 2);
4233    bool setflags = extract32(insn, 29, 1);
4234    bool sub_op = extract32(insn, 30, 1);
4235    bool sf = extract32(insn, 31, 1);
4236
4237    TCGv_i64 tcg_rm, tcg_rn; /* temps */
4238    TCGv_i64 tcg_rd;
4239    TCGv_i64 tcg_result;
4240
4241    if (imm3 > 4 || opt != 0) {
4242        unallocated_encoding(s);
4243        return;
4244    }
4245
4246    /* non-flag setting ops may use SP */
4247    if (!setflags) {
4248        tcg_rd = cpu_reg_sp(s, rd);
4249    } else {
4250        tcg_rd = cpu_reg(s, rd);
4251    }
4252    tcg_rn = read_cpu_reg_sp(s, rn, sf);
4253
4254    tcg_rm = read_cpu_reg(s, rm, sf);
4255    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4256
4257    tcg_result = tcg_temp_new_i64();
4258
4259    if (!setflags) {
4260        if (sub_op) {
4261            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4262        } else {
4263            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4264        }
4265    } else {
4266        if (sub_op) {
4267            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4268        } else {
4269            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4270        }
4271    }
4272
4273    if (sf) {
4274        tcg_gen_mov_i64(tcg_rd, tcg_result);
4275    } else {
4276        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4277    }
4278
4279    tcg_temp_free_i64(tcg_result);
4280}
4281
4282/*
4283 * Add/subtract (shifted register)
4284 *
4285 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4286 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4287 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4288 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4289 *
4290 *    sf: 0 -> 32bit, 1 -> 64bit
4291 *    op: 0 -> add  , 1 -> sub
4292 *     S: 1 -> set flags
4293 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4294 *  imm6: Shift amount to apply to Rm before the add/sub
4295 */
4296static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4297{
4298    int rd = extract32(insn, 0, 5);
4299    int rn = extract32(insn, 5, 5);
4300    int imm6 = extract32(insn, 10, 6);
4301    int rm = extract32(insn, 16, 5);
4302    int shift_type = extract32(insn, 22, 2);
4303    bool setflags = extract32(insn, 29, 1);
4304    bool sub_op = extract32(insn, 30, 1);
4305    bool sf = extract32(insn, 31, 1);
4306
4307    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4308    TCGv_i64 tcg_rn, tcg_rm;
4309    TCGv_i64 tcg_result;
4310
4311    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4312        unallocated_encoding(s);
4313        return;
4314    }
4315
4316    tcg_rn = read_cpu_reg(s, rn, sf);
4317    tcg_rm = read_cpu_reg(s, rm, sf);
4318
4319    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4320
4321    tcg_result = tcg_temp_new_i64();
4322
4323    if (!setflags) {
4324        if (sub_op) {
4325            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4326        } else {
4327            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4328        }
4329    } else {
4330        if (sub_op) {
4331            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4332        } else {
4333            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4334        }
4335    }
4336
4337    if (sf) {
4338        tcg_gen_mov_i64(tcg_rd, tcg_result);
4339    } else {
4340        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4341    }
4342
4343    tcg_temp_free_i64(tcg_result);
4344}
4345
4346/* Data-processing (3 source)
4347 *
4348 *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4349 *  +--+------+-----------+------+------+----+------+------+------+
4350 *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4351 *  +--+------+-----------+------+------+----+------+------+------+
4352 */
4353static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4354{
4355    int rd = extract32(insn, 0, 5);
4356    int rn = extract32(insn, 5, 5);
4357    int ra = extract32(insn, 10, 5);
4358    int rm = extract32(insn, 16, 5);
4359    int op_id = (extract32(insn, 29, 3) << 4) |
4360        (extract32(insn, 21, 3) << 1) |
4361        extract32(insn, 15, 1);
4362    bool sf = extract32(insn, 31, 1);
4363    bool is_sub = extract32(op_id, 0, 1);
4364    bool is_high = extract32(op_id, 2, 1);
4365    bool is_signed = false;
4366    TCGv_i64 tcg_op1;
4367    TCGv_i64 tcg_op2;
4368    TCGv_i64 tcg_tmp;
4369
4370    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4371    switch (op_id) {
4372    case 0x42: /* SMADDL */
4373    case 0x43: /* SMSUBL */
4374    case 0x44: /* SMULH */
4375        is_signed = true;
4376        break;
4377    case 0x0: /* MADD (32bit) */
4378    case 0x1: /* MSUB (32bit) */
4379    case 0x40: /* MADD (64bit) */
4380    case 0x41: /* MSUB (64bit) */
4381    case 0x4a: /* UMADDL */
4382    case 0x4b: /* UMSUBL */
4383    case 0x4c: /* UMULH */
4384        break;
4385    default:
4386        unallocated_encoding(s);
4387        return;
4388    }
4389
4390    if (is_high) {
4391        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4392        TCGv_i64 tcg_rd = cpu_reg(s, rd);
4393        TCGv_i64 tcg_rn = cpu_reg(s, rn);
4394        TCGv_i64 tcg_rm = cpu_reg(s, rm);
4395
4396        if (is_signed) {
4397            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4398        } else {
4399            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4400        }
4401
4402        tcg_temp_free_i64(low_bits);
4403        return;
4404    }
4405
4406    tcg_op1 = tcg_temp_new_i64();
4407    tcg_op2 = tcg_temp_new_i64();
4408    tcg_tmp = tcg_temp_new_i64();
4409
4410    if (op_id < 0x42) {
4411        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4412        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4413    } else {
4414        if (is_signed) {
4415            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4416            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4417        } else {
4418            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4419            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4420        }
4421    }
4422
4423    if (ra == 31 && !is_sub) {
4424        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4425        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4426    } else {
4427        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4428        if (is_sub) {
4429            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4430        } else {
4431            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4432        }
4433    }
4434
4435    if (!sf) {
4436        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4437    }
4438
4439    tcg_temp_free_i64(tcg_op1);
4440    tcg_temp_free_i64(tcg_op2);
4441    tcg_temp_free_i64(tcg_tmp);
4442}
4443
4444/* Add/subtract (with carry)
4445 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
4446 * +--+--+--+------------------------+------+-------------+------+-----+
4447 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
4448 * +--+--+--+------------------------+------+-------------+------+-----+
4449 */
4450
4451static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4452{
4453    unsigned int sf, op, setflags, rm, rn, rd;
4454    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4455
4456    sf = extract32(insn, 31, 1);
4457    op = extract32(insn, 30, 1);
4458    setflags = extract32(insn, 29, 1);
4459    rm = extract32(insn, 16, 5);
4460    rn = extract32(insn, 5, 5);
4461    rd = extract32(insn, 0, 5);
4462
4463    tcg_rd = cpu_reg(s, rd);
4464    tcg_rn = cpu_reg(s, rn);
4465
4466    if (op) {
4467        tcg_y = new_tmp_a64(s);
4468        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4469    } else {
4470        tcg_y = cpu_reg(s, rm);
4471    }
4472
4473    if (setflags) {
4474        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4475    } else {
4476        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4477    }
4478}
4479
4480/*
4481 * Rotate right into flags
4482 *  31 30 29                21       15          10      5  4      0
4483 * +--+--+--+-----------------+--------+-----------+------+--+------+
4484 * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
4485 * +--+--+--+-----------------+--------+-----------+------+--+------+
4486 */
4487static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4488{
4489    int mask = extract32(insn, 0, 4);
4490    int o2 = extract32(insn, 4, 1);
4491    int rn = extract32(insn, 5, 5);
4492    int imm6 = extract32(insn, 15, 6);
4493    int sf_op_s = extract32(insn, 29, 3);
4494    TCGv_i64 tcg_rn;
4495    TCGv_i32 nzcv;
4496
4497    if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4498        unallocated_encoding(s);
4499        return;
4500    }
4501
4502    tcg_rn = read_cpu_reg(s, rn, 1);
4503    tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4504
4505    nzcv = tcg_temp_new_i32();
4506    tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
4507
4508    if (mask & 8) { /* N */
4509        tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
4510    }
4511    if (mask & 4) { /* Z */
4512        tcg_gen_not_i32(cpu_ZF, nzcv);
4513        tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
4514    }
4515    if (mask & 2) { /* C */
4516        tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
4517    }
4518    if (mask & 1) { /* V */
4519        tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
4520    }
4521
4522    tcg_temp_free_i32(nzcv);
4523}
4524
4525/*
4526 * Evaluate into flags
4527 *  31 30 29                21        15   14        10      5  4      0
4528 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4529 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
4530 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4531 */
4532static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
4533{
4534    int o3_mask = extract32(insn, 0, 5);
4535    int rn = extract32(insn, 5, 5);
4536    int o2 = extract32(insn, 15, 6);
4537    int sz = extract32(insn, 14, 1);
4538    int sf_op_s = extract32(insn, 29, 3);
4539    TCGv_i32 tmp;
4540    int shift;
4541
4542    if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
4543        !dc_isar_feature(aa64_condm_4, s)) {
4544        unallocated_encoding(s);
4545        return;
4546    }
4547    shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
4548
4549    tmp = tcg_temp_new_i32();
4550    tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
4551    tcg_gen_shli_i32(cpu_NF, tmp, shift);
4552    tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
4553    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
4554    tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
4555    tcg_temp_free_i32(tmp);
4556}
4557
4558/* Conditional compare (immediate / register)
4559 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
4560 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4561 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
4562 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4563 *        [1]                             y                [0]       [0]
4564 */
4565static void disas_cc(DisasContext *s, uint32_t insn)
4566{
4567    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
4568    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
4569    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
4570    DisasCompare c;
4571
4572    if (!extract32(insn, 29, 1)) {
4573        unallocated_encoding(s);
4574        return;
4575    }
4576    if (insn & (1 << 10 | 1 << 4)) {
4577        unallocated_encoding(s);
4578        return;
4579    }
4580    sf = extract32(insn, 31, 1);
4581    op = extract32(insn, 30, 1);
4582    is_imm = extract32(insn, 11, 1);
4583    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
4584    cond = extract32(insn, 12, 4);
4585    rn = extract32(insn, 5, 5);
4586    nzcv = extract32(insn, 0, 4);
4587
4588    /* Set T0 = !COND.  */
4589    tcg_t0 = tcg_temp_new_i32();
4590    arm_test_cc(&c, cond);
4591    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
4592    arm_free_cc(&c);
4593
4594    /* Load the arguments for the new comparison.  */
4595    if (is_imm) {
4596        tcg_y = new_tmp_a64(s);
4597        tcg_gen_movi_i64(tcg_y, y);
4598    } else {
4599        tcg_y = cpu_reg(s, y);
4600    }
4601    tcg_rn = cpu_reg(s, rn);
4602
4603    /* Set the flags for the new comparison.  */
4604    tcg_tmp = tcg_temp_new_i64();
4605    if (op) {
4606        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4607    } else {
4608        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4609    }
4610    tcg_temp_free_i64(tcg_tmp);
4611
4612    /* If COND was false, force the flags to #nzcv.  Compute two masks
4613     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4614     * For tcg hosts that support ANDC, we can make do with just T1.
4615     * In either case, allow the tcg optimizer to delete any unused mask.
4616     */
4617    tcg_t1 = tcg_temp_new_i32();
4618    tcg_t2 = tcg_temp_new_i32();
4619    tcg_gen_neg_i32(tcg_t1, tcg_t0);
4620    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
4621
4622    if (nzcv & 8) { /* N */
4623        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
4624    } else {
4625        if (TCG_TARGET_HAS_andc_i32) {
4626            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
4627        } else {
4628            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
4629        }
4630    }
4631    if (nzcv & 4) { /* Z */
4632        if (TCG_TARGET_HAS_andc_i32) {
4633            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
4634        } else {
4635            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
4636        }
4637    } else {
4638        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
4639    }
4640    if (nzcv & 2) { /* C */
4641        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
4642    } else {
4643        if (TCG_TARGET_HAS_andc_i32) {
4644            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
4645        } else {
4646            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
4647        }
4648    }
4649    if (nzcv & 1) { /* V */
4650        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
4651    } else {
4652        if (TCG_TARGET_HAS_andc_i32) {
4653            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
4654        } else {
4655            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
4656        }
4657    }
4658    tcg_temp_free_i32(tcg_t0);
4659    tcg_temp_free_i32(tcg_t1);
4660    tcg_temp_free_i32(tcg_t2);
4661}
4662
4663/* Conditional select
4664 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
4665 * +----+----+---+-----------------+------+------+-----+------+------+
4666 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
4667 * +----+----+---+-----------------+------+------+-----+------+------+
4668 */
4669static void disas_cond_select(DisasContext *s, uint32_t insn)
4670{
4671    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
4672    TCGv_i64 tcg_rd, zero;
4673    DisasCompare64 c;
4674
4675    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
4676        /* S == 1 or op2<1> == 1 */
4677        unallocated_encoding(s);
4678        return;
4679    }
4680    sf = extract32(insn, 31, 1);
4681    else_inv = extract32(insn, 30, 1);
4682    rm = extract32(insn, 16, 5);
4683    cond = extract32(insn, 12, 4);
4684    else_inc = extract32(insn, 10, 1);
4685    rn = extract32(insn, 5, 5);
4686    rd = extract32(insn, 0, 5);
4687
4688    tcg_rd = cpu_reg(s, rd);
4689
4690    a64_test_cc(&c, cond);
4691    zero = tcg_const_i64(0);
4692
4693    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
4694        /* CSET & CSETM.  */
4695        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
4696        if (else_inv) {
4697            tcg_gen_neg_i64(tcg_rd, tcg_rd);
4698        }
4699    } else {
4700        TCGv_i64 t_true = cpu_reg(s, rn);
4701        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
4702        if (else_inv && else_inc) {
4703            tcg_gen_neg_i64(t_false, t_false);
4704        } else if (else_inv) {
4705            tcg_gen_not_i64(t_false, t_false);
4706        } else if (else_inc) {
4707            tcg_gen_addi_i64(t_false, t_false, 1);
4708        }
4709        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
4710    }
4711
4712    tcg_temp_free_i64(zero);
4713    a64_free_cc(&c);
4714
4715    if (!sf) {
4716        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4717    }
4718}
4719
4720static void handle_clz(DisasContext *s, unsigned int sf,
4721                       unsigned int rn, unsigned int rd)
4722{
4723    TCGv_i64 tcg_rd, tcg_rn;
4724    tcg_rd = cpu_reg(s, rd);
4725    tcg_rn = cpu_reg(s, rn);
4726
4727    if (sf) {
4728        tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
4729    } else {
4730        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4731        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4732        tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
4733        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4734        tcg_temp_free_i32(tcg_tmp32);
4735    }
4736}
4737
4738static void handle_cls(DisasContext *s, unsigned int sf,
4739                       unsigned int rn, unsigned int rd)
4740{
4741    TCGv_i64 tcg_rd, tcg_rn;
4742    tcg_rd = cpu_reg(s, rd);
4743    tcg_rn = cpu_reg(s, rn);
4744
4745    if (sf) {
4746        tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4747    } else {
4748        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4749        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4750        tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4751        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4752        tcg_temp_free_i32(tcg_tmp32);
4753    }
4754}
4755
4756static void handle_rbit(DisasContext *s, unsigned int sf,
4757                        unsigned int rn, unsigned int rd)
4758{
4759    TCGv_i64 tcg_rd, tcg_rn;
4760    tcg_rd = cpu_reg(s, rd);
4761    tcg_rn = cpu_reg(s, rn);
4762
4763    if (sf) {
4764        gen_helper_rbit64(tcg_rd, tcg_rn);
4765    } else {
4766        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4767        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4768        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4769        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4770        tcg_temp_free_i32(tcg_tmp32);
4771    }
4772}
4773
4774/* REV with sf==1, opcode==3 ("REV64") */
4775static void handle_rev64(DisasContext *s, unsigned int sf,
4776                         unsigned int rn, unsigned int rd)
4777{
4778    if (!sf) {
4779        unallocated_encoding(s);
4780        return;
4781    }
4782    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4783}
4784
4785/* REV with sf==0, opcode==2
4786 * REV32 (sf==1, opcode==2)
4787 */
4788static void handle_rev32(DisasContext *s, unsigned int sf,
4789                         unsigned int rn, unsigned int rd)
4790{
4791    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4792
4793    if (sf) {
4794        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4795        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4796
4797        /* bswap32_i64 requires zero high word */
4798        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4799        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4800        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4801        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4802        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4803
4804        tcg_temp_free_i64(tcg_tmp);
4805    } else {
4806        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4807        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4808    }
4809}
4810
4811/* REV16 (opcode==1) */
4812static void handle_rev16(DisasContext *s, unsigned int sf,
4813                         unsigned int rn, unsigned int rd)
4814{
4815    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4816    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4817    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4818    TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4819
4820    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4821    tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4822    tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4823    tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4824    tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4825
4826    tcg_temp_free_i64(mask);
4827    tcg_temp_free_i64(tcg_tmp);
4828}
4829
4830/* Data-processing (1 source)
4831 *   31  30  29  28             21 20     16 15    10 9    5 4    0
4832 * +----+---+---+-----------------+---------+--------+------+------+
4833 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4834 * +----+---+---+-----------------+---------+--------+------+------+
4835 */
4836static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4837{
4838    unsigned int sf, opcode, opcode2, rn, rd;
4839    TCGv_i64 tcg_rd;
4840
4841    if (extract32(insn, 29, 1)) {
4842        unallocated_encoding(s);
4843        return;
4844    }
4845
4846    sf = extract32(insn, 31, 1);
4847    opcode = extract32(insn, 10, 6);
4848    opcode2 = extract32(insn, 16, 5);
4849    rn = extract32(insn, 5, 5);
4850    rd = extract32(insn, 0, 5);
4851
4852#define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
4853
4854    switch (MAP(sf, opcode2, opcode)) {
4855    case MAP(0, 0x00, 0x00): /* RBIT */
4856    case MAP(1, 0x00, 0x00):
4857        handle_rbit(s, sf, rn, rd);
4858        break;
4859    case MAP(0, 0x00, 0x01): /* REV16 */
4860    case MAP(1, 0x00, 0x01):
4861        handle_rev16(s, sf, rn, rd);
4862        break;
4863    case MAP(0, 0x00, 0x02): /* REV/REV32 */
4864    case MAP(1, 0x00, 0x02):
4865        handle_rev32(s, sf, rn, rd);
4866        break;
4867    case MAP(1, 0x00, 0x03): /* REV64 */
4868        handle_rev64(s, sf, rn, rd);
4869        break;
4870    case MAP(0, 0x00, 0x04): /* CLZ */
4871    case MAP(1, 0x00, 0x04):
4872        handle_clz(s, sf, rn, rd);
4873        break;
4874    case MAP(0, 0x00, 0x05): /* CLS */
4875    case MAP(1, 0x00, 0x05):
4876        handle_cls(s, sf, rn, rd);
4877        break;
4878    case MAP(1, 0x01, 0x00): /* PACIA */
4879        if (s->pauth_active) {
4880            tcg_rd = cpu_reg(s, rd);
4881            gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4882        } else if (!dc_isar_feature(aa64_pauth, s)) {
4883            goto do_unallocated;
4884        }
4885        break;
4886    case MAP(1, 0x01, 0x01): /* PACIB */
4887        if (s->pauth_active) {
4888            tcg_rd = cpu_reg(s, rd);
4889            gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4890        } else if (!dc_isar_feature(aa64_pauth, s)) {
4891            goto do_unallocated;
4892        }
4893        break;
4894    case MAP(1, 0x01, 0x02): /* PACDA */
4895        if (s->pauth_active) {
4896            tcg_rd = cpu_reg(s, rd);
4897            gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4898        } else if (!dc_isar_feature(aa64_pauth, s)) {
4899            goto do_unallocated;
4900        }
4901        break;
4902    case MAP(1, 0x01, 0x03): /* PACDB */
4903        if (s->pauth_active) {
4904            tcg_rd = cpu_reg(s, rd);
4905            gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4906        } else if (!dc_isar_feature(aa64_pauth, s)) {
4907            goto do_unallocated;
4908        }
4909        break;
4910    case MAP(1, 0x01, 0x04): /* AUTIA */
4911        if (s->pauth_active) {
4912            tcg_rd = cpu_reg(s, rd);
4913            gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4914        } else if (!dc_isar_feature(aa64_pauth, s)) {
4915            goto do_unallocated;
4916        }
4917        break;
4918    case MAP(1, 0x01, 0x05): /* AUTIB */
4919        if (s->pauth_active) {
4920            tcg_rd = cpu_reg(s, rd);
4921            gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4922        } else if (!dc_isar_feature(aa64_pauth, s)) {
4923            goto do_unallocated;
4924        }
4925        break;
4926    case MAP(1, 0x01, 0x06): /* AUTDA */
4927        if (s->pauth_active) {
4928            tcg_rd = cpu_reg(s, rd);
4929            gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4930        } else if (!dc_isar_feature(aa64_pauth, s)) {
4931            goto do_unallocated;
4932        }
4933        break;
4934    case MAP(1, 0x01, 0x07): /* AUTDB */
4935        if (s->pauth_active) {
4936            tcg_rd = cpu_reg(s, rd);
4937            gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4938        } else if (!dc_isar_feature(aa64_pauth, s)) {
4939            goto do_unallocated;
4940        }
4941        break;
4942    case MAP(1, 0x01, 0x08): /* PACIZA */
4943        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4944            goto do_unallocated;
4945        } else if (s->pauth_active) {
4946            tcg_rd = cpu_reg(s, rd);
4947            gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4948        }
4949        break;
4950    case MAP(1, 0x01, 0x09): /* PACIZB */
4951        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4952            goto do_unallocated;
4953        } else if (s->pauth_active) {
4954            tcg_rd = cpu_reg(s, rd);
4955            gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4956        }
4957        break;
4958    case MAP(1, 0x01, 0x0a): /* PACDZA */
4959        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4960            goto do_unallocated;
4961        } else if (s->pauth_active) {
4962            tcg_rd = cpu_reg(s, rd);
4963            gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4964        }
4965        break;
4966    case MAP(1, 0x01, 0x0b): /* PACDZB */
4967        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4968            goto do_unallocated;
4969        } else if (s->pauth_active) {
4970            tcg_rd = cpu_reg(s, rd);
4971            gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4972        }
4973        break;
4974    case MAP(1, 0x01, 0x0c): /* AUTIZA */
4975        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4976            goto do_unallocated;
4977        } else if (s->pauth_active) {
4978            tcg_rd = cpu_reg(s, rd);
4979            gen_helper_autia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4980        }
4981        break;
4982    case MAP(1, 0x01, 0x0d): /* AUTIZB */
4983        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4984            goto do_unallocated;
4985        } else if (s->pauth_active) {
4986            tcg_rd = cpu_reg(s, rd);
4987            gen_helper_autib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4988        }
4989        break;
4990    case MAP(1, 0x01, 0x0e): /* AUTDZA */
4991        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4992            goto do_unallocated;
4993        } else if (s->pauth_active) {
4994            tcg_rd = cpu_reg(s, rd);
4995            gen_helper_autda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4996        }
4997        break;
4998    case MAP(1, 0x01, 0x0f): /* AUTDZB */
4999        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5000            goto do_unallocated;
5001        } else if (s->pauth_active) {
5002            tcg_rd = cpu_reg(s, rd);
5003            gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5004        }
5005        break;
5006    case MAP(1, 0x01, 0x10): /* XPACI */
5007        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5008            goto do_unallocated;
5009        } else if (s->pauth_active) {
5010            tcg_rd = cpu_reg(s, rd);
5011            gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5012        }
5013        break;
5014    case MAP(1, 0x01, 0x11): /* XPACD */
5015        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5016            goto do_unallocated;
5017        } else if (s->pauth_active) {
5018            tcg_rd = cpu_reg(s, rd);
5019            gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5020        }
5021        break;
5022    default:
5023    do_unallocated:
5024        unallocated_encoding(s);
5025        break;
5026    }
5027
5028#undef MAP
5029}
5030
5031static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5032                       unsigned int rm, unsigned int rn, unsigned int rd)
5033{
5034    TCGv_i64 tcg_n, tcg_m, tcg_rd;
5035    tcg_rd = cpu_reg(s, rd);
5036
5037    if (!sf && is_signed) {
5038        tcg_n = new_tmp_a64(s);
5039        tcg_m = new_tmp_a64(s);
5040        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5041        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5042    } else {
5043        tcg_n = read_cpu_reg(s, rn, sf);
5044        tcg_m = read_cpu_reg(s, rm, sf);
5045    }
5046
5047    if (is_signed) {
5048        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5049    } else {
5050        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5051    }
5052
5053    if (!sf) { /* zero extend final result */
5054        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5055    }
5056}
5057
5058/* LSLV, LSRV, ASRV, RORV */
5059static void handle_shift_reg(DisasContext *s,
5060                             enum a64_shift_type shift_type, unsigned int sf,
5061                             unsigned int rm, unsigned int rn, unsigned int rd)
5062{
5063    TCGv_i64 tcg_shift = tcg_temp_new_i64();
5064    TCGv_i64 tcg_rd = cpu_reg(s, rd);
5065    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5066
5067    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5068    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5069    tcg_temp_free_i64(tcg_shift);
5070}
5071
5072/* CRC32[BHWX], CRC32C[BHWX] */
5073static void handle_crc32(DisasContext *s,
5074                         unsigned int sf, unsigned int sz, bool crc32c,
5075                         unsigned int rm, unsigned int rn, unsigned int rd)
5076{
5077    TCGv_i64 tcg_acc, tcg_val;
5078    TCGv_i32 tcg_bytes;
5079
5080    if (!dc_isar_feature(aa64_crc32, s)
5081        || (sf == 1 && sz != 3)
5082        || (sf == 0 && sz == 3)) {
5083        unallocated_encoding(s);
5084        return;
5085    }
5086
5087    if (sz == 3) {
5088        tcg_val = cpu_reg(s, rm);
5089    } else {
5090        uint64_t mask;
5091        switch (sz) {
5092        case 0:
5093            mask = 0xFF;
5094            break;
5095        case 1:
5096            mask = 0xFFFF;
5097            break;
5098        case 2:
5099            mask = 0xFFFFFFFF;
5100            break;
5101        default:
5102            g_assert_not_reached();
5103        }
5104        tcg_val = new_tmp_a64(s);
5105        tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5106    }
5107
5108    tcg_acc = cpu_reg(s, rn);
5109    tcg_bytes = tcg_const_i32(1 << sz);
5110
5111    if (crc32c) {
5112        gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5113    } else {
5114        gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5115    }
5116
5117    tcg_temp_free_i32(tcg_bytes);
5118}
5119
5120/* Data-processing (2 source)
5121 *   31   30  29 28             21 20  16 15    10 9    5 4    0
5122 * +----+---+---+-----------------+------+--------+------+------+
5123 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5124 * +----+---+---+-----------------+------+--------+------+------+
5125 */
5126static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5127{
5128    unsigned int sf, rm, opcode, rn, rd;
5129    sf = extract32(insn, 31, 1);
5130    rm = extract32(insn, 16, 5);
5131    opcode = extract32(insn, 10, 6);
5132    rn = extract32(insn, 5, 5);
5133    rd = extract32(insn, 0, 5);
5134
5135    if (extract32(insn, 29, 1)) {
5136        unallocated_encoding(s);
5137        return;
5138    }
5139
5140    switch (opcode) {
5141    case 2: /* UDIV */
5142        handle_div(s, false, sf, rm, rn, rd);
5143        break;
5144    case 3: /* SDIV */
5145        handle_div(s, true, sf, rm, rn, rd);
5146        break;
5147    case 8: /* LSLV */
5148        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5149        break;
5150    case 9: /* LSRV */
5151        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5152        break;
5153    case 10: /* ASRV */
5154        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5155        break;
5156    case 11: /* RORV */
5157        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5158        break;
5159    case 12: /* PACGA */
5160        if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5161            goto do_unallocated;
5162        }
5163        gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5164                         cpu_reg(s, rn), cpu_reg_sp(s, rm));
5165        break;
5166    case 16:
5167    case 17:
5168    case 18:
5169    case 19:
5170    case 20:
5171    case 21:
5172    case 22:
5173    case 23: /* CRC32 */
5174    {
5175        int sz = extract32(opcode, 0, 2);
5176        bool crc32c = extract32(opcode, 2, 1);
5177        handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5178        break;
5179    }
5180    default:
5181    do_unallocated:
5182        unallocated_encoding(s);
5183        break;
5184    }
5185}
5186
5187/*
5188 * Data processing - register
5189 *  31  30 29  28      25    21  20  16      10         0
5190 * +--+---+--+---+-------+-----+-------+-------+---------+
5191 * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5192 * +--+---+--+---+-------+-----+-------+-------+---------+
5193 */
5194static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5195{
5196    int op0 = extract32(insn, 30, 1);
5197    int op1 = extract32(insn, 28, 1);
5198    int op2 = extract32(insn, 21, 4);
5199    int op3 = extract32(insn, 10, 6);
5200
5201    if (!op1) {
5202        if (op2 & 8) {
5203            if (op2 & 1) {
5204                /* Add/sub (extended register) */
5205                disas_add_sub_ext_reg(s, insn);
5206            } else {
5207                /* Add/sub (shifted register) */
5208                disas_add_sub_reg(s, insn);
5209            }
5210        } else {
5211            /* Logical (shifted register) */
5212            disas_logic_reg(s, insn);
5213        }
5214        return;
5215    }
5216
5217    switch (op2) {
5218    case 0x0:
5219        switch (op3) {
5220        case 0x00: /* Add/subtract (with carry) */
5221            disas_adc_sbc(s, insn);
5222            break;
5223
5224        case 0x01: /* Rotate right into flags */
5225        case 0x21:
5226            disas_rotate_right_into_flags(s, insn);
5227            break;
5228
5229        case 0x02: /* Evaluate into flags */
5230        case 0x12:
5231        case 0x22:
5232        case 0x32:
5233            disas_evaluate_into_flags(s, insn);
5234            break;
5235
5236        default:
5237            goto do_unallocated;
5238        }
5239        break;
5240
5241    case 0x2: /* Conditional compare */
5242        disas_cc(s, insn); /* both imm and reg forms */
5243        break;
5244
5245    case 0x4: /* Conditional select */
5246        disas_cond_select(s, insn);
5247        break;
5248
5249    case 0x6: /* Data-processing */
5250        if (op0) {    /* (1 source) */
5251            disas_data_proc_1src(s, insn);
5252        } else {      /* (2 source) */
5253            disas_data_proc_2src(s, insn);
5254        }
5255        break;
5256    case 0x8 ... 0xf: /* (3 source) */
5257        disas_data_proc_3src(s, insn);
5258        break;
5259
5260    default:
5261    do_unallocated:
5262        unallocated_encoding(s);
5263        break;
5264    }
5265}
5266
5267static void handle_fp_compare(DisasContext *s, int size,
5268                              unsigned int rn, unsigned int rm,
5269                              bool cmp_with_zero, bool signal_all_nans)
5270{
5271    TCGv_i64 tcg_flags = tcg_temp_new_i64();
5272    TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
5273
5274    if (size == MO_64) {
5275        TCGv_i64 tcg_vn, tcg_vm;
5276
5277        tcg_vn = read_fp_dreg(s, rn);
5278        if (cmp_with_zero) {
5279            tcg_vm = tcg_const_i64(0);
5280        } else {
5281            tcg_vm = read_fp_dreg(s, rm);
5282        }
5283        if (signal_all_nans) {
5284            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5285        } else {
5286            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5287        }
5288        tcg_temp_free_i64(tcg_vn);
5289        tcg_temp_free_i64(tcg_vm);
5290    } else {
5291        TCGv_i32 tcg_vn = tcg_temp_new_i32();
5292        TCGv_i32 tcg_vm = tcg_temp_new_i32();
5293
5294        read_vec_element_i32(s, tcg_vn, rn, 0, size);
5295        if (cmp_with_zero) {
5296            tcg_gen_movi_i32(tcg_vm, 0);
5297        } else {
5298            read_vec_element_i32(s, tcg_vm, rm, 0, size);
5299        }
5300
5301        switch (size) {
5302        case MO_32:
5303            if (signal_all_nans) {
5304                gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5305            } else {
5306                gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5307            }
5308            break;
5309        case MO_16:
5310            if (signal_all_nans) {
5311                gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5312            } else {
5313                gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5314            }
5315            break;
5316        default:
5317            g_assert_not_reached();
5318        }
5319
5320        tcg_temp_free_i32(tcg_vn);
5321        tcg_temp_free_i32(tcg_vm);
5322    }
5323
5324    tcg_temp_free_ptr(fpst);
5325
5326    gen_set_nzcv(tcg_flags);
5327
5328    tcg_temp_free_i64(tcg_flags);
5329}
5330
5331/* Floating point compare
5332 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5333 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5334 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5335 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5336 */
5337static void disas_fp_compare(DisasContext *s, uint32_t insn)
5338{
5339    unsigned int mos, type, rm, op, rn, opc, op2r;
5340    int size;
5341
5342    mos = extract32(insn, 29, 3);
5343    type = extract32(insn, 22, 2);
5344    rm = extract32(insn, 16, 5);
5345    op = extract32(insn, 14, 2);
5346    rn = extract32(insn, 5, 5);
5347    opc = extract32(insn, 3, 2);
5348    op2r = extract32(insn, 0, 3);
5349
5350    if (mos || op || op2r) {
5351        unallocated_encoding(s);
5352        return;
5353    }
5354
5355    switch (type) {
5356    case 0:
5357        size = MO_32;
5358        break;
5359    case 1:
5360        size = MO_64;
5361        break;
5362    case 3:
5363        size = MO_16;
5364        if (dc_isar_feature(aa64_fp16, s)) {
5365            break;
5366        }
5367        /* fallthru */
5368    default:
5369        unallocated_encoding(s);
5370        return;
5371    }
5372
5373    if (!fp_access_check(s)) {
5374        return;
5375    }
5376
5377    handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5378}
5379
5380/* Floating point conditional compare
5381 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
5382 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5383 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
5384 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5385 */
5386static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5387{
5388    unsigned int mos, type, rm, cond, rn, op, nzcv;
5389    TCGv_i64 tcg_flags;
5390    TCGLabel *label_continue = NULL;
5391    int size;
5392
5393    mos = extract32(insn, 29, 3);
5394    type = extract32(insn, 22, 2);
5395    rm = extract32(insn, 16, 5);
5396    cond = extract32(insn, 12, 4);
5397    rn = extract32(insn, 5, 5);
5398    op = extract32(insn, 4, 1);
5399    nzcv = extract32(insn, 0, 4);
5400
5401    if (mos) {
5402        unallocated_encoding(s);
5403        return;
5404    }
5405
5406    switch (type) {
5407    case 0:
5408        size = MO_32;
5409        break;
5410    case 1:
5411        size = MO_64;
5412        break;
5413    case 3:
5414        size = MO_16;
5415        if (dc_isar_feature(aa64_fp16, s)) {
5416            break;
5417        }
5418        /* fallthru */
5419    default:
5420        unallocated_encoding(s);
5421        return;
5422    }
5423
5424    if (!fp_access_check(s)) {
5425        return;
5426    }
5427
5428    if (cond < 0x0e) { /* not always */
5429        TCGLabel *label_match = gen_new_label();
5430        label_continue = gen_new_label();
5431        arm_gen_test_cc(cond, label_match);
5432        /* nomatch: */
5433        tcg_flags = tcg_const_i64(nzcv << 28);
5434        gen_set_nzcv(tcg_flags);
5435        tcg_temp_free_i64(tcg_flags);
5436        tcg_gen_br(label_continue);
5437        gen_set_label(label_match);
5438    }
5439
5440    handle_fp_compare(s, size, rn, rm, false, op);
5441
5442    if (cond < 0x0e) {
5443        gen_set_label(label_continue);
5444    }
5445}
5446
5447/* Floating point conditional select
5448 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
5449 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5450 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
5451 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5452 */
5453static void disas_fp_csel(DisasContext *s, uint32_t insn)
5454{
5455    unsigned int mos, type, rm, cond, rn, rd;
5456    TCGv_i64 t_true, t_false, t_zero;
5457    DisasCompare64 c;
5458    TCGMemOp sz;
5459
5460    mos = extract32(insn, 29, 3);
5461    type = extract32(insn, 22, 2);
5462    rm = extract32(insn, 16, 5);
5463    cond = extract32(insn, 12, 4);
5464    rn = extract32(insn, 5, 5);
5465    rd = extract32(insn, 0, 5);
5466
5467    if (mos) {
5468        unallocated_encoding(s);
5469        return;
5470    }
5471
5472    switch (type) {
5473    case 0:
5474        sz = MO_32;
5475        break;
5476    case 1:
5477        sz = MO_64;
5478        break;
5479    case 3:
5480        sz = MO_16;
5481        if (dc_isar_feature(aa64_fp16, s)) {
5482            break;
5483        }
5484        /* fallthru */
5485    default:
5486        unallocated_encoding(s);
5487        return;
5488    }
5489
5490    if (!fp_access_check(s)) {
5491        return;
5492    }
5493
5494    /* Zero extend sreg & hreg inputs to 64 bits now.  */
5495    t_true = tcg_temp_new_i64();
5496    t_false = tcg_temp_new_i64();
5497    read_vec_element(s, t_true, rn, 0, sz);
5498    read_vec_element(s, t_false, rm, 0, sz);
5499
5500    a64_test_cc(&c, cond);
5501    t_zero = tcg_const_i64(0);
5502    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
5503    tcg_temp_free_i64(t_zero);
5504    tcg_temp_free_i64(t_false);
5505    a64_free_cc(&c);
5506
5507    /* Note that sregs & hregs write back zeros to the high bits,
5508       and we've already done the zero-extension.  */
5509    write_fp_dreg(s, rd, t_true);
5510    tcg_temp_free_i64(t_true);
5511}
5512
5513/* Floating-point data-processing (1 source) - half precision */
5514static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
5515{
5516    TCGv_ptr fpst = NULL;
5517    TCGv_i32 tcg_op = read_fp_hreg(s, rn);
5518    TCGv_i32 tcg_res = tcg_temp_new_i32();
5519
5520    switch (opcode) {
5521    case 0x0: /* FMOV */
5522        tcg_gen_mov_i32(tcg_res, tcg_op);
5523        break;
5524    case 0x1: /* FABS */
5525        tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
5526        break;
5527    case 0x2: /* FNEG */
5528        tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
5529        break;
5530    case 0x3: /* FSQRT */
5531        fpst = get_fpstatus_ptr(true);
5532        gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
5533        break;
5534    case 0x8: /* FRINTN */
5535    case 0x9: /* FRINTP */
5536    case 0xa: /* FRINTM */
5537    case 0xb: /* FRINTZ */
5538    case 0xc: /* FRINTA */
5539    {
5540        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
5541        fpst = get_fpstatus_ptr(true);
5542
5543        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5544        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5545
5546        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5547        tcg_temp_free_i32(tcg_rmode);
5548        break;
5549    }
5550    case 0xe: /* FRINTX */
5551        fpst = get_fpstatus_ptr(true);
5552        gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
5553        break;
5554    case 0xf: /* FRINTI */
5555        fpst = get_fpstatus_ptr(true);
5556        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5557        break;
5558    default:
5559        abort();
5560    }
5561
5562    write_fp_sreg(s, rd, tcg_res);
5563
5564    if (fpst) {
5565        tcg_temp_free_ptr(fpst);
5566    }
5567    tcg_temp_free_i32(tcg_op);
5568    tcg_temp_free_i32(tcg_res);
5569}
5570
5571/* Floating-point data-processing (1 source) - single precision */
5572static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
5573{
5574    void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr);
5575    TCGv_i32 tcg_op, tcg_res;
5576    TCGv_ptr fpst;
5577    int rmode = -1;
5578
5579    tcg_op = read_fp_sreg(s, rn);
5580    tcg_res = tcg_temp_new_i32();
5581
5582    switch (opcode) {
5583    case 0x0: /* FMOV */
5584        tcg_gen_mov_i32(tcg_res, tcg_op);
5585        goto done;
5586    case 0x1: /* FABS */
5587        gen_helper_vfp_abss(tcg_res, tcg_op);
5588        goto done;
5589    case 0x2: /* FNEG */
5590        gen_helper_vfp_negs(tcg_res, tcg_op);
5591        goto done;
5592    case 0x3: /* FSQRT */
5593        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
5594        goto done;
5595    case 0x8: /* FRINTN */
5596    case 0x9: /* FRINTP */
5597    case 0xa: /* FRINTM */
5598    case 0xb: /* FRINTZ */
5599    case 0xc: /* FRINTA */
5600        rmode = arm_rmode_to_sf(opcode & 7);
5601        gen_fpst = gen_helper_rints;
5602        break;
5603    case 0xe: /* FRINTX */
5604        gen_fpst = gen_helper_rints_exact;
5605        break;
5606    case 0xf: /* FRINTI */
5607        gen_fpst = gen_helper_rints;
5608        break;
5609    case 0x10: /* FRINT32Z */
5610        rmode = float_round_to_zero;
5611        gen_fpst = gen_helper_frint32_s;
5612        break;
5613    case 0x11: /* FRINT32X */
5614        gen_fpst = gen_helper_frint32_s;
5615        break;
5616    case 0x12: /* FRINT64Z */
5617        rmode = float_round_to_zero;
5618        gen_fpst = gen_helper_frint64_s;
5619        break;
5620    case 0x13: /* FRINT64X */
5621        gen_fpst = gen_helper_frint64_s;
5622        break;
5623    default:
5624        g_assert_not_reached();
5625    }
5626
5627    fpst = get_fpstatus_ptr(false);
5628    if (rmode >= 0) {
5629        TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
5630        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5631        gen_fpst(tcg_res, tcg_op, fpst);
5632        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5633        tcg_temp_free_i32(tcg_rmode);
5634    } else {
5635        gen_fpst(tcg_res, tcg_op, fpst);
5636    }
5637    tcg_temp_free_ptr(fpst);
5638
5639 done:
5640    write_fp_sreg(s, rd, tcg_res);
5641    tcg_temp_free_i32(tcg_op);
5642    tcg_temp_free_i32(tcg_res);
5643}
5644
5645/* Floating-point data-processing (1 source) - double precision */
5646static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
5647{
5648    void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr);
5649    TCGv_i64 tcg_op, tcg_res;
5650    TCGv_ptr fpst;
5651    int rmode = -1;
5652
5653    switch (opcode) {
5654    case 0x0: /* FMOV */
5655        gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
5656        return;
5657    }
5658
5659    tcg_op = read_fp_dreg(s, rn);
5660    tcg_res = tcg_temp_new_i64();
5661
5662    switch (opcode) {
5663    case 0x1: /* FABS */
5664        gen_helper_vfp_absd(tcg_res, tcg_op);
5665        goto done;
5666    case 0x2: /* FNEG */
5667        gen_helper_vfp_negd(tcg_res, tcg_op);
5668        goto done;
5669    case 0x3: /* FSQRT */
5670        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
5671        goto done;
5672    case 0x8: /* FRINTN */
5673    case 0x9: /* FRINTP */
5674    case 0xa: /* FRINTM */
5675    case 0xb: /* FRINTZ */
5676    case 0xc: /* FRINTA */
5677        rmode = arm_rmode_to_sf(opcode & 7);
5678        gen_fpst = gen_helper_rintd;
5679        break;
5680    case 0xe: /* FRINTX */
5681        gen_fpst = gen_helper_rintd_exact;
5682        break;
5683    case 0xf: /* FRINTI */
5684        gen_fpst = gen_helper_rintd;
5685        break;
5686    case 0x10: /* FRINT32Z */
5687        rmode = float_round_to_zero;
5688        gen_fpst = gen_helper_frint32_d;
5689        break;
5690    case 0x11: /* FRINT32X */
5691        gen_fpst = gen_helper_frint32_d;
5692        break;
5693    case 0x12: /* FRINT64Z */
5694        rmode = float_round_to_zero;
5695        gen_fpst = gen_helper_frint64_d;
5696        break;
5697    case 0x13: /* FRINT64X */
5698        gen_fpst = gen_helper_frint64_d;
5699        break;
5700    default:
5701        g_assert_not_reached();
5702    }
5703
5704    fpst = get_fpstatus_ptr(false);
5705    if (rmode >= 0) {
5706        TCGv_i32 tcg_rmode = tcg_const_i32(rmode);
5707        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5708        gen_fpst(tcg_res, tcg_op, fpst);
5709        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5710        tcg_temp_free_i32(tcg_rmode);
5711    } else {
5712        gen_fpst(tcg_res, tcg_op, fpst);
5713    }
5714    tcg_temp_free_ptr(fpst);
5715
5716 done:
5717    write_fp_dreg(s, rd, tcg_res);
5718    tcg_temp_free_i64(tcg_op);
5719    tcg_temp_free_i64(tcg_res);
5720}
5721
5722static void handle_fp_fcvt(DisasContext *s, int opcode,
5723                           int rd, int rn, int dtype, int ntype)
5724{
5725    switch (ntype) {
5726    case 0x0:
5727    {
5728        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5729        if (dtype == 1) {
5730            /* Single to double */
5731            TCGv_i64 tcg_rd = tcg_temp_new_i64();
5732            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
5733            write_fp_dreg(s, rd, tcg_rd);
5734            tcg_temp_free_i64(tcg_rd);
5735        } else {
5736            /* Single to half */
5737            TCGv_i32 tcg_rd = tcg_temp_new_i32();
5738            TCGv_i32 ahp = get_ahp_flag();
5739            TCGv_ptr fpst = get_fpstatus_ptr(false);
5740
5741            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5742            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5743            write_fp_sreg(s, rd, tcg_rd);
5744            tcg_temp_free_i32(tcg_rd);
5745            tcg_temp_free_i32(ahp);
5746            tcg_temp_free_ptr(fpst);
5747        }
5748        tcg_temp_free_i32(tcg_rn);
5749        break;
5750    }
5751    case 0x1:
5752    {
5753        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
5754        TCGv_i32 tcg_rd = tcg_temp_new_i32();
5755        if (dtype == 0) {
5756            /* Double to single */
5757            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
5758        } else {
5759            TCGv_ptr fpst = get_fpstatus_ptr(false);
5760            TCGv_i32 ahp = get_ahp_flag();
5761            /* Double to half */
5762            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5763            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5764            tcg_temp_free_ptr(fpst);
5765            tcg_temp_free_i32(ahp);
5766        }
5767        write_fp_sreg(s, rd, tcg_rd);
5768        tcg_temp_free_i32(tcg_rd);
5769        tcg_temp_free_i64(tcg_rn);
5770        break;
5771    }
5772    case 0x3:
5773    {
5774        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5775        TCGv_ptr tcg_fpst = get_fpstatus_ptr(false);
5776        TCGv_i32 tcg_ahp = get_ahp_flag();
5777        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
5778        if (dtype == 0) {
5779            /* Half to single */
5780            TCGv_i32 tcg_rd = tcg_temp_new_i32();
5781            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5782            write_fp_sreg(s, rd, tcg_rd);
5783            tcg_temp_free_ptr(tcg_fpst);
5784            tcg_temp_free_i32(tcg_ahp);
5785            tcg_temp_free_i32(tcg_rd);
5786        } else {
5787            /* Half to double */
5788            TCGv_i64 tcg_rd = tcg_temp_new_i64();
5789            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5790            write_fp_dreg(s, rd, tcg_rd);
5791            tcg_temp_free_i64(tcg_rd);
5792        }
5793        tcg_temp_free_i32(tcg_rn);
5794        break;
5795    }
5796    default:
5797        abort();
5798    }
5799}
5800
5801/* Floating point data-processing (1 source)
5802 *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
5803 * +---+---+---+-----------+------+---+--------+-----------+------+------+
5804 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
5805 * +---+---+---+-----------+------+---+--------+-----------+------+------+
5806 */
5807static void disas_fp_1src(DisasContext *s, uint32_t insn)
5808{
5809    int mos = extract32(insn, 29, 3);
5810    int type = extract32(insn, 22, 2);
5811    int opcode = extract32(insn, 15, 6);
5812    int rn = extract32(insn, 5, 5);
5813    int rd = extract32(insn, 0, 5);
5814
5815    if (mos) {
5816        unallocated_encoding(s);
5817        return;
5818    }
5819
5820    switch (opcode) {
5821    case 0x4: case 0x5: case 0x7:
5822    {
5823        /* FCVT between half, single and double precision */
5824        int dtype = extract32(opcode, 0, 2);
5825        if (type == 2 || dtype == type) {
5826            unallocated_encoding(s);
5827            return;
5828        }
5829        if (!fp_access_check(s)) {
5830            return;
5831        }
5832
5833        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
5834        break;
5835    }
5836
5837    case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */
5838        if (type > 1 || !dc_isar_feature(aa64_frint, s)) {
5839            unallocated_encoding(s);
5840            return;
5841        }
5842        /* fall through */
5843    case 0x0 ... 0x3:
5844    case 0x8 ... 0xc:
5845    case 0xe ... 0xf:
5846        /* 32-to-32 and 64-to-64 ops */
5847        switch (type) {
5848        case 0:
5849            if (!fp_access_check(s)) {
5850                return;
5851            }
5852            handle_fp_1src_single(s, opcode, rd, rn);
5853            break;
5854        case 1:
5855            if (!fp_access_check(s)) {
5856                return;
5857            }
5858            handle_fp_1src_double(s, opcode, rd, rn);
5859            break;
5860        case 3:
5861            if (!dc_isar_feature(aa64_fp16, s)) {
5862                unallocated_encoding(s);
5863                return;
5864            }
5865
5866            if (!fp_access_check(s)) {
5867                return;
5868            }
5869            handle_fp_1src_half(s, opcode, rd, rn);
5870            break;
5871        default:
5872            unallocated_encoding(s);
5873        }
5874        break;
5875
5876    default:
5877        unallocated_encoding(s);
5878        break;
5879    }
5880}
5881
5882/* Floating-point data-processing (2 source) - single precision */
5883static void handle_fp_2src_single(DisasContext *s, int opcode,
5884                                  int rd, int rn, int rm)
5885{
5886    TCGv_i32 tcg_op1;
5887    TCGv_i32 tcg_op2;
5888    TCGv_i32 tcg_res;
5889    TCGv_ptr fpst;
5890
5891    tcg_res = tcg_temp_new_i32();
5892    fpst = get_fpstatus_ptr(false);
5893    tcg_op1 = read_fp_sreg(s, rn);
5894    tcg_op2 = read_fp_sreg(s, rm);
5895
5896    switch (opcode) {
5897    case 0x0: /* FMUL */
5898        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5899        break;
5900    case 0x1: /* FDIV */
5901        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
5902        break;
5903    case 0x2: /* FADD */
5904        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
5905        break;
5906    case 0x3: /* FSUB */
5907        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
5908        break;
5909    case 0x4: /* FMAX */
5910        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
5911        break;
5912    case 0x5: /* FMIN */
5913        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
5914        break;
5915    case 0x6: /* FMAXNM */
5916        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
5917        break;
5918    case 0x7: /* FMINNM */
5919        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
5920        break;
5921    case 0x8: /* FNMUL */
5922        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5923        gen_helper_vfp_negs(tcg_res, tcg_res);
5924        break;
5925    }
5926
5927    write_fp_sreg(s, rd, tcg_res);
5928
5929    tcg_temp_free_ptr(fpst);
5930    tcg_temp_free_i32(tcg_op1);
5931    tcg_temp_free_i32(tcg_op2);
5932    tcg_temp_free_i32(tcg_res);
5933}
5934
5935/* Floating-point data-processing (2 source) - double precision */
5936static void handle_fp_2src_double(DisasContext *s, int opcode,
5937                                  int rd, int rn, int rm)
5938{
5939    TCGv_i64 tcg_op1;
5940    TCGv_i64 tcg_op2;
5941    TCGv_i64 tcg_res;
5942    TCGv_ptr fpst;
5943
5944    tcg_res = tcg_temp_new_i64();
5945    fpst = get_fpstatus_ptr(false);
5946    tcg_op1 = read_fp_dreg(s, rn);
5947    tcg_op2 = read_fp_dreg(s, rm);
5948
5949    switch (opcode) {
5950    case 0x0: /* FMUL */
5951        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5952        break;
5953    case 0x1: /* FDIV */
5954        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
5955        break;
5956    case 0x2: /* FADD */
5957        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
5958        break;
5959    case 0x3: /* FSUB */
5960        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
5961        break;
5962    case 0x4: /* FMAX */
5963        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
5964        break;
5965    case 0x5: /* FMIN */
5966        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
5967        break;
5968    case 0x6: /* FMAXNM */
5969        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5970        break;
5971    case 0x7: /* FMINNM */
5972        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5973        break;
5974    case 0x8: /* FNMUL */
5975        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5976        gen_helper_vfp_negd(tcg_res, tcg_res);
5977        break;
5978    }
5979
5980    write_fp_dreg(s, rd, tcg_res);
5981
5982    tcg_temp_free_ptr(fpst);
5983    tcg_temp_free_i64(tcg_op1);
5984    tcg_temp_free_i64(tcg_op2);
5985    tcg_temp_free_i64(tcg_res);
5986}
5987
5988/* Floating-point data-processing (2 source) - half precision */
5989static void handle_fp_2src_half(DisasContext *s, int opcode,
5990                                int rd, int rn, int rm)
5991{
5992    TCGv_i32 tcg_op1;
5993    TCGv_i32 tcg_op2;
5994    TCGv_i32 tcg_res;
5995    TCGv_ptr fpst;
5996
5997    tcg_res = tcg_temp_new_i32();
5998    fpst = get_fpstatus_ptr(true);
5999    tcg_op1 = read_fp_hreg(s, rn);
6000    tcg_op2 = read_fp_hreg(s, rm);
6001
6002    switch (opcode) {
6003    case 0x0: /* FMUL */
6004        gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6005        break;
6006    case 0x1: /* FDIV */
6007        gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
6008        break;
6009    case 0x2: /* FADD */
6010        gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6011        break;
6012    case 0x3: /* FSUB */
6013        gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
6014        break;
6015    case 0x4: /* FMAX */
6016        gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6017        break;
6018    case 0x5: /* FMIN */
6019        gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6020        break;
6021    case 0x6: /* FMAXNM */
6022        gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6023        break;
6024    case 0x7: /* FMINNM */
6025        gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6026        break;
6027    case 0x8: /* FNMUL */
6028        gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
6029        tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
6030        break;
6031    default:
6032        g_assert_not_reached();
6033    }
6034
6035    write_fp_sreg(s, rd, tcg_res);
6036
6037    tcg_temp_free_ptr(fpst);
6038    tcg_temp_free_i32(tcg_op1);
6039    tcg_temp_free_i32(tcg_op2);
6040    tcg_temp_free_i32(tcg_res);
6041}
6042
6043/* Floating point data-processing (2 source)
6044 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6045 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6046 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
6047 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6048 */
6049static void disas_fp_2src(DisasContext *s, uint32_t insn)
6050{
6051    int mos = extract32(insn, 29, 3);
6052    int type = extract32(insn, 22, 2);
6053    int rd = extract32(insn, 0, 5);
6054    int rn = extract32(insn, 5, 5);
6055    int rm = extract32(insn, 16, 5);
6056    int opcode = extract32(insn, 12, 4);
6057
6058    if (opcode > 8 || mos) {
6059        unallocated_encoding(s);
6060        return;
6061    }
6062
6063    switch (type) {
6064    case 0:
6065        if (!fp_access_check(s)) {
6066            return;
6067        }
6068        handle_fp_2src_single(s, opcode, rd, rn, rm);
6069        break;
6070    case 1:
6071        if (!fp_access_check(s)) {
6072            return;
6073        }
6074        handle_fp_2src_double(s, opcode, rd, rn, rm);
6075        break;
6076    case 3:
6077        if (!dc_isar_feature(aa64_fp16, s)) {
6078            unallocated_encoding(s);
6079            return;
6080        }
6081        if (!fp_access_check(s)) {
6082            return;
6083        }
6084        handle_fp_2src_half(s, opcode, rd, rn, rm);
6085        break;
6086    default:
6087        unallocated_encoding(s);
6088    }
6089}
6090
6091/* Floating-point data-processing (3 source) - single precision */
6092static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
6093                                  int rd, int rn, int rm, int ra)
6094{
6095    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6096    TCGv_i32 tcg_res = tcg_temp_new_i32();
6097    TCGv_ptr fpst = get_fpstatus_ptr(false);
6098
6099    tcg_op1 = read_fp_sreg(s, rn);
6100    tcg_op2 = read_fp_sreg(s, rm);
6101    tcg_op3 = read_fp_sreg(s, ra);
6102
6103    /* These are fused multiply-add, and must be done as one
6104     * floating point operation with no rounding between the
6105     * multiplication and addition steps.
6106     * NB that doing the negations here as separate steps is
6107     * correct : an input NaN should come out with its sign bit
6108     * flipped if it is a negated-input.
6109     */
6110    if (o1 == true) {
6111        gen_helper_vfp_negs(tcg_op3, tcg_op3);
6112    }
6113
6114    if (o0 != o1) {
6115        gen_helper_vfp_negs(tcg_op1, tcg_op1);
6116    }
6117
6118    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6119
6120    write_fp_sreg(s, rd, tcg_res);
6121
6122    tcg_temp_free_ptr(fpst);
6123    tcg_temp_free_i32(tcg_op1);
6124    tcg_temp_free_i32(tcg_op2);
6125    tcg_temp_free_i32(tcg_op3);
6126    tcg_temp_free_i32(tcg_res);
6127}
6128
6129/* Floating-point data-processing (3 source) - double precision */
6130static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
6131                                  int rd, int rn, int rm, int ra)
6132{
6133    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
6134    TCGv_i64 tcg_res = tcg_temp_new_i64();
6135    TCGv_ptr fpst = get_fpstatus_ptr(false);
6136
6137    tcg_op1 = read_fp_dreg(s, rn);
6138    tcg_op2 = read_fp_dreg(s, rm);
6139    tcg_op3 = read_fp_dreg(s, ra);
6140
6141    /* These are fused multiply-add, and must be done as one
6142     * floating point operation with no rounding between the
6143     * multiplication and addition steps.
6144     * NB that doing the negations here as separate steps is
6145     * correct : an input NaN should come out with its sign bit
6146     * flipped if it is a negated-input.
6147     */
6148    if (o1 == true) {
6149        gen_helper_vfp_negd(tcg_op3, tcg_op3);
6150    }
6151
6152    if (o0 != o1) {
6153        gen_helper_vfp_negd(tcg_op1, tcg_op1);
6154    }
6155
6156    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6157
6158    write_fp_dreg(s, rd, tcg_res);
6159
6160    tcg_temp_free_ptr(fpst);
6161    tcg_temp_free_i64(tcg_op1);
6162    tcg_temp_free_i64(tcg_op2);
6163    tcg_temp_free_i64(tcg_op3);
6164    tcg_temp_free_i64(tcg_res);
6165}
6166
6167/* Floating-point data-processing (3 source) - half precision */
6168static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
6169                                int rd, int rn, int rm, int ra)
6170{
6171    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
6172    TCGv_i32 tcg_res = tcg_temp_new_i32();
6173    TCGv_ptr fpst = get_fpstatus_ptr(true);
6174
6175    tcg_op1 = read_fp_hreg(s, rn);
6176    tcg_op2 = read_fp_hreg(s, rm);
6177    tcg_op3 = read_fp_hreg(s, ra);
6178
6179    /* These are fused multiply-add, and must be done as one
6180     * floating point operation with no rounding between the
6181     * multiplication and addition steps.
6182     * NB that doing the negations here as separate steps is
6183     * correct : an input NaN should come out with its sign bit
6184     * flipped if it is a negated-input.
6185     */
6186    if (o1 == true) {
6187        tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
6188    }
6189
6190    if (o0 != o1) {
6191        tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
6192    }
6193
6194    gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
6195
6196    write_fp_sreg(s, rd, tcg_res);
6197
6198    tcg_temp_free_ptr(fpst);
6199    tcg_temp_free_i32(tcg_op1);
6200    tcg_temp_free_i32(tcg_op2);
6201    tcg_temp_free_i32(tcg_op3);
6202    tcg_temp_free_i32(tcg_res);
6203}
6204
6205/* Floating point data-processing (3 source)
6206 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
6207 * +---+---+---+-----------+------+----+------+----+------+------+------+
6208 * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
6209 * +---+---+---+-----------+------+----+------+----+------+------+------+
6210 */
6211static void disas_fp_3src(DisasContext *s, uint32_t insn)
6212{
6213    int mos = extract32(insn, 29, 3);
6214    int type = extract32(insn, 22, 2);
6215    int rd = extract32(insn, 0, 5);
6216    int rn = extract32(insn, 5, 5);
6217    int ra = extract32(insn, 10, 5);
6218    int rm = extract32(insn, 16, 5);
6219    bool o0 = extract32(insn, 15, 1);
6220    bool o1 = extract32(insn, 21, 1);
6221
6222    if (mos) {
6223        unallocated_encoding(s);
6224        return;
6225    }
6226
6227    switch (type) {
6228    case 0:
6229        if (!fp_access_check(s)) {
6230            return;
6231        }
6232        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
6233        break;
6234    case 1:
6235        if (!fp_access_check(s)) {
6236            return;
6237        }
6238        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
6239        break;
6240    case 3:
6241        if (!dc_isar_feature(aa64_fp16, s)) {
6242            unallocated_encoding(s);
6243            return;
6244        }
6245        if (!fp_access_check(s)) {
6246            return;
6247        }
6248        handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
6249        break;
6250    default:
6251        unallocated_encoding(s);
6252    }
6253}
6254
6255/* Floating point immediate
6256 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
6257 * +---+---+---+-----------+------+---+------------+-------+------+------+
6258 * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
6259 * +---+---+---+-----------+------+---+------------+-------+------+------+
6260 */
6261static void disas_fp_imm(DisasContext *s, uint32_t insn)
6262{
6263    int rd = extract32(insn, 0, 5);
6264    int imm5 = extract32(insn, 5, 5);
6265    int imm8 = extract32(insn, 13, 8);
6266    int type = extract32(insn, 22, 2);
6267    int mos = extract32(insn, 29, 3);
6268    uint64_t imm;
6269    TCGv_i64 tcg_res;
6270    TCGMemOp sz;
6271
6272    if (mos || imm5) {
6273        unallocated_encoding(s);
6274        return;
6275    }
6276
6277    switch (type) {
6278    case 0:
6279        sz = MO_32;
6280        break;
6281    case 1:
6282        sz = MO_64;
6283        break;
6284    case 3:
6285        sz = MO_16;
6286        if (dc_isar_feature(aa64_fp16, s)) {
6287            break;
6288        }
6289        /* fallthru */
6290    default:
6291        unallocated_encoding(s);
6292        return;
6293    }
6294
6295    if (!fp_access_check(s)) {
6296        return;
6297    }
6298
6299    imm = vfp_expand_imm(sz, imm8);
6300
6301    tcg_res = tcg_const_i64(imm);
6302    write_fp_dreg(s, rd, tcg_res);
6303    tcg_temp_free_i64(tcg_res);
6304}
6305
6306/* Handle floating point <=> fixed point conversions. Note that we can
6307 * also deal with fp <=> integer conversions as a special case (scale == 64)
6308 * OPTME: consider handling that special case specially or at least skipping
6309 * the call to scalbn in the helpers for zero shifts.
6310 */
6311static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
6312                           bool itof, int rmode, int scale, int sf, int type)
6313{
6314    bool is_signed = !(opcode & 1);
6315    TCGv_ptr tcg_fpstatus;
6316    TCGv_i32 tcg_shift, tcg_single;
6317    TCGv_i64 tcg_double;
6318
6319    tcg_fpstatus = get_fpstatus_ptr(type == 3);
6320
6321    tcg_shift = tcg_const_i32(64 - scale);
6322
6323    if (itof) {
6324        TCGv_i64 tcg_int = cpu_reg(s, rn);
6325        if (!sf) {
6326            TCGv_i64 tcg_extend = new_tmp_a64(s);
6327
6328            if (is_signed) {
6329                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
6330            } else {
6331                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
6332            }
6333
6334            tcg_int = tcg_extend;
6335        }
6336
6337        switch (type) {
6338        case 1: /* float64 */
6339            tcg_double = tcg_temp_new_i64();
6340            if (is_signed) {
6341                gen_helper_vfp_sqtod(tcg_double, tcg_int,
6342                                     tcg_shift, tcg_fpstatus);
6343            } else {
6344                gen_helper_vfp_uqtod(tcg_double, tcg_int,
6345                                     tcg_shift, tcg_fpstatus);
6346            }
6347            write_fp_dreg(s, rd, tcg_double);
6348            tcg_temp_free_i64(tcg_double);
6349            break;
6350
6351        case 0: /* float32 */
6352            tcg_single = tcg_temp_new_i32();
6353            if (is_signed) {
6354                gen_helper_vfp_sqtos(tcg_single, tcg_int,
6355                                     tcg_shift, tcg_fpstatus);
6356            } else {
6357                gen_helper_vfp_uqtos(tcg_single, tcg_int,
6358                                     tcg_shift, tcg_fpstatus);
6359            }
6360            write_fp_sreg(s, rd, tcg_single);
6361            tcg_temp_free_i32(tcg_single);
6362            break;
6363
6364        case 3: /* float16 */
6365            tcg_single = tcg_temp_new_i32();
6366            if (is_signed) {
6367                gen_helper_vfp_sqtoh(tcg_single, tcg_int,
6368                                     tcg_shift, tcg_fpstatus);
6369            } else {
6370                gen_helper_vfp_uqtoh(tcg_single, tcg_int,
6371                                     tcg_shift, tcg_fpstatus);
6372            }
6373            write_fp_sreg(s, rd, tcg_single);
6374            tcg_temp_free_i32(tcg_single);
6375            break;
6376
6377        default:
6378            g_assert_not_reached();
6379        }
6380    } else {
6381        TCGv_i64 tcg_int = cpu_reg(s, rd);
6382        TCGv_i32 tcg_rmode;
6383
6384        if (extract32(opcode, 2, 1)) {
6385            /* There are too many rounding modes to all fit into rmode,
6386             * so FCVTA[US] is a special case.
6387             */
6388            rmode = FPROUNDING_TIEAWAY;
6389        }
6390
6391        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6392
6393        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
6394
6395        switch (type) {
6396        case 1: /* float64 */
6397            tcg_double = read_fp_dreg(s, rn);
6398            if (is_signed) {
6399                if (!sf) {
6400                    gen_helper_vfp_tosld(tcg_int, tcg_double,
6401                                         tcg_shift, tcg_fpstatus);
6402                } else {
6403                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
6404                                         tcg_shift, tcg_fpstatus);
6405                }
6406            } else {
6407                if (!sf) {
6408                    gen_helper_vfp_tould(tcg_int, tcg_double,
6409                                         tcg_shift, tcg_fpstatus);
6410                } else {
6411                    gen_helper_vfp_touqd(tcg_int, tcg_double,
6412                                         tcg_shift, tcg_fpstatus);
6413                }
6414            }
6415            if (!sf) {
6416                tcg_gen_ext32u_i64(tcg_int, tcg_int);
6417            }
6418            tcg_temp_free_i64(tcg_double);
6419            break;
6420
6421        case 0: /* float32 */
6422            tcg_single = read_fp_sreg(s, rn);
6423            if (sf) {
6424                if (is_signed) {
6425                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
6426                                         tcg_shift, tcg_fpstatus);
6427                } else {
6428                    gen_helper_vfp_touqs(tcg_int, tcg_single,
6429                                         tcg_shift, tcg_fpstatus);
6430                }
6431            } else {
6432                TCGv_i32 tcg_dest = tcg_temp_new_i32();
6433                if (is_signed) {
6434                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
6435                                         tcg_shift, tcg_fpstatus);
6436                } else {
6437                    gen_helper_vfp_touls(tcg_dest, tcg_single,
6438                                         tcg_shift, tcg_fpstatus);
6439                }
6440                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6441                tcg_temp_free_i32(tcg_dest);
6442            }
6443            tcg_temp_free_i32(tcg_single);
6444            break;
6445
6446        case 3: /* float16 */
6447            tcg_single = read_fp_sreg(s, rn);
6448            if (sf) {
6449                if (is_signed) {
6450                    gen_helper_vfp_tosqh(tcg_int, tcg_single,
6451                                         tcg_shift, tcg_fpstatus);
6452                } else {
6453                    gen_helper_vfp_touqh(tcg_int, tcg_single,
6454                                         tcg_shift, tcg_fpstatus);
6455                }
6456            } else {
6457                TCGv_i32 tcg_dest = tcg_temp_new_i32();
6458                if (is_signed) {
6459                    gen_helper_vfp_toslh(tcg_dest, tcg_single,
6460                                         tcg_shift, tcg_fpstatus);
6461                } else {
6462                    gen_helper_vfp_toulh(tcg_dest, tcg_single,
6463                                         tcg_shift, tcg_fpstatus);
6464                }
6465                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
6466                tcg_temp_free_i32(tcg_dest);
6467            }
6468            tcg_temp_free_i32(tcg_single);
6469            break;
6470
6471        default:
6472            g_assert_not_reached();
6473        }
6474
6475        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
6476        tcg_temp_free_i32(tcg_rmode);
6477    }
6478
6479    tcg_temp_free_ptr(tcg_fpstatus);
6480    tcg_temp_free_i32(tcg_shift);
6481}
6482
6483/* Floating point <-> fixed point conversions
6484 *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
6485 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6486 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
6487 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
6488 */
6489static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
6490{
6491    int rd = extract32(insn, 0, 5);
6492    int rn = extract32(insn, 5, 5);
6493    int scale = extract32(insn, 10, 6);
6494    int opcode = extract32(insn, 16, 3);
6495    int rmode = extract32(insn, 19, 2);
6496    int type = extract32(insn, 22, 2);
6497    bool sbit = extract32(insn, 29, 1);
6498    bool sf = extract32(insn, 31, 1);
6499    bool itof;
6500
6501    if (sbit || (!sf && scale < 32)) {
6502        unallocated_encoding(s);
6503        return;
6504    }
6505
6506    switch (type) {
6507    case 0: /* float32 */
6508    case 1: /* float64 */
6509        break;
6510    case 3: /* float16 */
6511        if (dc_isar_feature(aa64_fp16, s)) {
6512            break;
6513        }
6514        /* fallthru */
6515    default:
6516        unallocated_encoding(s);
6517        return;
6518    }
6519
6520    switch ((rmode << 3) | opcode) {
6521    case 0x2: /* SCVTF */
6522    case 0x3: /* UCVTF */
6523        itof = true;
6524        break;
6525    case 0x18: /* FCVTZS */
6526    case 0x19: /* FCVTZU */
6527        itof = false;
6528        break;
6529    default:
6530        unallocated_encoding(s);
6531        return;
6532    }
6533
6534    if (!fp_access_check(s)) {
6535        return;
6536    }
6537
6538    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
6539}
6540
6541static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
6542{
6543    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
6544     * without conversion.
6545     */
6546
6547    if (itof) {
6548        TCGv_i64 tcg_rn = cpu_reg(s, rn);
6549        TCGv_i64 tmp;
6550
6551        switch (type) {
6552        case 0:
6553            /* 32 bit */
6554            tmp = tcg_temp_new_i64();
6555            tcg_gen_ext32u_i64(tmp, tcg_rn);
6556            write_fp_dreg(s, rd, tmp);
6557            tcg_temp_free_i64(tmp);
6558            break;
6559        case 1:
6560            /* 64 bit */
6561            write_fp_dreg(s, rd, tcg_rn);
6562            break;
6563        case 2:
6564            /* 64 bit to top half. */
6565            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
6566            clear_vec_high(s, true, rd);
6567            break;
6568        case 3:
6569            /* 16 bit */
6570            tmp = tcg_temp_new_i64();
6571            tcg_gen_ext16u_i64(tmp, tcg_rn);
6572            write_fp_dreg(s, rd, tmp);
6573            tcg_temp_free_i64(tmp);
6574            break;
6575        default:
6576            g_assert_not_reached();
6577        }
6578    } else {
6579        TCGv_i64 tcg_rd = cpu_reg(s, rd);
6580
6581        switch (type) {
6582        case 0:
6583            /* 32 bit */
6584            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
6585            break;
6586        case 1:
6587            /* 64 bit */
6588            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
6589            break;
6590        case 2:
6591            /* 64 bits from top half */
6592            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
6593            break;
6594        case 3:
6595            /* 16 bit */
6596            tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
6597            break;
6598        default:
6599            g_assert_not_reached();
6600        }
6601    }
6602}
6603
6604static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
6605{
6606    TCGv_i64 t = read_fp_dreg(s, rn);
6607    TCGv_ptr fpstatus = get_fpstatus_ptr(false);
6608
6609    gen_helper_fjcvtzs(t, t, fpstatus);
6610
6611    tcg_temp_free_ptr(fpstatus);
6612
6613    tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
6614    tcg_gen_extrh_i64_i32(cpu_ZF, t);
6615    tcg_gen_movi_i32(cpu_CF, 0);
6616    tcg_gen_movi_i32(cpu_NF, 0);
6617    tcg_gen_movi_i32(cpu_VF, 0);
6618
6619    tcg_temp_free_i64(t);
6620}
6621
6622/* Floating point <-> integer conversions
6623 *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
6624 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6625 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
6626 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
6627 */
6628static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
6629{
6630    int rd = extract32(insn, 0, 5);
6631    int rn = extract32(insn, 5, 5);
6632    int opcode = extract32(insn, 16, 3);
6633    int rmode = extract32(insn, 19, 2);
6634    int type = extract32(insn, 22, 2);
6635    bool sbit = extract32(insn, 29, 1);
6636    bool sf = extract32(insn, 31, 1);
6637    bool itof = false;
6638
6639    if (sbit) {
6640        goto do_unallocated;
6641    }
6642
6643    switch (opcode) {
6644    case 2: /* SCVTF */
6645    case 3: /* UCVTF */
6646        itof = true;
6647        /* fallthru */
6648    case 4: /* FCVTAS */
6649    case 5: /* FCVTAU */
6650        if (rmode != 0) {
6651            goto do_unallocated;
6652        }
6653        /* fallthru */
6654    case 0: /* FCVT[NPMZ]S */
6655    case 1: /* FCVT[NPMZ]U */
6656        switch (type) {
6657        case 0: /* float32 */
6658        case 1: /* float64 */
6659            break;
6660        case 3: /* float16 */
6661            if (!dc_isar_feature(aa64_fp16, s)) {
6662                goto do_unallocated;
6663            }
6664            break;
6665        default:
6666            goto do_unallocated;
6667        }
6668        if (!fp_access_check(s)) {
6669            return;
6670        }
6671        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
6672        break;
6673
6674    default:
6675        switch (sf << 7 | type << 5 | rmode << 3 | opcode) {
6676        case 0b01100110: /* FMOV half <-> 32-bit int */
6677        case 0b01100111:
6678        case 0b11100110: /* FMOV half <-> 64-bit int */
6679        case 0b11100111:
6680            if (!dc_isar_feature(aa64_fp16, s)) {
6681                goto do_unallocated;
6682            }
6683            /* fallthru */
6684        case 0b00000110: /* FMOV 32-bit */
6685        case 0b00000111:
6686        case 0b10100110: /* FMOV 64-bit */
6687        case 0b10100111:
6688        case 0b11001110: /* FMOV top half of 128-bit */
6689        case 0b11001111:
6690            if (!fp_access_check(s)) {
6691                return;
6692            }
6693            itof = opcode & 1;
6694            handle_fmov(s, rd, rn, type, itof);
6695            break;
6696
6697        case 0b00111110: /* FJCVTZS */
6698            if (!dc_isar_feature(aa64_jscvt, s)) {
6699                goto do_unallocated;
6700            } else if (fp_access_check(s)) {
6701                handle_fjcvtzs(s, rd, rn);
6702            }
6703            break;
6704
6705        default:
6706        do_unallocated:
6707            unallocated_encoding(s);
6708            return;
6709        }
6710        break;
6711    }
6712}
6713
6714/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
6715 *   31  30  29 28     25 24                          0
6716 * +---+---+---+---------+-----------------------------+
6717 * |   | 0 |   | 1 1 1 1 |                             |
6718 * +---+---+---+---------+-----------------------------+
6719 */
6720static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
6721{
6722    if (extract32(insn, 24, 1)) {
6723        /* Floating point data-processing (3 source) */
6724        disas_fp_3src(s, insn);
6725    } else if (extract32(insn, 21, 1) == 0) {
6726        /* Floating point to fixed point conversions */
6727        disas_fp_fixed_conv(s, insn);
6728    } else {
6729        switch (extract32(insn, 10, 2)) {
6730        case 1:
6731            /* Floating point conditional compare */
6732            disas_fp_ccomp(s, insn);
6733            break;
6734        case 2:
6735            /* Floating point data-processing (2 source) */
6736            disas_fp_2src(s, insn);
6737            break;
6738        case 3:
6739            /* Floating point conditional select */
6740            disas_fp_csel(s, insn);
6741            break;
6742        case 0:
6743            switch (ctz32(extract32(insn, 12, 4))) {
6744            case 0: /* [15:12] == xxx1 */
6745                /* Floating point immediate */
6746                disas_fp_imm(s, insn);
6747                break;
6748            case 1: /* [15:12] == xx10 */
6749                /* Floating point compare */
6750                disas_fp_compare(s, insn);
6751                break;
6752            case 2: /* [15:12] == x100 */
6753                /* Floating point data-processing (1 source) */
6754                disas_fp_1src(s, insn);
6755                break;
6756            case 3: /* [15:12] == 1000 */
6757                unallocated_encoding(s);
6758                break;
6759            default: /* [15:12] == 0000 */
6760                /* Floating point <-> integer conversions */
6761                disas_fp_int_conv(s, insn);
6762                break;
6763            }
6764            break;
6765        }
6766    }
6767}
6768
6769static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
6770                     int pos)
6771{
6772    /* Extract 64 bits from the middle of two concatenated 64 bit
6773     * vector register slices left:right. The extracted bits start
6774     * at 'pos' bits into the right (least significant) side.
6775     * We return the result in tcg_right, and guarantee not to
6776     * trash tcg_left.
6777     */
6778    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
6779    assert(pos > 0 && pos < 64);
6780
6781    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
6782    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
6783    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
6784
6785    tcg_temp_free_i64(tcg_tmp);
6786}
6787
6788/* EXT
6789 *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
6790 * +---+---+-------------+-----+---+------+---+------+---+------+------+
6791 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
6792 * +---+---+-------------+-----+---+------+---+------+---+------+------+
6793 */
6794static void disas_simd_ext(DisasContext *s, uint32_t insn)
6795{
6796    int is_q = extract32(insn, 30, 1);
6797    int op2 = extract32(insn, 22, 2);
6798    int imm4 = extract32(insn, 11, 4);
6799    int rm = extract32(insn, 16, 5);
6800    int rn = extract32(insn, 5, 5);
6801    int rd = extract32(insn, 0, 5);
6802    int pos = imm4 << 3;
6803    TCGv_i64 tcg_resl, tcg_resh;
6804
6805    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
6806        unallocated_encoding(s);
6807        return;
6808    }
6809
6810    if (!fp_access_check(s)) {
6811        return;
6812    }
6813
6814    tcg_resh = tcg_temp_new_i64();
6815    tcg_resl = tcg_temp_new_i64();
6816
6817    /* Vd gets bits starting at pos bits into Vm:Vn. This is
6818     * either extracting 128 bits from a 128:128 concatenation, or
6819     * extracting 64 bits from a 64:64 concatenation.
6820     */
6821    if (!is_q) {
6822        read_vec_element(s, tcg_resl, rn, 0, MO_64);
6823        if (pos != 0) {
6824            read_vec_element(s, tcg_resh, rm, 0, MO_64);
6825            do_ext64(s, tcg_resh, tcg_resl, pos);
6826        }
6827        tcg_gen_movi_i64(tcg_resh, 0);
6828    } else {
6829        TCGv_i64 tcg_hh;
6830        typedef struct {
6831            int reg;
6832            int elt;
6833        } EltPosns;
6834        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
6835        EltPosns *elt = eltposns;
6836
6837        if (pos >= 64) {
6838            elt++;
6839            pos -= 64;
6840        }
6841
6842        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
6843        elt++;
6844        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
6845        elt++;
6846        if (pos != 0) {
6847            do_ext64(s, tcg_resh, tcg_resl, pos);
6848            tcg_hh = tcg_temp_new_i64();
6849            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
6850            do_ext64(s, tcg_hh, tcg_resh, pos);
6851            tcg_temp_free_i64(tcg_hh);
6852        }
6853    }
6854
6855    write_vec_element(s, tcg_resl, rd, 0, MO_64);
6856    tcg_temp_free_i64(tcg_resl);
6857    write_vec_element(s, tcg_resh, rd, 1, MO_64);
6858    tcg_temp_free_i64(tcg_resh);
6859}
6860
6861/* TBL/TBX
6862 *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
6863 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6864 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
6865 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6866 */
6867static void disas_simd_tb(DisasContext *s, uint32_t insn)
6868{
6869    int op2 = extract32(insn, 22, 2);
6870    int is_q = extract32(insn, 30, 1);
6871    int rm = extract32(insn, 16, 5);
6872    int rn = extract32(insn, 5, 5);
6873    int rd = extract32(insn, 0, 5);
6874    int is_tblx = extract32(insn, 12, 1);
6875    int len = extract32(insn, 13, 2);
6876    TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
6877    TCGv_i32 tcg_regno, tcg_numregs;
6878
6879    if (op2 != 0) {
6880        unallocated_encoding(s);
6881        return;
6882    }
6883
6884    if (!fp_access_check(s)) {
6885        return;
6886    }
6887
6888    /* This does a table lookup: for every byte element in the input
6889     * we index into a table formed from up to four vector registers,
6890     * and then the output is the result of the lookups. Our helper
6891     * function does the lookup operation for a single 64 bit part of
6892     * the input.
6893     */
6894    tcg_resl = tcg_temp_new_i64();
6895    tcg_resh = tcg_temp_new_i64();
6896
6897    if (is_tblx) {
6898        read_vec_element(s, tcg_resl, rd, 0, MO_64);
6899    } else {
6900        tcg_gen_movi_i64(tcg_resl, 0);
6901    }
6902    if (is_tblx && is_q) {
6903        read_vec_element(s, tcg_resh, rd, 1, MO_64);
6904    } else {
6905        tcg_gen_movi_i64(tcg_resh, 0);
6906    }
6907
6908    tcg_idx = tcg_temp_new_i64();
6909    tcg_regno = tcg_const_i32(rn);
6910    tcg_numregs = tcg_const_i32(len + 1);
6911    read_vec_element(s, tcg_idx, rm, 0, MO_64);
6912    gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
6913                        tcg_regno, tcg_numregs);
6914    if (is_q) {
6915        read_vec_element(s, tcg_idx, rm, 1, MO_64);
6916        gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
6917                            tcg_regno, tcg_numregs);
6918    }
6919    tcg_temp_free_i64(tcg_idx);
6920    tcg_temp_free_i32(tcg_regno);
6921    tcg_temp_free_i32(tcg_numregs);
6922
6923    write_vec_element(s, tcg_resl, rd, 0, MO_64);
6924    tcg_temp_free_i64(tcg_resl);
6925    write_vec_element(s, tcg_resh, rd, 1, MO_64);
6926    tcg_temp_free_i64(tcg_resh);
6927}
6928
6929/* ZIP/UZP/TRN
6930 *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
6931 * +---+---+-------------+------+---+------+---+------------------+------+
6932 * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
6933 * +---+---+-------------+------+---+------+---+------------------+------+
6934 */
6935static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
6936{
6937    int rd = extract32(insn, 0, 5);
6938    int rn = extract32(insn, 5, 5);
6939    int rm = extract32(insn, 16, 5);
6940    int size = extract32(insn, 22, 2);
6941    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
6942     * bit 2 indicates 1 vs 2 variant of the insn.
6943     */
6944    int opcode = extract32(insn, 12, 2);
6945    bool part = extract32(insn, 14, 1);
6946    bool is_q = extract32(insn, 30, 1);
6947    int esize = 8 << size;
6948    int i, ofs;
6949    int datasize = is_q ? 128 : 64;
6950    int elements = datasize / esize;
6951    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
6952
6953    if (opcode == 0 || (size == 3 && !is_q)) {
6954        unallocated_encoding(s);
6955        return;
6956    }
6957
6958    if (!fp_access_check(s)) {
6959        return;
6960    }
6961
6962    tcg_resl = tcg_const_i64(0);
6963    tcg_resh = tcg_const_i64(0);
6964    tcg_res = tcg_temp_new_i64();
6965
6966    for (i = 0; i < elements; i++) {
6967        switch (opcode) {
6968        case 1: /* UZP1/2 */
6969        {
6970            int midpoint = elements / 2;
6971            if (i < midpoint) {
6972                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
6973            } else {
6974                read_vec_element(s, tcg_res, rm,
6975                                 2 * (i - midpoint) + part, size);
6976            }
6977            break;
6978        }
6979        case 2: /* TRN1/2 */
6980            if (i & 1) {
6981                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
6982            } else {
6983                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
6984            }
6985            break;
6986        case 3: /* ZIP1/2 */
6987        {
6988            int base = part * elements / 2;
6989            if (i & 1) {
6990                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
6991            } else {
6992                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
6993            }
6994            break;
6995        }
6996        default:
6997            g_assert_not_reached();
6998        }
6999
7000        ofs = i * esize;
7001        if (ofs < 64) {
7002            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
7003            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
7004        } else {
7005            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
7006            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
7007        }
7008    }
7009
7010    tcg_temp_free_i64(tcg_res);
7011
7012    write_vec_element(s, tcg_resl, rd, 0, MO_64);
7013    tcg_temp_free_i64(tcg_resl);
7014    write_vec_element(s, tcg_resh, rd, 1, MO_64);
7015    tcg_temp_free_i64(tcg_resh);
7016}
7017
7018/*
7019 * do_reduction_op helper
7020 *
7021 * This mirrors the Reduce() pseudocode in the ARM ARM. It is
7022 * important for correct NaN propagation that we do these
7023 * operations in exactly the order specified by the pseudocode.
7024 *
7025 * This is a recursive function, TCG temps should be freed by the
7026 * calling function once it is done with the values.
7027 */
7028static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
7029                                int esize, int size, int vmap, TCGv_ptr fpst)
7030{
7031    if (esize == size) {
7032        int element;
7033        TCGMemOp msize = esize == 16 ? MO_16 : MO_32;
7034        TCGv_i32 tcg_elem;
7035
7036        /* We should have one register left here */
7037        assert(ctpop8(vmap) == 1);
7038        element = ctz32(vmap);
7039        assert(element < 8);
7040
7041        tcg_elem = tcg_temp_new_i32();
7042        read_vec_element_i32(s, tcg_elem, rn, element, msize);
7043        return tcg_elem;
7044    } else {
7045        int bits = size / 2;
7046        int shift = ctpop8(vmap) / 2;
7047        int vmap_lo = (vmap >> shift) & vmap;
7048        int vmap_hi = (vmap & ~vmap_lo);
7049        TCGv_i32 tcg_hi, tcg_lo, tcg_res;
7050
7051        tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
7052        tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
7053        tcg_res = tcg_temp_new_i32();
7054
7055        switch (fpopcode) {
7056        case 0x0c: /* fmaxnmv half-precision */
7057            gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7058            break;
7059        case 0x0f: /* fmaxv half-precision */
7060            gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
7061            break;
7062        case 0x1c: /* fminnmv half-precision */
7063            gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
7064            break;
7065        case 0x1f: /* fminv half-precision */
7066            gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
7067            break;
7068        case 0x2c: /* fmaxnmv */
7069            gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
7070            break;
7071        case 0x2f: /* fmaxv */
7072            gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
7073            break;
7074        case 0x3c: /* fminnmv */
7075            gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
7076            break;
7077        case 0x3f: /* fminv */
7078            gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
7079            break;
7080        default:
7081            g_assert_not_reached();
7082        }
7083
7084        tcg_temp_free_i32(tcg_hi);
7085        tcg_temp_free_i32(tcg_lo);
7086        return tcg_res;
7087    }
7088}
7089
7090/* AdvSIMD across lanes
7091 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7092 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7093 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7094 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7095 */
7096static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
7097{
7098    int rd = extract32(insn, 0, 5);
7099    int rn = extract32(insn, 5, 5);
7100    int size = extract32(insn, 22, 2);
7101    int opcode = extract32(insn, 12, 5);
7102    bool is_q = extract32(insn, 30, 1);
7103    bool is_u = extract32(insn, 29, 1);
7104    bool is_fp = false;
7105    bool is_min = false;
7106    int esize;
7107    int elements;
7108    int i;
7109    TCGv_i64 tcg_res, tcg_elt;
7110
7111    switch (opcode) {
7112    case 0x1b: /* ADDV */
7113        if (is_u) {
7114            unallocated_encoding(s);
7115            return;
7116        }
7117        /* fall through */
7118    case 0x3: /* SADDLV, UADDLV */
7119    case 0xa: /* SMAXV, UMAXV */
7120    case 0x1a: /* SMINV, UMINV */
7121        if (size == 3 || (size == 2 && !is_q)) {
7122            unallocated_encoding(s);
7123            return;
7124        }
7125        break;
7126    case 0xc: /* FMAXNMV, FMINNMV */
7127    case 0xf: /* FMAXV, FMINV */
7128        /* Bit 1 of size field encodes min vs max and the actual size
7129         * depends on the encoding of the U bit. If not set (and FP16
7130         * enabled) then we do half-precision float instead of single
7131         * precision.
7132         */
7133        is_min = extract32(size, 1, 1);
7134        is_fp = true;
7135        if (!is_u && dc_isar_feature(aa64_fp16, s)) {
7136            size = 1;
7137        } else if (!is_u || !is_q || extract32(size, 0, 1)) {
7138            unallocated_encoding(s);
7139            return;
7140        } else {
7141            size = 2;
7142        }
7143        break;
7144    default:
7145        unallocated_encoding(s);
7146        return;
7147    }
7148
7149    if (!fp_access_check(s)) {
7150        return;
7151    }
7152
7153    esize = 8 << size;
7154    elements = (is_q ? 128 : 64) / esize;
7155
7156    tcg_res = tcg_temp_new_i64();
7157    tcg_elt = tcg_temp_new_i64();
7158
7159    /* These instructions operate across all lanes of a vector
7160     * to produce a single result. We can guarantee that a 64
7161     * bit intermediate is sufficient:
7162     *  + for [US]ADDLV the maximum element size is 32 bits, and
7163     *    the result type is 64 bits
7164     *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
7165     *    same as the element size, which is 32 bits at most
7166     * For the integer operations we can choose to work at 64
7167     * or 32 bits and truncate at the end; for simplicity
7168     * we use 64 bits always. The floating point
7169     * ops do require 32 bit intermediates, though.
7170     */
7171    if (!is_fp) {
7172        read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
7173
7174        for (i = 1; i < elements; i++) {
7175            read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
7176
7177            switch (opcode) {
7178            case 0x03: /* SADDLV / UADDLV */
7179            case 0x1b: /* ADDV */
7180                tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
7181                break;
7182            case 0x0a: /* SMAXV / UMAXV */
7183                if (is_u) {
7184                    tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
7185                } else {
7186                    tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
7187                }
7188                break;
7189            case 0x1a: /* SMINV / UMINV */
7190                if (is_u) {
7191                    tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
7192                } else {
7193                    tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
7194                }
7195                break;
7196            default:
7197                g_assert_not_reached();
7198            }
7199
7200        }
7201    } else {
7202        /* Floating point vector reduction ops which work across 32
7203         * bit (single) or 16 bit (half-precision) intermediates.
7204         * Note that correct NaN propagation requires that we do these
7205         * operations in exactly the order specified by the pseudocode.
7206         */
7207        TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
7208        int fpopcode = opcode | is_min << 4 | is_u << 5;
7209        int vmap = (1 << elements) - 1;
7210        TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
7211                                             (is_q ? 128 : 64), vmap, fpst);
7212        tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
7213        tcg_temp_free_i32(tcg_res32);
7214        tcg_temp_free_ptr(fpst);
7215    }
7216
7217    tcg_temp_free_i64(tcg_elt);
7218
7219    /* Now truncate the result to the width required for the final output */
7220    if (opcode == 0x03) {
7221        /* SADDLV, UADDLV: result is 2*esize */
7222        size++;
7223    }
7224
7225    switch (size) {
7226    case 0:
7227        tcg_gen_ext8u_i64(tcg_res, tcg_res);
7228        break;
7229    case 1:
7230        tcg_gen_ext16u_i64(tcg_res, tcg_res);
7231        break;
7232    case 2:
7233        tcg_gen_ext32u_i64(tcg_res, tcg_res);
7234        break;
7235    case 3:
7236        break;
7237    default:
7238        g_assert_not_reached();
7239    }
7240
7241    write_fp_dreg(s, rd, tcg_res);
7242    tcg_temp_free_i64(tcg_res);
7243}
7244
7245/* DUP (Element, Vector)
7246 *
7247 *  31  30   29              21 20    16 15        10  9    5 4    0
7248 * +---+---+-------------------+--------+-------------+------+------+
7249 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7250 * +---+---+-------------------+--------+-------------+------+------+
7251 *
7252 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7253 */
7254static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
7255                             int imm5)
7256{
7257    int size = ctz32(imm5);
7258    int index = imm5 >> (size + 1);
7259
7260    if (size > 3 || (size == 3 && !is_q)) {
7261        unallocated_encoding(s);
7262        return;
7263    }
7264
7265    if (!fp_access_check(s)) {
7266        return;
7267    }
7268
7269    tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
7270                         vec_reg_offset(s, rn, index, size),
7271                         is_q ? 16 : 8, vec_full_reg_size(s));
7272}
7273
7274/* DUP (element, scalar)
7275 *  31                   21 20    16 15        10  9    5 4    0
7276 * +-----------------------+--------+-------------+------+------+
7277 * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
7278 * +-----------------------+--------+-------------+------+------+
7279 */
7280static void handle_simd_dupes(DisasContext *s, int rd, int rn,
7281                              int imm5)
7282{
7283    int size = ctz32(imm5);
7284    int index;
7285    TCGv_i64 tmp;
7286
7287    if (size > 3) {
7288        unallocated_encoding(s);
7289        return;
7290    }
7291
7292    if (!fp_access_check(s)) {
7293        return;
7294    }
7295
7296    index = imm5 >> (size + 1);
7297
7298    /* This instruction just extracts the specified element and
7299     * zero-extends it into the bottom of the destination register.
7300     */
7301    tmp = tcg_temp_new_i64();
7302    read_vec_element(s, tmp, rn, index, size);
7303    write_fp_dreg(s, rd, tmp);
7304    tcg_temp_free_i64(tmp);
7305}
7306
7307/* DUP (General)
7308 *
7309 *  31  30   29              21 20    16 15        10  9    5 4    0
7310 * +---+---+-------------------+--------+-------------+------+------+
7311 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
7312 * +---+---+-------------------+--------+-------------+------+------+
7313 *
7314 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7315 */
7316static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
7317                             int imm5)
7318{
7319    int size = ctz32(imm5);
7320    uint32_t dofs, oprsz, maxsz;
7321
7322    if (size > 3 || ((size == 3) && !is_q)) {
7323        unallocated_encoding(s);
7324        return;
7325    }
7326
7327    if (!fp_access_check(s)) {
7328        return;
7329    }
7330
7331    dofs = vec_full_reg_offset(s, rd);
7332    oprsz = is_q ? 16 : 8;
7333    maxsz = vec_full_reg_size(s);
7334
7335    tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
7336}
7337
7338/* INS (Element)
7339 *
7340 *  31                   21 20    16 15  14    11  10 9    5 4    0
7341 * +-----------------------+--------+------------+---+------+------+
7342 * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
7343 * +-----------------------+--------+------------+---+------+------+
7344 *
7345 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7346 * index: encoded in imm5<4:size+1>
7347 */
7348static void handle_simd_inse(DisasContext *s, int rd, int rn,
7349                             int imm4, int imm5)
7350{
7351    int size = ctz32(imm5);
7352    int src_index, dst_index;
7353    TCGv_i64 tmp;
7354
7355    if (size > 3) {
7356        unallocated_encoding(s);
7357        return;
7358    }
7359
7360    if (!fp_access_check(s)) {
7361        return;
7362    }
7363
7364    dst_index = extract32(imm5, 1+size, 5);
7365    src_index = extract32(imm4, size, 4);
7366
7367    tmp = tcg_temp_new_i64();
7368
7369    read_vec_element(s, tmp, rn, src_index, size);
7370    write_vec_element(s, tmp, rd, dst_index, size);
7371
7372    tcg_temp_free_i64(tmp);
7373}
7374
7375
7376/* INS (General)
7377 *
7378 *  31                   21 20    16 15        10  9    5 4    0
7379 * +-----------------------+--------+-------------+------+------+
7380 * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
7381 * +-----------------------+--------+-------------+------+------+
7382 *
7383 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7384 * index: encoded in imm5<4:size+1>
7385 */
7386static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
7387{
7388    int size = ctz32(imm5);
7389    int idx;
7390
7391    if (size > 3) {
7392        unallocated_encoding(s);
7393        return;
7394    }
7395
7396    if (!fp_access_check(s)) {
7397        return;
7398    }
7399
7400    idx = extract32(imm5, 1 + size, 4 - size);
7401    write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
7402}
7403
7404/*
7405 * UMOV (General)
7406 * SMOV (General)
7407 *
7408 *  31  30   29              21 20    16 15    12   10 9    5 4    0
7409 * +---+---+-------------------+--------+-------------+------+------+
7410 * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
7411 * +---+---+-------------------+--------+-------------+------+------+
7412 *
7413 * U: unsigned when set
7414 * size: encoded in imm5 (see ARM ARM LowestSetBit())
7415 */
7416static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
7417                                  int rn, int rd, int imm5)
7418{
7419    int size = ctz32(imm5);
7420    int element;
7421    TCGv_i64 tcg_rd;
7422
7423    /* Check for UnallocatedEncodings */
7424    if (is_signed) {
7425        if (size > 2 || (size == 2 && !is_q)) {
7426            unallocated_encoding(s);
7427            return;
7428        }
7429    } else {
7430        if (size > 3
7431            || (size < 3 && is_q)
7432            || (size == 3 && !is_q)) {
7433            unallocated_encoding(s);
7434            return;
7435        }
7436    }
7437
7438    if (!fp_access_check(s)) {
7439        return;
7440    }
7441
7442    element = extract32(imm5, 1+size, 4);
7443
7444    tcg_rd = cpu_reg(s, rd);
7445    read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
7446    if (is_signed && !is_q) {
7447        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
7448    }
7449}
7450
7451/* AdvSIMD copy
7452 *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
7453 * +---+---+----+-----------------+------+---+------+---+------+------+
7454 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7455 * +---+---+----+-----------------+------+---+------+---+------+------+
7456 */
7457static void disas_simd_copy(DisasContext *s, uint32_t insn)
7458{
7459    int rd = extract32(insn, 0, 5);
7460    int rn = extract32(insn, 5, 5);
7461    int imm4 = extract32(insn, 11, 4);
7462    int op = extract32(insn, 29, 1);
7463    int is_q = extract32(insn, 30, 1);
7464    int imm5 = extract32(insn, 16, 5);
7465
7466    if (op) {
7467        if (is_q) {
7468            /* INS (element) */
7469            handle_simd_inse(s, rd, rn, imm4, imm5);
7470        } else {
7471            unallocated_encoding(s);
7472        }
7473    } else {
7474        switch (imm4) {
7475        case 0:
7476            /* DUP (element - vector) */
7477            handle_simd_dupe(s, is_q, rd, rn, imm5);
7478            break;
7479        case 1:
7480            /* DUP (general) */
7481            handle_simd_dupg(s, is_q, rd, rn, imm5);
7482            break;
7483        case 3:
7484            if (is_q) {
7485                /* INS (general) */
7486                handle_simd_insg(s, rd, rn, imm5);
7487            } else {
7488                unallocated_encoding(s);
7489            }
7490            break;
7491        case 5:
7492        case 7:
7493            /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
7494            handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
7495            break;
7496        default:
7497            unallocated_encoding(s);
7498            break;
7499        }
7500    }
7501}
7502
7503/* AdvSIMD modified immediate
7504 *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
7505 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7506 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
7507 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
7508 *
7509 * There are a number of operations that can be carried out here:
7510 *   MOVI - move (shifted) imm into register
7511 *   MVNI - move inverted (shifted) imm into register
7512 *   ORR  - bitwise OR of (shifted) imm with register
7513 *   BIC  - bitwise clear of (shifted) imm with register
7514 * With ARMv8.2 we also have:
7515 *   FMOV half-precision
7516 */
7517static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
7518{
7519    int rd = extract32(insn, 0, 5);
7520    int cmode = extract32(insn, 12, 4);
7521    int cmode_3_1 = extract32(cmode, 1, 3);
7522    int cmode_0 = extract32(cmode, 0, 1);
7523    int o2 = extract32(insn, 11, 1);
7524    uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
7525    bool is_neg = extract32(insn, 29, 1);
7526    bool is_q = extract32(insn, 30, 1);
7527    uint64_t imm = 0;
7528
7529    if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
7530        /* Check for FMOV (vector, immediate) - half-precision */
7531        if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) {
7532            unallocated_encoding(s);
7533            return;
7534        }
7535    }
7536
7537    if (!fp_access_check(s)) {
7538        return;
7539    }
7540
7541    /* See AdvSIMDExpandImm() in ARM ARM */
7542    switch (cmode_3_1) {
7543    case 0: /* Replicate(Zeros(24):imm8, 2) */
7544    case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
7545    case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
7546    case 3: /* Replicate(imm8:Zeros(24), 2) */
7547    {
7548        int shift = cmode_3_1 * 8;
7549        imm = bitfield_replicate(abcdefgh << shift, 32);
7550        break;
7551    }
7552    case 4: /* Replicate(Zeros(8):imm8, 4) */
7553    case 5: /* Replicate(imm8:Zeros(8), 4) */
7554    {
7555        int shift = (cmode_3_1 & 0x1) * 8;
7556        imm = bitfield_replicate(abcdefgh << shift, 16);
7557        break;
7558    }
7559    case 6:
7560        if (cmode_0) {
7561            /* Replicate(Zeros(8):imm8:Ones(16), 2) */
7562            imm = (abcdefgh << 16) | 0xffff;
7563        } else {
7564            /* Replicate(Zeros(16):imm8:Ones(8), 2) */
7565            imm = (abcdefgh << 8) | 0xff;
7566        }
7567        imm = bitfield_replicate(imm, 32);
7568        break;
7569    case 7:
7570        if (!cmode_0 && !is_neg) {
7571            imm = bitfield_replicate(abcdefgh, 8);
7572        } else if (!cmode_0 && is_neg) {
7573            int i;
7574            imm = 0;
7575            for (i = 0; i < 8; i++) {
7576                if ((abcdefgh) & (1 << i)) {
7577                    imm |= 0xffULL << (i * 8);
7578                }
7579            }
7580        } else if (cmode_0) {
7581            if (is_neg) {
7582                imm = (abcdefgh & 0x3f) << 48;
7583                if (abcdefgh & 0x80) {
7584                    imm |= 0x8000000000000000ULL;
7585                }
7586                if (abcdefgh & 0x40) {
7587                    imm |= 0x3fc0000000000000ULL;
7588                } else {
7589                    imm |= 0x4000000000000000ULL;
7590                }
7591            } else {
7592                if (o2) {
7593                    /* FMOV (vector, immediate) - half-precision */
7594                    imm = vfp_expand_imm(MO_16, abcdefgh);
7595                    /* now duplicate across the lanes */
7596                    imm = bitfield_replicate(imm, 16);
7597                } else {
7598                    imm = (abcdefgh & 0x3f) << 19;
7599                    if (abcdefgh & 0x80) {
7600                        imm |= 0x80000000;
7601                    }
7602                    if (abcdefgh & 0x40) {
7603                        imm |= 0x3e000000;
7604                    } else {
7605                        imm |= 0x40000000;
7606                    }
7607                    imm |= (imm << 32);
7608                }
7609            }
7610        }
7611        break;
7612    default:
7613        fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
7614        g_assert_not_reached();
7615    }
7616
7617    if (cmode_3_1 != 7 && is_neg) {
7618        imm = ~imm;
7619    }
7620
7621    if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
7622        /* MOVI or MVNI, with MVNI negation handled above.  */
7623        tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
7624                            vec_full_reg_size(s), imm);
7625    } else {
7626        /* ORR or BIC, with BIC negation to AND handled above.  */
7627        if (is_neg) {
7628            gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
7629        } else {
7630            gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
7631        }
7632    }
7633}
7634
7635/* AdvSIMD scalar copy
7636 *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
7637 * +-----+----+-----------------+------+---+------+---+------+------+
7638 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
7639 * +-----+----+-----------------+------+---+------+---+------+------+
7640 */
7641static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
7642{
7643    int rd = extract32(insn, 0, 5);
7644    int rn = extract32(insn, 5, 5);
7645    int imm4 = extract32(insn, 11, 4);
7646    int imm5 = extract32(insn, 16, 5);
7647    int op = extract32(insn, 29, 1);
7648
7649    if (op != 0 || imm4 != 0) {
7650        unallocated_encoding(s);
7651        return;
7652    }
7653
7654    /* DUP (element, scalar) */
7655    handle_simd_dupes(s, rd, rn, imm5);
7656}
7657
7658/* AdvSIMD scalar pairwise
7659 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7660 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7661 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7662 * +-----+---+-----------+------+-----------+--------+-----+------+------+
7663 */
7664static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
7665{
7666    int u = extract32(insn, 29, 1);
7667    int size = extract32(insn, 22, 2);
7668    int opcode = extract32(insn, 12, 5);
7669    int rn = extract32(insn, 5, 5);
7670    int rd = extract32(insn, 0, 5);
7671    TCGv_ptr fpst;
7672
7673    /* For some ops (the FP ones), size[1] is part of the encoding.
7674     * For ADDP strictly it is not but size[1] is always 1 for valid
7675     * encodings.
7676     */
7677    opcode |= (extract32(size, 1, 1) << 5);
7678
7679    switch (opcode) {
7680    case 0x3b: /* ADDP */
7681        if (u || size != 3) {
7682            unallocated_encoding(s);
7683            return;
7684        }
7685        if (!fp_access_check(s)) {
7686            return;
7687        }
7688
7689        fpst = NULL;
7690        break;
7691    case 0xc: /* FMAXNMP */
7692    case 0xd: /* FADDP */
7693    case 0xf: /* FMAXP */
7694    case 0x2c: /* FMINNMP */
7695    case 0x2f: /* FMINP */
7696        /* FP op, size[0] is 32 or 64 bit*/
7697        if (!u) {
7698            if (!dc_isar_feature(aa64_fp16, s)) {
7699                unallocated_encoding(s);
7700                return;
7701            } else {
7702                size = MO_16;
7703            }
7704        } else {
7705            size = extract32(size, 0, 1) ? MO_64 : MO_32;
7706        }
7707
7708        if (!fp_access_check(s)) {
7709            return;
7710        }
7711
7712        fpst = get_fpstatus_ptr(size == MO_16);
7713        break;
7714    default:
7715        unallocated_encoding(s);
7716        return;
7717    }
7718
7719    if (size == MO_64) {
7720        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7721        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7722        TCGv_i64 tcg_res = tcg_temp_new_i64();
7723
7724        read_vec_element(s, tcg_op1, rn, 0, MO_64);
7725        read_vec_element(s, tcg_op2, rn, 1, MO_64);
7726
7727        switch (opcode) {
7728        case 0x3b: /* ADDP */
7729            tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
7730            break;
7731        case 0xc: /* FMAXNMP */
7732            gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7733            break;
7734        case 0xd: /* FADDP */
7735            gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7736            break;
7737        case 0xf: /* FMAXP */
7738            gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7739            break;
7740        case 0x2c: /* FMINNMP */
7741            gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7742            break;
7743        case 0x2f: /* FMINP */
7744            gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7745            break;
7746        default:
7747            g_assert_not_reached();
7748        }
7749
7750        write_fp_dreg(s, rd, tcg_res);
7751
7752        tcg_temp_free_i64(tcg_op1);
7753        tcg_temp_free_i64(tcg_op2);
7754        tcg_temp_free_i64(tcg_res);
7755    } else {
7756        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7757        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7758        TCGv_i32 tcg_res = tcg_temp_new_i32();
7759
7760        read_vec_element_i32(s, tcg_op1, rn, 0, size);
7761        read_vec_element_i32(s, tcg_op2, rn, 1, size);
7762
7763        if (size == MO_16) {
7764            switch (opcode) {
7765            case 0xc: /* FMAXNMP */
7766                gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7767                break;
7768            case 0xd: /* FADDP */
7769                gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
7770                break;
7771            case 0xf: /* FMAXP */
7772                gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
7773                break;
7774            case 0x2c: /* FMINNMP */
7775                gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7776                break;
7777            case 0x2f: /* FMINP */
7778                gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
7779                break;
7780            default:
7781                g_assert_not_reached();
7782            }
7783        } else {
7784            switch (opcode) {
7785            case 0xc: /* FMAXNMP */
7786                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7787                break;
7788            case 0xd: /* FADDP */
7789                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7790                break;
7791            case 0xf: /* FMAXP */
7792                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7793                break;
7794            case 0x2c: /* FMINNMP */
7795                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7796                break;
7797            case 0x2f: /* FMINP */
7798                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7799                break;
7800            default:
7801                g_assert_not_reached();
7802            }
7803        }
7804
7805        write_fp_sreg(s, rd, tcg_res);
7806
7807        tcg_temp_free_i32(tcg_op1);
7808        tcg_temp_free_i32(tcg_op2);
7809        tcg_temp_free_i32(tcg_res);
7810    }
7811
7812    if (fpst) {
7813        tcg_temp_free_ptr(fpst);
7814    }
7815}
7816
7817/*
7818 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
7819 *
7820 * This code is handles the common shifting code and is used by both
7821 * the vector and scalar code.
7822 */
7823static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
7824                                    TCGv_i64 tcg_rnd, bool accumulate,
7825                                    bool is_u, int size, int shift)
7826{
7827    bool extended_result = false;
7828    bool round = tcg_rnd != NULL;
7829    int ext_lshift = 0;
7830    TCGv_i64 tcg_src_hi;
7831
7832    if (round && size == 3) {
7833        extended_result = true;
7834        ext_lshift = 64 - shift;
7835        tcg_src_hi = tcg_temp_new_i64();
7836    } else if (shift == 64) {
7837        if (!accumulate && is_u) {
7838            /* result is zero */
7839            tcg_gen_movi_i64(tcg_res, 0);
7840            return;
7841        }
7842    }
7843
7844    /* Deal with the rounding step */
7845    if (round) {
7846        if (extended_result) {
7847            TCGv_i64 tcg_zero = tcg_const_i64(0);
7848            if (!is_u) {
7849                /* take care of sign extending tcg_res */
7850                tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
7851                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7852                                 tcg_src, tcg_src_hi,
7853                                 tcg_rnd, tcg_zero);
7854            } else {
7855                tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7856                                 tcg_src, tcg_zero,
7857                                 tcg_rnd, tcg_zero);
7858            }
7859            tcg_temp_free_i64(tcg_zero);
7860        } else {
7861            tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
7862        }
7863    }
7864
7865    /* Now do the shift right */
7866    if (round && extended_result) {
7867        /* extended case, >64 bit precision required */
7868        if (ext_lshift == 0) {
7869            /* special case, only high bits matter */
7870            tcg_gen_mov_i64(tcg_src, tcg_src_hi);
7871        } else {
7872            tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7873            tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
7874            tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
7875        }
7876    } else {
7877        if (is_u) {
7878            if (shift == 64) {
7879                /* essentially shifting in 64 zeros */
7880                tcg_gen_movi_i64(tcg_src, 0);
7881            } else {
7882                tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7883            }
7884        } else {
7885            if (shift == 64) {
7886                /* effectively extending the sign-bit */
7887                tcg_gen_sari_i64(tcg_src, tcg_src, 63);
7888            } else {
7889                tcg_gen_sari_i64(tcg_src, tcg_src, shift);
7890            }
7891        }
7892    }
7893
7894    if (accumulate) {
7895        tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
7896    } else {
7897        tcg_gen_mov_i64(tcg_res, tcg_src);
7898    }
7899
7900    if (extended_result) {
7901        tcg_temp_free_i64(tcg_src_hi);
7902    }
7903}
7904
7905/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
7906static void handle_scalar_simd_shri(DisasContext *s,
7907                                    bool is_u, int immh, int immb,
7908                                    int opcode, int rn, int rd)
7909{
7910    const int size = 3;
7911    int immhb = immh << 3 | immb;
7912    int shift = 2 * (8 << size) - immhb;
7913    bool accumulate = false;
7914    bool round = false;
7915    bool insert = false;
7916    TCGv_i64 tcg_rn;
7917    TCGv_i64 tcg_rd;
7918    TCGv_i64 tcg_round;
7919
7920    if (!extract32(immh, 3, 1)) {
7921        unallocated_encoding(s);
7922        return;
7923    }
7924
7925    if (!fp_access_check(s)) {
7926        return;
7927    }
7928
7929    switch (opcode) {
7930    case 0x02: /* SSRA / USRA (accumulate) */
7931        accumulate = true;
7932        break;
7933    case 0x04: /* SRSHR / URSHR (rounding) */
7934        round = true;
7935        break;
7936    case 0x06: /* SRSRA / URSRA (accum + rounding) */
7937        accumulate = round = true;
7938        break;
7939    case 0x08: /* SRI */
7940        insert = true;
7941        break;
7942    }
7943
7944    if (round) {
7945        uint64_t round_const = 1ULL << (shift - 1);
7946        tcg_round = tcg_const_i64(round_const);
7947    } else {
7948        tcg_round = NULL;
7949    }
7950
7951    tcg_rn = read_fp_dreg(s, rn);
7952    tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7953
7954    if (insert) {
7955        /* shift count same as element size is valid but does nothing;
7956         * special case to avoid potential shift by 64.
7957         */
7958        int esize = 8 << size;
7959        if (shift != esize) {
7960            tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
7961            tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
7962        }
7963    } else {
7964        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
7965                                accumulate, is_u, size, shift);
7966    }
7967
7968    write_fp_dreg(s, rd, tcg_rd);
7969
7970    tcg_temp_free_i64(tcg_rn);
7971    tcg_temp_free_i64(tcg_rd);
7972    if (round) {
7973        tcg_temp_free_i64(tcg_round);
7974    }
7975}
7976
7977/* SHL/SLI - Scalar shift left */
7978static void handle_scalar_simd_shli(DisasContext *s, bool insert,
7979                                    int immh, int immb, int opcode,
7980                                    int rn, int rd)
7981{
7982    int size = 32 - clz32(immh) - 1;
7983    int immhb = immh << 3 | immb;
7984    int shift = immhb - (8 << size);
7985    TCGv_i64 tcg_rn = new_tmp_a64(s);
7986    TCGv_i64 tcg_rd = new_tmp_a64(s);
7987
7988    if (!extract32(immh, 3, 1)) {
7989        unallocated_encoding(s);
7990        return;
7991    }
7992
7993    if (!fp_access_check(s)) {
7994        return;
7995    }
7996
7997    tcg_rn = read_fp_dreg(s, rn);
7998    tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7999
8000    if (insert) {
8001        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
8002    } else {
8003        tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
8004    }
8005
8006    write_fp_dreg(s, rd, tcg_rd);
8007
8008    tcg_temp_free_i64(tcg_rn);
8009    tcg_temp_free_i64(tcg_rd);
8010}
8011
8012/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
8013 * (signed/unsigned) narrowing */
8014static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
8015                                   bool is_u_shift, bool is_u_narrow,
8016                                   int immh, int immb, int opcode,
8017                                   int rn, int rd)
8018{
8019    int immhb = immh << 3 | immb;
8020    int size = 32 - clz32(immh) - 1;
8021    int esize = 8 << size;
8022    int shift = (2 * esize) - immhb;
8023    int elements = is_scalar ? 1 : (64 / esize);
8024    bool round = extract32(opcode, 0, 1);
8025    TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
8026    TCGv_i64 tcg_rn, tcg_rd, tcg_round;
8027    TCGv_i32 tcg_rd_narrowed;
8028    TCGv_i64 tcg_final;
8029
8030    static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
8031        { gen_helper_neon_narrow_sat_s8,
8032          gen_helper_neon_unarrow_sat8 },
8033        { gen_helper_neon_narrow_sat_s16,
8034          gen_helper_neon_unarrow_sat16 },
8035        { gen_helper_neon_narrow_sat_s32,
8036          gen_helper_neon_unarrow_sat32 },
8037        { NULL, NULL },
8038    };
8039    static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
8040        gen_helper_neon_narrow_sat_u8,
8041        gen_helper_neon_narrow_sat_u16,
8042        gen_helper_neon_narrow_sat_u32,
8043        NULL
8044    };
8045    NeonGenNarrowEnvFn *narrowfn;
8046
8047    int i;
8048
8049    assert(size < 4);
8050
8051    if (extract32(immh, 3, 1)) {
8052        unallocated_encoding(s);
8053        return;
8054    }
8055
8056    if (!fp_access_check(s)) {
8057        return;
8058    }
8059
8060    if (is_u_shift) {
8061        narrowfn = unsigned_narrow_fns[size];
8062    } else {
8063        narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
8064    }
8065
8066    tcg_rn = tcg_temp_new_i64();
8067    tcg_rd = tcg_temp_new_i64();
8068    tcg_rd_narrowed = tcg_temp_new_i32();
8069    tcg_final = tcg_const_i64(0);
8070
8071    if (round) {
8072        uint64_t round_const = 1ULL << (shift - 1);
8073        tcg_round = tcg_const_i64(round_const);
8074    } else {
8075        tcg_round = NULL;
8076    }
8077
8078    for (i = 0; i < elements; i++) {
8079        read_vec_element(s, tcg_rn, rn, i, ldop);
8080        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8081                                false, is_u_shift, size+1, shift);
8082        narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
8083        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
8084        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8085    }
8086
8087    if (!is_q) {
8088        write_vec_element(s, tcg_final, rd, 0, MO_64);
8089    } else {
8090        write_vec_element(s, tcg_final, rd, 1, MO_64);
8091    }
8092
8093    if (round) {
8094        tcg_temp_free_i64(tcg_round);
8095    }
8096    tcg_temp_free_i64(tcg_rn);
8097    tcg_temp_free_i64(tcg_rd);
8098    tcg_temp_free_i32(tcg_rd_narrowed);
8099    tcg_temp_free_i64(tcg_final);
8100
8101    clear_vec_high(s, is_q, rd);
8102}
8103
8104/* SQSHLU, UQSHL, SQSHL: saturating left shifts */
8105static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
8106                             bool src_unsigned, bool dst_unsigned,
8107                             int immh, int immb, int rn, int rd)
8108{
8109    int immhb = immh << 3 | immb;
8110    int size = 32 - clz32(immh) - 1;
8111    int shift = immhb - (8 << size);
8112    int pass;
8113
8114    assert(immh != 0);
8115    assert(!(scalar && is_q));
8116
8117    if (!scalar) {
8118        if (!is_q && extract32(immh, 3, 1)) {
8119            unallocated_encoding(s);
8120            return;
8121        }
8122
8123        /* Since we use the variable-shift helpers we must
8124         * replicate the shift count into each element of
8125         * the tcg_shift value.
8126         */
8127        switch (size) {
8128        case 0:
8129            shift |= shift << 8;
8130            /* fall through */
8131        case 1:
8132            shift |= shift << 16;
8133            break;
8134        case 2:
8135        case 3:
8136            break;
8137        default:
8138            g_assert_not_reached();
8139        }
8140    }
8141
8142    if (!fp_access_check(s)) {
8143        return;
8144    }
8145
8146    if (size == 3) {
8147        TCGv_i64 tcg_shift = tcg_const_i64(shift);
8148        static NeonGenTwo64OpEnvFn * const fns[2][2] = {
8149            { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
8150            { NULL, gen_helper_neon_qshl_u64 },
8151        };
8152        NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
8153        int maxpass = is_q ? 2 : 1;
8154
8155        for (pass = 0; pass < maxpass; pass++) {
8156            TCGv_i64 tcg_op = tcg_temp_new_i64();
8157
8158            read_vec_element(s, tcg_op, rn, pass, MO_64);
8159            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8160            write_vec_element(s, tcg_op, rd, pass, MO_64);
8161
8162            tcg_temp_free_i64(tcg_op);
8163        }
8164        tcg_temp_free_i64(tcg_shift);
8165        clear_vec_high(s, is_q, rd);
8166    } else {
8167        TCGv_i32 tcg_shift = tcg_const_i32(shift);
8168        static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
8169            {
8170                { gen_helper_neon_qshl_s8,
8171                  gen_helper_neon_qshl_s16,
8172                  gen_helper_neon_qshl_s32 },
8173                { gen_helper_neon_qshlu_s8,
8174                  gen_helper_neon_qshlu_s16,
8175                  gen_helper_neon_qshlu_s32 }
8176            }, {
8177                { NULL, NULL, NULL },
8178                { gen_helper_neon_qshl_u8,
8179                  gen_helper_neon_qshl_u16,
8180                  gen_helper_neon_qshl_u32 }
8181            }
8182        };
8183        NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
8184        TCGMemOp memop = scalar ? size : MO_32;
8185        int maxpass = scalar ? 1 : is_q ? 4 : 2;
8186
8187        for (pass = 0; pass < maxpass; pass++) {
8188            TCGv_i32 tcg_op = tcg_temp_new_i32();
8189
8190            read_vec_element_i32(s, tcg_op, rn, pass, memop);
8191            genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
8192            if (scalar) {
8193                switch (size) {
8194                case 0:
8195                    tcg_gen_ext8u_i32(tcg_op, tcg_op);
8196                    break;
8197                case 1:
8198                    tcg_gen_ext16u_i32(tcg_op, tcg_op);
8199                    break;
8200                case 2:
8201                    break;
8202                default:
8203                    g_assert_not_reached();
8204                }
8205                write_fp_sreg(s, rd, tcg_op);
8206            } else {
8207                write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8208            }
8209
8210            tcg_temp_free_i32(tcg_op);
8211        }
8212        tcg_temp_free_i32(tcg_shift);
8213
8214        if (!scalar) {
8215            clear_vec_high(s, is_q, rd);
8216        }
8217    }
8218}
8219
8220/* Common vector code for handling integer to FP conversion */
8221static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
8222                                   int elements, int is_signed,
8223                                   int fracbits, int size)
8224{
8225    TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16);
8226    TCGv_i32 tcg_shift = NULL;
8227
8228    TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
8229    int pass;
8230
8231    if (fracbits || size == MO_64) {
8232        tcg_shift = tcg_const_i32(fracbits);
8233    }
8234
8235    if (size == MO_64) {
8236        TCGv_i64 tcg_int64 = tcg_temp_new_i64();
8237        TCGv_i64 tcg_double = tcg_temp_new_i64();
8238
8239        for (pass = 0; pass < elements; pass++) {
8240            read_vec_element(s, tcg_int64, rn, pass, mop);
8241
8242            if (is_signed) {
8243                gen_helper_vfp_sqtod(tcg_double, tcg_int64,
8244                                     tcg_shift, tcg_fpst);
8245            } else {
8246                gen_helper_vfp_uqtod(tcg_double, tcg_int64,
8247                                     tcg_shift, tcg_fpst);
8248            }
8249            if (elements == 1) {
8250                write_fp_dreg(s, rd, tcg_double);
8251            } else {
8252                write_vec_element(s, tcg_double, rd, pass, MO_64);
8253            }
8254        }
8255
8256        tcg_temp_free_i64(tcg_int64);
8257        tcg_temp_free_i64(tcg_double);
8258
8259    } else {
8260        TCGv_i32 tcg_int32 = tcg_temp_new_i32();
8261        TCGv_i32 tcg_float = tcg_temp_new_i32();
8262
8263        for (pass = 0; pass < elements; pass++) {
8264            read_vec_element_i32(s, tcg_int32, rn, pass, mop);
8265
8266            switch (size) {
8267            case MO_32:
8268                if (fracbits) {
8269                    if (is_signed) {
8270                        gen_helper_vfp_sltos(tcg_float, tcg_int32,
8271                                             tcg_shift, tcg_fpst);
8272                    } else {
8273                        gen_helper_vfp_ultos(tcg_float, tcg_int32,
8274                                             tcg_shift, tcg_fpst);
8275                    }
8276                } else {
8277                    if (is_signed) {
8278                        gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
8279                    } else {
8280                        gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
8281                    }
8282                }
8283                break;
8284            case MO_16:
8285                if (fracbits) {
8286                    if (is_signed) {
8287                        gen_helper_vfp_sltoh(tcg_float, tcg_int32,
8288                                             tcg_shift, tcg_fpst);
8289                    } else {
8290                        gen_helper_vfp_ultoh(tcg_float, tcg_int32,
8291                                             tcg_shift, tcg_fpst);
8292                    }
8293                } else {
8294                    if (is_signed) {
8295                        gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
8296                    } else {
8297                        gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
8298                    }
8299                }
8300                break;
8301            default:
8302                g_assert_not_reached();
8303            }
8304
8305            if (elements == 1) {
8306                write_fp_sreg(s, rd, tcg_float);
8307            } else {
8308                write_vec_element_i32(s, tcg_float, rd, pass, size);
8309            }
8310        }
8311
8312        tcg_temp_free_i32(tcg_int32);
8313        tcg_temp_free_i32(tcg_float);
8314    }
8315
8316    tcg_temp_free_ptr(tcg_fpst);
8317    if (tcg_shift) {
8318        tcg_temp_free_i32(tcg_shift);
8319    }
8320
8321    clear_vec_high(s, elements << size == 16, rd);
8322}
8323
8324/* UCVTF/SCVTF - Integer to FP conversion */
8325static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
8326                                         bool is_q, bool is_u,
8327                                         int immh, int immb, int opcode,
8328                                         int rn, int rd)
8329{
8330    int size, elements, fracbits;
8331    int immhb = immh << 3 | immb;
8332
8333    if (immh & 8) {
8334        size = MO_64;
8335        if (!is_scalar && !is_q) {
8336            unallocated_encoding(s);
8337            return;
8338        }
8339    } else if (immh & 4) {
8340        size = MO_32;
8341    } else if (immh & 2) {
8342        size = MO_16;
8343        if (!dc_isar_feature(aa64_fp16, s)) {
8344            unallocated_encoding(s);
8345            return;
8346        }
8347    } else {
8348        /* immh == 0 would be a failure of the decode logic */
8349        g_assert(immh == 1);
8350        unallocated_encoding(s);
8351        return;
8352    }
8353
8354    if (is_scalar) {
8355        elements = 1;
8356    } else {
8357        elements = (8 << is_q) >> size;
8358    }
8359    fracbits = (16 << size) - immhb;
8360
8361    if (!fp_access_check(s)) {
8362        return;
8363    }
8364
8365    handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
8366}
8367
8368/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
8369static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
8370                                         bool is_q, bool is_u,
8371                                         int immh, int immb, int rn, int rd)
8372{
8373    int immhb = immh << 3 | immb;
8374    int pass, size, fracbits;
8375    TCGv_ptr tcg_fpstatus;
8376    TCGv_i32 tcg_rmode, tcg_shift;
8377
8378    if (immh & 0x8) {
8379        size = MO_64;
8380        if (!is_scalar && !is_q) {
8381            unallocated_encoding(s);
8382            return;
8383        }
8384    } else if (immh & 0x4) {
8385        size = MO_32;
8386    } else if (immh & 0x2) {
8387        size = MO_16;
8388        if (!dc_isar_feature(aa64_fp16, s)) {
8389            unallocated_encoding(s);
8390            return;
8391        }
8392    } else {
8393        /* Should have split out AdvSIMD modified immediate earlier.  */
8394        assert(immh == 1);
8395        unallocated_encoding(s);
8396        return;
8397    }
8398
8399    if (!fp_access_check(s)) {
8400        return;
8401    }
8402
8403    assert(!(is_scalar && is_q));
8404
8405    tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
8406    tcg_fpstatus = get_fpstatus_ptr(size == MO_16);
8407    gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8408    fracbits = (16 << size) - immhb;
8409    tcg_shift = tcg_const_i32(fracbits);
8410
8411    if (size == MO_64) {
8412        int maxpass = is_scalar ? 1 : 2;
8413
8414        for (pass = 0; pass < maxpass; pass++) {
8415            TCGv_i64 tcg_op = tcg_temp_new_i64();
8416
8417            read_vec_element(s, tcg_op, rn, pass, MO_64);
8418            if (is_u) {
8419                gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8420            } else {
8421                gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8422            }
8423            write_vec_element(s, tcg_op, rd, pass, MO_64);
8424            tcg_temp_free_i64(tcg_op);
8425        }
8426        clear_vec_high(s, is_q, rd);
8427    } else {
8428        void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
8429        int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
8430
8431        switch (size) {
8432        case MO_16:
8433            if (is_u) {
8434                fn = gen_helper_vfp_touhh;
8435            } else {
8436                fn = gen_helper_vfp_toshh;
8437            }
8438            break;
8439        case MO_32:
8440            if (is_u) {
8441                fn = gen_helper_vfp_touls;
8442            } else {
8443                fn = gen_helper_vfp_tosls;
8444            }
8445            break;
8446        default:
8447            g_assert_not_reached();
8448        }
8449
8450        for (pass = 0; pass < maxpass; pass++) {
8451            TCGv_i32 tcg_op = tcg_temp_new_i32();
8452
8453            read_vec_element_i32(s, tcg_op, rn, pass, size);
8454            fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
8455            if (is_scalar) {
8456                write_fp_sreg(s, rd, tcg_op);
8457            } else {
8458                write_vec_element_i32(s, tcg_op, rd, pass, size);
8459            }
8460            tcg_temp_free_i32(tcg_op);
8461        }
8462        if (!is_scalar) {
8463            clear_vec_high(s, is_q, rd);
8464        }
8465    }
8466
8467    tcg_temp_free_ptr(tcg_fpstatus);
8468    tcg_temp_free_i32(tcg_shift);
8469    gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8470    tcg_temp_free_i32(tcg_rmode);
8471}
8472
8473/* AdvSIMD scalar shift by immediate
8474 *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
8475 * +-----+---+-------------+------+------+--------+---+------+------+
8476 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8477 * +-----+---+-------------+------+------+--------+---+------+------+
8478 *
8479 * This is the scalar version so it works on a fixed sized registers
8480 */
8481static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
8482{
8483    int rd = extract32(insn, 0, 5);
8484    int rn = extract32(insn, 5, 5);
8485    int opcode = extract32(insn, 11, 5);
8486    int immb = extract32(insn, 16, 3);
8487    int immh = extract32(insn, 19, 4);
8488    bool is_u = extract32(insn, 29, 1);
8489
8490    if (immh == 0) {
8491        unallocated_encoding(s);
8492        return;
8493    }
8494
8495    switch (opcode) {
8496    case 0x08: /* SRI */
8497        if (!is_u) {
8498            unallocated_encoding(s);
8499            return;
8500        }
8501        /* fall through */
8502    case 0x00: /* SSHR / USHR */
8503    case 0x02: /* SSRA / USRA */
8504    case 0x04: /* SRSHR / URSHR */
8505    case 0x06: /* SRSRA / URSRA */
8506        handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
8507        break;
8508    case 0x0a: /* SHL / SLI */
8509        handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
8510        break;
8511    case 0x1c: /* SCVTF, UCVTF */
8512        handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
8513                                     opcode, rn, rd);
8514        break;
8515    case 0x10: /* SQSHRUN, SQSHRUN2 */
8516    case 0x11: /* SQRSHRUN, SQRSHRUN2 */
8517        if (!is_u) {
8518            unallocated_encoding(s);
8519            return;
8520        }
8521        handle_vec_simd_sqshrn(s, true, false, false, true,
8522                               immh, immb, opcode, rn, rd);
8523        break;
8524    case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
8525    case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
8526        handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
8527                               immh, immb, opcode, rn, rd);
8528        break;
8529    case 0xc: /* SQSHLU */
8530        if (!is_u) {
8531            unallocated_encoding(s);
8532            return;
8533        }
8534        handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
8535        break;
8536    case 0xe: /* SQSHL, UQSHL */
8537        handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
8538        break;
8539    case 0x1f: /* FCVTZS, FCVTZU */
8540        handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
8541        break;
8542    default:
8543        unallocated_encoding(s);
8544        break;
8545    }
8546}
8547
8548/* AdvSIMD scalar three different
8549 *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8550 * +-----+---+-----------+------+---+------+--------+-----+------+------+
8551 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8552 * +-----+---+-----------+------+---+------+--------+-----+------+------+
8553 */
8554static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
8555{
8556    bool is_u = extract32(insn, 29, 1);
8557    int size = extract32(insn, 22, 2);
8558    int opcode = extract32(insn, 12, 4);
8559    int rm = extract32(insn, 16, 5);
8560    int rn = extract32(insn, 5, 5);
8561    int rd = extract32(insn, 0, 5);
8562
8563    if (is_u) {
8564        unallocated_encoding(s);
8565        return;
8566    }
8567
8568    switch (opcode) {
8569    case 0x9: /* SQDMLAL, SQDMLAL2 */
8570    case 0xb: /* SQDMLSL, SQDMLSL2 */
8571    case 0xd: /* SQDMULL, SQDMULL2 */
8572        if (size == 0 || size == 3) {
8573            unallocated_encoding(s);
8574            return;
8575        }
8576        break;
8577    default:
8578        unallocated_encoding(s);
8579        return;
8580    }
8581
8582    if (!fp_access_check(s)) {
8583        return;
8584    }
8585
8586    if (size == 2) {
8587        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8588        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8589        TCGv_i64 tcg_res = tcg_temp_new_i64();
8590
8591        read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
8592        read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
8593
8594        tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
8595        gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
8596
8597        switch (opcode) {
8598        case 0xd: /* SQDMULL, SQDMULL2 */
8599            break;
8600        case 0xb: /* SQDMLSL, SQDMLSL2 */
8601            tcg_gen_neg_i64(tcg_res, tcg_res);
8602            /* fall through */
8603        case 0x9: /* SQDMLAL, SQDMLAL2 */
8604            read_vec_element(s, tcg_op1, rd, 0, MO_64);
8605            gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
8606                                              tcg_res, tcg_op1);
8607            break;
8608        default:
8609            g_assert_not_reached();
8610        }
8611
8612        write_fp_dreg(s, rd, tcg_res);
8613
8614        tcg_temp_free_i64(tcg_op1);
8615        tcg_temp_free_i64(tcg_op2);
8616        tcg_temp_free_i64(tcg_res);
8617    } else {
8618        TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
8619        TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
8620        TCGv_i64 tcg_res = tcg_temp_new_i64();
8621
8622        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
8623        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
8624
8625        switch (opcode) {
8626        case 0xd: /* SQDMULL, SQDMULL2 */
8627            break;
8628        case 0xb: /* SQDMLSL, SQDMLSL2 */
8629            gen_helper_neon_negl_u32(tcg_res, tcg_res);
8630            /* fall through */
8631        case 0x9: /* SQDMLAL, SQDMLAL2 */
8632        {
8633            TCGv_i64 tcg_op3 = tcg_temp_new_i64();
8634            read_vec_element(s, tcg_op3, rd, 0, MO_32);
8635            gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
8636                                              tcg_res, tcg_op3);
8637            tcg_temp_free_i64(tcg_op3);
8638            break;
8639        }
8640        default:
8641            g_assert_not_reached();
8642        }
8643
8644        tcg_gen_ext32u_i64(tcg_res, tcg_res);
8645        write_fp_dreg(s, rd, tcg_res);
8646
8647        tcg_temp_free_i32(tcg_op1);
8648        tcg_temp_free_i32(tcg_op2);
8649        tcg_temp_free_i64(tcg_res);
8650    }
8651}
8652
8653static void handle_3same_64(DisasContext *s, int opcode, bool u,
8654                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
8655{
8656    /* Handle 64x64->64 opcodes which are shared between the scalar
8657     * and vector 3-same groups. We cover every opcode where size == 3
8658     * is valid in either the three-reg-same (integer, not pairwise)
8659     * or scalar-three-reg-same groups.
8660     */
8661    TCGCond cond;
8662
8663    switch (opcode) {
8664    case 0x1: /* SQADD */
8665        if (u) {
8666            gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8667        } else {
8668            gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8669        }
8670        break;
8671    case 0x5: /* SQSUB */
8672        if (u) {
8673            gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8674        } else {
8675            gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8676        }
8677        break;
8678    case 0x6: /* CMGT, CMHI */
8679        /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
8680         * We implement this using setcond (test) and then negating.
8681         */
8682        cond = u ? TCG_COND_GTU : TCG_COND_GT;
8683    do_cmop:
8684        tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8685        tcg_gen_neg_i64(tcg_rd, tcg_rd);
8686        break;
8687    case 0x7: /* CMGE, CMHS */
8688        cond = u ? TCG_COND_GEU : TCG_COND_GE;
8689        goto do_cmop;
8690    case 0x11: /* CMTST, CMEQ */
8691        if (u) {
8692            cond = TCG_COND_EQ;
8693            goto do_cmop;
8694        }
8695        gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8696        break;
8697    case 0x8: /* SSHL, USHL */
8698        if (u) {
8699            gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
8700        } else {
8701            gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
8702        }
8703        break;
8704    case 0x9: /* SQSHL, UQSHL */
8705        if (u) {
8706            gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8707        } else {
8708            gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8709        }
8710        break;
8711    case 0xa: /* SRSHL, URSHL */
8712        if (u) {
8713            gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8714        } else {
8715            gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8716        }
8717        break;
8718    case 0xb: /* SQRSHL, UQRSHL */
8719        if (u) {
8720            gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8721        } else {
8722            gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8723        }
8724        break;
8725    case 0x10: /* ADD, SUB */
8726        if (u) {
8727            tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8728        } else {
8729            tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8730        }
8731        break;
8732    default:
8733        g_assert_not_reached();
8734    }
8735}
8736
8737/* Handle the 3-same-operands float operations; shared by the scalar
8738 * and vector encodings. The caller must filter out any encodings
8739 * not allocated for the encoding it is dealing with.
8740 */
8741static void handle_3same_float(DisasContext *s, int size, int elements,
8742                               int fpopcode, int rd, int rn, int rm)
8743{
8744    int pass;
8745    TCGv_ptr fpst = get_fpstatus_ptr(false);
8746
8747    for (pass = 0; pass < elements; pass++) {
8748        if (size) {
8749            /* Double */
8750            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8751            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8752            TCGv_i64 tcg_res = tcg_temp_new_i64();
8753
8754            read_vec_element(s, tcg_op1, rn, pass, MO_64);
8755            read_vec_element(s, tcg_op2, rm, pass, MO_64);
8756
8757            switch (fpopcode) {
8758            case 0x39: /* FMLS */
8759                /* As usual for ARM, separate negation for fused multiply-add */
8760                gen_helper_vfp_negd(tcg_op1, tcg_op1);
8761                /* fall through */
8762            case 0x19: /* FMLA */
8763                read_vec_element(s, tcg_res, rd, pass, MO_64);
8764                gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8765                                       tcg_res, fpst);
8766                break;
8767            case 0x18: /* FMAXNM */
8768                gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8769                break;
8770            case 0x1a: /* FADD */
8771                gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8772                break;
8773            case 0x1b: /* FMULX */
8774                gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8775                break;
8776            case 0x1c: /* FCMEQ */
8777                gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8778                break;
8779            case 0x1e: /* FMAX */
8780                gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8781                break;
8782            case 0x1f: /* FRECPS */
8783                gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8784                break;
8785            case 0x38: /* FMINNM */
8786                gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8787                break;
8788            case 0x3a: /* FSUB */
8789                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8790                break;
8791            case 0x3e: /* FMIN */
8792                gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8793                break;
8794            case 0x3f: /* FRSQRTS */
8795                gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8796                break;
8797            case 0x5b: /* FMUL */
8798                gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
8799                break;
8800            case 0x5c: /* FCMGE */
8801                gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8802                break;
8803            case 0x5d: /* FACGE */
8804                gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8805                break;
8806            case 0x5f: /* FDIV */
8807                gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
8808                break;
8809            case 0x7a: /* FABD */
8810                gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8811                gen_helper_vfp_absd(tcg_res, tcg_res);
8812                break;
8813            case 0x7c: /* FCMGT */
8814                gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8815                break;
8816            case 0x7d: /* FACGT */
8817                gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8818                break;
8819            default:
8820                g_assert_not_reached();
8821            }
8822
8823            write_vec_element(s, tcg_res, rd, pass, MO_64);
8824
8825            tcg_temp_free_i64(tcg_res);
8826            tcg_temp_free_i64(tcg_op1);
8827            tcg_temp_free_i64(tcg_op2);
8828        } else {
8829            /* Single */
8830            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8831            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8832            TCGv_i32 tcg_res = tcg_temp_new_i32();
8833
8834            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
8835            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
8836
8837            switch (fpopcode) {
8838            case 0x39: /* FMLS */
8839                /* As usual for ARM, separate negation for fused multiply-add */
8840                gen_helper_vfp_negs(tcg_op1, tcg_op1);
8841                /* fall through */
8842            case 0x19: /* FMLA */
8843                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8844                gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
8845                                       tcg_res, fpst);
8846                break;
8847            case 0x1a: /* FADD */
8848                gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8849                break;
8850            case 0x1b: /* FMULX */
8851                gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
8852                break;
8853            case 0x1c: /* FCMEQ */
8854                gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8855                break;
8856            case 0x1e: /* FMAX */
8857                gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8858                break;
8859            case 0x1f: /* FRECPS */
8860                gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8861                break;
8862            case 0x18: /* FMAXNM */
8863                gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8864                break;
8865            case 0x38: /* FMINNM */
8866                gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8867                break;
8868            case 0x3a: /* FSUB */
8869                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8870                break;
8871            case 0x3e: /* FMIN */
8872                gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8873                break;
8874            case 0x3f: /* FRSQRTS */
8875                gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8876                break;
8877            case 0x5b: /* FMUL */
8878                gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
8879                break;
8880            case 0x5c: /* FCMGE */
8881                gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8882                break;
8883            case 0x5d: /* FACGE */
8884                gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8885                break;
8886            case 0x5f: /* FDIV */
8887                gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
8888                break;
8889            case 0x7a: /* FABD */
8890                gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8891                gen_helper_vfp_abss(tcg_res, tcg_res);
8892                break;
8893            case 0x7c: /* FCMGT */
8894                gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8895                break;
8896            case 0x7d: /* FACGT */
8897                gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8898                break;
8899            default:
8900                g_assert_not_reached();
8901            }
8902
8903            if (elements == 1) {
8904                /* scalar single so clear high part */
8905                TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8906
8907                tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
8908                write_vec_element(s, tcg_tmp, rd, pass, MO_64);
8909                tcg_temp_free_i64(tcg_tmp);
8910            } else {
8911                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8912            }
8913
8914            tcg_temp_free_i32(tcg_res);
8915            tcg_temp_free_i32(tcg_op1);
8916            tcg_temp_free_i32(tcg_op2);
8917        }
8918    }
8919
8920    tcg_temp_free_ptr(fpst);
8921
8922    clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
8923}
8924
8925/* AdvSIMD scalar three same
8926 *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
8927 * +-----+---+-----------+------+---+------+--------+---+------+------+
8928 * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
8929 * +-----+---+-----------+------+---+------+--------+---+------+------+
8930 */
8931static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
8932{
8933    int rd = extract32(insn, 0, 5);
8934    int rn = extract32(insn, 5, 5);
8935    int opcode = extract32(insn, 11, 5);
8936    int rm = extract32(insn, 16, 5);
8937    int size = extract32(insn, 22, 2);
8938    bool u = extract32(insn, 29, 1);
8939    TCGv_i64 tcg_rd;
8940
8941    if (opcode >= 0x18) {
8942        /* Floating point: U, size[1] and opcode indicate operation */
8943        int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
8944        switch (fpopcode) {
8945        case 0x1b: /* FMULX */
8946        case 0x1f: /* FRECPS */
8947        case 0x3f: /* FRSQRTS */
8948        case 0x5d: /* FACGE */
8949        case 0x7d: /* FACGT */
8950        case 0x1c: /* FCMEQ */
8951        case 0x5c: /* FCMGE */
8952        case 0x7c: /* FCMGT */
8953        case 0x7a: /* FABD */
8954            break;
8955        default:
8956            unallocated_encoding(s);
8957            return;
8958        }
8959
8960        if (!fp_access_check(s)) {
8961            return;
8962        }
8963
8964        handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
8965        return;
8966    }
8967
8968    switch (opcode) {
8969    case 0x1: /* SQADD, UQADD */
8970    case 0x5: /* SQSUB, UQSUB */
8971    case 0x9: /* SQSHL, UQSHL */
8972    case 0xb: /* SQRSHL, UQRSHL */
8973        break;
8974    case 0x8: /* SSHL, USHL */
8975    case 0xa: /* SRSHL, URSHL */
8976    case 0x6: /* CMGT, CMHI */
8977    case 0x7: /* CMGE, CMHS */
8978    case 0x11: /* CMTST, CMEQ */
8979    case 0x10: /* ADD, SUB (vector) */
8980        if (size != 3) {
8981            unallocated_encoding(s);
8982            return;
8983        }
8984        break;
8985    case 0x16: /* SQDMULH, SQRDMULH (vector) */
8986        if (size != 1 && size != 2) {
8987            unallocated_encoding(s);
8988            return;
8989        }
8990        break;
8991    default:
8992        unallocated_encoding(s);
8993        return;
8994    }
8995
8996    if (!fp_access_check(s)) {
8997        return;
8998    }
8999
9000    tcg_rd = tcg_temp_new_i64();
9001
9002    if (size == 3) {
9003        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9004        TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
9005
9006        handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
9007        tcg_temp_free_i64(tcg_rn);
9008        tcg_temp_free_i64(tcg_rm);
9009    } else {
9010        /* Do a single operation on the lowest element in the vector.
9011         * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
9012         * no side effects for all these operations.
9013         * OPTME: special-purpose helpers would avoid doing some
9014         * unnecessary work in the helper for the 8 and 16 bit cases.
9015         */
9016        NeonGenTwoOpEnvFn *genenvfn;
9017        TCGv_i32 tcg_rn = tcg_temp_new_i32();
9018        TCGv_i32 tcg_rm = tcg_temp_new_i32();
9019        TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
9020
9021        read_vec_element_i32(s, tcg_rn, rn, 0, size);
9022        read_vec_element_i32(s, tcg_rm, rm, 0, size);
9023
9024        switch (opcode) {
9025        case 0x1: /* SQADD, UQADD */
9026        {
9027            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9028                { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9029                { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9030                { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9031            };
9032            genenvfn = fns[size][u];
9033            break;
9034        }
9035        case 0x5: /* SQSUB, UQSUB */
9036        {
9037            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9038                { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9039                { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9040                { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9041            };
9042            genenvfn = fns[size][u];
9043            break;
9044        }
9045        case 0x9: /* SQSHL, UQSHL */
9046        {
9047            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9048                { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9049                { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9050                { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9051            };
9052            genenvfn = fns[size][u];
9053            break;
9054        }
9055        case 0xb: /* SQRSHL, UQRSHL */
9056        {
9057            static NeonGenTwoOpEnvFn * const fns[3][2] = {
9058                { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9059                { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9060                { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9061            };
9062            genenvfn = fns[size][u];
9063            break;
9064        }
9065        case 0x16: /* SQDMULH, SQRDMULH */
9066        {
9067            static NeonGenTwoOpEnvFn * const fns[2][2] = {
9068                { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9069                { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9070            };
9071            assert(size == 1 || size == 2);
9072            genenvfn = fns[size - 1][u];
9073            break;
9074        }
9075        default:
9076            g_assert_not_reached();
9077        }
9078
9079        genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
9080        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
9081        tcg_temp_free_i32(tcg_rd32);
9082        tcg_temp_free_i32(tcg_rn);
9083        tcg_temp_free_i32(tcg_rm);
9084    }
9085
9086    write_fp_dreg(s, rd, tcg_rd);
9087
9088    tcg_temp_free_i64(tcg_rd);
9089}
9090
9091/* AdvSIMD scalar three same FP16
9092 *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
9093 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9094 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
9095 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
9096 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
9097 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
9098 */
9099static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
9100                                                  uint32_t insn)
9101{
9102    int rd = extract32(insn, 0, 5);
9103    int rn = extract32(insn, 5, 5);
9104    int opcode = extract32(insn, 11, 3);
9105    int rm = extract32(insn, 16, 5);
9106    bool u = extract32(insn, 29, 1);
9107    bool a = extract32(insn, 23, 1);
9108    int fpopcode = opcode | (a << 3) |  (u << 4);
9109    TCGv_ptr fpst;
9110    TCGv_i32 tcg_op1;
9111    TCGv_i32 tcg_op2;
9112    TCGv_i32 tcg_res;
9113
9114    switch (fpopcode) {
9115    case 0x03: /* FMULX */
9116    case 0x04: /* FCMEQ (reg) */
9117    case 0x07: /* FRECPS */
9118    case 0x0f: /* FRSQRTS */
9119    case 0x14: /* FCMGE (reg) */
9120    case 0x15: /* FACGE */
9121    case 0x1a: /* FABD */
9122    case 0x1c: /* FCMGT (reg) */
9123    case 0x1d: /* FACGT */
9124        break;
9125    default:
9126        unallocated_encoding(s);
9127        return;
9128    }
9129
9130    if (!dc_isar_feature(aa64_fp16, s)) {
9131        unallocated_encoding(s);
9132    }
9133
9134    if (!fp_access_check(s)) {
9135        return;
9136    }
9137
9138    fpst = get_fpstatus_ptr(true);
9139
9140    tcg_op1 = read_fp_hreg(s, rn);
9141    tcg_op2 = read_fp_hreg(s, rm);
9142    tcg_res = tcg_temp_new_i32();
9143
9144    switch (fpopcode) {
9145    case 0x03: /* FMULX */
9146        gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
9147        break;
9148    case 0x04: /* FCMEQ (reg) */
9149        gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9150        break;
9151    case 0x07: /* FRECPS */
9152        gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9153        break;
9154    case 0x0f: /* FRSQRTS */
9155        gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9156        break;
9157    case 0x14: /* FCMGE (reg) */
9158        gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9159        break;
9160    case 0x15: /* FACGE */
9161        gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9162        break;
9163    case 0x1a: /* FABD */
9164        gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
9165        tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
9166        break;
9167    case 0x1c: /* FCMGT (reg) */
9168        gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9169        break;
9170    case 0x1d: /* FACGT */
9171        gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
9172        break;
9173    default:
9174        g_assert_not_reached();
9175    }
9176
9177    write_fp_sreg(s, rd, tcg_res);
9178
9179
9180    tcg_temp_free_i32(tcg_res);
9181    tcg_temp_free_i32(tcg_op1);
9182    tcg_temp_free_i32(tcg_op2);
9183    tcg_temp_free_ptr(fpst);
9184}
9185
9186/* AdvSIMD scalar three same extra
9187 *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
9188 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9189 * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
9190 * +-----+---+-----------+------+---+------+---+--------+---+----+----+
9191 */
9192static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
9193                                                   uint32_t insn)
9194{
9195    int rd = extract32(insn, 0, 5);
9196    int rn = extract32(insn, 5, 5);
9197    int opcode = extract32(insn, 11, 4);
9198    int rm = extract32(insn, 16, 5);
9199    int size = extract32(insn, 22, 2);
9200    bool u = extract32(insn, 29, 1);
9201    TCGv_i32 ele1, ele2, ele3;
9202    TCGv_i64 res;
9203    bool feature;
9204
9205    switch (u * 16 + opcode) {
9206    case 0x10: /* SQRDMLAH (vector) */
9207    case 0x11: /* SQRDMLSH (vector) */
9208        if (size != 1 && size != 2) {
9209            unallocated_encoding(s);
9210            return;
9211        }
9212        feature = dc_isar_feature(aa64_rdm, s);
9213        break;
9214    default:
9215        unallocated_encoding(s);
9216        return;
9217    }
9218    if (!feature) {
9219        unallocated_encoding(s);
9220        return;
9221    }
9222    if (!fp_access_check(s)) {
9223        return;
9224    }
9225
9226    /* Do a single operation on the lowest element in the vector.
9227     * We use the standard Neon helpers and rely on 0 OP 0 == 0
9228     * with no side effects for all these operations.
9229     * OPTME: special-purpose helpers would avoid doing some
9230     * unnecessary work in the helper for the 16 bit cases.
9231     */
9232    ele1 = tcg_temp_new_i32();
9233    ele2 = tcg_temp_new_i32();
9234    ele3 = tcg_temp_new_i32();
9235
9236    read_vec_element_i32(s, ele1, rn, 0, size);
9237    read_vec_element_i32(s, ele2, rm, 0, size);
9238    read_vec_element_i32(s, ele3, rd, 0, size);
9239
9240    switch (opcode) {
9241    case 0x0: /* SQRDMLAH */
9242        if (size == 1) {
9243            gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
9244        } else {
9245            gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
9246        }
9247        break;
9248    case 0x1: /* SQRDMLSH */
9249        if (size == 1) {
9250            gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
9251        } else {
9252            gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
9253        }
9254        break;
9255    default:
9256        g_assert_not_reached();
9257    }
9258    tcg_temp_free_i32(ele1);
9259    tcg_temp_free_i32(ele2);
9260
9261    res = tcg_temp_new_i64();
9262    tcg_gen_extu_i32_i64(res, ele3);
9263    tcg_temp_free_i32(ele3);
9264
9265    write_fp_dreg(s, rd, res);
9266    tcg_temp_free_i64(res);
9267}
9268
9269static void handle_2misc_64(DisasContext *s, int opcode, bool u,
9270                            TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
9271                            TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
9272{
9273    /* Handle 64->64 opcodes which are shared between the scalar and
9274     * vector 2-reg-misc groups. We cover every integer opcode where size == 3
9275     * is valid in either group and also the double-precision fp ops.
9276     * The caller only need provide tcg_rmode and tcg_fpstatus if the op
9277     * requires them.
9278     */
9279    TCGCond cond;
9280
9281    switch (opcode) {
9282    case 0x4: /* CLS, CLZ */
9283        if (u) {
9284            tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
9285        } else {
9286            tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
9287        }
9288        break;
9289    case 0x5: /* NOT */
9290        /* This opcode is shared with CNT and RBIT but we have earlier
9291         * enforced that size == 3 if and only if this is the NOT insn.
9292         */
9293        tcg_gen_not_i64(tcg_rd, tcg_rn);
9294        break;
9295    case 0x7: /* SQABS, SQNEG */
9296        if (u) {
9297            gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
9298        } else {
9299            gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
9300        }
9301        break;
9302    case 0xa: /* CMLT */
9303        /* 64 bit integer comparison against zero, result is
9304         * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
9305         * subtracting 1.
9306         */
9307        cond = TCG_COND_LT;
9308    do_cmop:
9309        tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
9310        tcg_gen_neg_i64(tcg_rd, tcg_rd);
9311        break;
9312    case 0x8: /* CMGT, CMGE */
9313        cond = u ? TCG_COND_GE : TCG_COND_GT;
9314        goto do_cmop;
9315    case 0x9: /* CMEQ, CMLE */
9316        cond = u ? TCG_COND_LE : TCG_COND_EQ;
9317        goto do_cmop;
9318    case 0xb: /* ABS, NEG */
9319        if (u) {
9320            tcg_gen_neg_i64(tcg_rd, tcg_rn);
9321        } else {
9322            tcg_gen_abs_i64(tcg_rd, tcg_rn);
9323        }
9324        break;
9325    case 0x2f: /* FABS */
9326        gen_helper_vfp_absd(tcg_rd, tcg_rn);
9327        break;
9328    case 0x6f: /* FNEG */
9329        gen_helper_vfp_negd(tcg_rd, tcg_rn);
9330        break;
9331    case 0x7f: /* FSQRT */
9332        gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
9333        break;
9334    case 0x1a: /* FCVTNS */
9335    case 0x1b: /* FCVTMS */
9336    case 0x1c: /* FCVTAS */
9337    case 0x3a: /* FCVTPS */
9338    case 0x3b: /* FCVTZS */
9339    {
9340        TCGv_i32 tcg_shift = tcg_const_i32(0);
9341        gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9342        tcg_temp_free_i32(tcg_shift);
9343        break;
9344    }
9345    case 0x5a: /* FCVTNU */
9346    case 0x5b: /* FCVTMU */
9347    case 0x5c: /* FCVTAU */
9348    case 0x7a: /* FCVTPU */
9349    case 0x7b: /* FCVTZU */
9350    {
9351        TCGv_i32 tcg_shift = tcg_const_i32(0);
9352        gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9353        tcg_temp_free_i32(tcg_shift);
9354        break;
9355    }
9356    case 0x18: /* FRINTN */
9357    case 0x19: /* FRINTM */
9358    case 0x38: /* FRINTP */
9359    case 0x39: /* FRINTZ */
9360    case 0x58: /* FRINTA */
9361    case 0x79: /* FRINTI */
9362        gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
9363        break;
9364    case 0x59: /* FRINTX */
9365        gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
9366        break;
9367    case 0x1e: /* FRINT32Z */
9368    case 0x5e: /* FRINT32X */
9369        gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus);
9370        break;
9371    case 0x1f: /* FRINT64Z */
9372    case 0x5f: /* FRINT64X */
9373        gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus);
9374        break;
9375    default:
9376        g_assert_not_reached();
9377    }
9378}
9379
9380static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
9381                                   bool is_scalar, bool is_u, bool is_q,
9382                                   int size, int rn, int rd)
9383{
9384    bool is_double = (size == MO_64);
9385    TCGv_ptr fpst;
9386
9387    if (!fp_access_check(s)) {
9388        return;
9389    }
9390
9391    fpst = get_fpstatus_ptr(size == MO_16);
9392
9393    if (is_double) {
9394        TCGv_i64 tcg_op = tcg_temp_new_i64();
9395        TCGv_i64 tcg_zero = tcg_const_i64(0);
9396        TCGv_i64 tcg_res = tcg_temp_new_i64();
9397        NeonGenTwoDoubleOPFn *genfn;
9398        bool swap = false;
9399        int pass;
9400
9401        switch (opcode) {
9402        case 0x2e: /* FCMLT (zero) */
9403            swap = true;
9404            /* fallthrough */
9405        case 0x2c: /* FCMGT (zero) */
9406            genfn = gen_helper_neon_cgt_f64;
9407            break;
9408        case 0x2d: /* FCMEQ (zero) */
9409            genfn = gen_helper_neon_ceq_f64;
9410            break;
9411        case 0x6d: /* FCMLE (zero) */
9412            swap = true;
9413            /* fall through */
9414        case 0x6c: /* FCMGE (zero) */
9415            genfn = gen_helper_neon_cge_f64;
9416            break;
9417        default:
9418            g_assert_not_reached();
9419        }
9420
9421        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9422            read_vec_element(s, tcg_op, rn, pass, MO_64);
9423            if (swap) {
9424                genfn(tcg_res, tcg_zero, tcg_op, fpst);
9425            } else {
9426                genfn(tcg_res, tcg_op, tcg_zero, fpst);
9427            }
9428            write_vec_element(s, tcg_res, rd, pass, MO_64);
9429        }
9430        tcg_temp_free_i64(tcg_res);
9431        tcg_temp_free_i64(tcg_zero);
9432        tcg_temp_free_i64(tcg_op);
9433
9434        clear_vec_high(s, !is_scalar, rd);
9435    } else {
9436        TCGv_i32 tcg_op = tcg_temp_new_i32();
9437        TCGv_i32 tcg_zero = tcg_const_i32(0);
9438        TCGv_i32 tcg_res = tcg_temp_new_i32();
9439        NeonGenTwoSingleOPFn *genfn;
9440        bool swap = false;
9441        int pass, maxpasses;
9442
9443        if (size == MO_16) {
9444            switch (opcode) {
9445            case 0x2e: /* FCMLT (zero) */
9446                swap = true;
9447                /* fall through */
9448            case 0x2c: /* FCMGT (zero) */
9449                genfn = gen_helper_advsimd_cgt_f16;
9450                break;
9451            case 0x2d: /* FCMEQ (zero) */
9452                genfn = gen_helper_advsimd_ceq_f16;
9453                break;
9454            case 0x6d: /* FCMLE (zero) */
9455                swap = true;
9456                /* fall through */
9457            case 0x6c: /* FCMGE (zero) */
9458                genfn = gen_helper_advsimd_cge_f16;
9459                break;
9460            default:
9461                g_assert_not_reached();
9462            }
9463        } else {
9464            switch (opcode) {
9465            case 0x2e: /* FCMLT (zero) */
9466                swap = true;
9467                /* fall through */
9468            case 0x2c: /* FCMGT (zero) */
9469                genfn = gen_helper_neon_cgt_f32;
9470                break;
9471            case 0x2d: /* FCMEQ (zero) */
9472                genfn = gen_helper_neon_ceq_f32;
9473                break;
9474            case 0x6d: /* FCMLE (zero) */
9475                swap = true;
9476                /* fall through */
9477            case 0x6c: /* FCMGE (zero) */
9478                genfn = gen_helper_neon_cge_f32;
9479                break;
9480            default:
9481                g_assert_not_reached();
9482            }
9483        }
9484
9485        if (is_scalar) {
9486            maxpasses = 1;
9487        } else {
9488            int vector_size = 8 << is_q;
9489            maxpasses = vector_size >> size;
9490        }
9491
9492        for (pass = 0; pass < maxpasses; pass++) {
9493            read_vec_element_i32(s, tcg_op, rn, pass, size);
9494            if (swap) {
9495                genfn(tcg_res, tcg_zero, tcg_op, fpst);
9496            } else {
9497                genfn(tcg_res, tcg_op, tcg_zero, fpst);
9498            }
9499            if (is_scalar) {
9500                write_fp_sreg(s, rd, tcg_res);
9501            } else {
9502                write_vec_element_i32(s, tcg_res, rd, pass, size);
9503            }
9504        }
9505        tcg_temp_free_i32(tcg_res);
9506        tcg_temp_free_i32(tcg_zero);
9507        tcg_temp_free_i32(tcg_op);
9508        if (!is_scalar) {
9509            clear_vec_high(s, is_q, rd);
9510        }
9511    }
9512
9513    tcg_temp_free_ptr(fpst);
9514}
9515
9516static void handle_2misc_reciprocal(DisasContext *s, int opcode,
9517                                    bool is_scalar, bool is_u, bool is_q,
9518                                    int size, int rn, int rd)
9519{
9520    bool is_double = (size == 3);
9521    TCGv_ptr fpst = get_fpstatus_ptr(false);
9522
9523    if (is_double) {
9524        TCGv_i64 tcg_op = tcg_temp_new_i64();
9525        TCGv_i64 tcg_res = tcg_temp_new_i64();
9526        int pass;
9527
9528        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9529            read_vec_element(s, tcg_op, rn, pass, MO_64);
9530            switch (opcode) {
9531            case 0x3d: /* FRECPE */
9532                gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
9533                break;
9534            case 0x3f: /* FRECPX */
9535                gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
9536                break;
9537            case 0x7d: /* FRSQRTE */
9538                gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
9539                break;
9540            default:
9541                g_assert_not_reached();
9542            }
9543            write_vec_element(s, tcg_res, rd, pass, MO_64);
9544        }
9545        tcg_temp_free_i64(tcg_res);
9546        tcg_temp_free_i64(tcg_op);
9547        clear_vec_high(s, !is_scalar, rd);
9548    } else {
9549        TCGv_i32 tcg_op = tcg_temp_new_i32();
9550        TCGv_i32 tcg_res = tcg_temp_new_i32();
9551        int pass, maxpasses;
9552
9553        if (is_scalar) {
9554            maxpasses = 1;
9555        } else {
9556            maxpasses = is_q ? 4 : 2;
9557        }
9558
9559        for (pass = 0; pass < maxpasses; pass++) {
9560            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
9561
9562            switch (opcode) {
9563            case 0x3c: /* URECPE */
9564                gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
9565                break;
9566            case 0x3d: /* FRECPE */
9567                gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
9568                break;
9569            case 0x3f: /* FRECPX */
9570                gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
9571                break;
9572            case 0x7d: /* FRSQRTE */
9573                gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
9574                break;
9575            default:
9576                g_assert_not_reached();
9577            }
9578
9579            if (is_scalar) {
9580                write_fp_sreg(s, rd, tcg_res);
9581            } else {
9582                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9583            }
9584        }
9585        tcg_temp_free_i32(tcg_res);
9586        tcg_temp_free_i32(tcg_op);
9587        if (!is_scalar) {
9588            clear_vec_high(s, is_q, rd);
9589        }
9590    }
9591    tcg_temp_free_ptr(fpst);
9592}
9593
9594static void handle_2misc_narrow(DisasContext *s, bool scalar,
9595                                int opcode, bool u, bool is_q,
9596                                int size, int rn, int rd)
9597{
9598    /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
9599     * in the source becomes a size element in the destination).
9600     */
9601    int pass;
9602    TCGv_i32 tcg_res[2];
9603    int destelt = is_q ? 2 : 0;
9604    int passes = scalar ? 1 : 2;
9605
9606    if (scalar) {
9607        tcg_res[1] = tcg_const_i32(0);
9608    }
9609
9610    for (pass = 0; pass < passes; pass++) {
9611        TCGv_i64 tcg_op = tcg_temp_new_i64();
9612        NeonGenNarrowFn *genfn = NULL;
9613        NeonGenNarrowEnvFn *genenvfn = NULL;
9614
9615        if (scalar) {
9616            read_vec_element(s, tcg_op, rn, pass, size + 1);
9617        } else {
9618            read_vec_element(s, tcg_op, rn, pass, MO_64);
9619        }
9620        tcg_res[pass] = tcg_temp_new_i32();
9621
9622        switch (opcode) {
9623        case 0x12: /* XTN, SQXTUN */
9624        {
9625            static NeonGenNarrowFn * const xtnfns[3] = {
9626                gen_helper_neon_narrow_u8,
9627                gen_helper_neon_narrow_u16,
9628                tcg_gen_extrl_i64_i32,
9629            };
9630            static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
9631                gen_helper_neon_unarrow_sat8,
9632                gen_helper_neon_unarrow_sat16,
9633                gen_helper_neon_unarrow_sat32,
9634            };
9635            if (u) {
9636                genenvfn = sqxtunfns[size];
9637            } else {
9638                genfn = xtnfns[size];
9639            }
9640            break;
9641        }
9642        case 0x14: /* SQXTN, UQXTN */
9643        {
9644            static NeonGenNarrowEnvFn * const fns[3][2] = {
9645                { gen_helper_neon_narrow_sat_s8,
9646                  gen_helper_neon_narrow_sat_u8 },
9647                { gen_helper_neon_narrow_sat_s16,
9648                  gen_helper_neon_narrow_sat_u16 },
9649                { gen_helper_neon_narrow_sat_s32,
9650                  gen_helper_neon_narrow_sat_u32 },
9651            };
9652            genenvfn = fns[size][u];
9653            break;
9654        }
9655        case 0x16: /* FCVTN, FCVTN2 */
9656            /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
9657            if (size == 2) {
9658                gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
9659            } else {
9660                TCGv_i32 tcg_lo = tcg_temp_new_i32();
9661                TCGv_i32 tcg_hi = tcg_temp_new_i32();
9662                TCGv_ptr fpst = get_fpstatus_ptr(false);
9663                TCGv_i32 ahp = get_ahp_flag();
9664
9665                tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9666                gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9667                gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9668                tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9669                tcg_temp_free_i32(tcg_lo);
9670                tcg_temp_free_i32(tcg_hi);
9671                tcg_temp_free_ptr(fpst);
9672                tcg_temp_free_i32(ahp);
9673            }
9674            break;
9675        case 0x56:  /* FCVTXN, FCVTXN2 */
9676            /* 64 bit to 32 bit float conversion
9677             * with von Neumann rounding (round to odd)
9678             */
9679            assert(size == 2);
9680            gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9681            break;
9682        default:
9683            g_assert_not_reached();
9684        }
9685
9686        if (genfn) {
9687            genfn(tcg_res[pass], tcg_op);
9688        } else if (genenvfn) {
9689            genenvfn(tcg_res[pass], cpu_env, tcg_op);
9690        }
9691
9692        tcg_temp_free_i64(tcg_op);
9693    }
9694
9695    for (pass = 0; pass < 2; pass++) {
9696        write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9697        tcg_temp_free_i32(tcg_res[pass]);
9698    }
9699    clear_vec_high(s, is_q, rd);
9700}
9701
9702/* Remaining saturating accumulating ops */
9703static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9704                                bool is_q, int size, int rn, int rd)
9705{
9706    bool is_double = (size == 3);
9707
9708    if (is_double) {
9709        TCGv_i64 tcg_rn = tcg_temp_new_i64();
9710        TCGv_i64 tcg_rd = tcg_temp_new_i64();
9711        int pass;
9712
9713        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9714            read_vec_element(s, tcg_rn, rn, pass, MO_64);
9715            read_vec_element(s, tcg_rd, rd, pass, MO_64);
9716
9717            if (is_u) { /* USQADD */
9718                gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9719            } else { /* SUQADD */
9720                gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9721            }
9722            write_vec_element(s, tcg_rd, rd, pass, MO_64);
9723        }
9724        tcg_temp_free_i64(tcg_rd);
9725        tcg_temp_free_i64(tcg_rn);
9726        clear_vec_high(s, !is_scalar, rd);
9727    } else {
9728        TCGv_i32 tcg_rn = tcg_temp_new_i32();
9729        TCGv_i32 tcg_rd = tcg_temp_new_i32();
9730        int pass, maxpasses;
9731
9732        if (is_scalar) {
9733            maxpasses = 1;
9734        } else {
9735            maxpasses = is_q ? 4 : 2;
9736        }
9737
9738        for (pass = 0; pass < maxpasses; pass++) {
9739            if (is_scalar) {
9740                read_vec_element_i32(s, tcg_rn, rn, pass, size);
9741                read_vec_element_i32(s, tcg_rd, rd, pass, size);
9742            } else {
9743                read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9744                read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9745            }
9746
9747            if (is_u) { /* USQADD */
9748                switch (size) {
9749                case 0:
9750                    gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9751                    break;
9752                case 1:
9753                    gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9754                    break;
9755                case 2:
9756                    gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9757                    break;
9758                default:
9759                    g_assert_not_reached();
9760                }
9761            } else { /* SUQADD */
9762                switch (size) {
9763                case 0:
9764                    gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9765                    break;
9766                case 1:
9767                    gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9768                    break;
9769                case 2:
9770                    gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9771                    break;
9772                default:
9773                    g_assert_not_reached();
9774                }
9775            }
9776
9777            if (is_scalar) {
9778                TCGv_i64 tcg_zero = tcg_const_i64(0);
9779                write_vec_element(s, tcg_zero, rd, 0, MO_64);
9780                tcg_temp_free_i64(tcg_zero);
9781            }
9782            write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9783        }
9784        tcg_temp_free_i32(tcg_rd);
9785        tcg_temp_free_i32(tcg_rn);
9786        clear_vec_high(s, is_q, rd);
9787    }
9788}
9789
9790/* AdvSIMD scalar two reg misc
9791 *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9792 * +-----+---+-----------+------+-----------+--------+-----+------+------+
9793 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9794 * +-----+---+-----------+------+-----------+--------+-----+------+------+
9795 */
9796static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9797{
9798    int rd = extract32(insn, 0, 5);
9799    int rn = extract32(insn, 5, 5);
9800    int opcode = extract32(insn, 12, 5);
9801    int size = extract32(insn, 22, 2);
9802    bool u = extract32(insn, 29, 1);
9803    bool is_fcvt = false;
9804    int rmode;
9805    TCGv_i32 tcg_rmode;
9806    TCGv_ptr tcg_fpstatus;
9807
9808    switch (opcode) {
9809    case 0x3: /* USQADD / SUQADD*/
9810        if (!fp_access_check(s)) {
9811            return;
9812        }
9813        handle_2misc_satacc(s, true, u, false, size, rn, rd);
9814        return;
9815    case 0x7: /* SQABS / SQNEG */
9816        break;
9817    case 0xa: /* CMLT */
9818        if (u) {
9819            unallocated_encoding(s);
9820            return;
9821        }
9822        /* fall through */
9823    case 0x8: /* CMGT, CMGE */
9824    case 0x9: /* CMEQ, CMLE */
9825    case 0xb: /* ABS, NEG */
9826        if (size != 3) {
9827            unallocated_encoding(s);
9828            return;
9829        }
9830        break;
9831    case 0x12: /* SQXTUN */
9832        if (!u) {
9833            unallocated_encoding(s);
9834            return;
9835        }
9836        /* fall through */
9837    case 0x14: /* SQXTN, UQXTN */
9838        if (size == 3) {
9839            unallocated_encoding(s);
9840            return;
9841        }
9842        if (!fp_access_check(s)) {
9843            return;
9844        }
9845        handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
9846        return;
9847    case 0xc ... 0xf:
9848    case 0x16 ... 0x1d:
9849    case 0x1f:
9850        /* Floating point: U, size[1] and opcode indicate operation;
9851         * size[0] indicates single or double precision.
9852         */
9853        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9854        size = extract32(size, 0, 1) ? 3 : 2;
9855        switch (opcode) {
9856        case 0x2c: /* FCMGT (zero) */
9857        case 0x2d: /* FCMEQ (zero) */
9858        case 0x2e: /* FCMLT (zero) */
9859        case 0x6c: /* FCMGE (zero) */
9860        case 0x6d: /* FCMLE (zero) */
9861            handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
9862            return;
9863        case 0x1d: /* SCVTF */
9864        case 0x5d: /* UCVTF */
9865        {
9866            bool is_signed = (opcode == 0x1d);
9867            if (!fp_access_check(s)) {
9868                return;
9869            }
9870            handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
9871            return;
9872        }
9873        case 0x3d: /* FRECPE */
9874        case 0x3f: /* FRECPX */
9875        case 0x7d: /* FRSQRTE */
9876            if (!fp_access_check(s)) {
9877                return;
9878            }
9879            handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
9880            return;
9881        case 0x1a: /* FCVTNS */
9882        case 0x1b: /* FCVTMS */
9883        case 0x3a: /* FCVTPS */
9884        case 0x3b: /* FCVTZS */
9885        case 0x5a: /* FCVTNU */
9886        case 0x5b: /* FCVTMU */
9887        case 0x7a: /* FCVTPU */
9888        case 0x7b: /* FCVTZU */
9889            is_fcvt = true;
9890            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9891            break;
9892        case 0x1c: /* FCVTAS */
9893        case 0x5c: /* FCVTAU */
9894            /* TIEAWAY doesn't fit in the usual rounding mode encoding */
9895            is_fcvt = true;
9896            rmode = FPROUNDING_TIEAWAY;
9897            break;
9898        case 0x56: /* FCVTXN, FCVTXN2 */
9899            if (size == 2) {
9900                unallocated_encoding(s);
9901                return;
9902            }
9903            if (!fp_access_check(s)) {
9904                return;
9905            }
9906            handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
9907            return;
9908        default:
9909            unallocated_encoding(s);
9910            return;
9911        }
9912        break;
9913    default:
9914        unallocated_encoding(s);
9915        return;
9916    }
9917
9918    if (!fp_access_check(s)) {
9919        return;
9920    }
9921
9922    if (is_fcvt) {
9923        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
9924        tcg_fpstatus = get_fpstatus_ptr(false);
9925        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9926    } else {
9927        tcg_rmode = NULL;
9928        tcg_fpstatus = NULL;
9929    }
9930
9931    if (size == 3) {
9932        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9933        TCGv_i64 tcg_rd = tcg_temp_new_i64();
9934
9935        handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
9936        write_fp_dreg(s, rd, tcg_rd);
9937        tcg_temp_free_i64(tcg_rd);
9938        tcg_temp_free_i64(tcg_rn);
9939    } else {
9940        TCGv_i32 tcg_rn = tcg_temp_new_i32();
9941        TCGv_i32 tcg_rd = tcg_temp_new_i32();
9942
9943        read_vec_element_i32(s, tcg_rn, rn, 0, size);
9944
9945        switch (opcode) {
9946        case 0x7: /* SQABS, SQNEG */
9947        {
9948            NeonGenOneOpEnvFn *genfn;
9949            static NeonGenOneOpEnvFn * const fns[3][2] = {
9950                { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
9951                { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
9952                { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
9953            };
9954            genfn = fns[size][u];
9955            genfn(tcg_rd, cpu_env, tcg_rn);
9956            break;
9957        }
9958        case 0x1a: /* FCVTNS */
9959        case 0x1b: /* FCVTMS */
9960        case 0x1c: /* FCVTAS */
9961        case 0x3a: /* FCVTPS */
9962        case 0x3b: /* FCVTZS */
9963        {
9964            TCGv_i32 tcg_shift = tcg_const_i32(0);
9965            gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9966            tcg_temp_free_i32(tcg_shift);
9967            break;
9968        }
9969        case 0x5a: /* FCVTNU */
9970        case 0x5b: /* FCVTMU */
9971        case 0x5c: /* FCVTAU */
9972        case 0x7a: /* FCVTPU */
9973        case 0x7b: /* FCVTZU */
9974        {
9975            TCGv_i32 tcg_shift = tcg_const_i32(0);
9976            gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9977            tcg_temp_free_i32(tcg_shift);
9978            break;
9979        }
9980        default:
9981            g_assert_not_reached();
9982        }
9983
9984        write_fp_sreg(s, rd, tcg_rd);
9985        tcg_temp_free_i32(tcg_rd);
9986        tcg_temp_free_i32(tcg_rn);
9987    }
9988
9989    if (is_fcvt) {
9990        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9991        tcg_temp_free_i32(tcg_rmode);
9992        tcg_temp_free_ptr(tcg_fpstatus);
9993    }
9994}
9995
9996/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
9997static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
9998                                 int immh, int immb, int opcode, int rn, int rd)
9999{
10000    int size = 32 - clz32(immh) - 1;
10001    int immhb = immh << 3 | immb;
10002    int shift = 2 * (8 << size) - immhb;
10003    bool accumulate = false;
10004    int dsize = is_q ? 128 : 64;
10005    int esize = 8 << size;
10006    int elements = dsize/esize;
10007    TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
10008    TCGv_i64 tcg_rn = new_tmp_a64(s);
10009    TCGv_i64 tcg_rd = new_tmp_a64(s);
10010    TCGv_i64 tcg_round;
10011    uint64_t round_const;
10012    int i;
10013
10014    if (extract32(immh, 3, 1) && !is_q) {
10015        unallocated_encoding(s);
10016        return;
10017    }
10018    tcg_debug_assert(size <= 3);
10019
10020    if (!fp_access_check(s)) {
10021        return;
10022    }
10023
10024    switch (opcode) {
10025    case 0x02: /* SSRA / USRA (accumulate) */
10026        if (is_u) {
10027            /* Shift count same as element size produces zero to add.  */
10028            if (shift == 8 << size) {
10029                goto done;
10030            }
10031            gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
10032        } else {
10033            /* Shift count same as element size produces all sign to add.  */
10034            if (shift == 8 << size) {
10035                shift -= 1;
10036            }
10037            gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
10038        }
10039        return;
10040    case 0x08: /* SRI */
10041        /* Shift count same as element size is valid but does nothing.  */
10042        if (shift == 8 << size) {
10043            goto done;
10044        }
10045        gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
10046        return;
10047
10048    case 0x00: /* SSHR / USHR */
10049        if (is_u) {
10050            if (shift == 8 << size) {
10051                /* Shift count the same size as element size produces zero.  */
10052                tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
10053                                   is_q ? 16 : 8, vec_full_reg_size(s), 0);
10054            } else {
10055                gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
10056            }
10057        } else {
10058            /* Shift count the same size as element size produces all sign.  */
10059            if (shift == 8 << size) {
10060                shift -= 1;
10061            }
10062            gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
10063        }
10064        return;
10065
10066    case 0x04: /* SRSHR / URSHR (rounding) */
10067        break;
10068    case 0x06: /* SRSRA / URSRA (accum + rounding) */
10069        accumulate = true;
10070        break;
10071    default:
10072        g_assert_not_reached();
10073    }
10074
10075    round_const = 1ULL << (shift - 1);
10076    tcg_round = tcg_const_i64(round_const);
10077
10078    for (i = 0; i < elements; i++) {
10079        read_vec_element(s, tcg_rn, rn, i, memop);
10080        if (accumulate) {
10081            read_vec_element(s, tcg_rd, rd, i, memop);
10082        }
10083
10084        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10085                                accumulate, is_u, size, shift);
10086
10087        write_vec_element(s, tcg_rd, rd, i, size);
10088    }
10089    tcg_temp_free_i64(tcg_round);
10090
10091 done:
10092    clear_vec_high(s, is_q, rd);
10093}
10094
10095/* SHL/SLI - Vector shift left */
10096static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
10097                                 int immh, int immb, int opcode, int rn, int rd)
10098{
10099    int size = 32 - clz32(immh) - 1;
10100    int immhb = immh << 3 | immb;
10101    int shift = immhb - (8 << size);
10102
10103    /* Range of size is limited by decode: immh is a non-zero 4 bit field */
10104    assert(size >= 0 && size <= 3);
10105
10106    if (extract32(immh, 3, 1) && !is_q) {
10107        unallocated_encoding(s);
10108        return;
10109    }
10110
10111    if (!fp_access_check(s)) {
10112        return;
10113    }
10114
10115    if (insert) {
10116        gen_gvec_op2i(s, is_q, rd, rn, shift, &sli_op[size]);
10117    } else {
10118        gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
10119    }
10120}
10121
10122/* USHLL/SHLL - Vector shift left with widening */
10123static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
10124                                 int immh, int immb, int opcode, int rn, int rd)
10125{
10126    int size = 32 - clz32(immh) - 1;
10127    int immhb = immh << 3 | immb;
10128    int shift = immhb - (8 << size);
10129    int dsize = 64;
10130    int esize = 8 << size;
10131    int elements = dsize/esize;
10132    TCGv_i64 tcg_rn = new_tmp_a64(s);
10133    TCGv_i64 tcg_rd = new_tmp_a64(s);
10134    int i;
10135
10136    if (size >= 3) {
10137        unallocated_encoding(s);
10138        return;
10139    }
10140
10141    if (!fp_access_check(s)) {
10142        return;
10143    }
10144
10145    /* For the LL variants the store is larger than the load,
10146     * so if rd == rn we would overwrite parts of our input.
10147     * So load everything right now and use shifts in the main loop.
10148     */
10149    read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
10150
10151    for (i = 0; i < elements; i++) {
10152        tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
10153        ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
10154        tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
10155        write_vec_element(s, tcg_rd, rd, i, size + 1);
10156    }
10157}
10158
10159/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
10160static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
10161                                 int immh, int immb, int opcode, int rn, int rd)
10162{
10163    int immhb = immh << 3 | immb;
10164    int size = 32 - clz32(immh) - 1;
10165    int dsize = 64;
10166    int esize = 8 << size;
10167    int elements = dsize/esize;
10168    int shift = (2 * esize) - immhb;
10169    bool round = extract32(opcode, 0, 1);
10170    TCGv_i64 tcg_rn, tcg_rd, tcg_final;
10171    TCGv_i64 tcg_round;
10172    int i;
10173
10174    if (extract32(immh, 3, 1)) {
10175        unallocated_encoding(s);
10176        return;
10177    }
10178
10179    if (!fp_access_check(s)) {
10180        return;
10181    }
10182
10183    tcg_rn = tcg_temp_new_i64();
10184    tcg_rd = tcg_temp_new_i64();
10185    tcg_final = tcg_temp_new_i64();
10186    read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
10187
10188    if (round) {
10189        uint64_t round_const = 1ULL << (shift - 1);
10190        tcg_round = tcg_const_i64(round_const);
10191    } else {
10192        tcg_round = NULL;
10193    }
10194
10195    for (i = 0; i < elements; i++) {
10196        read_vec_element(s, tcg_rn, rn, i, size+1);
10197        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
10198                                false, true, size+1, shift);
10199
10200        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
10201    }
10202
10203    if (!is_q) {
10204        write_vec_element(s, tcg_final, rd, 0, MO_64);
10205    } else {
10206        write_vec_element(s, tcg_final, rd, 1, MO_64);
10207    }
10208    if (round) {
10209        tcg_temp_free_i64(tcg_round);
10210    }
10211    tcg_temp_free_i64(tcg_rn);
10212    tcg_temp_free_i64(tcg_rd);
10213    tcg_temp_free_i64(tcg_final);
10214
10215    clear_vec_high(s, is_q, rd);
10216}
10217
10218
10219/* AdvSIMD shift by immediate
10220 *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
10221 * +---+---+---+-------------+------+------+--------+---+------+------+
10222 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
10223 * +---+---+---+-------------+------+------+--------+---+------+------+
10224 */
10225static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
10226{
10227    int rd = extract32(insn, 0, 5);
10228    int rn = extract32(insn, 5, 5);
10229    int opcode = extract32(insn, 11, 5);
10230    int immb = extract32(insn, 16, 3);
10231    int immh = extract32(insn, 19, 4);
10232    bool is_u = extract32(insn, 29, 1);
10233    bool is_q = extract32(insn, 30, 1);
10234
10235    switch (opcode) {
10236    case 0x08: /* SRI */
10237        if (!is_u) {
10238            unallocated_encoding(s);
10239            return;
10240        }
10241        /* fall through */
10242    case 0x00: /* SSHR / USHR */
10243    case 0x02: /* SSRA / USRA (accumulate) */
10244    case 0x04: /* SRSHR / URSHR (rounding) */
10245    case 0x06: /* SRSRA / URSRA (accum + rounding) */
10246        handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
10247        break;
10248    case 0x0a: /* SHL / SLI */
10249        handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10250        break;
10251    case 0x10: /* SHRN */
10252    case 0x11: /* RSHRN / SQRSHRUN */
10253        if (is_u) {
10254            handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
10255                                   opcode, rn, rd);
10256        } else {
10257            handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
10258        }
10259        break;
10260    case 0x12: /* SQSHRN / UQSHRN */
10261    case 0x13: /* SQRSHRN / UQRSHRN */
10262        handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
10263                               opcode, rn, rd);
10264        break;
10265    case 0x14: /* SSHLL / USHLL */
10266        handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
10267        break;
10268    case 0x1c: /* SCVTF / UCVTF */
10269        handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
10270                                     opcode, rn, rd);
10271        break;
10272    case 0xc: /* SQSHLU */
10273        if (!is_u) {
10274            unallocated_encoding(s);
10275            return;
10276        }
10277        handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
10278        break;
10279    case 0xe: /* SQSHL, UQSHL */
10280        handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
10281        break;
10282    case 0x1f: /* FCVTZS/ FCVTZU */
10283        handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
10284        return;
10285    default:
10286        unallocated_encoding(s);
10287        return;
10288    }
10289}
10290
10291/* Generate code to do a "long" addition or subtraction, ie one done in
10292 * TCGv_i64 on vector lanes twice the width specified by size.
10293 */
10294static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
10295                          TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
10296{
10297    static NeonGenTwo64OpFn * const fns[3][2] = {
10298        { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
10299        { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
10300        { tcg_gen_add_i64, tcg_gen_sub_i64 },
10301    };
10302    NeonGenTwo64OpFn *genfn;
10303    assert(size < 3);
10304
10305    genfn = fns[size][is_sub];
10306    genfn(tcg_res, tcg_op1, tcg_op2);
10307}
10308
10309static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
10310                                int opcode, int rd, int rn, int rm)
10311{
10312    /* 3-reg-different widening insns: 64 x 64 -> 128 */
10313    TCGv_i64 tcg_res[2];
10314    int pass, accop;
10315
10316    tcg_res[0] = tcg_temp_new_i64();
10317    tcg_res[1] = tcg_temp_new_i64();
10318
10319    /* Does this op do an adding accumulate, a subtracting accumulate,
10320     * or no accumulate at all?
10321     */
10322    switch (opcode) {
10323    case 5:
10324    case 8:
10325    case 9:
10326        accop = 1;
10327        break;
10328    case 10:
10329    case 11:
10330        accop = -1;
10331        break;
10332    default:
10333        accop = 0;
10334        break;
10335    }
10336
10337    if (accop != 0) {
10338        read_vec_element(s, tcg_res[0], rd, 0, MO_64);
10339        read_vec_element(s, tcg_res[1], rd, 1, MO_64);
10340    }
10341
10342    /* size == 2 means two 32x32->64 operations; this is worth special
10343     * casing because we can generally handle it inline.
10344     */
10345    if (size == 2) {
10346        for (pass = 0; pass < 2; pass++) {
10347            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10348            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10349            TCGv_i64 tcg_passres;
10350            TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
10351
10352            int elt = pass + is_q * 2;
10353
10354            read_vec_element(s, tcg_op1, rn, elt, memop);
10355            read_vec_element(s, tcg_op2, rm, elt, memop);
10356
10357            if (accop == 0) {
10358                tcg_passres = tcg_res[pass];
10359            } else {
10360                tcg_passres = tcg_temp_new_i64();
10361            }
10362
10363            switch (opcode) {
10364            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10365                tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
10366                break;
10367            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10368                tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
10369                break;
10370            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10371            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10372            {
10373                TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
10374                TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
10375
10376                tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
10377                tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
10378                tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
10379                                    tcg_passres,
10380                                    tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
10381                tcg_temp_free_i64(tcg_tmp1);
10382                tcg_temp_free_i64(tcg_tmp2);
10383                break;
10384            }
10385            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10386            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10387            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10388                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10389                break;
10390            case 9: /* SQDMLAL, SQDMLAL2 */
10391            case 11: /* SQDMLSL, SQDMLSL2 */
10392            case 13: /* SQDMULL, SQDMULL2 */
10393                tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
10394                gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10395                                                  tcg_passres, tcg_passres);
10396                break;
10397            default:
10398                g_assert_not_reached();
10399            }
10400
10401            if (opcode == 9 || opcode == 11) {
10402                /* saturating accumulate ops */
10403                if (accop < 0) {
10404                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
10405                }
10406                gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10407                                                  tcg_res[pass], tcg_passres);
10408            } else if (accop > 0) {
10409                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10410            } else if (accop < 0) {
10411                tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10412            }
10413
10414            if (accop != 0) {
10415                tcg_temp_free_i64(tcg_passres);
10416            }
10417
10418            tcg_temp_free_i64(tcg_op1);
10419            tcg_temp_free_i64(tcg_op2);
10420        }
10421    } else {
10422        /* size 0 or 1, generally helper functions */
10423        for (pass = 0; pass < 2; pass++) {
10424            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10425            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10426            TCGv_i64 tcg_passres;
10427            int elt = pass + is_q * 2;
10428
10429            read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10430            read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10431
10432            if (accop == 0) {
10433                tcg_passres = tcg_res[pass];
10434            } else {
10435                tcg_passres = tcg_temp_new_i64();
10436            }
10437
10438            switch (opcode) {
10439            case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10440            case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10441            {
10442                TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10443                static NeonGenWidenFn * const widenfns[2][2] = {
10444                    { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10445                    { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10446                };
10447                NeonGenWidenFn *widenfn = widenfns[size][is_u];
10448
10449                widenfn(tcg_op2_64, tcg_op2);
10450                widenfn(tcg_passres, tcg_op1);
10451                gen_neon_addl(size, (opcode == 2), tcg_passres,
10452                              tcg_passres, tcg_op2_64);
10453                tcg_temp_free_i64(tcg_op2_64);
10454                break;
10455            }
10456            case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10457            case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10458                if (size == 0) {
10459                    if (is_u) {
10460                        gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10461                    } else {
10462                        gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10463                    }
10464                } else {
10465                    if (is_u) {
10466                        gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10467                    } else {
10468                        gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10469                    }
10470                }
10471                break;
10472            case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10473            case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10474            case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10475                if (size == 0) {
10476                    if (is_u) {
10477                        gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10478                    } else {
10479                        gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10480                    }
10481                } else {
10482                    if (is_u) {
10483                        gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10484                    } else {
10485                        gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10486                    }
10487                }
10488                break;
10489            case 9: /* SQDMLAL, SQDMLAL2 */
10490            case 11: /* SQDMLSL, SQDMLSL2 */
10491            case 13: /* SQDMULL, SQDMULL2 */
10492                assert(size == 1);
10493                gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10494                gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10495                                                  tcg_passres, tcg_passres);
10496                break;
10497            case 14: /* PMULL */
10498                assert(size == 0);
10499                gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
10500                break;
10501            default:
10502                g_assert_not_reached();
10503            }
10504            tcg_temp_free_i32(tcg_op1);
10505            tcg_temp_free_i32(tcg_op2);
10506
10507            if (accop != 0) {
10508                if (opcode == 9 || opcode == 11) {
10509                    /* saturating accumulate ops */
10510                    if (accop < 0) {
10511                        gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10512                    }
10513                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10514                                                      tcg_res[pass],
10515                                                      tcg_passres);
10516                } else {
10517                    gen_neon_addl(size, (accop < 0), tcg_res[pass],
10518                                  tcg_res[pass], tcg_passres);
10519                }
10520                tcg_temp_free_i64(tcg_passres);
10521            }
10522        }
10523    }
10524
10525    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10526    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10527    tcg_temp_free_i64(tcg_res[0]);
10528    tcg_temp_free_i64(tcg_res[1]);
10529}
10530
10531static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10532                            int opcode, int rd, int rn, int rm)
10533{
10534    TCGv_i64 tcg_res[2];
10535    int part = is_q ? 2 : 0;
10536    int pass;
10537
10538    for (pass = 0; pass < 2; pass++) {
10539        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10540        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10541        TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10542        static NeonGenWidenFn * const widenfns[3][2] = {
10543            { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10544            { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10545            { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10546        };
10547        NeonGenWidenFn *widenfn = widenfns[size][is_u];
10548
10549        read_vec_element(s, tcg_op1, rn, pass, MO_64);
10550        read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10551        widenfn(tcg_op2_wide, tcg_op2);
10552        tcg_temp_free_i32(tcg_op2);
10553        tcg_res[pass] = tcg_temp_new_i64();
10554        gen_neon_addl(size, (opcode == 3),
10555                      tcg_res[pass], tcg_op1, tcg_op2_wide);
10556        tcg_temp_free_i64(tcg_op1);
10557        tcg_temp_free_i64(tcg_op2_wide);
10558    }
10559
10560    for (pass = 0; pass < 2; pass++) {
10561        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10562        tcg_temp_free_i64(tcg_res[pass]);
10563    }
10564}
10565
10566static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10567{
10568    tcg_gen_addi_i64(in, in, 1U << 31);
10569    tcg_gen_extrh_i64_i32(res, in);
10570}
10571
10572static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10573                                 int opcode, int rd, int rn, int rm)
10574{
10575    TCGv_i32 tcg_res[2];
10576    int part = is_q ? 2 : 0;
10577    int pass;
10578
10579    for (pass = 0; pass < 2; pass++) {
10580        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10581        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10582        TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10583        static NeonGenNarrowFn * const narrowfns[3][2] = {
10584            { gen_helper_neon_narrow_high_u8,
10585              gen_helper_neon_narrow_round_high_u8 },
10586            { gen_helper_neon_narrow_high_u16,
10587              gen_helper_neon_narrow_round_high_u16 },
10588            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10589        };
10590        NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10591
10592        read_vec_element(s, tcg_op1, rn, pass, MO_64);
10593        read_vec_element(s, tcg_op2, rm, pass, MO_64);
10594
10595        gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10596
10597        tcg_temp_free_i64(tcg_op1);
10598        tcg_temp_free_i64(tcg_op2);
10599
10600        tcg_res[pass] = tcg_temp_new_i32();
10601        gennarrow(tcg_res[pass], tcg_wideres);
10602        tcg_temp_free_i64(tcg_wideres);
10603    }
10604
10605    for (pass = 0; pass < 2; pass++) {
10606        write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10607        tcg_temp_free_i32(tcg_res[pass]);
10608    }
10609    clear_vec_high(s, is_q, rd);
10610}
10611
10612static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
10613{
10614    /* PMULL of 64 x 64 -> 128 is an odd special case because it
10615     * is the only three-reg-diff instruction which produces a
10616     * 128-bit wide result from a single operation. However since
10617     * it's possible to calculate the two halves more or less
10618     * separately we just use two helper calls.
10619     */
10620    TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10621    TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10622    TCGv_i64 tcg_res = tcg_temp_new_i64();
10623
10624    read_vec_element(s, tcg_op1, rn, is_q, MO_64);
10625    read_vec_element(s, tcg_op2, rm, is_q, MO_64);
10626    gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
10627    write_vec_element(s, tcg_res, rd, 0, MO_64);
10628    gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
10629    write_vec_element(s, tcg_res, rd, 1, MO_64);
10630
10631    tcg_temp_free_i64(tcg_op1);
10632    tcg_temp_free_i64(tcg_op2);
10633    tcg_temp_free_i64(tcg_res);
10634}
10635
10636/* AdvSIMD three different
10637 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10638 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10639 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10640 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10641 */
10642static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10643{
10644    /* Instructions in this group fall into three basic classes
10645     * (in each case with the operation working on each element in
10646     * the input vectors):
10647     * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10648     *     128 bit input)
10649     * (2) wide 64 x 128 -> 128
10650     * (3) narrowing 128 x 128 -> 64
10651     * Here we do initial decode, catch unallocated cases and
10652     * dispatch to separate functions for each class.
10653     */
10654    int is_q = extract32(insn, 30, 1);
10655    int is_u = extract32(insn, 29, 1);
10656    int size = extract32(insn, 22, 2);
10657    int opcode = extract32(insn, 12, 4);
10658    int rm = extract32(insn, 16, 5);
10659    int rn = extract32(insn, 5, 5);
10660    int rd = extract32(insn, 0, 5);
10661
10662    switch (opcode) {
10663    case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10664    case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10665        /* 64 x 128 -> 128 */
10666        if (size == 3) {
10667            unallocated_encoding(s);
10668            return;
10669        }
10670        if (!fp_access_check(s)) {
10671            return;
10672        }
10673        handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10674        break;
10675    case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10676    case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10677        /* 128 x 128 -> 64 */
10678        if (size == 3) {
10679            unallocated_encoding(s);
10680            return;
10681        }
10682        if (!fp_access_check(s)) {
10683            return;
10684        }
10685        handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10686        break;
10687    case 14: /* PMULL, PMULL2 */
10688        if (is_u || size == 1 || size == 2) {
10689            unallocated_encoding(s);
10690            return;
10691        }
10692        if (size == 3) {
10693            if (!dc_isar_feature(aa64_pmull, s)) {
10694                unallocated_encoding(s);
10695                return;
10696            }
10697            if (!fp_access_check(s)) {
10698                return;
10699            }
10700            handle_pmull_64(s, is_q, rd, rn, rm);
10701            return;
10702        }
10703        goto is_widening;
10704    case 9: /* SQDMLAL, SQDMLAL2 */
10705    case 11: /* SQDMLSL, SQDMLSL2 */
10706    case 13: /* SQDMULL, SQDMULL2 */
10707        if (is_u || size == 0) {
10708            unallocated_encoding(s);
10709            return;
10710        }
10711        /* fall through */
10712    case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10713    case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10714    case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10715    case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10716    case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10717    case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10718    case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10719        /* 64 x 64 -> 128 */
10720        if (size == 3) {
10721            unallocated_encoding(s);
10722            return;
10723        }
10724    is_widening:
10725        if (!fp_access_check(s)) {
10726            return;
10727        }
10728
10729        handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10730        break;
10731    default:
10732        /* opcode 15 not allocated */
10733        unallocated_encoding(s);
10734        break;
10735    }
10736}
10737
10738/* Logic op (opcode == 3) subgroup of C3.6.16. */
10739static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10740{
10741    int rd = extract32(insn, 0, 5);
10742    int rn = extract32(insn, 5, 5);
10743    int rm = extract32(insn, 16, 5);
10744    int size = extract32(insn, 22, 2);
10745    bool is_u = extract32(insn, 29, 1);
10746    bool is_q = extract32(insn, 30, 1);
10747
10748    if (!fp_access_check(s)) {
10749        return;
10750    }
10751
10752    switch (size + 4 * is_u) {
10753    case 0: /* AND */
10754        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10755        return;
10756    case 1: /* BIC */
10757        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10758        return;
10759    case 2: /* ORR */
10760        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10761        return;
10762    case 3: /* ORN */
10763        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10764        return;
10765    case 4: /* EOR */
10766        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10767        return;
10768
10769    case 5: /* BSL bitwise select */
10770        gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0);
10771        return;
10772    case 6: /* BIT, bitwise insert if true */
10773        gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0);
10774        return;
10775    case 7: /* BIF, bitwise insert if false */
10776        gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0);
10777        return;
10778
10779    default:
10780        g_assert_not_reached();
10781    }
10782}
10783
10784/* Pairwise op subgroup of C3.6.16.
10785 *
10786 * This is called directly or via the handle_3same_float for float pairwise
10787 * operations where the opcode and size are calculated differently.
10788 */
10789static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10790                                   int size, int rn, int rm, int rd)
10791{
10792    TCGv_ptr fpst;
10793    int pass;
10794
10795    /* Floating point operations need fpst */
10796    if (opcode >= 0x58) {
10797        fpst = get_fpstatus_ptr(false);
10798    } else {
10799        fpst = NULL;
10800    }
10801
10802    if (!fp_access_check(s)) {
10803        return;
10804    }
10805
10806    /* These operations work on the concatenated rm:rn, with each pair of
10807     * adjacent elements being operated on to produce an element in the result.
10808     */
10809    if (size == 3) {
10810        TCGv_i64 tcg_res[2];
10811
10812        for (pass = 0; pass < 2; pass++) {
10813            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10814            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10815            int passreg = (pass == 0) ? rn : rm;
10816
10817            read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10818            read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10819            tcg_res[pass] = tcg_temp_new_i64();
10820
10821            switch (opcode) {
10822            case 0x17: /* ADDP */
10823                tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10824                break;
10825            case 0x58: /* FMAXNMP */
10826                gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10827                break;
10828            case 0x5a: /* FADDP */
10829                gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10830                break;
10831            case 0x5e: /* FMAXP */
10832                gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10833                break;
10834            case 0x78: /* FMINNMP */
10835                gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10836                break;
10837            case 0x7e: /* FMINP */
10838                gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10839                break;
10840            default:
10841                g_assert_not_reached();
10842            }
10843
10844            tcg_temp_free_i64(tcg_op1);
10845            tcg_temp_free_i64(tcg_op2);
10846        }
10847
10848        for (pass = 0; pass < 2; pass++) {
10849            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10850            tcg_temp_free_i64(tcg_res[pass]);
10851        }
10852    } else {
10853        int maxpass = is_q ? 4 : 2;
10854        TCGv_i32 tcg_res[4];
10855
10856        for (pass = 0; pass < maxpass; pass++) {
10857            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10858            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10859            NeonGenTwoOpFn *genfn = NULL;
10860            int passreg = pass < (maxpass / 2) ? rn : rm;
10861            int passelt = (is_q && (pass & 1)) ? 2 : 0;
10862
10863            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10864            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10865            tcg_res[pass] = tcg_temp_new_i32();
10866
10867            switch (opcode) {
10868            case 0x17: /* ADDP */
10869            {
10870                static NeonGenTwoOpFn * const fns[3] = {
10871                    gen_helper_neon_padd_u8,
10872                    gen_helper_neon_padd_u16,
10873                    tcg_gen_add_i32,
10874                };
10875                genfn = fns[size];
10876                break;
10877            }
10878            case 0x14: /* SMAXP, UMAXP */
10879            {
10880                static NeonGenTwoOpFn * const fns[3][2] = {
10881                    { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10882                    { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10883                    { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10884                };
10885                genfn = fns[size][u];
10886                break;
10887            }
10888            case 0x15: /* SMINP, UMINP */
10889            {
10890                static NeonGenTwoOpFn * const fns[3][2] = {
10891                    { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10892                    { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10893                    { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10894                };
10895                genfn = fns[size][u];
10896                break;
10897            }
10898            /* The FP operations are all on single floats (32 bit) */
10899            case 0x58: /* FMAXNMP */
10900                gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10901                break;
10902            case 0x5a: /* FADDP */
10903                gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10904                break;
10905            case 0x5e: /* FMAXP */
10906                gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10907                break;
10908            case 0x78: /* FMINNMP */
10909                gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10910                break;
10911            case 0x7e: /* FMINP */
10912                gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10913                break;
10914            default:
10915                g_assert_not_reached();
10916            }
10917
10918            /* FP ops called directly, otherwise call now */
10919            if (genfn) {
10920                genfn(tcg_res[pass], tcg_op1, tcg_op2);
10921            }
10922
10923            tcg_temp_free_i32(tcg_op1);
10924            tcg_temp_free_i32(tcg_op2);
10925        }
10926
10927        for (pass = 0; pass < maxpass; pass++) {
10928            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10929            tcg_temp_free_i32(tcg_res[pass]);
10930        }
10931        clear_vec_high(s, is_q, rd);
10932    }
10933
10934    if (fpst) {
10935        tcg_temp_free_ptr(fpst);
10936    }
10937}
10938
10939/* Floating point op subgroup of C3.6.16. */
10940static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10941{
10942    /* For floating point ops, the U, size[1] and opcode bits
10943     * together indicate the operation. size[0] indicates single
10944     * or double.
10945     */
10946    int fpopcode = extract32(insn, 11, 5)
10947        | (extract32(insn, 23, 1) << 5)
10948        | (extract32(insn, 29, 1) << 6);
10949    int is_q = extract32(insn, 30, 1);
10950    int size = extract32(insn, 22, 1);
10951    int rm = extract32(insn, 16, 5);
10952    int rn = extract32(insn, 5, 5);
10953    int rd = extract32(insn, 0, 5);
10954
10955    int datasize = is_q ? 128 : 64;
10956    int esize = 32 << size;
10957    int elements = datasize / esize;
10958
10959    if (size == 1 && !is_q) {
10960        unallocated_encoding(s);
10961        return;
10962    }
10963
10964    switch (fpopcode) {
10965    case 0x58: /* FMAXNMP */
10966    case 0x5a: /* FADDP */
10967    case 0x5e: /* FMAXP */
10968    case 0x78: /* FMINNMP */
10969    case 0x7e: /* FMINP */
10970        if (size && !is_q) {
10971            unallocated_encoding(s);
10972            return;
10973        }
10974        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
10975                               rn, rm, rd);
10976        return;
10977    case 0x1b: /* FMULX */
10978    case 0x1f: /* FRECPS */
10979    case 0x3f: /* FRSQRTS */
10980    case 0x5d: /* FACGE */
10981    case 0x7d: /* FACGT */
10982    case 0x19: /* FMLA */
10983    case 0x39: /* FMLS */
10984    case 0x18: /* FMAXNM */
10985    case 0x1a: /* FADD */
10986    case 0x1c: /* FCMEQ */
10987    case 0x1e: /* FMAX */
10988    case 0x38: /* FMINNM */
10989    case 0x3a: /* FSUB */
10990    case 0x3e: /* FMIN */
10991    case 0x5b: /* FMUL */
10992    case 0x5c: /* FCMGE */
10993    case 0x5f: /* FDIV */
10994    case 0x7a: /* FABD */
10995    case 0x7c: /* FCMGT */
10996        if (!fp_access_check(s)) {
10997            return;
10998        }
10999        handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
11000        return;
11001
11002    case 0x1d: /* FMLAL  */
11003    case 0x3d: /* FMLSL  */
11004    case 0x59: /* FMLAL2 */
11005    case 0x79: /* FMLSL2 */
11006        if (size & 1 || !dc_isar_feature(aa64_fhm, s)) {
11007            unallocated_encoding(s);
11008            return;
11009        }
11010        if (fp_access_check(s)) {
11011            int is_s = extract32(insn, 23, 1);
11012            int is_2 = extract32(insn, 29, 1);
11013            int data = (is_2 << 1) | is_s;
11014            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
11015                               vec_full_reg_offset(s, rn),
11016                               vec_full_reg_offset(s, rm), cpu_env,
11017                               is_q ? 16 : 8, vec_full_reg_size(s),
11018                               data, gen_helper_gvec_fmlal_a64);
11019        }
11020        return;
11021
11022    default:
11023        unallocated_encoding(s);
11024        return;
11025    }
11026}
11027
11028/* Integer op subgroup of C3.6.16. */
11029static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
11030{
11031    int is_q = extract32(insn, 30, 1);
11032    int u = extract32(insn, 29, 1);
11033    int size = extract32(insn, 22, 2);
11034    int opcode = extract32(insn, 11, 5);
11035    int rm = extract32(insn, 16, 5);
11036    int rn = extract32(insn, 5, 5);
11037    int rd = extract32(insn, 0, 5);
11038    int pass;
11039    TCGCond cond;
11040
11041    switch (opcode) {
11042    case 0x13: /* MUL, PMUL */
11043        if (u && size != 0) {
11044            unallocated_encoding(s);
11045            return;
11046        }
11047        /* fall through */
11048    case 0x0: /* SHADD, UHADD */
11049    case 0x2: /* SRHADD, URHADD */
11050    case 0x4: /* SHSUB, UHSUB */
11051    case 0xc: /* SMAX, UMAX */
11052    case 0xd: /* SMIN, UMIN */
11053    case 0xe: /* SABD, UABD */
11054    case 0xf: /* SABA, UABA */
11055    case 0x12: /* MLA, MLS */
11056        if (size == 3) {
11057            unallocated_encoding(s);
11058            return;
11059        }
11060        break;
11061    case 0x16: /* SQDMULH, SQRDMULH */
11062        if (size == 0 || size == 3) {
11063            unallocated_encoding(s);
11064            return;
11065        }
11066        break;
11067    default:
11068        if (size == 3 && !is_q) {
11069            unallocated_encoding(s);
11070            return;
11071        }
11072        break;
11073    }
11074
11075    if (!fp_access_check(s)) {
11076        return;
11077    }
11078
11079    switch (opcode) {
11080    case 0x01: /* SQADD, UQADD */
11081        tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
11082                       offsetof(CPUARMState, vfp.qc),
11083                       vec_full_reg_offset(s, rn),
11084                       vec_full_reg_offset(s, rm),
11085                       is_q ? 16 : 8, vec_full_reg_size(s),
11086                       (u ? uqadd_op : sqadd_op) + size);
11087        return;
11088    case 0x05: /* SQSUB, UQSUB */
11089        tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
11090                       offsetof(CPUARMState, vfp.qc),
11091                       vec_full_reg_offset(s, rn),
11092                       vec_full_reg_offset(s, rm),
11093                       is_q ? 16 : 8, vec_full_reg_size(s),
11094                       (u ? uqsub_op : sqsub_op) + size);
11095        return;
11096    case 0x0c: /* SMAX, UMAX */
11097        if (u) {
11098            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
11099        } else {
11100            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
11101        }
11102        return;
11103    case 0x0d: /* SMIN, UMIN */
11104        if (u) {
11105            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
11106        } else {
11107            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
11108        }
11109        return;
11110    case 0x10: /* ADD, SUB */
11111        if (u) {
11112            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
11113        } else {
11114            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
11115        }
11116        return;
11117    case 0x13: /* MUL, PMUL */
11118        if (!u) { /* MUL */
11119            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
11120            return;
11121        }
11122        break;
11123    case 0x12: /* MLA, MLS */
11124        if (u) {
11125            gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
11126        } else {
11127            gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
11128        }
11129        return;
11130    case 0x11:
11131        if (!u) { /* CMTST */
11132            gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
11133            return;
11134        }
11135        /* else CMEQ */
11136        cond = TCG_COND_EQ;
11137        goto do_gvec_cmp;
11138    case 0x06: /* CMGT, CMHI */
11139        cond = u ? TCG_COND_GTU : TCG_COND_GT;
11140        goto do_gvec_cmp;
11141    case 0x07: /* CMGE, CMHS */
11142        cond = u ? TCG_COND_GEU : TCG_COND_GE;
11143    do_gvec_cmp:
11144        tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
11145                         vec_full_reg_offset(s, rn),
11146                         vec_full_reg_offset(s, rm),
11147                         is_q ? 16 : 8, vec_full_reg_size(s));
11148        return;
11149    }
11150
11151    if (size == 3) {
11152        assert(is_q);
11153        for (pass = 0; pass < 2; pass++) {
11154            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11155            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11156            TCGv_i64 tcg_res = tcg_temp_new_i64();
11157
11158            read_vec_element(s, tcg_op1, rn, pass, MO_64);
11159            read_vec_element(s, tcg_op2, rm, pass, MO_64);
11160
11161            handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
11162
11163            write_vec_element(s, tcg_res, rd, pass, MO_64);
11164
11165            tcg_temp_free_i64(tcg_res);
11166            tcg_temp_free_i64(tcg_op1);
11167            tcg_temp_free_i64(tcg_op2);
11168        }
11169    } else {
11170        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11171            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11172            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11173            TCGv_i32 tcg_res = tcg_temp_new_i32();
11174            NeonGenTwoOpFn *genfn = NULL;
11175            NeonGenTwoOpEnvFn *genenvfn = NULL;
11176
11177            read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
11178            read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
11179
11180            switch (opcode) {
11181            case 0x0: /* SHADD, UHADD */
11182            {
11183                static NeonGenTwoOpFn * const fns[3][2] = {
11184                    { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
11185                    { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
11186                    { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
11187                };
11188                genfn = fns[size][u];
11189                break;
11190            }
11191            case 0x2: /* SRHADD, URHADD */
11192            {
11193                static NeonGenTwoOpFn * const fns[3][2] = {
11194                    { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
11195                    { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
11196                    { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
11197                };
11198                genfn = fns[size][u];
11199                break;
11200            }
11201            case 0x4: /* SHSUB, UHSUB */
11202            {
11203                static NeonGenTwoOpFn * const fns[3][2] = {
11204                    { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
11205                    { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
11206                    { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
11207                };
11208                genfn = fns[size][u];
11209                break;
11210            }
11211            case 0x8: /* SSHL, USHL */
11212            {
11213                static NeonGenTwoOpFn * const fns[3][2] = {
11214                    { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
11215                    { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
11216                    { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
11217                };
11218                genfn = fns[size][u];
11219                break;
11220            }
11221            case 0x9: /* SQSHL, UQSHL */
11222            {
11223                static NeonGenTwoOpEnvFn * const fns[3][2] = {
11224                    { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
11225                    { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
11226                    { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
11227                };
11228                genenvfn = fns[size][u];
11229                break;
11230            }
11231            case 0xa: /* SRSHL, URSHL */
11232            {
11233                static NeonGenTwoOpFn * const fns[3][2] = {
11234                    { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
11235                    { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
11236                    { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
11237                };
11238                genfn = fns[size][u];
11239                break;
11240            }
11241            case 0xb: /* SQRSHL, UQRSHL */
11242            {
11243                static NeonGenTwoOpEnvFn * const fns[3][2] = {
11244                    { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
11245                    { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
11246                    { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
11247                };
11248                genenvfn = fns[size][u];
11249                break;
11250            }
11251            case 0xe: /* SABD, UABD */
11252            case 0xf: /* SABA, UABA */
11253            {
11254                static NeonGenTwoOpFn * const fns[3][2] = {
11255                    { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
11256                    { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
11257                    { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
11258                };
11259                genfn = fns[size][u];
11260                break;
11261            }
11262            case 0x13: /* MUL, PMUL */
11263                assert(u); /* PMUL */
11264                assert(size == 0);
11265                genfn = gen_helper_neon_mul_p8;
11266                break;
11267            case 0x16: /* SQDMULH, SQRDMULH */
11268            {
11269                static NeonGenTwoOpEnvFn * const fns[2][2] = {
11270                    { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
11271                    { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
11272                };
11273                assert(size == 1 || size == 2);
11274                genenvfn = fns[size - 1][u];
11275                break;
11276            }
11277            default:
11278                g_assert_not_reached();
11279            }
11280
11281            if (genenvfn) {
11282                genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11283            } else {
11284                genfn(tcg_res, tcg_op1, tcg_op2);
11285            }
11286
11287            if (opcode == 0xf) {
11288                /* SABA, UABA: accumulating ops */
11289                static NeonGenTwoOpFn * const fns[3] = {
11290                    gen_helper_neon_add_u8,
11291                    gen_helper_neon_add_u16,
11292                    tcg_gen_add_i32,
11293                };
11294
11295                read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
11296                fns[size](tcg_res, tcg_op1, tcg_res);
11297            }
11298
11299            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11300
11301            tcg_temp_free_i32(tcg_res);
11302            tcg_temp_free_i32(tcg_op1);
11303            tcg_temp_free_i32(tcg_op2);
11304        }
11305    }
11306    clear_vec_high(s, is_q, rd);
11307}
11308
11309/* AdvSIMD three same
11310 *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11311 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11312 * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11313 * +---+---+---+-----------+------+---+------+--------+---+------+------+
11314 */
11315static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11316{
11317    int opcode = extract32(insn, 11, 5);
11318
11319    switch (opcode) {
11320    case 0x3: /* logic ops */
11321        disas_simd_3same_logic(s, insn);
11322        break;
11323    case 0x17: /* ADDP */
11324    case 0x14: /* SMAXP, UMAXP */
11325    case 0x15: /* SMINP, UMINP */
11326    {
11327        /* Pairwise operations */
11328        int is_q = extract32(insn, 30, 1);
11329        int u = extract32(insn, 29, 1);
11330        int size = extract32(insn, 22, 2);
11331        int rm = extract32(insn, 16, 5);
11332        int rn = extract32(insn, 5, 5);
11333        int rd = extract32(insn, 0, 5);
11334        if (opcode == 0x17) {
11335            if (u || (size == 3 && !is_q)) {
11336                unallocated_encoding(s);
11337                return;
11338            }
11339        } else {
11340            if (size == 3) {
11341                unallocated_encoding(s);
11342                return;
11343            }
11344        }
11345        handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11346        break;
11347    }
11348    case 0x18 ... 0x31:
11349        /* floating point ops, sz[1] and U are part of opcode */
11350        disas_simd_3same_float(s, insn);
11351        break;
11352    default:
11353        disas_simd_3same_int(s, insn);
11354        break;
11355    }
11356}
11357
11358/*
11359 * Advanced SIMD three same (ARMv8.2 FP16 variants)
11360 *
11361 *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11362 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11363 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11364 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11365 *
11366 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11367 * (register), FACGE, FABD, FCMGT (register) and FACGT.
11368 *
11369 */
11370static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11371{
11372    int opcode, fpopcode;
11373    int is_q, u, a, rm, rn, rd;
11374    int datasize, elements;
11375    int pass;
11376    TCGv_ptr fpst;
11377    bool pairwise = false;
11378
11379    if (!dc_isar_feature(aa64_fp16, s)) {
11380        unallocated_encoding(s);
11381        return;
11382    }
11383
11384    if (!fp_access_check(s)) {
11385        return;
11386    }
11387
11388    /* For these floating point ops, the U, a and opcode bits
11389     * together indicate the operation.
11390     */
11391    opcode = extract32(insn, 11, 3);
11392    u = extract32(insn, 29, 1);
11393    a = extract32(insn, 23, 1);
11394    is_q = extract32(insn, 30, 1);
11395    rm = extract32(insn, 16, 5);
11396    rn = extract32(insn, 5, 5);
11397    rd = extract32(insn, 0, 5);
11398
11399    fpopcode = opcode | (a << 3) |  (u << 4);
11400    datasize = is_q ? 128 : 64;
11401    elements = datasize / 16;
11402
11403    switch (fpopcode) {
11404    case 0x10: /* FMAXNMP */
11405    case 0x12: /* FADDP */
11406    case 0x16: /* FMAXP */
11407    case 0x18: /* FMINNMP */
11408    case 0x1e: /* FMINP */
11409        pairwise = true;
11410        break;
11411    }
11412
11413    fpst = get_fpstatus_ptr(true);
11414
11415    if (pairwise) {
11416        int maxpass = is_q ? 8 : 4;
11417        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11418        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11419        TCGv_i32 tcg_res[8];
11420
11421        for (pass = 0; pass < maxpass; pass++) {
11422            int passreg = pass < (maxpass / 2) ? rn : rm;
11423            int passelt = (pass << 1) & (maxpass - 1);
11424
11425            read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11426            read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11427            tcg_res[pass] = tcg_temp_new_i32();
11428
11429            switch (fpopcode) {
11430            case 0x10: /* FMAXNMP */
11431                gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11432                                           fpst);
11433                break;
11434            case 0x12: /* FADDP */
11435                gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11436                break;
11437            case 0x16: /* FMAXP */
11438                gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11439                break;
11440            case 0x18: /* FMINNMP */
11441                gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11442                                           fpst);
11443                break;
11444            case 0x1e: /* FMINP */
11445                gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11446                break;
11447            default:
11448                g_assert_not_reached();
11449            }
11450        }
11451
11452        for (pass = 0; pass < maxpass; pass++) {
11453            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11454            tcg_temp_free_i32(tcg_res[pass]);
11455        }
11456
11457        tcg_temp_free_i32(tcg_op1);
11458        tcg_temp_free_i32(tcg_op2);
11459
11460    } else {
11461        for (pass = 0; pass < elements; pass++) {
11462            TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11463            TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11464            TCGv_i32 tcg_res = tcg_temp_new_i32();
11465
11466            read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11467            read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11468
11469            switch (fpopcode) {
11470            case 0x0: /* FMAXNM */
11471                gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11472                break;
11473            case 0x1: /* FMLA */
11474                read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11475                gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11476                                           fpst);
11477                break;
11478            case 0x2: /* FADD */
11479                gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11480                break;
11481            case 0x3: /* FMULX */
11482                gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11483                break;
11484            case 0x4: /* FCMEQ */
11485                gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11486                break;
11487            case 0x6: /* FMAX */
11488                gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11489                break;
11490            case 0x7: /* FRECPS */
11491                gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11492                break;
11493            case 0x8: /* FMINNM */
11494                gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11495                break;
11496            case 0x9: /* FMLS */
11497                /* As usual for ARM, separate negation for fused multiply-add */
11498                tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11499                read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11500                gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11501                                           fpst);
11502                break;
11503            case 0xa: /* FSUB */
11504                gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11505                break;
11506            case 0xe: /* FMIN */
11507                gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11508                break;
11509            case 0xf: /* FRSQRTS */
11510                gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11511                break;
11512            case 0x13: /* FMUL */
11513                gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11514                break;
11515            case 0x14: /* FCMGE */
11516                gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11517                break;
11518            case 0x15: /* FACGE */
11519                gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11520                break;
11521            case 0x17: /* FDIV */
11522                gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11523                break;
11524            case 0x1a: /* FABD */
11525                gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11526                tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11527                break;
11528            case 0x1c: /* FCMGT */
11529                gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11530                break;
11531            case 0x1d: /* FACGT */
11532                gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11533                break;
11534            default:
11535                fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
11536                        __func__, insn, fpopcode, s->pc);
11537                g_assert_not_reached();
11538            }
11539
11540            write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11541            tcg_temp_free_i32(tcg_res);
11542            tcg_temp_free_i32(tcg_op1);
11543            tcg_temp_free_i32(tcg_op2);
11544        }
11545    }
11546
11547    tcg_temp_free_ptr(fpst);
11548
11549    clear_vec_high(s, is_q, rd);
11550}
11551
11552/* AdvSIMD three same extra
11553 *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11554 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11555 * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11556 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11557 */
11558static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11559{
11560    int rd = extract32(insn, 0, 5);
11561    int rn = extract32(insn, 5, 5);
11562    int opcode = extract32(insn, 11, 4);
11563    int rm = extract32(insn, 16, 5);
11564    int size = extract32(insn, 22, 2);
11565    bool u = extract32(insn, 29, 1);
11566    bool is_q = extract32(insn, 30, 1);
11567    bool feature;
11568    int rot;
11569
11570    switch (u * 16 + opcode) {
11571    case 0x10: /* SQRDMLAH (vector) */
11572    case 0x11: /* SQRDMLSH (vector) */
11573        if (size != 1 && size != 2) {
11574            unallocated_encoding(s);
11575            return;
11576        }
11577        feature = dc_isar_feature(aa64_rdm, s);
11578        break;
11579    case 0x02: /* SDOT (vector) */
11580    case 0x12: /* UDOT (vector) */
11581        if (size != MO_32) {
11582            unallocated_encoding(s);
11583            return;
11584        }
11585        feature = dc_isar_feature(aa64_dp, s);
11586        break;
11587    case 0x18: /* FCMLA, #0 */
11588    case 0x19: /* FCMLA, #90 */
11589    case 0x1a: /* FCMLA, #180 */
11590    case 0x1b: /* FCMLA, #270 */
11591    case 0x1c: /* FCADD, #90 */
11592    case 0x1e: /* FCADD, #270 */
11593        if (size == 0
11594            || (size == 1 && !dc_isar_feature(aa64_fp16, s))
11595            || (size == 3 && !is_q)) {
11596            unallocated_encoding(s);
11597            return;
11598        }
11599        feature = dc_isar_feature(aa64_fcma, s);
11600        break;
11601    default:
11602        unallocated_encoding(s);
11603        return;
11604    }
11605    if (!feature) {
11606        unallocated_encoding(s);
11607        return;
11608    }
11609    if (!fp_access_check(s)) {
11610        return;
11611    }
11612
11613    switch (opcode) {
11614    case 0x0: /* SQRDMLAH (vector) */
11615        switch (size) {
11616        case 1:
11617            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
11618            break;
11619        case 2:
11620            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
11621            break;
11622        default:
11623            g_assert_not_reached();
11624        }
11625        return;
11626
11627    case 0x1: /* SQRDMLSH (vector) */
11628        switch (size) {
11629        case 1:
11630            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
11631            break;
11632        case 2:
11633            gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
11634            break;
11635        default:
11636            g_assert_not_reached();
11637        }
11638        return;
11639
11640    case 0x2: /* SDOT / UDOT */
11641        gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0,
11642                         u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11643        return;
11644
11645    case 0x8: /* FCMLA, #0 */
11646    case 0x9: /* FCMLA, #90 */
11647    case 0xa: /* FCMLA, #180 */
11648    case 0xb: /* FCMLA, #270 */
11649        rot = extract32(opcode, 0, 2);
11650        switch (size) {
11651        case 1:
11652            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, true, rot,
11653                              gen_helper_gvec_fcmlah);
11654            break;
11655        case 2:
11656            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11657                              gen_helper_gvec_fcmlas);
11658            break;
11659        case 3:
11660            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11661                              gen_helper_gvec_fcmlad);
11662            break;
11663        default:
11664            g_assert_not_reached();
11665        }
11666        return;
11667
11668    case 0xc: /* FCADD, #90 */
11669    case 0xe: /* FCADD, #270 */
11670        rot = extract32(opcode, 1, 1);
11671        switch (size) {
11672        case 1:
11673            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11674                              gen_helper_gvec_fcaddh);
11675            break;
11676        case 2:
11677            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11678                              gen_helper_gvec_fcadds);
11679            break;
11680        case 3:
11681            gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11682                              gen_helper_gvec_fcaddd);
11683            break;
11684        default:
11685            g_assert_not_reached();
11686        }
11687        return;
11688
11689    default:
11690        g_assert_not_reached();
11691    }
11692}
11693
11694static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11695                                  int size, int rn, int rd)
11696{
11697    /* Handle 2-reg-misc ops which are widening (so each size element
11698     * in the source becomes a 2*size element in the destination.
11699     * The only instruction like this is FCVTL.
11700     */
11701    int pass;
11702
11703    if (size == 3) {
11704        /* 32 -> 64 bit fp conversion */
11705        TCGv_i64 tcg_res[2];
11706        int srcelt = is_q ? 2 : 0;
11707
11708        for (pass = 0; pass < 2; pass++) {
11709            TCGv_i32 tcg_op = tcg_temp_new_i32();
11710            tcg_res[pass] = tcg_temp_new_i64();
11711
11712            read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11713            gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11714            tcg_temp_free_i32(tcg_op);
11715        }
11716        for (pass = 0; pass < 2; pass++) {
11717            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11718            tcg_temp_free_i64(tcg_res[pass]);
11719        }
11720    } else {
11721        /* 16 -> 32 bit fp conversion */
11722        int srcelt = is_q ? 4 : 0;
11723        TCGv_i32 tcg_res[4];
11724        TCGv_ptr fpst = get_fpstatus_ptr(false);
11725        TCGv_i32 ahp = get_ahp_flag();
11726
11727        for (pass = 0; pass < 4; pass++) {
11728            tcg_res[pass] = tcg_temp_new_i32();
11729
11730            read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11731            gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11732                                           fpst, ahp);
11733        }
11734        for (pass = 0; pass < 4; pass++) {
11735            write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11736            tcg_temp_free_i32(tcg_res[pass]);
11737        }
11738
11739        tcg_temp_free_ptr(fpst);
11740        tcg_temp_free_i32(ahp);
11741    }
11742}
11743
11744static void handle_rev(DisasContext *s, int opcode, bool u,
11745                       bool is_q, int size, int rn, int rd)
11746{
11747    int op = (opcode << 1) | u;
11748    int opsz = op + size;
11749    int grp_size = 3 - opsz;
11750    int dsize = is_q ? 128 : 64;
11751    int i;
11752
11753    if (opsz >= 3) {
11754        unallocated_encoding(s);
11755        return;
11756    }
11757
11758    if (!fp_access_check(s)) {
11759        return;
11760    }
11761
11762    if (size == 0) {
11763        /* Special case bytes, use bswap op on each group of elements */
11764        int groups = dsize / (8 << grp_size);
11765
11766        for (i = 0; i < groups; i++) {
11767            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11768
11769            read_vec_element(s, tcg_tmp, rn, i, grp_size);
11770            switch (grp_size) {
11771            case MO_16:
11772                tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
11773                break;
11774            case MO_32:
11775                tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
11776                break;
11777            case MO_64:
11778                tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11779                break;
11780            default:
11781                g_assert_not_reached();
11782            }
11783            write_vec_element(s, tcg_tmp, rd, i, grp_size);
11784            tcg_temp_free_i64(tcg_tmp);
11785        }
11786        clear_vec_high(s, is_q, rd);
11787    } else {
11788        int revmask = (1 << grp_size) - 1;
11789        int esize = 8 << size;
11790        int elements = dsize / esize;
11791        TCGv_i64 tcg_rn = tcg_temp_new_i64();
11792        TCGv_i64 tcg_rd = tcg_const_i64(0);
11793        TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
11794
11795        for (i = 0; i < elements; i++) {
11796            int e_rev = (i & 0xf) ^ revmask;
11797            int off = e_rev * esize;
11798            read_vec_element(s, tcg_rn, rn, i, size);
11799            if (off >= 64) {
11800                tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
11801                                    tcg_rn, off - 64, esize);
11802            } else {
11803                tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
11804            }
11805        }
11806        write_vec_element(s, tcg_rd, rd, 0, MO_64);
11807        write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
11808
11809        tcg_temp_free_i64(tcg_rd_hi);
11810        tcg_temp_free_i64(tcg_rd);
11811        tcg_temp_free_i64(tcg_rn);
11812    }
11813}
11814
11815static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11816                                  bool is_q, int size, int rn, int rd)
11817{
11818    /* Implement the pairwise operations from 2-misc:
11819     * SADDLP, UADDLP, SADALP, UADALP.
11820     * These all add pairs of elements in the input to produce a
11821     * double-width result element in the output (possibly accumulating).
11822     */
11823    bool accum = (opcode == 0x6);
11824    int maxpass = is_q ? 2 : 1;
11825    int pass;
11826    TCGv_i64 tcg_res[2];
11827
11828    if (size == 2) {
11829        /* 32 + 32 -> 64 op */
11830        TCGMemOp memop = size + (u ? 0 : MO_SIGN);
11831
11832        for (pass = 0; pass < maxpass; pass++) {
11833            TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11834            TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11835
11836            tcg_res[pass] = tcg_temp_new_i64();
11837
11838            read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11839            read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11840            tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11841            if (accum) {
11842                read_vec_element(s, tcg_op1, rd, pass, MO_64);
11843                tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11844            }
11845
11846            tcg_temp_free_i64(tcg_op1);
11847            tcg_temp_free_i64(tcg_op2);
11848        }
11849    } else {
11850        for (pass = 0; pass < maxpass; pass++) {
11851            TCGv_i64 tcg_op = tcg_temp_new_i64();
11852            NeonGenOneOpFn *genfn;
11853            static NeonGenOneOpFn * const fns[2][2] = {
11854                { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11855                { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11856            };
11857
11858            genfn = fns[size][u];
11859
11860            tcg_res[pass] = tcg_temp_new_i64();
11861
11862            read_vec_element(s, tcg_op, rn, pass, MO_64);
11863            genfn(tcg_res[pass], tcg_op);
11864
11865            if (accum) {
11866                read_vec_element(s, tcg_op, rd, pass, MO_64);
11867                if (size == 0) {
11868                    gen_helper_neon_addl_u16(tcg_res[pass],
11869                                             tcg_res[pass], tcg_op);
11870                } else {
11871                    gen_helper_neon_addl_u32(tcg_res[pass],
11872                                             tcg_res[pass], tcg_op);
11873                }
11874            }
11875            tcg_temp_free_i64(tcg_op);
11876        }
11877    }
11878    if (!is_q) {
11879        tcg_res[1] = tcg_const_i64(0);
11880    }
11881    for (pass = 0; pass < 2; pass++) {
11882        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11883        tcg_temp_free_i64(tcg_res[pass]);
11884    }
11885}
11886
11887static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11888{
11889    /* Implement SHLL and SHLL2 */
11890    int pass;
11891    int part = is_q ? 2 : 0;
11892    TCGv_i64 tcg_res[2];
11893
11894    for (pass = 0; pass < 2; pass++) {
11895        static NeonGenWidenFn * const widenfns[3] = {
11896            gen_helper_neon_widen_u8,
11897            gen_helper_neon_widen_u16,
11898            tcg_gen_extu_i32_i64,
11899        };
11900        NeonGenWidenFn *widenfn = widenfns[size];
11901        TCGv_i32 tcg_op = tcg_temp_new_i32();
11902
11903        read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11904        tcg_res[pass] = tcg_temp_new_i64();
11905        widenfn(tcg_res[pass], tcg_op);
11906        tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11907
11908        tcg_temp_free_i32(tcg_op);
11909    }
11910
11911    for (pass = 0; pass < 2; pass++) {
11912        write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11913        tcg_temp_free_i64(tcg_res[pass]);
11914    }
11915}
11916
11917/* AdvSIMD two reg misc
11918 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11919 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11920 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11921 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11922 */
11923static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11924{
11925    int size = extract32(insn, 22, 2);
11926    int opcode = extract32(insn, 12, 5);
11927    bool u = extract32(insn, 29, 1);
11928    bool is_q = extract32(insn, 30, 1);
11929    int rn = extract32(insn, 5, 5);
11930    int rd = extract32(insn, 0, 5);
11931    bool need_fpstatus = false;
11932    bool need_rmode = false;
11933    int rmode = -1;
11934    TCGv_i32 tcg_rmode;
11935    TCGv_ptr tcg_fpstatus;
11936
11937    switch (opcode) {
11938    case 0x0: /* REV64, REV32 */
11939    case 0x1: /* REV16 */
11940        handle_rev(s, opcode, u, is_q, size, rn, rd);
11941        return;
11942    case 0x5: /* CNT, NOT, RBIT */
11943        if (u && size == 0) {
11944            /* NOT */
11945            break;
11946        } else if (u && size == 1) {
11947            /* RBIT */
11948            break;
11949        } else if (!u && size == 0) {
11950            /* CNT */
11951            break;
11952        }
11953        unallocated_encoding(s);
11954        return;
11955    case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11956    case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11957        if (size == 3) {
11958            unallocated_encoding(s);
11959            return;
11960        }
11961        if (!fp_access_check(s)) {
11962            return;
11963        }
11964
11965        handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11966        return;
11967    case 0x4: /* CLS, CLZ */
11968        if (size == 3) {
11969            unallocated_encoding(s);
11970            return;
11971        }
11972        break;
11973    case 0x2: /* SADDLP, UADDLP */
11974    case 0x6: /* SADALP, UADALP */
11975        if (size == 3) {
11976            unallocated_encoding(s);
11977            return;
11978        }
11979        if (!fp_access_check(s)) {
11980            return;
11981        }
11982        handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
11983        return;
11984    case 0x13: /* SHLL, SHLL2 */
11985        if (u == 0 || size == 3) {
11986            unallocated_encoding(s);
11987            return;
11988        }
11989        if (!fp_access_check(s)) {
11990            return;
11991        }
11992        handle_shll(s, is_q, size, rn, rd);
11993        return;
11994    case 0xa: /* CMLT */
11995        if (u == 1) {
11996            unallocated_encoding(s);
11997            return;
11998        }
11999        /* fall through */
12000    case 0x8: /* CMGT, CMGE */
12001    case 0x9: /* CMEQ, CMLE */
12002    case 0xb: /* ABS, NEG */
12003        if (size == 3 && !is_q) {
12004            unallocated_encoding(s);
12005            return;
12006        }
12007        break;
12008    case 0x3: /* SUQADD, USQADD */
12009        if (size == 3 && !is_q) {
12010            unallocated_encoding(s);
12011            return;
12012        }
12013        if (!fp_access_check(s)) {
12014            return;
12015        }
12016        handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
12017        return;
12018    case 0x7: /* SQABS, SQNEG */
12019        if (size == 3 && !is_q) {
12020            unallocated_encoding(s);
12021            return;
12022        }
12023        break;
12024    case 0xc ... 0xf:
12025    case 0x16 ... 0x1f:
12026    {
12027        /* Floating point: U, size[1] and opcode indicate operation;
12028         * size[0] indicates single or double precision.
12029         */
12030        int is_double = extract32(size, 0, 1);
12031        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
12032        size = is_double ? 3 : 2;
12033        switch (opcode) {
12034        case 0x2f: /* FABS */
12035        case 0x6f: /* FNEG */
12036            if (size == 3 && !is_q) {
12037                unallocated_encoding(s);
12038                return;
12039            }
12040            break;
12041        case 0x1d: /* SCVTF */
12042        case 0x5d: /* UCVTF */
12043        {
12044            bool is_signed = (opcode == 0x1d) ? true : false;
12045            int elements = is_double ? 2 : is_q ? 4 : 2;
12046            if (is_double && !is_q) {
12047                unallocated_encoding(s);
12048                return;
12049            }
12050            if (!fp_access_check(s)) {
12051                return;
12052            }
12053            handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
12054            return;
12055        }
12056        case 0x2c: /* FCMGT (zero) */
12057        case 0x2d: /* FCMEQ (zero) */
12058        case 0x2e: /* FCMLT (zero) */
12059        case 0x6c: /* FCMGE (zero) */
12060        case 0x6d: /* FCMLE (zero) */
12061            if (size == 3 && !is_q) {
12062                unallocated_encoding(s);
12063                return;
12064            }
12065            handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
12066            return;
12067        case 0x7f: /* FSQRT */
12068            if (size == 3 && !is_q) {
12069                unallocated_encoding(s);
12070                return;
12071            }
12072            break;
12073        case 0x1a: /* FCVTNS */
12074        case 0x1b: /* FCVTMS */
12075        case 0x3a: /* FCVTPS */
12076        case 0x3b: /* FCVTZS */
12077        case 0x5a: /* FCVTNU */
12078        case 0x5b: /* FCVTMU */
12079        case 0x7a: /* FCVTPU */
12080        case 0x7b: /* FCVTZU */
12081            need_fpstatus = true;
12082            need_rmode = true;
12083            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12084            if (size == 3 && !is_q) {
12085                unallocated_encoding(s);
12086                return;
12087            }
12088            break;
12089        case 0x5c: /* FCVTAU */
12090        case 0x1c: /* FCVTAS */
12091            need_fpstatus = true;
12092            need_rmode = true;
12093            rmode = FPROUNDING_TIEAWAY;
12094            if (size == 3 && !is_q) {
12095                unallocated_encoding(s);
12096                return;
12097            }
12098            break;
12099        case 0x3c: /* URECPE */
12100            if (size == 3) {
12101                unallocated_encoding(s);
12102                return;
12103            }
12104            /* fall through */
12105        case 0x3d: /* FRECPE */
12106        case 0x7d: /* FRSQRTE */
12107            if (size == 3 && !is_q) {
12108                unallocated_encoding(s);
12109                return;
12110            }
12111            if (!fp_access_check(s)) {
12112                return;
12113            }
12114            handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
12115            return;
12116        case 0x56: /* FCVTXN, FCVTXN2 */
12117            if (size == 2) {
12118                unallocated_encoding(s);
12119                return;
12120            }
12121            /* fall through */
12122        case 0x16: /* FCVTN, FCVTN2 */
12123            /* handle_2misc_narrow does a 2*size -> size operation, but these
12124             * instructions encode the source size rather than dest size.
12125             */
12126            if (!fp_access_check(s)) {
12127                return;
12128            }
12129            handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
12130            return;
12131        case 0x17: /* FCVTL, FCVTL2 */
12132            if (!fp_access_check(s)) {
12133                return;
12134            }
12135            handle_2misc_widening(s, opcode, is_q, size, rn, rd);
12136            return;
12137        case 0x18: /* FRINTN */
12138        case 0x19: /* FRINTM */
12139        case 0x38: /* FRINTP */
12140        case 0x39: /* FRINTZ */
12141            need_rmode = true;
12142            rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
12143            /* fall through */
12144        case 0x59: /* FRINTX */
12145        case 0x79: /* FRINTI */
12146            need_fpstatus = true;
12147            if (size == 3 && !is_q) {
12148                unallocated_encoding(s);
12149                return;
12150            }
12151            break;
12152        case 0x58: /* FRINTA */
12153            need_rmode = true;
12154            rmode = FPROUNDING_TIEAWAY;
12155            need_fpstatus = true;
12156            if (size == 3 && !is_q) {
12157                unallocated_encoding(s);
12158                return;
12159            }
12160            break;
12161        case 0x7c: /* URSQRTE */
12162            if (size == 3) {
12163                unallocated_encoding(s);
12164                return;
12165            }
12166            need_fpstatus = true;
12167            break;
12168        case 0x1e: /* FRINT32Z */
12169        case 0x1f: /* FRINT64Z */
12170            need_rmode = true;
12171            rmode = FPROUNDING_ZERO;
12172            /* fall through */
12173        case 0x5e: /* FRINT32X */
12174        case 0x5f: /* FRINT64X */
12175            need_fpstatus = true;
12176            if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) {
12177                unallocated_encoding(s);
12178                return;
12179            }
12180            break;
12181        default:
12182            unallocated_encoding(s);
12183            return;
12184        }
12185        break;
12186    }
12187    default:
12188        unallocated_encoding(s);
12189        return;
12190    }
12191
12192    if (!fp_access_check(s)) {
12193        return;
12194    }
12195
12196    if (need_fpstatus || need_rmode) {
12197        tcg_fpstatus = get_fpstatus_ptr(false);
12198    } else {
12199        tcg_fpstatus = NULL;
12200    }
12201    if (need_rmode) {
12202        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12203        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12204    } else {
12205        tcg_rmode = NULL;
12206    }
12207
12208    switch (opcode) {
12209    case 0x5:
12210        if (u && size == 0) { /* NOT */
12211            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
12212            return;
12213        }
12214        break;
12215    case 0xb:
12216        if (u) { /* ABS, NEG */
12217            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
12218        } else {
12219            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
12220        }
12221        return;
12222    }
12223
12224    if (size == 3) {
12225        /* All 64-bit element operations can be shared with scalar 2misc */
12226        int pass;
12227
12228        /* Coverity claims (size == 3 && !is_q) has been eliminated
12229         * from all paths leading to here.
12230         */
12231        tcg_debug_assert(is_q);
12232        for (pass = 0; pass < 2; pass++) {
12233            TCGv_i64 tcg_op = tcg_temp_new_i64();
12234            TCGv_i64 tcg_res = tcg_temp_new_i64();
12235
12236            read_vec_element(s, tcg_op, rn, pass, MO_64);
12237
12238            handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
12239                            tcg_rmode, tcg_fpstatus);
12240
12241            write_vec_element(s, tcg_res, rd, pass, MO_64);
12242
12243            tcg_temp_free_i64(tcg_res);
12244            tcg_temp_free_i64(tcg_op);
12245        }
12246    } else {
12247        int pass;
12248
12249        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12250            TCGv_i32 tcg_op = tcg_temp_new_i32();
12251            TCGv_i32 tcg_res = tcg_temp_new_i32();
12252            TCGCond cond;
12253
12254            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12255
12256            if (size == 2) {
12257                /* Special cases for 32 bit elements */
12258                switch (opcode) {
12259                case 0xa: /* CMLT */
12260                    /* 32 bit integer comparison against zero, result is
12261                     * test ? (2^32 - 1) : 0. We implement via setcond(test)
12262                     * and inverting.
12263                     */
12264                    cond = TCG_COND_LT;
12265                do_cmop:
12266                    tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
12267                    tcg_gen_neg_i32(tcg_res, tcg_res);
12268                    break;
12269                case 0x8: /* CMGT, CMGE */
12270                    cond = u ? TCG_COND_GE : TCG_COND_GT;
12271                    goto do_cmop;
12272                case 0x9: /* CMEQ, CMLE */
12273                    cond = u ? TCG_COND_LE : TCG_COND_EQ;
12274                    goto do_cmop;
12275                case 0x4: /* CLS */
12276                    if (u) {
12277                        tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12278                    } else {
12279                        tcg_gen_clrsb_i32(tcg_res, tcg_op);
12280                    }
12281                    break;
12282                case 0x7: /* SQABS, SQNEG */
12283                    if (u) {
12284                        gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12285                    } else {
12286                        gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12287                    }
12288                    break;
12289                case 0x2f: /* FABS */
12290                    gen_helper_vfp_abss(tcg_res, tcg_op);
12291                    break;
12292                case 0x6f: /* FNEG */
12293                    gen_helper_vfp_negs(tcg_res, tcg_op);
12294                    break;
12295                case 0x7f: /* FSQRT */
12296                    gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12297                    break;
12298                case 0x1a: /* FCVTNS */
12299                case 0x1b: /* FCVTMS */
12300                case 0x1c: /* FCVTAS */
12301                case 0x3a: /* FCVTPS */
12302                case 0x3b: /* FCVTZS */
12303                {
12304                    TCGv_i32 tcg_shift = tcg_const_i32(0);
12305                    gen_helper_vfp_tosls(tcg_res, tcg_op,
12306                                         tcg_shift, tcg_fpstatus);
12307                    tcg_temp_free_i32(tcg_shift);
12308                    break;
12309                }
12310                case 0x5a: /* FCVTNU */
12311                case 0x5b: /* FCVTMU */
12312                case 0x5c: /* FCVTAU */
12313                case 0x7a: /* FCVTPU */
12314                case 0x7b: /* FCVTZU */
12315                {
12316                    TCGv_i32 tcg_shift = tcg_const_i32(0);
12317                    gen_helper_vfp_touls(tcg_res, tcg_op,
12318                                         tcg_shift, tcg_fpstatus);
12319                    tcg_temp_free_i32(tcg_shift);
12320                    break;
12321                }
12322                case 0x18: /* FRINTN */
12323                case 0x19: /* FRINTM */
12324                case 0x38: /* FRINTP */
12325                case 0x39: /* FRINTZ */
12326                case 0x58: /* FRINTA */
12327                case 0x79: /* FRINTI */
12328                    gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12329                    break;
12330                case 0x59: /* FRINTX */
12331                    gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12332                    break;
12333                case 0x7c: /* URSQRTE */
12334                    gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
12335                    break;
12336                case 0x1e: /* FRINT32Z */
12337                case 0x5e: /* FRINT32X */
12338                    gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus);
12339                    break;
12340                case 0x1f: /* FRINT64Z */
12341                case 0x5f: /* FRINT64X */
12342                    gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus);
12343                    break;
12344                default:
12345                    g_assert_not_reached();
12346                }
12347            } else {
12348                /* Use helpers for 8 and 16 bit elements */
12349                switch (opcode) {
12350                case 0x5: /* CNT, RBIT */
12351                    /* For these two insns size is part of the opcode specifier
12352                     * (handled earlier); they always operate on byte elements.
12353                     */
12354                    if (u) {
12355                        gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12356                    } else {
12357                        gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12358                    }
12359                    break;
12360                case 0x7: /* SQABS, SQNEG */
12361                {
12362                    NeonGenOneOpEnvFn *genfn;
12363                    static NeonGenOneOpEnvFn * const fns[2][2] = {
12364                        { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12365                        { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12366                    };
12367                    genfn = fns[size][u];
12368                    genfn(tcg_res, cpu_env, tcg_op);
12369                    break;
12370                }
12371                case 0x8: /* CMGT, CMGE */
12372                case 0x9: /* CMEQ, CMLE */
12373                case 0xa: /* CMLT */
12374                {
12375                    static NeonGenTwoOpFn * const fns[3][2] = {
12376                        { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
12377                        { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
12378                        { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
12379                    };
12380                    NeonGenTwoOpFn *genfn;
12381                    int comp;
12382                    bool reverse;
12383                    TCGv_i32 tcg_zero = tcg_const_i32(0);
12384
12385                    /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
12386                    comp = (opcode - 0x8) * 2 + u;
12387                    /* ...but LE, LT are implemented as reverse GE, GT */
12388                    reverse = (comp > 2);
12389                    if (reverse) {
12390                        comp = 4 - comp;
12391                    }
12392                    genfn = fns[comp][size];
12393                    if (reverse) {
12394                        genfn(tcg_res, tcg_zero, tcg_op);
12395                    } else {
12396                        genfn(tcg_res, tcg_op, tcg_zero);
12397                    }
12398                    tcg_temp_free_i32(tcg_zero);
12399                    break;
12400                }
12401                case 0x4: /* CLS, CLZ */
12402                    if (u) {
12403                        if (size == 0) {
12404                            gen_helper_neon_clz_u8(tcg_res, tcg_op);
12405                        } else {
12406                            gen_helper_neon_clz_u16(tcg_res, tcg_op);
12407                        }
12408                    } else {
12409                        if (size == 0) {
12410                            gen_helper_neon_cls_s8(tcg_res, tcg_op);
12411                        } else {
12412                            gen_helper_neon_cls_s16(tcg_res, tcg_op);
12413                        }
12414                    }
12415                    break;
12416                default:
12417                    g_assert_not_reached();
12418                }
12419            }
12420
12421            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12422
12423            tcg_temp_free_i32(tcg_res);
12424            tcg_temp_free_i32(tcg_op);
12425        }
12426    }
12427    clear_vec_high(s, is_q, rd);
12428
12429    if (need_rmode) {
12430        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12431        tcg_temp_free_i32(tcg_rmode);
12432    }
12433    if (need_fpstatus) {
12434        tcg_temp_free_ptr(tcg_fpstatus);
12435    }
12436}
12437
12438/* AdvSIMD [scalar] two register miscellaneous (FP16)
12439 *
12440 *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12441 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12442 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12443 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12444 *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12445 *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12446 *
12447 * This actually covers two groups where scalar access is governed by
12448 * bit 28. A bunch of the instructions (float to integral) only exist
12449 * in the vector form and are un-allocated for the scalar decode. Also
12450 * in the scalar decode Q is always 1.
12451 */
12452static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12453{
12454    int fpop, opcode, a, u;
12455    int rn, rd;
12456    bool is_q;
12457    bool is_scalar;
12458    bool only_in_vector = false;
12459
12460    int pass;
12461    TCGv_i32 tcg_rmode = NULL;
12462    TCGv_ptr tcg_fpstatus = NULL;
12463    bool need_rmode = false;
12464    bool need_fpst = true;
12465    int rmode;
12466
12467    if (!dc_isar_feature(aa64_fp16, s)) {
12468        unallocated_encoding(s);
12469        return;
12470    }
12471
12472    rd = extract32(insn, 0, 5);
12473    rn = extract32(insn, 5, 5);
12474
12475    a = extract32(insn, 23, 1);
12476    u = extract32(insn, 29, 1);
12477    is_scalar = extract32(insn, 28, 1);
12478    is_q = extract32(insn, 30, 1);
12479
12480    opcode = extract32(insn, 12, 5);
12481    fpop = deposit32(opcode, 5, 1, a);
12482    fpop = deposit32(fpop, 6, 1, u);
12483
12484    rd = extract32(insn, 0, 5);
12485    rn = extract32(insn, 5, 5);
12486
12487    switch (fpop) {
12488    case 0x1d: /* SCVTF */
12489    case 0x5d: /* UCVTF */
12490    {
12491        int elements;
12492
12493        if (is_scalar) {
12494            elements = 1;
12495        } else {
12496            elements = (is_q ? 8 : 4);
12497        }
12498
12499        if (!fp_access_check(s)) {
12500            return;
12501        }
12502        handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12503        return;
12504    }
12505    break;
12506    case 0x2c: /* FCMGT (zero) */
12507    case 0x2d: /* FCMEQ (zero) */
12508    case 0x2e: /* FCMLT (zero) */
12509    case 0x6c: /* FCMGE (zero) */
12510    case 0x6d: /* FCMLE (zero) */
12511        handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12512        return;
12513    case 0x3d: /* FRECPE */
12514    case 0x3f: /* FRECPX */
12515        break;
12516    case 0x18: /* FRINTN */
12517        need_rmode = true;
12518        only_in_vector = true;
12519        rmode = FPROUNDING_TIEEVEN;
12520        break;
12521    case 0x19: /* FRINTM */
12522        need_rmode = true;
12523        only_in_vector = true;
12524        rmode = FPROUNDING_NEGINF;
12525        break;
12526    case 0x38: /* FRINTP */
12527        need_rmode = true;
12528        only_in_vector = true;
12529        rmode = FPROUNDING_POSINF;
12530        break;
12531    case 0x39: /* FRINTZ */
12532        need_rmode = true;
12533        only_in_vector = true;
12534        rmode = FPROUNDING_ZERO;
12535        break;
12536    case 0x58: /* FRINTA */
12537        need_rmode = true;
12538        only_in_vector = true;
12539        rmode = FPROUNDING_TIEAWAY;
12540        break;
12541    case 0x59: /* FRINTX */
12542    case 0x79: /* FRINTI */
12543        only_in_vector = true;
12544        /* current rounding mode */
12545        break;
12546    case 0x1a: /* FCVTNS */
12547        need_rmode = true;
12548        rmode = FPROUNDING_TIEEVEN;
12549        break;
12550    case 0x1b: /* FCVTMS */
12551        need_rmode = true;
12552        rmode = FPROUNDING_NEGINF;
12553        break;
12554    case 0x1c: /* FCVTAS */
12555        need_rmode = true;
12556        rmode = FPROUNDING_TIEAWAY;
12557        break;
12558    case 0x3a: /* FCVTPS */
12559        need_rmode = true;
12560        rmode = FPROUNDING_POSINF;
12561        break;
12562    case 0x3b: /* FCVTZS */
12563        need_rmode = true;
12564        rmode = FPROUNDING_ZERO;
12565        break;
12566    case 0x5a: /* FCVTNU */
12567        need_rmode = true;
12568        rmode = FPROUNDING_TIEEVEN;
12569        break;
12570    case 0x5b: /* FCVTMU */
12571        need_rmode = true;
12572        rmode = FPROUNDING_NEGINF;
12573        break;
12574    case 0x5c: /* FCVTAU */
12575        need_rmode = true;
12576        rmode = FPROUNDING_TIEAWAY;
12577        break;
12578    case 0x7a: /* FCVTPU */
12579        need_rmode = true;
12580        rmode = FPROUNDING_POSINF;
12581        break;
12582    case 0x7b: /* FCVTZU */
12583        need_rmode = true;
12584        rmode = FPROUNDING_ZERO;
12585        break;
12586    case 0x2f: /* FABS */
12587    case 0x6f: /* FNEG */
12588        need_fpst = false;
12589        break;
12590    case 0x7d: /* FRSQRTE */
12591    case 0x7f: /* FSQRT (vector) */
12592        break;
12593    default:
12594        fprintf(stderr, "%s: insn %#04x fpop %#2x\n", __func__, insn, fpop);
12595        g_assert_not_reached();
12596    }
12597
12598
12599    /* Check additional constraints for the scalar encoding */
12600    if (is_scalar) {
12601        if (!is_q) {
12602            unallocated_encoding(s);
12603            return;
12604        }
12605        /* FRINTxx is only in the vector form */
12606        if (only_in_vector) {
12607            unallocated_encoding(s);
12608            return;
12609        }
12610    }
12611
12612    if (!fp_access_check(s)) {
12613        return;
12614    }
12615
12616    if (need_rmode || need_fpst) {
12617        tcg_fpstatus = get_fpstatus_ptr(true);
12618    }
12619
12620    if (need_rmode) {
12621        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12622        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12623    }
12624
12625    if (is_scalar) {
12626        TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12627        TCGv_i32 tcg_res = tcg_temp_new_i32();
12628
12629        switch (fpop) {
12630        case 0x1a: /* FCVTNS */
12631        case 0x1b: /* FCVTMS */
12632        case 0x1c: /* FCVTAS */
12633        case 0x3a: /* FCVTPS */
12634        case 0x3b: /* FCVTZS */
12635            gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12636            break;
12637        case 0x3d: /* FRECPE */
12638            gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12639            break;
12640        case 0x3f: /* FRECPX */
12641            gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12642            break;
12643        case 0x5a: /* FCVTNU */
12644        case 0x5b: /* FCVTMU */
12645        case 0x5c: /* FCVTAU */
12646        case 0x7a: /* FCVTPU */
12647        case 0x7b: /* FCVTZU */
12648            gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12649            break;
12650        case 0x6f: /* FNEG */
12651            tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12652            break;
12653        case 0x7d: /* FRSQRTE */
12654            gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12655            break;
12656        default:
12657            g_assert_not_reached();
12658        }
12659
12660        /* limit any sign extension going on */
12661        tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12662        write_fp_sreg(s, rd, tcg_res);
12663
12664        tcg_temp_free_i32(tcg_res);
12665        tcg_temp_free_i32(tcg_op);
12666    } else {
12667        for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12668            TCGv_i32 tcg_op = tcg_temp_new_i32();
12669            TCGv_i32 tcg_res = tcg_temp_new_i32();
12670
12671            read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12672
12673            switch (fpop) {
12674            case 0x1a: /* FCVTNS */
12675            case 0x1b: /* FCVTMS */
12676            case 0x1c: /* FCVTAS */
12677            case 0x3a: /* FCVTPS */
12678            case 0x3b: /* FCVTZS */
12679                gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12680                break;
12681            case 0x3d: /* FRECPE */
12682                gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12683                break;
12684            case 0x5a: /* FCVTNU */
12685            case 0x5b: /* FCVTMU */
12686            case 0x5c: /* FCVTAU */
12687            case 0x7a: /* FCVTPU */
12688            case 0x7b: /* FCVTZU */
12689                gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12690                break;
12691            case 0x18: /* FRINTN */
12692            case 0x19: /* FRINTM */
12693            case 0x38: /* FRINTP */
12694            case 0x39: /* FRINTZ */
12695            case 0x58: /* FRINTA */
12696            case 0x79: /* FRINTI */
12697                gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12698                break;
12699            case 0x59: /* FRINTX */
12700                gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12701                break;
12702            case 0x2f: /* FABS */
12703                tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12704                break;
12705            case 0x6f: /* FNEG */
12706                tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12707                break;
12708            case 0x7d: /* FRSQRTE */
12709                gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12710                break;
12711            case 0x7f: /* FSQRT */
12712                gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12713                break;
12714            default:
12715                g_assert_not_reached();
12716            }
12717
12718            write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12719
12720            tcg_temp_free_i32(tcg_res);
12721            tcg_temp_free_i32(tcg_op);
12722        }
12723
12724        clear_vec_high(s, is_q, rd);
12725    }
12726
12727    if (tcg_rmode) {
12728        gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12729        tcg_temp_free_i32(tcg_rmode);
12730    }
12731
12732    if (tcg_fpstatus) {
12733        tcg_temp_free_ptr(tcg_fpstatus);
12734    }
12735}
12736
12737/* AdvSIMD scalar x indexed element
12738 *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12739 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12740 * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12741 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12742 * AdvSIMD vector x indexed element
12743 *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12744 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12745 * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12746 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12747 */
12748static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12749{
12750    /* This encoding has two kinds of instruction:
12751     *  normal, where we perform elt x idxelt => elt for each
12752     *     element in the vector
12753     *  long, where we perform elt x idxelt and generate a result of
12754     *     double the width of the input element
12755     * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12756     */
12757    bool is_scalar = extract32(insn, 28, 1);
12758    bool is_q = extract32(insn, 30, 1);
12759    bool u = extract32(insn, 29, 1);
12760    int size = extract32(insn, 22, 2);
12761    int l = extract32(insn, 21, 1);
12762    int m = extract32(insn, 20, 1);
12763    /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12764    int rm = extract32(insn, 16, 4);
12765    int opcode = extract32(insn, 12, 4);
12766    int h = extract32(insn, 11, 1);
12767    int rn = extract32(insn, 5, 5);
12768    int rd = extract32(insn, 0, 5);
12769    bool is_long = false;
12770    int is_fp = 0;
12771    bool is_fp16 = false;
12772    int index;
12773    TCGv_ptr fpst;
12774
12775    switch (16 * u + opcode) {
12776    case 0x08: /* MUL */
12777    case 0x10: /* MLA */
12778    case 0x14: /* MLS */
12779        if (is_scalar) {
12780            unallocated_encoding(s);
12781            return;
12782        }
12783        break;
12784    case 0x02: /* SMLAL, SMLAL2 */
12785    case 0x12: /* UMLAL, UMLAL2 */
12786    case 0x06: /* SMLSL, SMLSL2 */
12787    case 0x16: /* UMLSL, UMLSL2 */
12788    case 0x0a: /* SMULL, SMULL2 */
12789    case 0x1a: /* UMULL, UMULL2 */
12790        if (is_scalar) {
12791            unallocated_encoding(s);
12792            return;
12793        }
12794        is_long = true;
12795        break;
12796    case 0x03: /* SQDMLAL, SQDMLAL2 */
12797    case 0x07: /* SQDMLSL, SQDMLSL2 */
12798    case 0x0b: /* SQDMULL, SQDMULL2 */
12799        is_long = true;
12800        break;
12801    case 0x0c: /* SQDMULH */
12802    case 0x0d: /* SQRDMULH */
12803        break;
12804    case 0x01: /* FMLA */
12805    case 0x05: /* FMLS */
12806    case 0x09: /* FMUL */
12807    case 0x19: /* FMULX */
12808        is_fp = 1;
12809        break;
12810    case 0x1d: /* SQRDMLAH */
12811    case 0x1f: /* SQRDMLSH */
12812        if (!dc_isar_feature(aa64_rdm, s)) {
12813            unallocated_encoding(s);
12814            return;
12815        }
12816        break;
12817    case 0x0e: /* SDOT */
12818    case 0x1e: /* UDOT */
12819        if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
12820            unallocated_encoding(s);
12821            return;
12822        }
12823        break;
12824    case 0x11: /* FCMLA #0 */
12825    case 0x13: /* FCMLA #90 */
12826    case 0x15: /* FCMLA #180 */
12827    case 0x17: /* FCMLA #270 */
12828        if (is_scalar || !dc_isar_feature(aa64_fcma, s)) {
12829            unallocated_encoding(s);
12830            return;
12831        }
12832        is_fp = 2;
12833        break;
12834    case 0x00: /* FMLAL */
12835    case 0x04: /* FMLSL */
12836    case 0x18: /* FMLAL2 */
12837    case 0x1c: /* FMLSL2 */
12838        if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) {
12839            unallocated_encoding(s);
12840            return;
12841        }
12842        size = MO_16;
12843        /* is_fp, but we pass cpu_env not fp_status.  */
12844        break;
12845    default:
12846        unallocated_encoding(s);
12847        return;
12848    }
12849
12850    switch (is_fp) {
12851    case 1: /* normal fp */
12852        /* convert insn encoded size to TCGMemOp size */
12853        switch (size) {
12854        case 0: /* half-precision */
12855            size = MO_16;
12856            is_fp16 = true;
12857            break;
12858        case MO_32: /* single precision */
12859        case MO_64: /* double precision */
12860            break;
12861        default:
12862            unallocated_encoding(s);
12863            return;
12864        }
12865        break;
12866
12867    case 2: /* complex fp */
12868        /* Each indexable element is a complex pair.  */
12869        size += 1;
12870        switch (size) {
12871        case MO_32:
12872            if (h && !is_q) {
12873                unallocated_encoding(s);
12874                return;
12875            }
12876            is_fp16 = true;
12877            break;
12878        case MO_64:
12879            break;
12880        default:
12881            unallocated_encoding(s);
12882            return;
12883        }
12884        break;
12885
12886    default: /* integer */
12887        switch (size) {
12888        case MO_8:
12889        case MO_64:
12890            unallocated_encoding(s);
12891            return;
12892        }
12893        break;
12894    }
12895    if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) {
12896        unallocated_encoding(s);
12897        return;
12898    }
12899
12900    /* Given TCGMemOp size, adjust register and indexing.  */
12901    switch (size) {
12902    case MO_16:
12903        index = h << 2 | l << 1 | m;
12904        break;
12905    case MO_32:
12906        index = h << 1 | l;
12907        rm |= m << 4;
12908        break;
12909    case MO_64:
12910        if (l || !is_q) {
12911            unallocated_encoding(s);
12912            return;
12913        }
12914        index = h;
12915        rm |= m << 4;
12916        break;
12917    default:
12918        g_assert_not_reached();
12919    }
12920
12921    if (!fp_access_check(s)) {
12922        return;
12923    }
12924
12925    if (is_fp) {
12926        fpst = get_fpstatus_ptr(is_fp16);
12927    } else {
12928        fpst = NULL;
12929    }
12930
12931    switch (16 * u + opcode) {
12932    case 0x0e: /* SDOT */
12933    case 0x1e: /* UDOT */
12934        gen_gvec_op3_ool(s, is_q, rd, rn, rm, index,
12935                         u ? gen_helper_gvec_udot_idx_b
12936                         : gen_helper_gvec_sdot_idx_b);
12937        return;
12938    case 0x11: /* FCMLA #0 */
12939    case 0x13: /* FCMLA #90 */
12940    case 0x15: /* FCMLA #180 */
12941    case 0x17: /* FCMLA #270 */
12942        {
12943            int rot = extract32(insn, 13, 2);
12944            int data = (index << 2) | rot;
12945            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12946                               vec_full_reg_offset(s, rn),
12947                               vec_full_reg_offset(s, rm), fpst,
12948                               is_q ? 16 : 8, vec_full_reg_size(s), data,
12949                               size == MO_64
12950                               ? gen_helper_gvec_fcmlas_idx
12951                               : gen_helper_gvec_fcmlah_idx);
12952            tcg_temp_free_ptr(fpst);
12953        }
12954        return;
12955
12956    case 0x00: /* FMLAL */
12957    case 0x04: /* FMLSL */
12958    case 0x18: /* FMLAL2 */
12959    case 0x1c: /* FMLSL2 */
12960        {
12961            int is_s = extract32(opcode, 2, 1);
12962            int is_2 = u;
12963            int data = (index << 2) | (is_2 << 1) | is_s;
12964            tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12965                               vec_full_reg_offset(s, rn),
12966                               vec_full_reg_offset(s, rm), cpu_env,
12967                               is_q ? 16 : 8, vec_full_reg_size(s),
12968                               data, gen_helper_gvec_fmlal_idx_a64);
12969        }
12970        return;
12971    }
12972
12973    if (size == 3) {
12974        TCGv_i64 tcg_idx = tcg_temp_new_i64();
12975        int pass;
12976
12977        assert(is_fp && is_q && !is_long);
12978
12979        read_vec_element(s, tcg_idx, rm, index, MO_64);
12980
12981        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12982            TCGv_i64 tcg_op = tcg_temp_new_i64();
12983            TCGv_i64 tcg_res = tcg_temp_new_i64();
12984
12985            read_vec_element(s, tcg_op, rn, pass, MO_64);
12986
12987            switch (16 * u + opcode) {
12988            case 0x05: /* FMLS */
12989                /* As usual for ARM, separate negation for fused multiply-add */
12990                gen_helper_vfp_negd(tcg_op, tcg_op);
12991                /* fall through */
12992            case 0x01: /* FMLA */
12993                read_vec_element(s, tcg_res, rd, pass, MO_64);
12994                gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
12995                break;
12996            case 0x09: /* FMUL */
12997                gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
12998                break;
12999            case 0x19: /* FMULX */
13000                gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
13001                break;
13002            default:
13003                g_assert_not_reached();
13004            }
13005
13006            write_vec_element(s, tcg_res, rd, pass, MO_64);
13007            tcg_temp_free_i64(tcg_op);
13008            tcg_temp_free_i64(tcg_res);
13009        }
13010
13011        tcg_temp_free_i64(tcg_idx);
13012        clear_vec_high(s, !is_scalar, rd);
13013    } else if (!is_long) {
13014        /* 32 bit floating point, or 16 or 32 bit integer.
13015         * For the 16 bit scalar case we use the usual Neon helpers and
13016         * rely on the fact that 0 op 0 == 0 with no side effects.
13017         */
13018        TCGv_i32 tcg_idx = tcg_temp_new_i32();
13019        int pass, maxpasses;
13020
13021        if (is_scalar) {
13022            maxpasses = 1;
13023        } else {
13024            maxpasses = is_q ? 4 : 2;
13025        }
13026
13027        read_vec_element_i32(s, tcg_idx, rm, index, size);
13028
13029        if (size == 1 && !is_scalar) {
13030            /* The simplest way to handle the 16x16 indexed ops is to duplicate
13031             * the index into both halves of the 32 bit tcg_idx and then use
13032             * the usual Neon helpers.
13033             */
13034            tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13035        }
13036
13037        for (pass = 0; pass < maxpasses; pass++) {
13038            TCGv_i32 tcg_op = tcg_temp_new_i32();
13039            TCGv_i32 tcg_res = tcg_temp_new_i32();
13040
13041            read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
13042
13043            switch (16 * u + opcode) {
13044            case 0x08: /* MUL */
13045            case 0x10: /* MLA */
13046            case 0x14: /* MLS */
13047            {
13048                static NeonGenTwoOpFn * const fns[2][2] = {
13049                    { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
13050                    { tcg_gen_add_i32, tcg_gen_sub_i32 },
13051                };
13052                NeonGenTwoOpFn *genfn;
13053                bool is_sub = opcode == 0x4;
13054
13055                if (size == 1) {
13056                    gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
13057                } else {
13058                    tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
13059                }
13060                if (opcode == 0x8) {
13061                    break;
13062                }
13063                read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
13064                genfn = fns[size - 1][is_sub];
13065                genfn(tcg_res, tcg_op, tcg_res);
13066                break;
13067            }
13068            case 0x05: /* FMLS */
13069            case 0x01: /* FMLA */
13070                read_vec_element_i32(s, tcg_res, rd, pass,
13071                                     is_scalar ? size : MO_32);
13072                switch (size) {
13073                case 1:
13074                    if (opcode == 0x5) {
13075                        /* As usual for ARM, separate negation for fused
13076                         * multiply-add */
13077                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
13078                    }
13079                    if (is_scalar) {
13080                        gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
13081                                                   tcg_res, fpst);
13082                    } else {
13083                        gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
13084                                                    tcg_res, fpst);
13085                    }
13086                    break;
13087                case 2:
13088                    if (opcode == 0x5) {
13089                        /* As usual for ARM, separate negation for
13090                         * fused multiply-add */
13091                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
13092                    }
13093                    gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
13094                                           tcg_res, fpst);
13095                    break;
13096                default:
13097                    g_assert_not_reached();
13098                }
13099                break;
13100            case 0x09: /* FMUL */
13101                switch (size) {
13102                case 1:
13103                    if (is_scalar) {
13104                        gen_helper_advsimd_mulh(tcg_res, tcg_op,
13105                                                tcg_idx, fpst);
13106                    } else {
13107                        gen_helper_advsimd_mul2h(tcg_res, tcg_op,
13108                                                 tcg_idx, fpst);
13109                    }
13110                    break;
13111                case 2:
13112                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
13113                    break;
13114                default:
13115                    g_assert_not_reached();
13116                }
13117                break;
13118            case 0x19: /* FMULX */
13119                switch (size) {
13120                case 1:
13121                    if (is_scalar) {
13122                        gen_helper_advsimd_mulxh(tcg_res, tcg_op,
13123                                                 tcg_idx, fpst);
13124                    } else {
13125                        gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
13126                                                  tcg_idx, fpst);
13127                    }
13128                    break;
13129                case 2:
13130                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
13131                    break;
13132                default:
13133                    g_assert_not_reached();
13134                }
13135                break;
13136            case 0x0c: /* SQDMULH */
13137                if (size == 1) {
13138                    gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
13139                                               tcg_op, tcg_idx);
13140                } else {
13141                    gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
13142                                               tcg_op, tcg_idx);
13143                }
13144                break;
13145            case 0x0d: /* SQRDMULH */
13146                if (size == 1) {
13147                    gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
13148                                                tcg_op, tcg_idx);
13149                } else {
13150                    gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
13151                                                tcg_op, tcg_idx);
13152                }
13153                break;
13154            case 0x1d: /* SQRDMLAH */
13155                read_vec_element_i32(s, tcg_res, rd, pass,
13156                                     is_scalar ? size : MO_32);
13157                if (size == 1) {
13158                    gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
13159                                                tcg_op, tcg_idx, tcg_res);
13160                } else {
13161                    gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
13162                                                tcg_op, tcg_idx, tcg_res);
13163                }
13164                break;
13165            case 0x1f: /* SQRDMLSH */
13166                read_vec_element_i32(s, tcg_res, rd, pass,
13167                                     is_scalar ? size : MO_32);
13168                if (size == 1) {
13169                    gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
13170                                                tcg_op, tcg_idx, tcg_res);
13171                } else {
13172                    gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
13173                                                tcg_op, tcg_idx, tcg_res);
13174                }
13175                break;
13176            default:
13177                g_assert_not_reached();
13178            }
13179
13180            if (is_scalar) {
13181                write_fp_sreg(s, rd, tcg_res);
13182            } else {
13183                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
13184            }
13185
13186            tcg_temp_free_i32(tcg_op);
13187            tcg_temp_free_i32(tcg_res);
13188        }
13189
13190        tcg_temp_free_i32(tcg_idx);
13191        clear_vec_high(s, is_q, rd);
13192    } else {
13193        /* long ops: 16x16->32 or 32x32->64 */
13194        TCGv_i64 tcg_res[2];
13195        int pass;
13196        bool satop = extract32(opcode, 0, 1);
13197        TCGMemOp memop = MO_32;
13198
13199        if (satop || !u) {
13200            memop |= MO_SIGN;
13201        }
13202
13203        if (size == 2) {
13204            TCGv_i64 tcg_idx = tcg_temp_new_i64();
13205
13206            read_vec_element(s, tcg_idx, rm, index, memop);
13207
13208            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13209                TCGv_i64 tcg_op = tcg_temp_new_i64();
13210                TCGv_i64 tcg_passres;
13211                int passelt;
13212
13213                if (is_scalar) {
13214                    passelt = 0;
13215                } else {
13216                    passelt = pass + (is_q * 2);
13217                }
13218
13219                read_vec_element(s, tcg_op, rn, passelt, memop);
13220
13221                tcg_res[pass] = tcg_temp_new_i64();
13222
13223                if (opcode == 0xa || opcode == 0xb) {
13224                    /* Non-accumulating ops */
13225                    tcg_passres = tcg_res[pass];
13226                } else {
13227                    tcg_passres = tcg_temp_new_i64();
13228                }
13229
13230                tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
13231                tcg_temp_free_i64(tcg_op);
13232
13233                if (satop) {
13234                    /* saturating, doubling */
13235                    gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
13236                                                      tcg_passres, tcg_passres);
13237                }
13238
13239                if (opcode == 0xa || opcode == 0xb) {
13240                    continue;
13241                }
13242
13243                /* Accumulating op: handle accumulate step */
13244                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13245
13246                switch (opcode) {
13247                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13248                    tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13249                    break;
13250                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13251                    tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13252                    break;
13253                case 0x7: /* SQDMLSL, SQDMLSL2 */
13254                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
13255                    /* fall through */
13256                case 0x3: /* SQDMLAL, SQDMLAL2 */
13257                    gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13258                                                      tcg_res[pass],
13259                                                      tcg_passres);
13260                    break;
13261                default:
13262                    g_assert_not_reached();
13263                }
13264                tcg_temp_free_i64(tcg_passres);
13265            }
13266            tcg_temp_free_i64(tcg_idx);
13267
13268            clear_vec_high(s, !is_scalar, rd);
13269        } else {
13270            TCGv_i32 tcg_idx = tcg_temp_new_i32();
13271
13272            assert(size == 1);
13273            read_vec_element_i32(s, tcg_idx, rm, index, size);
13274
13275            if (!is_scalar) {
13276                /* The simplest way to handle the 16x16 indexed ops is to
13277                 * duplicate the index into both halves of the 32 bit tcg_idx
13278                 * and then use the usual Neon helpers.
13279                 */
13280                tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13281            }
13282
13283            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13284                TCGv_i32 tcg_op = tcg_temp_new_i32();
13285                TCGv_i64 tcg_passres;
13286
13287                if (is_scalar) {
13288                    read_vec_element_i32(s, tcg_op, rn, pass, size);
13289                } else {
13290                    read_vec_element_i32(s, tcg_op, rn,
13291                                         pass + (is_q * 2), MO_32);
13292                }
13293
13294                tcg_res[pass] = tcg_temp_new_i64();
13295
13296                if (opcode == 0xa || opcode == 0xb) {
13297                    /* Non-accumulating ops */
13298                    tcg_passres = tcg_res[pass];
13299                } else {
13300                    tcg_passres = tcg_temp_new_i64();
13301                }
13302
13303                if (memop & MO_SIGN) {
13304                    gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13305                } else {
13306                    gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13307                }
13308                if (satop) {
13309                    gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13310                                                      tcg_passres, tcg_passres);
13311                }
13312                tcg_temp_free_i32(tcg_op);
13313
13314                if (opcode == 0xa || opcode == 0xb) {
13315                    continue;
13316                }
13317
13318                /* Accumulating op: handle accumulate step */
13319                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13320
13321                switch (opcode) {
13322                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13323                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13324                                             tcg_passres);
13325                    break;
13326                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13327                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13328                                             tcg_passres);
13329                    break;
13330                case 0x7: /* SQDMLSL, SQDMLSL2 */
13331                    gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13332                    /* fall through */
13333                case 0x3: /* SQDMLAL, SQDMLAL2 */
13334                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13335                                                      tcg_res[pass],
13336                                                      tcg_passres);
13337                    break;
13338                default:
13339                    g_assert_not_reached();
13340                }
13341                tcg_temp_free_i64(tcg_passres);
13342            }
13343            tcg_temp_free_i32(tcg_idx);
13344
13345            if (is_scalar) {
13346                tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13347            }
13348        }
13349
13350        if (is_scalar) {
13351            tcg_res[1] = tcg_const_i64(0);
13352        }
13353
13354        for (pass = 0; pass < 2; pass++) {
13355            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13356            tcg_temp_free_i64(tcg_res[pass]);
13357        }
13358    }
13359
13360    if (fpst) {
13361        tcg_temp_free_ptr(fpst);
13362    }
13363}
13364
13365/* Crypto AES
13366 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13367 * +-----------------+------+-----------+--------+-----+------+------+
13368 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13369 * +-----------------+------+-----------+--------+-----+------+------+
13370 */
13371static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13372{
13373    int size = extract32(insn, 22, 2);
13374    int opcode = extract32(insn, 12, 5);
13375    int rn = extract32(insn, 5, 5);
13376    int rd = extract32(insn, 0, 5);
13377    int decrypt;
13378    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13379    TCGv_i32 tcg_decrypt;
13380    CryptoThreeOpIntFn *genfn;
13381
13382    if (!dc_isar_feature(aa64_aes, s) || size != 0) {
13383        unallocated_encoding(s);
13384        return;
13385    }
13386
13387    switch (opcode) {
13388    case 0x4: /* AESE */
13389        decrypt = 0;
13390        genfn = gen_helper_crypto_aese;
13391        break;
13392    case 0x6: /* AESMC */
13393        decrypt = 0;
13394        genfn = gen_helper_crypto_aesmc;
13395        break;
13396    case 0x5: /* AESD */
13397        decrypt = 1;
13398        genfn = gen_helper_crypto_aese;
13399        break;
13400    case 0x7: /* AESIMC */
13401        decrypt = 1;
13402        genfn = gen_helper_crypto_aesmc;
13403        break;
13404    default:
13405        unallocated_encoding(s);
13406        return;
13407    }
13408
13409    if (!fp_access_check(s)) {
13410        return;
13411    }
13412
13413    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13414    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13415    tcg_decrypt = tcg_const_i32(decrypt);
13416
13417    genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
13418
13419    tcg_temp_free_ptr(tcg_rd_ptr);
13420    tcg_temp_free_ptr(tcg_rn_ptr);
13421    tcg_temp_free_i32(tcg_decrypt);
13422}
13423
13424/* Crypto three-reg SHA
13425 *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13426 * +-----------------+------+---+------+---+--------+-----+------+------+
13427 * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13428 * +-----------------+------+---+------+---+--------+-----+------+------+
13429 */
13430static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13431{
13432    int size = extract32(insn, 22, 2);
13433    int opcode = extract32(insn, 12, 3);
13434    int rm = extract32(insn, 16, 5);
13435    int rn = extract32(insn, 5, 5);
13436    int rd = extract32(insn, 0, 5);
13437    CryptoThreeOpFn *genfn;
13438    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13439    bool feature;
13440
13441    if (size != 0) {
13442        unallocated_encoding(s);
13443        return;
13444    }
13445
13446    switch (opcode) {
13447    case 0: /* SHA1C */
13448    case 1: /* SHA1P */
13449    case 2: /* SHA1M */
13450    case 3: /* SHA1SU0 */
13451        genfn = NULL;
13452        feature = dc_isar_feature(aa64_sha1, s);
13453        break;
13454    case 4: /* SHA256H */
13455        genfn = gen_helper_crypto_sha256h;
13456        feature = dc_isar_feature(aa64_sha256, s);
13457        break;
13458    case 5: /* SHA256H2 */
13459        genfn = gen_helper_crypto_sha256h2;
13460        feature = dc_isar_feature(aa64_sha256, s);
13461        break;
13462    case 6: /* SHA256SU1 */
13463        genfn = gen_helper_crypto_sha256su1;
13464        feature = dc_isar_feature(aa64_sha256, s);
13465        break;
13466    default:
13467        unallocated_encoding(s);
13468        return;
13469    }
13470
13471    if (!feature) {
13472        unallocated_encoding(s);
13473        return;
13474    }
13475
13476    if (!fp_access_check(s)) {
13477        return;
13478    }
13479
13480    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13481    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13482    tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13483
13484    if (genfn) {
13485        genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13486    } else {
13487        TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
13488
13489        gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
13490                                    tcg_rm_ptr, tcg_opcode);
13491        tcg_temp_free_i32(tcg_opcode);
13492    }
13493
13494    tcg_temp_free_ptr(tcg_rd_ptr);
13495    tcg_temp_free_ptr(tcg_rn_ptr);
13496    tcg_temp_free_ptr(tcg_rm_ptr);
13497}
13498
13499/* Crypto two-reg SHA
13500 *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13501 * +-----------------+------+-----------+--------+-----+------+------+
13502 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13503 * +-----------------+------+-----------+--------+-----+------+------+
13504 */
13505static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13506{
13507    int size = extract32(insn, 22, 2);
13508    int opcode = extract32(insn, 12, 5);
13509    int rn = extract32(insn, 5, 5);
13510    int rd = extract32(insn, 0, 5);
13511    CryptoTwoOpFn *genfn;
13512    bool feature;
13513    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13514
13515    if (size != 0) {
13516        unallocated_encoding(s);
13517        return;
13518    }
13519
13520    switch (opcode) {
13521    case 0: /* SHA1H */
13522        feature = dc_isar_feature(aa64_sha1, s);
13523        genfn = gen_helper_crypto_sha1h;
13524        break;
13525    case 1: /* SHA1SU1 */
13526        feature = dc_isar_feature(aa64_sha1, s);
13527        genfn = gen_helper_crypto_sha1su1;
13528        break;
13529    case 2: /* SHA256SU0 */
13530        feature = dc_isar_feature(aa64_sha256, s);
13531        genfn = gen_helper_crypto_sha256su0;
13532        break;
13533    default:
13534        unallocated_encoding(s);
13535        return;
13536    }
13537
13538    if (!feature) {
13539        unallocated_encoding(s);
13540        return;
13541    }
13542
13543    if (!fp_access_check(s)) {
13544        return;
13545    }
13546
13547    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13548    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13549
13550    genfn(tcg_rd_ptr, tcg_rn_ptr);
13551
13552    tcg_temp_free_ptr(tcg_rd_ptr);
13553    tcg_temp_free_ptr(tcg_rn_ptr);
13554}
13555
13556/* Crypto three-reg SHA512
13557 *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13558 * +-----------------------+------+---+---+-----+--------+------+------+
13559 * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13560 * +-----------------------+------+---+---+-----+--------+------+------+
13561 */
13562static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13563{
13564    int opcode = extract32(insn, 10, 2);
13565    int o =  extract32(insn, 14, 1);
13566    int rm = extract32(insn, 16, 5);
13567    int rn = extract32(insn, 5, 5);
13568    int rd = extract32(insn, 0, 5);
13569    bool feature;
13570    CryptoThreeOpFn *genfn;
13571
13572    if (o == 0) {
13573        switch (opcode) {
13574        case 0: /* SHA512H */
13575            feature = dc_isar_feature(aa64_sha512, s);
13576            genfn = gen_helper_crypto_sha512h;
13577            break;
13578        case 1: /* SHA512H2 */
13579            feature = dc_isar_feature(aa64_sha512, s);
13580            genfn = gen_helper_crypto_sha512h2;
13581            break;
13582        case 2: /* SHA512SU1 */
13583            feature = dc_isar_feature(aa64_sha512, s);
13584            genfn = gen_helper_crypto_sha512su1;
13585            break;
13586        case 3: /* RAX1 */
13587            feature = dc_isar_feature(aa64_sha3, s);
13588            genfn = NULL;
13589            break;
13590        }
13591    } else {
13592        switch (opcode) {
13593        case 0: /* SM3PARTW1 */
13594            feature = dc_isar_feature(aa64_sm3, s);
13595            genfn = gen_helper_crypto_sm3partw1;
13596            break;
13597        case 1: /* SM3PARTW2 */
13598            feature = dc_isar_feature(aa64_sm3, s);
13599            genfn = gen_helper_crypto_sm3partw2;
13600            break;
13601        case 2: /* SM4EKEY */
13602            feature = dc_isar_feature(aa64_sm4, s);
13603            genfn = gen_helper_crypto_sm4ekey;
13604            break;
13605        default:
13606            unallocated_encoding(s);
13607            return;
13608        }
13609    }
13610
13611    if (!feature) {
13612        unallocated_encoding(s);
13613        return;
13614    }
13615
13616    if (!fp_access_check(s)) {
13617        return;
13618    }
13619
13620    if (genfn) {
13621        TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13622
13623        tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13624        tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13625        tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13626
13627        genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13628
13629        tcg_temp_free_ptr(tcg_rd_ptr);
13630        tcg_temp_free_ptr(tcg_rn_ptr);
13631        tcg_temp_free_ptr(tcg_rm_ptr);
13632    } else {
13633        TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13634        int pass;
13635
13636        tcg_op1 = tcg_temp_new_i64();
13637        tcg_op2 = tcg_temp_new_i64();
13638        tcg_res[0] = tcg_temp_new_i64();
13639        tcg_res[1] = tcg_temp_new_i64();
13640
13641        for (pass = 0; pass < 2; pass++) {
13642            read_vec_element(s, tcg_op1, rn, pass, MO_64);
13643            read_vec_element(s, tcg_op2, rm, pass, MO_64);
13644
13645            tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1);
13646            tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13647        }
13648        write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13649        write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13650
13651        tcg_temp_free_i64(tcg_op1);
13652        tcg_temp_free_i64(tcg_op2);
13653        tcg_temp_free_i64(tcg_res[0]);
13654        tcg_temp_free_i64(tcg_res[1]);
13655    }
13656}
13657
13658/* Crypto two-reg SHA512
13659 *  31                                     12  11  10  9    5 4    0
13660 * +-----------------------------------------+--------+------+------+
13661 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13662 * +-----------------------------------------+--------+------+------+
13663 */
13664static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13665{
13666    int opcode = extract32(insn, 10, 2);
13667    int rn = extract32(insn, 5, 5);
13668    int rd = extract32(insn, 0, 5);
13669    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13670    bool feature;
13671    CryptoTwoOpFn *genfn;
13672
13673    switch (opcode) {
13674    case 0: /* SHA512SU0 */
13675        feature = dc_isar_feature(aa64_sha512, s);
13676        genfn = gen_helper_crypto_sha512su0;
13677        break;
13678    case 1: /* SM4E */
13679        feature = dc_isar_feature(aa64_sm4, s);
13680        genfn = gen_helper_crypto_sm4e;
13681        break;
13682    default:
13683        unallocated_encoding(s);
13684        return;
13685    }
13686
13687    if (!feature) {
13688        unallocated_encoding(s);
13689        return;
13690    }
13691
13692    if (!fp_access_check(s)) {
13693        return;
13694    }
13695
13696    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13697    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13698
13699    genfn(tcg_rd_ptr, tcg_rn_ptr);
13700
13701    tcg_temp_free_ptr(tcg_rd_ptr);
13702    tcg_temp_free_ptr(tcg_rn_ptr);
13703}
13704
13705/* Crypto four-register
13706 *  31               23 22 21 20  16 15  14  10 9    5 4    0
13707 * +-------------------+-----+------+---+------+------+------+
13708 * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13709 * +-------------------+-----+------+---+------+------+------+
13710 */
13711static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13712{
13713    int op0 = extract32(insn, 21, 2);
13714    int rm = extract32(insn, 16, 5);
13715    int ra = extract32(insn, 10, 5);
13716    int rn = extract32(insn, 5, 5);
13717    int rd = extract32(insn, 0, 5);
13718    bool feature;
13719
13720    switch (op0) {
13721    case 0: /* EOR3 */
13722    case 1: /* BCAX */
13723        feature = dc_isar_feature(aa64_sha3, s);
13724        break;
13725    case 2: /* SM3SS1 */
13726        feature = dc_isar_feature(aa64_sm3, s);
13727        break;
13728    default:
13729        unallocated_encoding(s);
13730        return;
13731    }
13732
13733    if (!feature) {
13734        unallocated_encoding(s);
13735        return;
13736    }
13737
13738    if (!fp_access_check(s)) {
13739        return;
13740    }
13741
13742    if (op0 < 2) {
13743        TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13744        int pass;
13745
13746        tcg_op1 = tcg_temp_new_i64();
13747        tcg_op2 = tcg_temp_new_i64();
13748        tcg_op3 = tcg_temp_new_i64();
13749        tcg_res[0] = tcg_temp_new_i64();
13750        tcg_res[1] = tcg_temp_new_i64();
13751
13752        for (pass = 0; pass < 2; pass++) {
13753            read_vec_element(s, tcg_op1, rn, pass, MO_64);
13754            read_vec_element(s, tcg_op2, rm, pass, MO_64);
13755            read_vec_element(s, tcg_op3, ra, pass, MO_64);
13756
13757            if (op0 == 0) {
13758                /* EOR3 */
13759                tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13760            } else {
13761                /* BCAX */
13762                tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13763            }
13764            tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13765        }
13766        write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13767        write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13768
13769        tcg_temp_free_i64(tcg_op1);
13770        tcg_temp_free_i64(tcg_op2);
13771        tcg_temp_free_i64(tcg_op3);
13772        tcg_temp_free_i64(tcg_res[0]);
13773        tcg_temp_free_i64(tcg_res[1]);
13774    } else {
13775        TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13776
13777        tcg_op1 = tcg_temp_new_i32();
13778        tcg_op2 = tcg_temp_new_i32();
13779        tcg_op3 = tcg_temp_new_i32();
13780        tcg_res = tcg_temp_new_i32();
13781        tcg_zero = tcg_const_i32(0);
13782
13783        read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13784        read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13785        read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13786
13787        tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13788        tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13789        tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13790        tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13791
13792        write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13793        write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13794        write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13795        write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13796
13797        tcg_temp_free_i32(tcg_op1);
13798        tcg_temp_free_i32(tcg_op2);
13799        tcg_temp_free_i32(tcg_op3);
13800        tcg_temp_free_i32(tcg_res);
13801        tcg_temp_free_i32(tcg_zero);
13802    }
13803}
13804
13805/* Crypto XAR
13806 *  31                   21 20  16 15    10 9    5 4    0
13807 * +-----------------------+------+--------+------+------+
13808 * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13809 * +-----------------------+------+--------+------+------+
13810 */
13811static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13812{
13813    int rm = extract32(insn, 16, 5);
13814    int imm6 = extract32(insn, 10, 6);
13815    int rn = extract32(insn, 5, 5);
13816    int rd = extract32(insn, 0, 5);
13817    TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13818    int pass;
13819
13820    if (!dc_isar_feature(aa64_sha3, s)) {
13821        unallocated_encoding(s);
13822        return;
13823    }
13824
13825    if (!fp_access_check(s)) {
13826        return;
13827    }
13828
13829    tcg_op1 = tcg_temp_new_i64();
13830    tcg_op2 = tcg_temp_new_i64();
13831    tcg_res[0] = tcg_temp_new_i64();
13832    tcg_res[1] = tcg_temp_new_i64();
13833
13834    for (pass = 0; pass < 2; pass++) {
13835        read_vec_element(s, tcg_op1, rn, pass, MO_64);
13836        read_vec_element(s, tcg_op2, rm, pass, MO_64);
13837
13838        tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
13839        tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
13840    }
13841    write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13842    write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13843
13844    tcg_temp_free_i64(tcg_op1);
13845    tcg_temp_free_i64(tcg_op2);
13846    tcg_temp_free_i64(tcg_res[0]);
13847    tcg_temp_free_i64(tcg_res[1]);
13848}
13849
13850/* Crypto three-reg imm2
13851 *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13852 * +-----------------------+------+-----+------+--------+------+------+
13853 * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13854 * +-----------------------+------+-----+------+--------+------+------+
13855 */
13856static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13857{
13858    int opcode = extract32(insn, 10, 2);
13859    int imm2 = extract32(insn, 12, 2);
13860    int rm = extract32(insn, 16, 5);
13861    int rn = extract32(insn, 5, 5);
13862    int rd = extract32(insn, 0, 5);
13863    TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13864    TCGv_i32 tcg_imm2, tcg_opcode;
13865
13866    if (!dc_isar_feature(aa64_sm3, s)) {
13867        unallocated_encoding(s);
13868        return;
13869    }
13870
13871    if (!fp_access_check(s)) {
13872        return;
13873    }
13874
13875    tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13876    tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13877    tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13878    tcg_imm2   = tcg_const_i32(imm2);
13879    tcg_opcode = tcg_const_i32(opcode);
13880
13881    gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
13882                            tcg_opcode);
13883
13884    tcg_temp_free_ptr(tcg_rd_ptr);
13885    tcg_temp_free_ptr(tcg_rn_ptr);
13886    tcg_temp_free_ptr(tcg_rm_ptr);
13887    tcg_temp_free_i32(tcg_imm2);
13888    tcg_temp_free_i32(tcg_opcode);
13889}
13890
13891/* C3.6 Data processing - SIMD, inc Crypto
13892 *
13893 * As the decode gets a little complex we are using a table based
13894 * approach for this part of the decode.
13895 */
13896static const AArch64DecodeTable data_proc_simd[] = {
13897    /* pattern  ,  mask     ,  fn                        */
13898    { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13899    { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13900    { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13901    { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13902    { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13903    { 0x0e000400, 0x9fe08400, disas_simd_copy },
13904    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13905    /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13906    { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13907    { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13908    { 0x0e000000, 0xbf208c00, disas_simd_tb },
13909    { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13910    { 0x2e000000, 0xbf208400, disas_simd_ext },
13911    { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13912    { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13913    { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13914    { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13915    { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13916    { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13917    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13918    { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13919    { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13920    { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13921    { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13922    { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13923    { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13924    { 0xce000000, 0xff808000, disas_crypto_four_reg },
13925    { 0xce800000, 0xffe00000, disas_crypto_xar },
13926    { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13927    { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13928    { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13929    { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13930    { 0x00000000, 0x00000000, NULL }
13931};
13932
13933static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13934{
13935    /* Note that this is called with all non-FP cases from
13936     * table C3-6 so it must UNDEF for entries not specifically
13937     * allocated to instructions in that table.
13938     */
13939    AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13940    if (fn) {
13941        fn(s, insn);
13942    } else {
13943        unallocated_encoding(s);
13944    }
13945}
13946
13947/* C3.6 Data processing - SIMD and floating point */
13948static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13949{
13950    if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13951        disas_data_proc_fp(s, insn);
13952    } else {
13953        /* SIMD, including crypto */
13954        disas_data_proc_simd(s, insn);
13955    }
13956}
13957
13958/**
13959 * is_guarded_page:
13960 * @env: The cpu environment
13961 * @s: The DisasContext
13962 *
13963 * Return true if the page is guarded.
13964 */
13965static bool is_guarded_page(CPUARMState *env, DisasContext *s)
13966{
13967#ifdef CONFIG_USER_ONLY
13968    return false;  /* FIXME */
13969#else
13970    uint64_t addr = s->base.pc_first;
13971    int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx);
13972    unsigned int index = tlb_index(env, mmu_idx, addr);
13973    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
13974
13975    /*
13976     * We test this immediately after reading an insn, which means
13977     * that any normal page must be in the TLB.  The only exception
13978     * would be for executing from flash or device memory, which
13979     * does not retain the TLB entry.
13980     *
13981     * FIXME: Assume false for those, for now.  We could use
13982     * arm_cpu_get_phys_page_attrs_debug to re-read the page
13983     * table entry even for that case.
13984     */
13985    return (tlb_hit(entry->addr_code, addr) &&
13986            env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0);
13987#endif
13988}
13989
13990/**
13991 * btype_destination_ok:
13992 * @insn: The instruction at the branch destination
13993 * @bt: SCTLR_ELx.BT
13994 * @btype: PSTATE.BTYPE, and is non-zero
13995 *
13996 * On a guarded page, there are a limited number of insns
13997 * that may be present at the branch target:
13998 *   - branch target identifiers,
13999 *   - paciasp, pacibsp,
14000 *   - BRK insn
14001 *   - HLT insn
14002 * Anything else causes a Branch Target Exception.
14003 *
14004 * Return true if the branch is compatible, false to raise BTITRAP.
14005 */
14006static bool btype_destination_ok(uint32_t insn, bool bt, int btype)
14007{
14008    if ((insn & 0xfffff01fu) == 0xd503201fu) {
14009        /* HINT space */
14010        switch (extract32(insn, 5, 7)) {
14011        case 0b011001: /* PACIASP */
14012        case 0b011011: /* PACIBSP */
14013            /*
14014             * If SCTLR_ELx.BT, then PACI*SP are not compatible
14015             * with btype == 3.  Otherwise all btype are ok.
14016             */
14017            return !bt || btype != 3;
14018        case 0b100000: /* BTI */
14019            /* Not compatible with any btype.  */
14020            return false;
14021        case 0b100010: /* BTI c */
14022            /* Not compatible with btype == 3 */
14023            return btype != 3;
14024        case 0b100100: /* BTI j */
14025            /* Not compatible with btype == 2 */
14026            return btype != 2;
14027        case 0b100110: /* BTI jc */
14028            /* Compatible with any btype.  */
14029            return true;
14030        }
14031    } else {
14032        switch (insn & 0xffe0001fu) {
14033        case 0xd4200000u: /* BRK */
14034        case 0xd4400000u: /* HLT */
14035            /* Give priority to the breakpoint exception.  */
14036            return true;
14037        }
14038    }
14039    return false;
14040}
14041
14042/* C3.1 A64 instruction index by encoding */
14043static void disas_a64_insn(CPUARMState *env, DisasContext *s)
14044{
14045    uint32_t insn;
14046
14047    insn = arm_ldl_code(env, s->pc, s->sctlr_b);
14048    s->insn = insn;
14049    s->pc += 4;
14050
14051    s->fp_access_checked = false;
14052
14053    if (dc_isar_feature(aa64_bti, s)) {
14054        if (s->base.num_insns == 1) {
14055            /*
14056             * At the first insn of the TB, compute s->guarded_page.
14057             * We delayed computing this until successfully reading
14058             * the first insn of the TB, above.  This (mostly) ensures
14059             * that the softmmu tlb entry has been populated, and the
14060             * page table GP bit is available.
14061             *
14062             * Note that we need to compute this even if btype == 0,
14063             * because this value is used for BR instructions later
14064             * where ENV is not available.
14065             */
14066            s->guarded_page = is_guarded_page(env, s);
14067
14068            /* First insn can have btype set to non-zero.  */
14069            tcg_debug_assert(s->btype >= 0);
14070
14071            /*
14072             * Note that the Branch Target Exception has fairly high
14073             * priority -- below debugging exceptions but above most
14074             * everything else.  This allows us to handle this now
14075             * instead of waiting until the insn is otherwise decoded.
14076             */
14077            if (s->btype != 0
14078                && s->guarded_page
14079                && !btype_destination_ok(insn, s->bt, s->btype)) {
14080                gen_exception_insn(s, 4, EXCP_UDEF, syn_btitrap(s->btype),
14081                                   default_exception_el(s));
14082                return;
14083            }
14084        } else {
14085            /* Not the first insn: btype must be 0.  */
14086            tcg_debug_assert(s->btype == 0);
14087        }
14088    }
14089
14090    switch (extract32(insn, 25, 4)) {
14091    case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
14092        unallocated_encoding(s);
14093        break;
14094    case 0x2:
14095        if (!dc_isar_feature(aa64_sve, s) || !disas_sve(s, insn)) {
14096            unallocated_encoding(s);
14097        }
14098        break;
14099    case 0x8: case 0x9: /* Data processing - immediate */
14100        disas_data_proc_imm(s, insn);
14101        break;
14102    case 0xa: case 0xb: /* Branch, exception generation and system insns */
14103        disas_b_exc_sys(s, insn);
14104        break;
14105    case 0x4:
14106    case 0x6:
14107    case 0xc:
14108    case 0xe:      /* Loads and stores */
14109        disas_ldst(s, insn);
14110        break;
14111    case 0x5:
14112    case 0xd:      /* Data processing - register */
14113        disas_data_proc_reg(s, insn);
14114        break;
14115    case 0x7:
14116    case 0xf:      /* Data processing - SIMD and floating point */
14117        disas_data_proc_simd_fp(s, insn);
14118        break;
14119    default:
14120        assert(FALSE); /* all 15 cases should be handled above */
14121        break;
14122    }
14123
14124    /* if we allocated any temporaries, free them here */
14125    free_tmp_a64(s);
14126
14127    /*
14128     * After execution of most insns, btype is reset to 0.
14129     * Note that we set btype == -1 when the insn sets btype.
14130     */
14131    if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) {
14132        reset_btype(s);
14133    }
14134}
14135
14136static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
14137                                          CPUState *cpu)
14138{
14139    DisasContext *dc = container_of(dcbase, DisasContext, base);
14140    CPUARMState *env = cpu->env_ptr;
14141    ARMCPU *arm_cpu = env_archcpu(env);
14142    uint32_t tb_flags = dc->base.tb->flags;
14143    int bound, core_mmu_idx;
14144
14145    dc->isar = &arm_cpu->isar;
14146    dc->pc = dc->base.pc_first;
14147    dc->condjmp = 0;
14148
14149    dc->aarch64 = 1;
14150    /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
14151     * there is no secure EL1, so we route exceptions to EL3.
14152     */
14153    dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
14154                               !arm_el_is_aa64(env, 3);
14155    dc->thumb = 0;
14156    dc->sctlr_b = 0;
14157    dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
14158    dc->condexec_mask = 0;
14159    dc->condexec_cond = 0;
14160    core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
14161    dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
14162    dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII);
14163    dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID);
14164    dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
14165#if !defined(CONFIG_USER_ONLY)
14166    dc->user = (dc->current_el == 0);
14167#endif
14168    dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
14169    dc->sve_excp_el = FIELD_EX32(tb_flags, TBFLAG_A64, SVEEXC_EL);
14170    dc->sve_len = (FIELD_EX32(tb_flags, TBFLAG_A64, ZCR_LEN) + 1) * 16;
14171    dc->pauth_active = FIELD_EX32(tb_flags, TBFLAG_A64, PAUTH_ACTIVE);
14172    dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT);
14173    dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE);
14174    dc->vec_len = 0;
14175    dc->vec_stride = 0;
14176    dc->cp_regs = arm_cpu->cp_regs;
14177    dc->features = env->features;
14178
14179    /* Single step state. The code-generation logic here is:
14180     *  SS_ACTIVE == 0:
14181     *   generate code with no special handling for single-stepping (except
14182     *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
14183     *   this happens anyway because those changes are all system register or
14184     *   PSTATE writes).
14185     *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
14186     *   emit code for one insn
14187     *   emit code to clear PSTATE.SS
14188     *   emit code to generate software step exception for completed step
14189     *   end TB (as usual for having generated an exception)
14190     *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
14191     *   emit code to generate a software step exception
14192     *   end the TB
14193     */
14194    dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
14195    dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
14196    dc->is_ldex = false;
14197    dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
14198
14199    /* Bound the number of insns to execute to those left on the page.  */
14200    bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
14201
14202    /* If architectural single step active, limit to 1.  */
14203    if (dc->ss_active) {
14204        bound = 1;
14205    }
14206    dc->base.max_insns = MIN(dc->base.max_insns, bound);
14207
14208    init_tmp_a64_array(dc);
14209}
14210
14211static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
14212{
14213}
14214
14215static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
14216{
14217    DisasContext *dc = container_of(dcbase, DisasContext, base);
14218
14219    tcg_gen_insn_start(dc->pc, 0, 0);
14220    dc->insn_start = tcg_last_op();
14221}
14222
14223static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
14224                                        const CPUBreakpoint *bp)
14225{
14226    DisasContext *dc = container_of(dcbase, DisasContext, base);
14227
14228    if (bp->flags & BP_CPU) {
14229        gen_a64_set_pc_im(dc->pc);
14230        gen_helper_check_breakpoints(cpu_env);
14231        /* End the TB early; it likely won't be executed */
14232        dc->base.is_jmp = DISAS_TOO_MANY;
14233    } else {
14234        gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
14235        /* The address covered by the breakpoint must be
14236           included in [tb->pc, tb->pc + tb->size) in order
14237           to for it to be properly cleared -- thus we
14238           increment the PC here so that the logic setting
14239           tb->size below does the right thing.  */
14240        dc->pc += 4;
14241        dc->base.is_jmp = DISAS_NORETURN;
14242    }
14243
14244    return true;
14245}
14246
14247static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
14248{
14249    DisasContext *dc = container_of(dcbase, DisasContext, base);
14250    CPUARMState *env = cpu->env_ptr;
14251
14252    if (dc->ss_active && !dc->pstate_ss) {
14253        /* Singlestep state is Active-pending.
14254         * If we're in this state at the start of a TB then either
14255         *  a) we just took an exception to an EL which is being debugged
14256         *     and this is the first insn in the exception handler
14257         *  b) debug exceptions were masked and we just unmasked them
14258         *     without changing EL (eg by clearing PSTATE.D)
14259         * In either case we're going to take a swstep exception in the
14260         * "did not step an insn" case, and so the syndrome ISV and EX
14261         * bits should be zero.
14262         */
14263        assert(dc->base.num_insns == 1);
14264        gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
14265                      default_exception_el(dc));
14266        dc->base.is_jmp = DISAS_NORETURN;
14267    } else {
14268        disas_a64_insn(env, dc);
14269    }
14270
14271    dc->base.pc_next = dc->pc;
14272    translator_loop_temp_check(&dc->base);
14273}
14274
14275static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
14276{
14277    DisasContext *dc = container_of(dcbase, DisasContext, base);
14278
14279    if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
14280        /* Note that this means single stepping WFI doesn't halt the CPU.
14281         * For conditional branch insns this is harmless unreachable code as
14282         * gen_goto_tb() has already handled emitting the debug exception
14283         * (and thus a tb-jump is not possible when singlestepping).
14284         */
14285        switch (dc->base.is_jmp) {
14286        default:
14287            gen_a64_set_pc_im(dc->pc);
14288            /* fall through */
14289        case DISAS_EXIT:
14290        case DISAS_JUMP:
14291            if (dc->base.singlestep_enabled) {
14292                gen_exception_internal(EXCP_DEBUG);
14293            } else {
14294                gen_step_complete_exception(dc);
14295            }
14296            break;
14297        case DISAS_NORETURN:
14298            break;
14299        }
14300    } else {
14301        switch (dc->base.is_jmp) {
14302        case DISAS_NEXT:
14303        case DISAS_TOO_MANY:
14304            gen_goto_tb(dc, 1, dc->pc);
14305            break;
14306        default:
14307        case DISAS_UPDATE:
14308            gen_a64_set_pc_im(dc->pc);
14309            /* fall through */
14310        case DISAS_EXIT:
14311            tcg_gen_exit_tb(NULL, 0);
14312            break;
14313        case DISAS_JUMP:
14314            tcg_gen_lookup_and_goto_ptr();
14315            break;
14316        case DISAS_NORETURN:
14317        case DISAS_SWI:
14318            break;
14319        case DISAS_WFE:
14320            gen_a64_set_pc_im(dc->pc);
14321            gen_helper_wfe(cpu_env);
14322            break;
14323        case DISAS_YIELD:
14324            gen_a64_set_pc_im(dc->pc);
14325            gen_helper_yield(cpu_env);
14326            break;
14327        case DISAS_WFI:
14328        {
14329            /* This is a special case because we don't want to just halt the CPU
14330             * if trying to debug across a WFI.
14331             */
14332            TCGv_i32 tmp = tcg_const_i32(4);
14333
14334            gen_a64_set_pc_im(dc->pc);
14335            gen_helper_wfi(cpu_env, tmp);
14336            tcg_temp_free_i32(tmp);
14337            /* The helper doesn't necessarily throw an exception, but we
14338             * must go back to the main loop to check for interrupts anyway.
14339             */
14340            tcg_gen_exit_tb(NULL, 0);
14341            break;
14342        }
14343        }
14344    }
14345
14346    /* Functions above can change dc->pc, so re-align db->pc_next */
14347    dc->base.pc_next = dc->pc;
14348}
14349
14350static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
14351                                      CPUState *cpu)
14352{
14353    DisasContext *dc = container_of(dcbase, DisasContext, base);
14354
14355    qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
14356    log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
14357}
14358
14359const TranslatorOps aarch64_translator_ops = {
14360    .init_disas_context = aarch64_tr_init_disas_context,
14361    .tb_start           = aarch64_tr_tb_start,
14362    .insn_start         = aarch64_tr_insn_start,
14363    .breakpoint_check   = aarch64_tr_breakpoint_check,
14364    .translate_insn     = aarch64_tr_translate_insn,
14365    .tb_stop            = aarch64_tr_tb_stop,
14366    .disas_log          = aarch64_tr_disas_log,
14367};
14368