qemu/target/arm/translate-a64.c
<<
>>
Prefs
   1/*
   2 *  AArch64 translation
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "cpu.h"
  22#include "exec/exec-all.h"
  23#include "tcg-op.h"
  24#include "tcg-op-gvec.h"
  25#include "qemu/log.h"
  26#include "arm_ldst.h"
  27#include "translate.h"
  28#include "internals.h"
  29#include "qemu/host-utils.h"
  30
  31#include "hw/semihosting/semihost.h"
  32#include "exec/gen-icount.h"
  33
  34#include "exec/helper-proto.h"
  35#include "exec/helper-gen.h"
  36#include "exec/log.h"
  37
  38#include "trace-tcg.h"
  39#include "translate-a64.h"
  40#include "qemu/atomic128.h"
  41
  42static TCGv_i64 cpu_X[32];
  43static TCGv_i64 cpu_pc;
  44
  45/* Load/store exclusive handling */
  46static TCGv_i64 cpu_exclusive_high;
  47
  48static const char *regnames[] = {
  49    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  50    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  51    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  52    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  53};
  54
  55enum a64_shift_type {
  56    A64_SHIFT_TYPE_LSL = 0,
  57    A64_SHIFT_TYPE_LSR = 1,
  58    A64_SHIFT_TYPE_ASR = 2,
  59    A64_SHIFT_TYPE_ROR = 3
  60};
  61
  62/* Table based decoder typedefs - used when the relevant bits for decode
  63 * are too awkwardly scattered across the instruction (eg SIMD).
  64 */
  65typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  66
  67typedef struct AArch64DecodeTable {
  68    uint32_t pattern;
  69    uint32_t mask;
  70    AArch64DecodeFn *disas_fn;
  71} AArch64DecodeTable;
  72
  73/* Function prototype for gen_ functions for calling Neon helpers */
  74typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  75typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  76typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  77typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  78typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  79typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  80typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  81typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  82typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  83typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  84typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  85typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
  86typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
  87typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
  88typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, MemOp);
  89
  90/* initialize TCG globals.  */
  91void a64_translate_init(void)
  92{
  93    int i;
  94
  95    cpu_pc = tcg_global_mem_new_i64(cpu_env,
  96                                    offsetof(CPUARMState, pc),
  97                                    "pc");
  98    for (i = 0; i < 32; i++) {
  99        cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
 100                                          offsetof(CPUARMState, xregs[i]),
 101                                          regnames[i]);
 102    }
 103
 104    cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 105        offsetof(CPUARMState, exclusive_high), "exclusive_high");
 106}
 107
 108static inline int get_a64_user_mem_index(DisasContext *s)
 109{
 110    /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
 111     *  if EL1, access as if EL0; otherwise access at current EL
 112     */
 113    ARMMMUIdx useridx;
 114
 115    switch (s->mmu_idx) {
 116    case ARMMMUIdx_S12NSE1:
 117        useridx = ARMMMUIdx_S12NSE0;
 118        break;
 119    case ARMMMUIdx_S1SE1:
 120        useridx = ARMMMUIdx_S1SE0;
 121        break;
 122    case ARMMMUIdx_S2NS:
 123        g_assert_not_reached();
 124    default:
 125        useridx = s->mmu_idx;
 126        break;
 127    }
 128    return arm_to_core_mmu_idx(useridx);
 129}
 130
 131static void reset_btype(DisasContext *s)
 132{
 133    if (s->btype != 0) {
 134        TCGv_i32 zero = tcg_const_i32(0);
 135        tcg_gen_st_i32(zero, cpu_env, offsetof(CPUARMState, btype));
 136        tcg_temp_free_i32(zero);
 137        s->btype = 0;
 138    }
 139}
 140
 141static void set_btype(DisasContext *s, int val)
 142{
 143    TCGv_i32 tcg_val;
 144
 145    /* BTYPE is a 2-bit field, and 0 should be done with reset_btype.  */
 146    tcg_debug_assert(val >= 1 && val <= 3);
 147
 148    tcg_val = tcg_const_i32(val);
 149    tcg_gen_st_i32(tcg_val, cpu_env, offsetof(CPUARMState, btype));
 150    tcg_temp_free_i32(tcg_val);
 151    s->btype = -1;
 152}
 153
 154void gen_a64_set_pc_im(uint64_t val)
 155{
 156    tcg_gen_movi_i64(cpu_pc, val);
 157}
 158
 159/*
 160 * Handle Top Byte Ignore (TBI) bits.
 161 *
 162 * If address tagging is enabled via the TCR TBI bits:
 163 *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 164 *    then the address is zero-extended, clearing bits [63:56]
 165 *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 166 *    and TBI1 controls addressses with bit 55 == 1.
 167 *    If the appropriate TBI bit is set for the address then
 168 *    the address is sign-extended from bit 55 into bits [63:56]
 169 *
 170 * Here We have concatenated TBI{1,0} into tbi.
 171 */
 172static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst,
 173                                TCGv_i64 src, int tbi)
 174{
 175    if (tbi == 0) {
 176        /* Load unmodified address */
 177        tcg_gen_mov_i64(dst, src);
 178    } else if (s->current_el >= 2) {
 179        /* FIXME: ARMv8.1-VHE S2 translation regime.  */
 180        /* Force tag byte to all zero */
 181        tcg_gen_extract_i64(dst, src, 0, 56);
 182    } else {
 183        /* Sign-extend from bit 55.  */
 184        tcg_gen_sextract_i64(dst, src, 0, 56);
 185
 186        if (tbi != 3) {
 187            TCGv_i64 tcg_zero = tcg_const_i64(0);
 188
 189            /*
 190             * The two TBI bits differ.
 191             * If tbi0, then !tbi1: only use the extension if positive.
 192             * if !tbi0, then tbi1: only use the extension if negative.
 193             */
 194            tcg_gen_movcond_i64(tbi == 1 ? TCG_COND_GE : TCG_COND_LT,
 195                                dst, dst, tcg_zero, dst, src);
 196            tcg_temp_free_i64(tcg_zero);
 197        }
 198    }
 199}
 200
 201static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 202{
 203    /*
 204     * If address tagging is enabled for instructions via the TCR TBI bits,
 205     * then loading an address into the PC will clear out any tag.
 206     */
 207    gen_top_byte_ignore(s, cpu_pc, src, s->tbii);
 208}
 209
 210/*
 211 * Return a "clean" address for ADDR according to TBID.
 212 * This is always a fresh temporary, as we need to be able to
 213 * increment this independently of a dirty write-back address.
 214 */
 215static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
 216{
 217    TCGv_i64 clean = new_tmp_a64(s);
 218    gen_top_byte_ignore(s, clean, addr, s->tbid);
 219    return clean;
 220}
 221
 222typedef struct DisasCompare64 {
 223    TCGCond cond;
 224    TCGv_i64 value;
 225} DisasCompare64;
 226
 227static void a64_test_cc(DisasCompare64 *c64, int cc)
 228{
 229    DisasCompare c32;
 230
 231    arm_test_cc(&c32, cc);
 232
 233    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 234       * properly.  The NE/EQ comparisons are also fine with this choice.  */
 235    c64->cond = c32.cond;
 236    c64->value = tcg_temp_new_i64();
 237    tcg_gen_ext_i32_i64(c64->value, c32.value);
 238
 239    arm_free_cc(&c32);
 240}
 241
 242static void a64_free_cc(DisasCompare64 *c64)
 243{
 244    tcg_temp_free_i64(c64->value);
 245}
 246
 247static void gen_exception_internal(int excp)
 248{
 249    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 250
 251    assert(excp_is_internal(excp));
 252    gen_helper_exception_internal(cpu_env, tcg_excp);
 253    tcg_temp_free_i32(tcg_excp);
 254}
 255
 256static void gen_exception_internal_insn(DisasContext *s, uint64_t pc, int excp)
 257{
 258    gen_a64_set_pc_im(pc);
 259    gen_exception_internal(excp);
 260    s->base.is_jmp = DISAS_NORETURN;
 261}
 262
 263static void gen_exception_insn(DisasContext *s, uint64_t pc, int excp,
 264                               uint32_t syndrome, uint32_t target_el)
 265{
 266    gen_a64_set_pc_im(pc);
 267    gen_exception(excp, syndrome, target_el);
 268    s->base.is_jmp = DISAS_NORETURN;
 269}
 270
 271static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome)
 272{
 273    TCGv_i32 tcg_syn;
 274
 275    gen_a64_set_pc_im(s->pc_curr);
 276    tcg_syn = tcg_const_i32(syndrome);
 277    gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
 278    tcg_temp_free_i32(tcg_syn);
 279    s->base.is_jmp = DISAS_NORETURN;
 280}
 281
 282static void gen_step_complete_exception(DisasContext *s)
 283{
 284    /* We just completed step of an insn. Move from Active-not-pending
 285     * to Active-pending, and then also take the swstep exception.
 286     * This corresponds to making the (IMPDEF) choice to prioritize
 287     * swstep exceptions over asynchronous exceptions taken to an exception
 288     * level where debug is disabled. This choice has the advantage that
 289     * we do not need to maintain internal state corresponding to the
 290     * ISV/EX syndrome bits between completion of the step and generation
 291     * of the exception, and our syndrome information is always correct.
 292     */
 293    gen_ss_advance(s);
 294    gen_swstep_exception(s, 1, s->is_ldex);
 295    s->base.is_jmp = DISAS_NORETURN;
 296}
 297
 298static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 299{
 300    /* No direct tb linking with singlestep (either QEMU's or the ARM
 301     * debug architecture kind) or deterministic io
 302     */
 303    if (s->base.singlestep_enabled || s->ss_active ||
 304        (tb_cflags(s->base.tb) & CF_LAST_IO)) {
 305        return false;
 306    }
 307
 308#ifndef CONFIG_USER_ONLY
 309    /* Only link tbs from inside the same guest page */
 310    if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 311        return false;
 312    }
 313#endif
 314
 315    return true;
 316}
 317
 318static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 319{
 320    TranslationBlock *tb;
 321
 322    tb = s->base.tb;
 323    if (use_goto_tb(s, n, dest)) {
 324        tcg_gen_goto_tb(n);
 325        gen_a64_set_pc_im(dest);
 326        tcg_gen_exit_tb(tb, n);
 327        s->base.is_jmp = DISAS_NORETURN;
 328    } else {
 329        gen_a64_set_pc_im(dest);
 330        if (s->ss_active) {
 331            gen_step_complete_exception(s);
 332        } else if (s->base.singlestep_enabled) {
 333            gen_exception_internal(EXCP_DEBUG);
 334        } else {
 335            tcg_gen_lookup_and_goto_ptr();
 336            s->base.is_jmp = DISAS_NORETURN;
 337        }
 338    }
 339}
 340
 341void unallocated_encoding(DisasContext *s)
 342{
 343    /* Unallocated and reserved encodings are uncategorized */
 344    gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
 345                       default_exception_el(s));
 346}
 347
 348static void init_tmp_a64_array(DisasContext *s)
 349{
 350#ifdef CONFIG_DEBUG_TCG
 351    memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
 352#endif
 353    s->tmp_a64_count = 0;
 354}
 355
 356static void free_tmp_a64(DisasContext *s)
 357{
 358    int i;
 359    for (i = 0; i < s->tmp_a64_count; i++) {
 360        tcg_temp_free_i64(s->tmp_a64[i]);
 361    }
 362    init_tmp_a64_array(s);
 363}
 364
 365TCGv_i64 new_tmp_a64(DisasContext *s)
 366{
 367    assert(s->tmp_a64_count < TMP_A64_MAX);
 368    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 369}
 370
 371TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 372{
 373    TCGv_i64 t = new_tmp_a64(s);
 374    tcg_gen_movi_i64(t, 0);
 375    return t;
 376}
 377
 378/*
 379 * Register access functions
 380 *
 381 * These functions are used for directly accessing a register in where
 382 * changes to the final register value are likely to be made. If you
 383 * need to use a register for temporary calculation (e.g. index type
 384 * operations) use the read_* form.
 385 *
 386 * B1.2.1 Register mappings
 387 *
 388 * In instruction register encoding 31 can refer to ZR (zero register) or
 389 * the SP (stack pointer) depending on context. In QEMU's case we map SP
 390 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 391 * This is the point of the _sp forms.
 392 */
 393TCGv_i64 cpu_reg(DisasContext *s, int reg)
 394{
 395    if (reg == 31) {
 396        return new_tmp_a64_zero(s);
 397    } else {
 398        return cpu_X[reg];
 399    }
 400}
 401
 402/* register access for when 31 == SP */
 403TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 404{
 405    return cpu_X[reg];
 406}
 407
 408/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 409 * representing the register contents. This TCGv is an auto-freed
 410 * temporary so it need not be explicitly freed, and may be modified.
 411 */
 412TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 413{
 414    TCGv_i64 v = new_tmp_a64(s);
 415    if (reg != 31) {
 416        if (sf) {
 417            tcg_gen_mov_i64(v, cpu_X[reg]);
 418        } else {
 419            tcg_gen_ext32u_i64(v, cpu_X[reg]);
 420        }
 421    } else {
 422        tcg_gen_movi_i64(v, 0);
 423    }
 424    return v;
 425}
 426
 427TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 428{
 429    TCGv_i64 v = new_tmp_a64(s);
 430    if (sf) {
 431        tcg_gen_mov_i64(v, cpu_X[reg]);
 432    } else {
 433        tcg_gen_ext32u_i64(v, cpu_X[reg]);
 434    }
 435    return v;
 436}
 437
 438/* Return the offset into CPUARMState of a slice (from
 439 * the least significant end) of FP register Qn (ie
 440 * Dn, Sn, Hn or Bn).
 441 * (Note that this is not the same mapping as for A32; see cpu.h)
 442 */
 443static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size)
 444{
 445    return vec_reg_offset(s, regno, 0, size);
 446}
 447
 448/* Offset of the high half of the 128 bit vector Qn */
 449static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 450{
 451    return vec_reg_offset(s, regno, 1, MO_64);
 452}
 453
 454/* Convenience accessors for reading and writing single and double
 455 * FP registers. Writing clears the upper parts of the associated
 456 * 128 bit vector register, as required by the architecture.
 457 * Note that unlike the GP register accessors, the values returned
 458 * by the read functions must be manually freed.
 459 */
 460static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 461{
 462    TCGv_i64 v = tcg_temp_new_i64();
 463
 464    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 465    return v;
 466}
 467
 468static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 469{
 470    TCGv_i32 v = tcg_temp_new_i32();
 471
 472    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 473    return v;
 474}
 475
 476static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
 477{
 478    TCGv_i32 v = tcg_temp_new_i32();
 479
 480    tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
 481    return v;
 482}
 483
 484/* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
 485 * If SVE is not enabled, then there are only 128 bits in the vector.
 486 */
 487static void clear_vec_high(DisasContext *s, bool is_q, int rd)
 488{
 489    unsigned ofs = fp_reg_offset(s, rd, MO_64);
 490    unsigned vsz = vec_full_reg_size(s);
 491
 492    if (!is_q) {
 493        TCGv_i64 tcg_zero = tcg_const_i64(0);
 494        tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
 495        tcg_temp_free_i64(tcg_zero);
 496    }
 497    if (vsz > 16) {
 498        tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
 499    }
 500}
 501
 502void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 503{
 504    unsigned ofs = fp_reg_offset(s, reg, MO_64);
 505
 506    tcg_gen_st_i64(v, cpu_env, ofs);
 507    clear_vec_high(s, false, reg);
 508}
 509
 510static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 511{
 512    TCGv_i64 tmp = tcg_temp_new_i64();
 513
 514    tcg_gen_extu_i32_i64(tmp, v);
 515    write_fp_dreg(s, reg, tmp);
 516    tcg_temp_free_i64(tmp);
 517}
 518
 519TCGv_ptr get_fpstatus_ptr(bool is_f16)
 520{
 521    TCGv_ptr statusptr = tcg_temp_new_ptr();
 522    int offset;
 523
 524    /* In A64 all instructions (both FP and Neon) use the FPCR; there
 525     * is no equivalent of the A32 Neon "standard FPSCR value".
 526     * However half-precision operations operate under a different
 527     * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
 528     */
 529    if (is_f16) {
 530        offset = offsetof(CPUARMState, vfp.fp_status_f16);
 531    } else {
 532        offset = offsetof(CPUARMState, vfp.fp_status);
 533    }
 534    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 535    return statusptr;
 536}
 537
 538/* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
 539static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
 540                         GVecGen2Fn *gvec_fn, int vece)
 541{
 542    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 543            is_q ? 16 : 8, vec_full_reg_size(s));
 544}
 545
 546/* Expand a 2-operand + immediate AdvSIMD vector operation using
 547 * an expander function.
 548 */
 549static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
 550                          int64_t imm, GVecGen2iFn *gvec_fn, int vece)
 551{
 552    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 553            imm, is_q ? 16 : 8, vec_full_reg_size(s));
 554}
 555
 556/* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
 557static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
 558                         GVecGen3Fn *gvec_fn, int vece)
 559{
 560    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 561            vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
 562}
 563
 564/* Expand a 4-operand AdvSIMD vector operation using an expander function.  */
 565static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm,
 566                         int rx, GVecGen4Fn *gvec_fn, int vece)
 567{
 568    gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 569            vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx),
 570            is_q ? 16 : 8, vec_full_reg_size(s));
 571}
 572
 573/* Expand a 2-operand + immediate AdvSIMD vector operation using
 574 * an op descriptor.
 575 */
 576static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
 577                          int rn, int64_t imm, const GVecGen2i *gvec_op)
 578{
 579    tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 580                    is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
 581}
 582
 583/* Expand a 3-operand AdvSIMD vector operation using an op descriptor.  */
 584static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
 585                         int rn, int rm, const GVecGen3 *gvec_op)
 586{
 587    tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 588                   vec_full_reg_offset(s, rm), is_q ? 16 : 8,
 589                   vec_full_reg_size(s), gvec_op);
 590}
 591
 592/* Expand a 3-operand operation using an out-of-line helper.  */
 593static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
 594                             int rn, int rm, int data, gen_helper_gvec_3 *fn)
 595{
 596    tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 597                       vec_full_reg_offset(s, rn),
 598                       vec_full_reg_offset(s, rm),
 599                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 600}
 601
 602/* Expand a 3-operand + env pointer operation using
 603 * an out-of-line helper.
 604 */
 605static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
 606                             int rn, int rm, gen_helper_gvec_3_ptr *fn)
 607{
 608    tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 609                       vec_full_reg_offset(s, rn),
 610                       vec_full_reg_offset(s, rm), cpu_env,
 611                       is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
 612}
 613
 614/* Expand a 3-operand + fpstatus pointer + simd data value operation using
 615 * an out-of-line helper.
 616 */
 617static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
 618                              int rm, bool is_fp16, int data,
 619                              gen_helper_gvec_3_ptr *fn)
 620{
 621    TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
 622    tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 623                       vec_full_reg_offset(s, rn),
 624                       vec_full_reg_offset(s, rm), fpst,
 625                       is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 626    tcg_temp_free_ptr(fpst);
 627}
 628
 629/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 630 * than the 32 bit equivalent.
 631 */
 632static inline void gen_set_NZ64(TCGv_i64 result)
 633{
 634    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 635    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 636}
 637
 638/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 639static inline void gen_logic_CC(int sf, TCGv_i64 result)
 640{
 641    if (sf) {
 642        gen_set_NZ64(result);
 643    } else {
 644        tcg_gen_extrl_i64_i32(cpu_ZF, result);
 645        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 646    }
 647    tcg_gen_movi_i32(cpu_CF, 0);
 648    tcg_gen_movi_i32(cpu_VF, 0);
 649}
 650
 651/* dest = T0 + T1; compute C, N, V and Z flags */
 652static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 653{
 654    if (sf) {
 655        TCGv_i64 result, flag, tmp;
 656        result = tcg_temp_new_i64();
 657        flag = tcg_temp_new_i64();
 658        tmp = tcg_temp_new_i64();
 659
 660        tcg_gen_movi_i64(tmp, 0);
 661        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 662
 663        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 664
 665        gen_set_NZ64(result);
 666
 667        tcg_gen_xor_i64(flag, result, t0);
 668        tcg_gen_xor_i64(tmp, t0, t1);
 669        tcg_gen_andc_i64(flag, flag, tmp);
 670        tcg_temp_free_i64(tmp);
 671        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 672
 673        tcg_gen_mov_i64(dest, result);
 674        tcg_temp_free_i64(result);
 675        tcg_temp_free_i64(flag);
 676    } else {
 677        /* 32 bit arithmetic */
 678        TCGv_i32 t0_32 = tcg_temp_new_i32();
 679        TCGv_i32 t1_32 = tcg_temp_new_i32();
 680        TCGv_i32 tmp = tcg_temp_new_i32();
 681
 682        tcg_gen_movi_i32(tmp, 0);
 683        tcg_gen_extrl_i64_i32(t0_32, t0);
 684        tcg_gen_extrl_i64_i32(t1_32, t1);
 685        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 686        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 687        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 688        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 689        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 690        tcg_gen_extu_i32_i64(dest, cpu_NF);
 691
 692        tcg_temp_free_i32(tmp);
 693        tcg_temp_free_i32(t0_32);
 694        tcg_temp_free_i32(t1_32);
 695    }
 696}
 697
 698/* dest = T0 - T1; compute C, N, V and Z flags */
 699static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 700{
 701    if (sf) {
 702        /* 64 bit arithmetic */
 703        TCGv_i64 result, flag, tmp;
 704
 705        result = tcg_temp_new_i64();
 706        flag = tcg_temp_new_i64();
 707        tcg_gen_sub_i64(result, t0, t1);
 708
 709        gen_set_NZ64(result);
 710
 711        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 712        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 713
 714        tcg_gen_xor_i64(flag, result, t0);
 715        tmp = tcg_temp_new_i64();
 716        tcg_gen_xor_i64(tmp, t0, t1);
 717        tcg_gen_and_i64(flag, flag, tmp);
 718        tcg_temp_free_i64(tmp);
 719        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 720        tcg_gen_mov_i64(dest, result);
 721        tcg_temp_free_i64(flag);
 722        tcg_temp_free_i64(result);
 723    } else {
 724        /* 32 bit arithmetic */
 725        TCGv_i32 t0_32 = tcg_temp_new_i32();
 726        TCGv_i32 t1_32 = tcg_temp_new_i32();
 727        TCGv_i32 tmp;
 728
 729        tcg_gen_extrl_i64_i32(t0_32, t0);
 730        tcg_gen_extrl_i64_i32(t1_32, t1);
 731        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 732        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 733        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 734        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 735        tmp = tcg_temp_new_i32();
 736        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 737        tcg_temp_free_i32(t0_32);
 738        tcg_temp_free_i32(t1_32);
 739        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 740        tcg_temp_free_i32(tmp);
 741        tcg_gen_extu_i32_i64(dest, cpu_NF);
 742    }
 743}
 744
 745/* dest = T0 + T1 + CF; do not compute flags. */
 746static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 747{
 748    TCGv_i64 flag = tcg_temp_new_i64();
 749    tcg_gen_extu_i32_i64(flag, cpu_CF);
 750    tcg_gen_add_i64(dest, t0, t1);
 751    tcg_gen_add_i64(dest, dest, flag);
 752    tcg_temp_free_i64(flag);
 753
 754    if (!sf) {
 755        tcg_gen_ext32u_i64(dest, dest);
 756    }
 757}
 758
 759/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 760static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 761{
 762    if (sf) {
 763        TCGv_i64 result, cf_64, vf_64, tmp;
 764        result = tcg_temp_new_i64();
 765        cf_64 = tcg_temp_new_i64();
 766        vf_64 = tcg_temp_new_i64();
 767        tmp = tcg_const_i64(0);
 768
 769        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 770        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 771        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 772        tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 773        gen_set_NZ64(result);
 774
 775        tcg_gen_xor_i64(vf_64, result, t0);
 776        tcg_gen_xor_i64(tmp, t0, t1);
 777        tcg_gen_andc_i64(vf_64, vf_64, tmp);
 778        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 779
 780        tcg_gen_mov_i64(dest, result);
 781
 782        tcg_temp_free_i64(tmp);
 783        tcg_temp_free_i64(vf_64);
 784        tcg_temp_free_i64(cf_64);
 785        tcg_temp_free_i64(result);
 786    } else {
 787        TCGv_i32 t0_32, t1_32, tmp;
 788        t0_32 = tcg_temp_new_i32();
 789        t1_32 = tcg_temp_new_i32();
 790        tmp = tcg_const_i32(0);
 791
 792        tcg_gen_extrl_i64_i32(t0_32, t0);
 793        tcg_gen_extrl_i64_i32(t1_32, t1);
 794        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 795        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 796
 797        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 798        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 799        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 800        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 801        tcg_gen_extu_i32_i64(dest, cpu_NF);
 802
 803        tcg_temp_free_i32(tmp);
 804        tcg_temp_free_i32(t1_32);
 805        tcg_temp_free_i32(t0_32);
 806    }
 807}
 808
 809/*
 810 * Load/Store generators
 811 */
 812
 813/*
 814 * Store from GPR register to memory.
 815 */
 816static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 817                             TCGv_i64 tcg_addr, int size, int memidx,
 818                             bool iss_valid,
 819                             unsigned int iss_srt,
 820                             bool iss_sf, bool iss_ar)
 821{
 822    g_assert(size <= 3);
 823    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 824
 825    if (iss_valid) {
 826        uint32_t syn;
 827
 828        syn = syn_data_abort_with_iss(0,
 829                                      size,
 830                                      false,
 831                                      iss_srt,
 832                                      iss_sf,
 833                                      iss_ar,
 834                                      0, 0, 0, 0, 0, false);
 835        disas_set_insn_syndrome(s, syn);
 836    }
 837}
 838
 839static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 840                      TCGv_i64 tcg_addr, int size,
 841                      bool iss_valid,
 842                      unsigned int iss_srt,
 843                      bool iss_sf, bool iss_ar)
 844{
 845    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 846                     iss_valid, iss_srt, iss_sf, iss_ar);
 847}
 848
 849/*
 850 * Load from memory to GPR register
 851 */
 852static void do_gpr_ld_memidx(DisasContext *s,
 853                             TCGv_i64 dest, TCGv_i64 tcg_addr,
 854                             int size, bool is_signed,
 855                             bool extend, int memidx,
 856                             bool iss_valid, unsigned int iss_srt,
 857                             bool iss_sf, bool iss_ar)
 858{
 859    MemOp memop = s->be_data + size;
 860
 861    g_assert(size <= 3);
 862
 863    if (is_signed) {
 864        memop += MO_SIGN;
 865    }
 866
 867    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 868
 869    if (extend && is_signed) {
 870        g_assert(size < 3);
 871        tcg_gen_ext32u_i64(dest, dest);
 872    }
 873
 874    if (iss_valid) {
 875        uint32_t syn;
 876
 877        syn = syn_data_abort_with_iss(0,
 878                                      size,
 879                                      is_signed,
 880                                      iss_srt,
 881                                      iss_sf,
 882                                      iss_ar,
 883                                      0, 0, 0, 0, 0, false);
 884        disas_set_insn_syndrome(s, syn);
 885    }
 886}
 887
 888static void do_gpr_ld(DisasContext *s,
 889                      TCGv_i64 dest, TCGv_i64 tcg_addr,
 890                      int size, bool is_signed, bool extend,
 891                      bool iss_valid, unsigned int iss_srt,
 892                      bool iss_sf, bool iss_ar)
 893{
 894    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 895                     get_mem_index(s),
 896                     iss_valid, iss_srt, iss_sf, iss_ar);
 897}
 898
 899/*
 900 * Store from FP register to memory
 901 */
 902static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 903{
 904    /* This writes the bottom N bits of a 128 bit wide vector to memory */
 905    TCGv_i64 tmp = tcg_temp_new_i64();
 906    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 907    if (size < 4) {
 908        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 909                            s->be_data + size);
 910    } else {
 911        bool be = s->be_data == MO_BE;
 912        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 913
 914        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 915        tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 916                            s->be_data | MO_Q);
 917        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 918        tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 919                            s->be_data | MO_Q);
 920        tcg_temp_free_i64(tcg_hiaddr);
 921    }
 922
 923    tcg_temp_free_i64(tmp);
 924}
 925
 926/*
 927 * Load from memory to FP register
 928 */
 929static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 930{
 931    /* This always zero-extends and writes to a full 128 bit wide vector */
 932    TCGv_i64 tmplo = tcg_temp_new_i64();
 933    TCGv_i64 tmphi;
 934
 935    if (size < 4) {
 936        MemOp memop = s->be_data + size;
 937        tmphi = tcg_const_i64(0);
 938        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 939    } else {
 940        bool be = s->be_data == MO_BE;
 941        TCGv_i64 tcg_hiaddr;
 942
 943        tmphi = tcg_temp_new_i64();
 944        tcg_hiaddr = tcg_temp_new_i64();
 945
 946        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 947        tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 948                            s->be_data | MO_Q);
 949        tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 950                            s->be_data | MO_Q);
 951        tcg_temp_free_i64(tcg_hiaddr);
 952    }
 953
 954    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 955    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 956
 957    tcg_temp_free_i64(tmplo);
 958    tcg_temp_free_i64(tmphi);
 959
 960    clear_vec_high(s, true, destidx);
 961}
 962
 963/*
 964 * Vector load/store helpers.
 965 *
 966 * The principal difference between this and a FP load is that we don't
 967 * zero extend as we are filling a partial chunk of the vector register.
 968 * These functions don't support 128 bit loads/stores, which would be
 969 * normal load/store operations.
 970 *
 971 * The _i32 versions are useful when operating on 32 bit quantities
 972 * (eg for floating point single or using Neon helper functions).
 973 */
 974
 975/* Get value of an element within a vector register */
 976static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 977                             int element, MemOp memop)
 978{
 979    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 980    switch (memop) {
 981    case MO_8:
 982        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 983        break;
 984    case MO_16:
 985        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 986        break;
 987    case MO_32:
 988        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 989        break;
 990    case MO_8|MO_SIGN:
 991        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 992        break;
 993    case MO_16|MO_SIGN:
 994        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 995        break;
 996    case MO_32|MO_SIGN:
 997        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 998        break;
 999    case MO_64:
1000    case MO_64|MO_SIGN:
1001        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1002        break;
1003    default:
1004        g_assert_not_reached();
1005    }
1006}
1007
1008static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1009                                 int element, MemOp memop)
1010{
1011    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1012    switch (memop) {
1013    case MO_8:
1014        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1015        break;
1016    case MO_16:
1017        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1018        break;
1019    case MO_8|MO_SIGN:
1020        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1021        break;
1022    case MO_16|MO_SIGN:
1023        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1024        break;
1025    case MO_32:
1026    case MO_32|MO_SIGN:
1027        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1028        break;
1029    default:
1030        g_assert_not_reached();
1031    }
1032}
1033
1034/* Set value of an element within a vector register */
1035static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1036                              int element, MemOp memop)
1037{
1038    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1039    switch (memop) {
1040    case MO_8:
1041        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1042        break;
1043    case MO_16:
1044        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1045        break;
1046    case MO_32:
1047        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1048        break;
1049    case MO_64:
1050        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1051        break;
1052    default:
1053        g_assert_not_reached();
1054    }
1055}
1056
1057static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1058                                  int destidx, int element, MemOp memop)
1059{
1060    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1061    switch (memop) {
1062    case MO_8:
1063        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1064        break;
1065    case MO_16:
1066        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1067        break;
1068    case MO_32:
1069        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1070        break;
1071    default:
1072        g_assert_not_reached();
1073    }
1074}
1075
1076/* Store from vector register to memory */
1077static void do_vec_st(DisasContext *s, int srcidx, int element,
1078                      TCGv_i64 tcg_addr, int size, MemOp endian)
1079{
1080    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1081
1082    read_vec_element(s, tcg_tmp, srcidx, element, size);
1083    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1084
1085    tcg_temp_free_i64(tcg_tmp);
1086}
1087
1088/* Load from memory to vector register */
1089static void do_vec_ld(DisasContext *s, int destidx, int element,
1090                      TCGv_i64 tcg_addr, int size, MemOp endian)
1091{
1092    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1093
1094    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), endian | size);
1095    write_vec_element(s, tcg_tmp, destidx, element, size);
1096
1097    tcg_temp_free_i64(tcg_tmp);
1098}
1099
1100/* Check that FP/Neon access is enabled. If it is, return
1101 * true. If not, emit code to generate an appropriate exception,
1102 * and return false; the caller should not emit any code for
1103 * the instruction. Note that this check must happen after all
1104 * unallocated-encoding checks (otherwise the syndrome information
1105 * for the resulting exception will be incorrect).
1106 */
1107static inline bool fp_access_check(DisasContext *s)
1108{
1109    assert(!s->fp_access_checked);
1110    s->fp_access_checked = true;
1111
1112    if (!s->fp_excp_el) {
1113        return true;
1114    }
1115
1116    gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
1117                       syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
1118    return false;
1119}
1120
1121/* Check that SVE access is enabled.  If it is, return true.
1122 * If not, emit code to generate an appropriate exception and return false.
1123 */
1124bool sve_access_check(DisasContext *s)
1125{
1126    if (s->sve_excp_el) {
1127        gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_sve_access_trap(),
1128                           s->sve_excp_el);
1129        return false;
1130    }
1131    return fp_access_check(s);
1132}
1133
1134/*
1135 * This utility function is for doing register extension with an
1136 * optional shift. You will likely want to pass a temporary for the
1137 * destination register. See DecodeRegExtend() in the ARM ARM.
1138 */
1139static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1140                              int option, unsigned int shift)
1141{
1142    int extsize = extract32(option, 0, 2);
1143    bool is_signed = extract32(option, 2, 1);
1144
1145    if (is_signed) {
1146        switch (extsize) {
1147        case 0:
1148            tcg_gen_ext8s_i64(tcg_out, tcg_in);
1149            break;
1150        case 1:
1151            tcg_gen_ext16s_i64(tcg_out, tcg_in);
1152            break;
1153        case 2:
1154            tcg_gen_ext32s_i64(tcg_out, tcg_in);
1155            break;
1156        case 3:
1157            tcg_gen_mov_i64(tcg_out, tcg_in);
1158            break;
1159        }
1160    } else {
1161        switch (extsize) {
1162        case 0:
1163            tcg_gen_ext8u_i64(tcg_out, tcg_in);
1164            break;
1165        case 1:
1166            tcg_gen_ext16u_i64(tcg_out, tcg_in);
1167            break;
1168        case 2:
1169            tcg_gen_ext32u_i64(tcg_out, tcg_in);
1170            break;
1171        case 3:
1172            tcg_gen_mov_i64(tcg_out, tcg_in);
1173            break;
1174        }
1175    }
1176
1177    if (shift) {
1178        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1179    }
1180}
1181
1182static inline void gen_check_sp_alignment(DisasContext *s)
1183{
1184    /* The AArch64 architecture mandates that (if enabled via PSTATE
1185     * or SCTLR bits) there is a check that SP is 16-aligned on every
1186     * SP-relative load or store (with an exception generated if it is not).
1187     * In line with general QEMU practice regarding misaligned accesses,
1188     * we omit these checks for the sake of guest program performance.
1189     * This function is provided as a hook so we can more easily add these
1190     * checks in future (possibly as a "favour catching guest program bugs
1191     * over speed" user selectable option).
1192     */
1193}
1194
1195/*
1196 * This provides a simple table based table lookup decoder. It is
1197 * intended to be used when the relevant bits for decode are too
1198 * awkwardly placed and switch/if based logic would be confusing and
1199 * deeply nested. Since it's a linear search through the table, tables
1200 * should be kept small.
1201 *
1202 * It returns the first handler where insn & mask == pattern, or
1203 * NULL if there is no match.
1204 * The table is terminated by an empty mask (i.e. 0)
1205 */
1206static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1207                                               uint32_t insn)
1208{
1209    const AArch64DecodeTable *tptr = table;
1210
1211    while (tptr->mask) {
1212        if ((insn & tptr->mask) == tptr->pattern) {
1213            return tptr->disas_fn;
1214        }
1215        tptr++;
1216    }
1217    return NULL;
1218}
1219
1220/*
1221 * The instruction disassembly implemented here matches
1222 * the instruction encoding classifications in chapter C4
1223 * of the ARM Architecture Reference Manual (DDI0487B_a);
1224 * classification names and decode diagrams here should generally
1225 * match up with those in the manual.
1226 */
1227
1228/* Unconditional branch (immediate)
1229 *   31  30       26 25                                  0
1230 * +----+-----------+-------------------------------------+
1231 * | op | 0 0 1 0 1 |                 imm26               |
1232 * +----+-----------+-------------------------------------+
1233 */
1234static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1235{
1236    uint64_t addr = s->pc_curr + sextract32(insn, 0, 26) * 4;
1237
1238    if (insn & (1U << 31)) {
1239        /* BL Branch with link */
1240        tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
1241    }
1242
1243    /* B Branch / BL Branch with link */
1244    reset_btype(s);
1245    gen_goto_tb(s, 0, addr);
1246}
1247
1248/* Compare and branch (immediate)
1249 *   31  30         25  24  23                  5 4      0
1250 * +----+-------------+----+---------------------+--------+
1251 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1252 * +----+-------------+----+---------------------+--------+
1253 */
1254static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1255{
1256    unsigned int sf, op, rt;
1257    uint64_t addr;
1258    TCGLabel *label_match;
1259    TCGv_i64 tcg_cmp;
1260
1261    sf = extract32(insn, 31, 1);
1262    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1263    rt = extract32(insn, 0, 5);
1264    addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1265
1266    tcg_cmp = read_cpu_reg(s, rt, sf);
1267    label_match = gen_new_label();
1268
1269    reset_btype(s);
1270    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1271                        tcg_cmp, 0, label_match);
1272
1273    gen_goto_tb(s, 0, s->base.pc_next);
1274    gen_set_label(label_match);
1275    gen_goto_tb(s, 1, addr);
1276}
1277
1278/* Test and branch (immediate)
1279 *   31  30         25  24  23   19 18          5 4    0
1280 * +----+-------------+----+-------+-------------+------+
1281 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1282 * +----+-------------+----+-------+-------------+------+
1283 */
1284static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1285{
1286    unsigned int bit_pos, op, rt;
1287    uint64_t addr;
1288    TCGLabel *label_match;
1289    TCGv_i64 tcg_cmp;
1290
1291    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1292    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1293    addr = s->pc_curr + sextract32(insn, 5, 14) * 4;
1294    rt = extract32(insn, 0, 5);
1295
1296    tcg_cmp = tcg_temp_new_i64();
1297    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1298    label_match = gen_new_label();
1299
1300    reset_btype(s);
1301    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1302                        tcg_cmp, 0, label_match);
1303    tcg_temp_free_i64(tcg_cmp);
1304    gen_goto_tb(s, 0, s->base.pc_next);
1305    gen_set_label(label_match);
1306    gen_goto_tb(s, 1, addr);
1307}
1308
1309/* Conditional branch (immediate)
1310 *  31           25  24  23                  5   4  3    0
1311 * +---------------+----+---------------------+----+------+
1312 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1313 * +---------------+----+---------------------+----+------+
1314 */
1315static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1316{
1317    unsigned int cond;
1318    uint64_t addr;
1319
1320    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1321        unallocated_encoding(s);
1322        return;
1323    }
1324    addr = s->pc_curr + sextract32(insn, 5, 19) * 4;
1325    cond = extract32(insn, 0, 4);
1326
1327    reset_btype(s);
1328    if (cond < 0x0e) {
1329        /* genuinely conditional branches */
1330        TCGLabel *label_match = gen_new_label();
1331        arm_gen_test_cc(cond, label_match);
1332        gen_goto_tb(s, 0, s->base.pc_next);
1333        gen_set_label(label_match);
1334        gen_goto_tb(s, 1, addr);
1335    } else {
1336        /* 0xe and 0xf are both "always" conditions */
1337        gen_goto_tb(s, 0, addr);
1338    }
1339}
1340
1341/* HINT instruction group, including various allocated HINTs */
1342static void handle_hint(DisasContext *s, uint32_t insn,
1343                        unsigned int op1, unsigned int op2, unsigned int crm)
1344{
1345    unsigned int selector = crm << 3 | op2;
1346
1347    if (op1 != 3) {
1348        unallocated_encoding(s);
1349        return;
1350    }
1351
1352    switch (selector) {
1353    case 0b00000: /* NOP */
1354        break;
1355    case 0b00011: /* WFI */
1356        s->base.is_jmp = DISAS_WFI;
1357        break;
1358    case 0b00001: /* YIELD */
1359        /* When running in MTTCG we don't generate jumps to the yield and
1360         * WFE helpers as it won't affect the scheduling of other vCPUs.
1361         * If we wanted to more completely model WFE/SEV so we don't busy
1362         * spin unnecessarily we would need to do something more involved.
1363         */
1364        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1365            s->base.is_jmp = DISAS_YIELD;
1366        }
1367        break;
1368    case 0b00010: /* WFE */
1369        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1370            s->base.is_jmp = DISAS_WFE;
1371        }
1372        break;
1373    case 0b00100: /* SEV */
1374    case 0b00101: /* SEVL */
1375        /* we treat all as NOP at least for now */
1376        break;
1377    case 0b00111: /* XPACLRI */
1378        if (s->pauth_active) {
1379            gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]);
1380        }
1381        break;
1382    case 0b01000: /* PACIA1716 */
1383        if (s->pauth_active) {
1384            gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1385        }
1386        break;
1387    case 0b01010: /* PACIB1716 */
1388        if (s->pauth_active) {
1389            gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1390        }
1391        break;
1392    case 0b01100: /* AUTIA1716 */
1393        if (s->pauth_active) {
1394            gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1395        }
1396        break;
1397    case 0b01110: /* AUTIB1716 */
1398        if (s->pauth_active) {
1399            gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]);
1400        }
1401        break;
1402    case 0b11000: /* PACIAZ */
1403        if (s->pauth_active) {
1404            gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30],
1405                                new_tmp_a64_zero(s));
1406        }
1407        break;
1408    case 0b11001: /* PACIASP */
1409        if (s->pauth_active) {
1410            gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1411        }
1412        break;
1413    case 0b11010: /* PACIBZ */
1414        if (s->pauth_active) {
1415            gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30],
1416                                new_tmp_a64_zero(s));
1417        }
1418        break;
1419    case 0b11011: /* PACIBSP */
1420        if (s->pauth_active) {
1421            gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1422        }
1423        break;
1424    case 0b11100: /* AUTIAZ */
1425        if (s->pauth_active) {
1426            gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30],
1427                              new_tmp_a64_zero(s));
1428        }
1429        break;
1430    case 0b11101: /* AUTIASP */
1431        if (s->pauth_active) {
1432            gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1433        }
1434        break;
1435    case 0b11110: /* AUTIBZ */
1436        if (s->pauth_active) {
1437            gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30],
1438                              new_tmp_a64_zero(s));
1439        }
1440        break;
1441    case 0b11111: /* AUTIBSP */
1442        if (s->pauth_active) {
1443            gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]);
1444        }
1445        break;
1446    default:
1447        /* default specified as NOP equivalent */
1448        break;
1449    }
1450}
1451
1452static void gen_clrex(DisasContext *s, uint32_t insn)
1453{
1454    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1455}
1456
1457/* CLREX, DSB, DMB, ISB */
1458static void handle_sync(DisasContext *s, uint32_t insn,
1459                        unsigned int op1, unsigned int op2, unsigned int crm)
1460{
1461    TCGBar bar;
1462
1463    if (op1 != 3) {
1464        unallocated_encoding(s);
1465        return;
1466    }
1467
1468    switch (op2) {
1469    case 2: /* CLREX */
1470        gen_clrex(s, insn);
1471        return;
1472    case 4: /* DSB */
1473    case 5: /* DMB */
1474        switch (crm & 3) {
1475        case 1: /* MBReqTypes_Reads */
1476            bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1477            break;
1478        case 2: /* MBReqTypes_Writes */
1479            bar = TCG_BAR_SC | TCG_MO_ST_ST;
1480            break;
1481        default: /* MBReqTypes_All */
1482            bar = TCG_BAR_SC | TCG_MO_ALL;
1483            break;
1484        }
1485        tcg_gen_mb(bar);
1486        return;
1487    case 6: /* ISB */
1488        /* We need to break the TB after this insn to execute
1489         * a self-modified code correctly and also to take
1490         * any pending interrupts immediately.
1491         */
1492        reset_btype(s);
1493        gen_goto_tb(s, 0, s->base.pc_next);
1494        return;
1495
1496    case 7: /* SB */
1497        if (crm != 0 || !dc_isar_feature(aa64_sb, s)) {
1498            goto do_unallocated;
1499        }
1500        /*
1501         * TODO: There is no speculation barrier opcode for TCG;
1502         * MB and end the TB instead.
1503         */
1504        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1505        gen_goto_tb(s, 0, s->base.pc_next);
1506        return;
1507
1508    default:
1509    do_unallocated:
1510        unallocated_encoding(s);
1511        return;
1512    }
1513}
1514
1515static void gen_xaflag(void)
1516{
1517    TCGv_i32 z = tcg_temp_new_i32();
1518
1519    tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0);
1520
1521    /*
1522     * (!C & !Z) << 31
1523     * (!(C | Z)) << 31
1524     * ~((C | Z) << 31)
1525     * ~-(C | Z)
1526     * (C | Z) - 1
1527     */
1528    tcg_gen_or_i32(cpu_NF, cpu_CF, z);
1529    tcg_gen_subi_i32(cpu_NF, cpu_NF, 1);
1530
1531    /* !(Z & C) */
1532    tcg_gen_and_i32(cpu_ZF, z, cpu_CF);
1533    tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1);
1534
1535    /* (!C & Z) << 31 -> -(Z & ~C) */
1536    tcg_gen_andc_i32(cpu_VF, z, cpu_CF);
1537    tcg_gen_neg_i32(cpu_VF, cpu_VF);
1538
1539    /* C | Z */
1540    tcg_gen_or_i32(cpu_CF, cpu_CF, z);
1541
1542    tcg_temp_free_i32(z);
1543}
1544
1545static void gen_axflag(void)
1546{
1547    tcg_gen_sari_i32(cpu_VF, cpu_VF, 31);         /* V ? -1 : 0 */
1548    tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF);     /* C & !V */
1549
1550    /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */
1551    tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF);
1552
1553    tcg_gen_movi_i32(cpu_NF, 0);
1554    tcg_gen_movi_i32(cpu_VF, 0);
1555}
1556
1557/* MSR (immediate) - move immediate to processor state field */
1558static void handle_msr_i(DisasContext *s, uint32_t insn,
1559                         unsigned int op1, unsigned int op2, unsigned int crm)
1560{
1561    TCGv_i32 t1;
1562    int op = op1 << 3 | op2;
1563
1564    /* End the TB by default, chaining is ok.  */
1565    s->base.is_jmp = DISAS_TOO_MANY;
1566
1567    switch (op) {
1568    case 0x00: /* CFINV */
1569        if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) {
1570            goto do_unallocated;
1571        }
1572        tcg_gen_xori_i32(cpu_CF, cpu_CF, 1);
1573        s->base.is_jmp = DISAS_NEXT;
1574        break;
1575
1576    case 0x01: /* XAFlag */
1577        if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1578            goto do_unallocated;
1579        }
1580        gen_xaflag();
1581        s->base.is_jmp = DISAS_NEXT;
1582        break;
1583
1584    case 0x02: /* AXFlag */
1585        if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) {
1586            goto do_unallocated;
1587        }
1588        gen_axflag();
1589        s->base.is_jmp = DISAS_NEXT;
1590        break;
1591
1592    case 0x05: /* SPSel */
1593        if (s->current_el == 0) {
1594            goto do_unallocated;
1595        }
1596        t1 = tcg_const_i32(crm & PSTATE_SP);
1597        gen_helper_msr_i_spsel(cpu_env, t1);
1598        tcg_temp_free_i32(t1);
1599        break;
1600
1601    case 0x1e: /* DAIFSet */
1602        t1 = tcg_const_i32(crm);
1603        gen_helper_msr_i_daifset(cpu_env, t1);
1604        tcg_temp_free_i32(t1);
1605        break;
1606
1607    case 0x1f: /* DAIFClear */
1608        t1 = tcg_const_i32(crm);
1609        gen_helper_msr_i_daifclear(cpu_env, t1);
1610        tcg_temp_free_i32(t1);
1611        /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1612        s->base.is_jmp = DISAS_UPDATE;
1613        break;
1614
1615    default:
1616    do_unallocated:
1617        unallocated_encoding(s);
1618        return;
1619    }
1620}
1621
1622static void gen_get_nzcv(TCGv_i64 tcg_rt)
1623{
1624    TCGv_i32 tmp = tcg_temp_new_i32();
1625    TCGv_i32 nzcv = tcg_temp_new_i32();
1626
1627    /* build bit 31, N */
1628    tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1629    /* build bit 30, Z */
1630    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1631    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1632    /* build bit 29, C */
1633    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1634    /* build bit 28, V */
1635    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1636    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1637    /* generate result */
1638    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1639
1640    tcg_temp_free_i32(nzcv);
1641    tcg_temp_free_i32(tmp);
1642}
1643
1644static void gen_set_nzcv(TCGv_i64 tcg_rt)
1645{
1646    TCGv_i32 nzcv = tcg_temp_new_i32();
1647
1648    /* take NZCV from R[t] */
1649    tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1650
1651    /* bit 31, N */
1652    tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1653    /* bit 30, Z */
1654    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1655    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1656    /* bit 29, C */
1657    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1658    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1659    /* bit 28, V */
1660    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1661    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1662    tcg_temp_free_i32(nzcv);
1663}
1664
1665/* MRS - move from system register
1666 * MSR (register) - move to system register
1667 * SYS
1668 * SYSL
1669 * These are all essentially the same insn in 'read' and 'write'
1670 * versions, with varying op0 fields.
1671 */
1672static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1673                       unsigned int op0, unsigned int op1, unsigned int op2,
1674                       unsigned int crn, unsigned int crm, unsigned int rt)
1675{
1676    const ARMCPRegInfo *ri;
1677    TCGv_i64 tcg_rt;
1678
1679    ri = get_arm_cp_reginfo(s->cp_regs,
1680                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1681                                               crn, crm, op0, op1, op2));
1682
1683    if (!ri) {
1684        /* Unknown register; this might be a guest error or a QEMU
1685         * unimplemented feature.
1686         */
1687        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1688                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1689                      isread ? "read" : "write", op0, op1, crn, crm, op2);
1690        unallocated_encoding(s);
1691        return;
1692    }
1693
1694    /* Check access permissions */
1695    if (!cp_access_ok(s->current_el, ri, isread)) {
1696        unallocated_encoding(s);
1697        return;
1698    }
1699
1700    if (ri->accessfn) {
1701        /* Emit code to perform further access permissions checks at
1702         * runtime; this may result in an exception.
1703         */
1704        TCGv_ptr tmpptr;
1705        TCGv_i32 tcg_syn, tcg_isread;
1706        uint32_t syndrome;
1707
1708        gen_a64_set_pc_im(s->pc_curr);
1709        tmpptr = tcg_const_ptr(ri);
1710        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1711        tcg_syn = tcg_const_i32(syndrome);
1712        tcg_isread = tcg_const_i32(isread);
1713        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1714        tcg_temp_free_ptr(tmpptr);
1715        tcg_temp_free_i32(tcg_syn);
1716        tcg_temp_free_i32(tcg_isread);
1717    } else if (ri->type & ARM_CP_RAISES_EXC) {
1718        /*
1719         * The readfn or writefn might raise an exception;
1720         * synchronize the CPU state in case it does.
1721         */
1722        gen_a64_set_pc_im(s->pc_curr);
1723    }
1724
1725    /* Handle special cases first */
1726    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1727    case ARM_CP_NOP:
1728        return;
1729    case ARM_CP_NZCV:
1730        tcg_rt = cpu_reg(s, rt);
1731        if (isread) {
1732            gen_get_nzcv(tcg_rt);
1733        } else {
1734            gen_set_nzcv(tcg_rt);
1735        }
1736        return;
1737    case ARM_CP_CURRENTEL:
1738        /* Reads as current EL value from pstate, which is
1739         * guaranteed to be constant by the tb flags.
1740         */
1741        tcg_rt = cpu_reg(s, rt);
1742        tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1743        return;
1744    case ARM_CP_DC_ZVA:
1745        /* Writes clear the aligned block of memory which rt points into. */
1746        tcg_rt = cpu_reg(s, rt);
1747        gen_helper_dc_zva(cpu_env, tcg_rt);
1748        return;
1749    default:
1750        break;
1751    }
1752    if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1753        return;
1754    } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1755        return;
1756    }
1757
1758    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1759        gen_io_start();
1760    }
1761
1762    tcg_rt = cpu_reg(s, rt);
1763
1764    if (isread) {
1765        if (ri->type & ARM_CP_CONST) {
1766            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1767        } else if (ri->readfn) {
1768            TCGv_ptr tmpptr;
1769            tmpptr = tcg_const_ptr(ri);
1770            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1771            tcg_temp_free_ptr(tmpptr);
1772        } else {
1773            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1774        }
1775    } else {
1776        if (ri->type & ARM_CP_CONST) {
1777            /* If not forbidden by access permissions, treat as WI */
1778            return;
1779        } else if (ri->writefn) {
1780            TCGv_ptr tmpptr;
1781            tmpptr = tcg_const_ptr(ri);
1782            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1783            tcg_temp_free_ptr(tmpptr);
1784        } else {
1785            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1786        }
1787    }
1788
1789    if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1790        /* I/O operations must end the TB here (whether read or write) */
1791        s->base.is_jmp = DISAS_UPDATE;
1792    }
1793    if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1794        /*
1795         * A write to any coprocessor regiser that ends a TB
1796         * must rebuild the hflags for the next TB.
1797         */
1798        TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
1799        gen_helper_rebuild_hflags_a64(cpu_env, tcg_el);
1800        tcg_temp_free_i32(tcg_el);
1801        /*
1802         * We default to ending the TB on a coprocessor register write,
1803         * but allow this to be suppressed by the register definition
1804         * (usually only necessary to work around guest bugs).
1805         */
1806        s->base.is_jmp = DISAS_UPDATE;
1807    }
1808}
1809
1810/* System
1811 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1812 * +---------------------+---+-----+-----+-------+-------+-----+------+
1813 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1814 * +---------------------+---+-----+-----+-------+-------+-----+------+
1815 */
1816static void disas_system(DisasContext *s, uint32_t insn)
1817{
1818    unsigned int l, op0, op1, crn, crm, op2, rt;
1819    l = extract32(insn, 21, 1);
1820    op0 = extract32(insn, 19, 2);
1821    op1 = extract32(insn, 16, 3);
1822    crn = extract32(insn, 12, 4);
1823    crm = extract32(insn, 8, 4);
1824    op2 = extract32(insn, 5, 3);
1825    rt = extract32(insn, 0, 5);
1826
1827    if (op0 == 0) {
1828        if (l || rt != 31) {
1829            unallocated_encoding(s);
1830            return;
1831        }
1832        switch (crn) {
1833        case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1834            handle_hint(s, insn, op1, op2, crm);
1835            break;
1836        case 3: /* CLREX, DSB, DMB, ISB */
1837            handle_sync(s, insn, op1, op2, crm);
1838            break;
1839        case 4: /* MSR (immediate) */
1840            handle_msr_i(s, insn, op1, op2, crm);
1841            break;
1842        default:
1843            unallocated_encoding(s);
1844            break;
1845        }
1846        return;
1847    }
1848    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1849}
1850
1851/* Exception generation
1852 *
1853 *  31             24 23 21 20                     5 4   2 1  0
1854 * +-----------------+-----+------------------------+-----+----+
1855 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1856 * +-----------------------+------------------------+----------+
1857 */
1858static void disas_exc(DisasContext *s, uint32_t insn)
1859{
1860    int opc = extract32(insn, 21, 3);
1861    int op2_ll = extract32(insn, 0, 5);
1862    int imm16 = extract32(insn, 5, 16);
1863    TCGv_i32 tmp;
1864
1865    switch (opc) {
1866    case 0:
1867        /* For SVC, HVC and SMC we advance the single-step state
1868         * machine before taking the exception. This is architecturally
1869         * mandated, to ensure that single-stepping a system call
1870         * instruction works properly.
1871         */
1872        switch (op2_ll) {
1873        case 1:                                                     /* SVC */
1874            gen_ss_advance(s);
1875            gen_exception_insn(s, s->base.pc_next, EXCP_SWI,
1876                               syn_aa64_svc(imm16), default_exception_el(s));
1877            break;
1878        case 2:                                                     /* HVC */
1879            if (s->current_el == 0) {
1880                unallocated_encoding(s);
1881                break;
1882            }
1883            /* The pre HVC helper handles cases when HVC gets trapped
1884             * as an undefined insn by runtime configuration.
1885             */
1886            gen_a64_set_pc_im(s->pc_curr);
1887            gen_helper_pre_hvc(cpu_env);
1888            gen_ss_advance(s);
1889            gen_exception_insn(s, s->base.pc_next, EXCP_HVC,
1890                               syn_aa64_hvc(imm16), 2);
1891            break;
1892        case 3:                                                     /* SMC */
1893            if (s->current_el == 0) {
1894                unallocated_encoding(s);
1895                break;
1896            }
1897            gen_a64_set_pc_im(s->pc_curr);
1898            tmp = tcg_const_i32(syn_aa64_smc(imm16));
1899            gen_helper_pre_smc(cpu_env, tmp);
1900            tcg_temp_free_i32(tmp);
1901            gen_ss_advance(s);
1902            gen_exception_insn(s, s->base.pc_next, EXCP_SMC,
1903                               syn_aa64_smc(imm16), 3);
1904            break;
1905        default:
1906            unallocated_encoding(s);
1907            break;
1908        }
1909        break;
1910    case 1:
1911        if (op2_ll != 0) {
1912            unallocated_encoding(s);
1913            break;
1914        }
1915        /* BRK */
1916        gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16));
1917        break;
1918    case 2:
1919        if (op2_ll != 0) {
1920            unallocated_encoding(s);
1921            break;
1922        }
1923        /* HLT. This has two purposes.
1924         * Architecturally, it is an external halting debug instruction.
1925         * Since QEMU doesn't implement external debug, we treat this as
1926         * it is required for halting debug disabled: it will UNDEF.
1927         * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1928         */
1929        if (semihosting_enabled() && imm16 == 0xf000) {
1930#ifndef CONFIG_USER_ONLY
1931            /* In system mode, don't allow userspace access to semihosting,
1932             * to provide some semblance of security (and for consistency
1933             * with our 32-bit semihosting).
1934             */
1935            if (s->current_el == 0) {
1936                unsupported_encoding(s, insn);
1937                break;
1938            }
1939#endif
1940            gen_exception_internal_insn(s, s->base.pc_next, EXCP_SEMIHOST);
1941        } else {
1942            unsupported_encoding(s, insn);
1943        }
1944        break;
1945    case 5:
1946        if (op2_ll < 1 || op2_ll > 3) {
1947            unallocated_encoding(s);
1948            break;
1949        }
1950        /* DCPS1, DCPS2, DCPS3 */
1951        unsupported_encoding(s, insn);
1952        break;
1953    default:
1954        unallocated_encoding(s);
1955        break;
1956    }
1957}
1958
1959/* Unconditional branch (register)
1960 *  31           25 24   21 20   16 15   10 9    5 4     0
1961 * +---------------+-------+-------+-------+------+-------+
1962 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1963 * +---------------+-------+-------+-------+------+-------+
1964 */
1965static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1966{
1967    unsigned int opc, op2, op3, rn, op4;
1968    unsigned btype_mod = 2;   /* 0: BR, 1: BLR, 2: other */
1969    TCGv_i64 dst;
1970    TCGv_i64 modifier;
1971
1972    opc = extract32(insn, 21, 4);
1973    op2 = extract32(insn, 16, 5);
1974    op3 = extract32(insn, 10, 6);
1975    rn = extract32(insn, 5, 5);
1976    op4 = extract32(insn, 0, 5);
1977
1978    if (op2 != 0x1f) {
1979        goto do_unallocated;
1980    }
1981
1982    switch (opc) {
1983    case 0: /* BR */
1984    case 1: /* BLR */
1985    case 2: /* RET */
1986        btype_mod = opc;
1987        switch (op3) {
1988        case 0:
1989            /* BR, BLR, RET */
1990            if (op4 != 0) {
1991                goto do_unallocated;
1992            }
1993            dst = cpu_reg(s, rn);
1994            break;
1995
1996        case 2:
1997        case 3:
1998            if (!dc_isar_feature(aa64_pauth, s)) {
1999                goto do_unallocated;
2000            }
2001            if (opc == 2) {
2002                /* RETAA, RETAB */
2003                if (rn != 0x1f || op4 != 0x1f) {
2004                    goto do_unallocated;
2005                }
2006                rn = 30;
2007                modifier = cpu_X[31];
2008            } else {
2009                /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */
2010                if (op4 != 0x1f) {
2011                    goto do_unallocated;
2012                }
2013                modifier = new_tmp_a64_zero(s);
2014            }
2015            if (s->pauth_active) {
2016                dst = new_tmp_a64(s);
2017                if (op3 == 2) {
2018                    gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2019                } else {
2020                    gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2021                }
2022            } else {
2023                dst = cpu_reg(s, rn);
2024            }
2025            break;
2026
2027        default:
2028            goto do_unallocated;
2029        }
2030        gen_a64_set_pc(s, dst);
2031        /* BLR also needs to load return address */
2032        if (opc == 1) {
2033            tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2034        }
2035        break;
2036
2037    case 8: /* BRAA */
2038    case 9: /* BLRAA */
2039        if (!dc_isar_feature(aa64_pauth, s)) {
2040            goto do_unallocated;
2041        }
2042        if ((op3 & ~1) != 2) {
2043            goto do_unallocated;
2044        }
2045        btype_mod = opc & 1;
2046        if (s->pauth_active) {
2047            dst = new_tmp_a64(s);
2048            modifier = cpu_reg_sp(s, op4);
2049            if (op3 == 2) {
2050                gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier);
2051            } else {
2052                gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier);
2053            }
2054        } else {
2055            dst = cpu_reg(s, rn);
2056        }
2057        gen_a64_set_pc(s, dst);
2058        /* BLRAA also needs to load return address */
2059        if (opc == 9) {
2060            tcg_gen_movi_i64(cpu_reg(s, 30), s->base.pc_next);
2061        }
2062        break;
2063
2064    case 4: /* ERET */
2065        if (s->current_el == 0) {
2066            goto do_unallocated;
2067        }
2068        switch (op3) {
2069        case 0: /* ERET */
2070            if (op4 != 0) {
2071                goto do_unallocated;
2072            }
2073            dst = tcg_temp_new_i64();
2074            tcg_gen_ld_i64(dst, cpu_env,
2075                           offsetof(CPUARMState, elr_el[s->current_el]));
2076            break;
2077
2078        case 2: /* ERETAA */
2079        case 3: /* ERETAB */
2080            if (!dc_isar_feature(aa64_pauth, s)) {
2081                goto do_unallocated;
2082            }
2083            if (rn != 0x1f || op4 != 0x1f) {
2084                goto do_unallocated;
2085            }
2086            dst = tcg_temp_new_i64();
2087            tcg_gen_ld_i64(dst, cpu_env,
2088                           offsetof(CPUARMState, elr_el[s->current_el]));
2089            if (s->pauth_active) {
2090                modifier = cpu_X[31];
2091                if (op3 == 2) {
2092                    gen_helper_autia(dst, cpu_env, dst, modifier);
2093                } else {
2094                    gen_helper_autib(dst, cpu_env, dst, modifier);
2095                }
2096            }
2097            break;
2098
2099        default:
2100            goto do_unallocated;
2101        }
2102        if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2103            gen_io_start();
2104        }
2105
2106        gen_helper_exception_return(cpu_env, dst);
2107        tcg_temp_free_i64(dst);
2108        /* Must exit loop to check un-masked IRQs */
2109        s->base.is_jmp = DISAS_EXIT;
2110        return;
2111
2112    case 5: /* DRPS */
2113        if (op3 != 0 || op4 != 0 || rn != 0x1f) {
2114            goto do_unallocated;
2115        } else {
2116            unsupported_encoding(s, insn);
2117        }
2118        return;
2119
2120    default:
2121    do_unallocated:
2122        unallocated_encoding(s);
2123        return;
2124    }
2125
2126    switch (btype_mod) {
2127    case 0: /* BR */
2128        if (dc_isar_feature(aa64_bti, s)) {
2129            /* BR to {x16,x17} or !guard -> 1, else 3.  */
2130            set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3);
2131        }
2132        break;
2133
2134    case 1: /* BLR */
2135        if (dc_isar_feature(aa64_bti, s)) {
2136            /* BLR sets BTYPE to 2, regardless of source guarded page.  */
2137            set_btype(s, 2);
2138        }
2139        break;
2140
2141    default: /* RET or none of the above.  */
2142        /* BTYPE will be set to 0 by normal end-of-insn processing.  */
2143        break;
2144    }
2145
2146    s->base.is_jmp = DISAS_JUMP;
2147}
2148
2149/* Branches, exception generating and system instructions */
2150static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
2151{
2152    switch (extract32(insn, 25, 7)) {
2153    case 0x0a: case 0x0b:
2154    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
2155        disas_uncond_b_imm(s, insn);
2156        break;
2157    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
2158        disas_comp_b_imm(s, insn);
2159        break;
2160    case 0x1b: case 0x5b: /* Test & branch (immediate) */
2161        disas_test_b_imm(s, insn);
2162        break;
2163    case 0x2a: /* Conditional branch (immediate) */
2164        disas_cond_b_imm(s, insn);
2165        break;
2166    case 0x6a: /* Exception generation / System */
2167        if (insn & (1 << 24)) {
2168            if (extract32(insn, 22, 2) == 0) {
2169                disas_system(s, insn);
2170            } else {
2171                unallocated_encoding(s);
2172            }
2173        } else {
2174            disas_exc(s, insn);
2175        }
2176        break;
2177    case 0x6b: /* Unconditional branch (register) */
2178        disas_uncond_b_reg(s, insn);
2179        break;
2180    default:
2181        unallocated_encoding(s);
2182        break;
2183    }
2184}
2185
2186/*
2187 * Load/Store exclusive instructions are implemented by remembering
2188 * the value/address loaded, and seeing if these are the same
2189 * when the store is performed. This is not actually the architecturally
2190 * mandated semantics, but it works for typical guest code sequences
2191 * and avoids having to monitor regular stores.
2192 *
2193 * The store exclusive uses the atomic cmpxchg primitives to avoid
2194 * races in multi-threaded linux-user and when MTTCG softmmu is
2195 * enabled.
2196 */
2197static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
2198                               TCGv_i64 addr, int size, bool is_pair)
2199{
2200    int idx = get_mem_index(s);
2201    MemOp memop = s->be_data;
2202
2203    g_assert(size <= 3);
2204    if (is_pair) {
2205        g_assert(size >= 2);
2206        if (size == 2) {
2207            /* The pair must be single-copy atomic for the doubleword.  */
2208            memop |= MO_64 | MO_ALIGN;
2209            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2210            if (s->be_data == MO_LE) {
2211                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2212                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2213            } else {
2214                tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2215                tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2216            }
2217        } else {
2218            /* The pair must be single-copy atomic for *each* doubleword, not
2219               the entire quadword, however it must be quadword aligned.  */
2220            memop |= MO_64;
2221            tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2222                                memop | MO_ALIGN_16);
2223
2224            TCGv_i64 addr2 = tcg_temp_new_i64();
2225            tcg_gen_addi_i64(addr2, addr, 8);
2226            tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2227            tcg_temp_free_i64(addr2);
2228
2229            tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2230            tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2231        }
2232    } else {
2233        memop |= size | MO_ALIGN;
2234        tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2235        tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2236    }
2237    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2238}
2239
2240static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2241                                TCGv_i64 addr, int size, int is_pair)
2242{
2243    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2244     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2245     *     [addr] = {Rt};
2246     *     if (is_pair) {
2247     *         [addr + datasize] = {Rt2};
2248     *     }
2249     *     {Rd} = 0;
2250     * } else {
2251     *     {Rd} = 1;
2252     * }
2253     * env->exclusive_addr = -1;
2254     */
2255    TCGLabel *fail_label = gen_new_label();
2256    TCGLabel *done_label = gen_new_label();
2257    TCGv_i64 tmp;
2258
2259    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2260
2261    tmp = tcg_temp_new_i64();
2262    if (is_pair) {
2263        if (size == 2) {
2264            if (s->be_data == MO_LE) {
2265                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2266            } else {
2267                tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2268            }
2269            tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2270                                       cpu_exclusive_val, tmp,
2271                                       get_mem_index(s),
2272                                       MO_64 | MO_ALIGN | s->be_data);
2273            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2274        } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2275            if (!HAVE_CMPXCHG128) {
2276                gen_helper_exit_atomic(cpu_env);
2277                s->base.is_jmp = DISAS_NORETURN;
2278            } else if (s->be_data == MO_LE) {
2279                gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2280                                                        cpu_exclusive_addr,
2281                                                        cpu_reg(s, rt),
2282                                                        cpu_reg(s, rt2));
2283            } else {
2284                gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2285                                                        cpu_exclusive_addr,
2286                                                        cpu_reg(s, rt),
2287                                                        cpu_reg(s, rt2));
2288            }
2289        } else if (s->be_data == MO_LE) {
2290            gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2291                                           cpu_reg(s, rt), cpu_reg(s, rt2));
2292        } else {
2293            gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2294                                           cpu_reg(s, rt), cpu_reg(s, rt2));
2295        }
2296    } else {
2297        tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2298                                   cpu_reg(s, rt), get_mem_index(s),
2299                                   size | MO_ALIGN | s->be_data);
2300        tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2301    }
2302    tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2303    tcg_temp_free_i64(tmp);
2304    tcg_gen_br(done_label);
2305
2306    gen_set_label(fail_label);
2307    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2308    gen_set_label(done_label);
2309    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2310}
2311
2312static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2313                                 int rn, int size)
2314{
2315    TCGv_i64 tcg_rs = cpu_reg(s, rs);
2316    TCGv_i64 tcg_rt = cpu_reg(s, rt);
2317    int memidx = get_mem_index(s);
2318    TCGv_i64 clean_addr;
2319
2320    if (rn == 31) {
2321        gen_check_sp_alignment(s);
2322    }
2323    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2324    tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
2325                               size | MO_ALIGN | s->be_data);
2326}
2327
2328static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2329                                      int rn, int size)
2330{
2331    TCGv_i64 s1 = cpu_reg(s, rs);
2332    TCGv_i64 s2 = cpu_reg(s, rs + 1);
2333    TCGv_i64 t1 = cpu_reg(s, rt);
2334    TCGv_i64 t2 = cpu_reg(s, rt + 1);
2335    TCGv_i64 clean_addr;
2336    int memidx = get_mem_index(s);
2337
2338    if (rn == 31) {
2339        gen_check_sp_alignment(s);
2340    }
2341    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2342
2343    if (size == 2) {
2344        TCGv_i64 cmp = tcg_temp_new_i64();
2345        TCGv_i64 val = tcg_temp_new_i64();
2346
2347        if (s->be_data == MO_LE) {
2348            tcg_gen_concat32_i64(val, t1, t2);
2349            tcg_gen_concat32_i64(cmp, s1, s2);
2350        } else {
2351            tcg_gen_concat32_i64(val, t2, t1);
2352            tcg_gen_concat32_i64(cmp, s2, s1);
2353        }
2354
2355        tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
2356                                   MO_64 | MO_ALIGN | s->be_data);
2357        tcg_temp_free_i64(val);
2358
2359        if (s->be_data == MO_LE) {
2360            tcg_gen_extr32_i64(s1, s2, cmp);
2361        } else {
2362            tcg_gen_extr32_i64(s2, s1, cmp);
2363        }
2364        tcg_temp_free_i64(cmp);
2365    } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2366        if (HAVE_CMPXCHG128) {
2367            TCGv_i32 tcg_rs = tcg_const_i32(rs);
2368            if (s->be_data == MO_LE) {
2369                gen_helper_casp_le_parallel(cpu_env, tcg_rs,
2370                                            clean_addr, t1, t2);
2371            } else {
2372                gen_helper_casp_be_parallel(cpu_env, tcg_rs,
2373                                            clean_addr, t1, t2);
2374            }
2375            tcg_temp_free_i32(tcg_rs);
2376        } else {
2377            gen_helper_exit_atomic(cpu_env);
2378            s->base.is_jmp = DISAS_NORETURN;
2379        }
2380    } else {
2381        TCGv_i64 d1 = tcg_temp_new_i64();
2382        TCGv_i64 d2 = tcg_temp_new_i64();
2383        TCGv_i64 a2 = tcg_temp_new_i64();
2384        TCGv_i64 c1 = tcg_temp_new_i64();
2385        TCGv_i64 c2 = tcg_temp_new_i64();
2386        TCGv_i64 zero = tcg_const_i64(0);
2387
2388        /* Load the two words, in memory order.  */
2389        tcg_gen_qemu_ld_i64(d1, clean_addr, memidx,
2390                            MO_64 | MO_ALIGN_16 | s->be_data);
2391        tcg_gen_addi_i64(a2, clean_addr, 8);
2392        tcg_gen_qemu_ld_i64(d2, a2, memidx, MO_64 | s->be_data);
2393
2394        /* Compare the two words, also in memory order.  */
2395        tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
2396        tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
2397        tcg_gen_and_i64(c2, c2, c1);
2398
2399        /* If compare equal, write back new data, else write back old data.  */
2400        tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
2401        tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
2402        tcg_gen_qemu_st_i64(c1, clean_addr, memidx, MO_64 | s->be_data);
2403        tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
2404        tcg_temp_free_i64(a2);
2405        tcg_temp_free_i64(c1);
2406        tcg_temp_free_i64(c2);
2407        tcg_temp_free_i64(zero);
2408
2409        /* Write back the data from memory to Rs.  */
2410        tcg_gen_mov_i64(s1, d1);
2411        tcg_gen_mov_i64(s2, d2);
2412        tcg_temp_free_i64(d1);
2413        tcg_temp_free_i64(d2);
2414    }
2415}
2416
2417/* Update the Sixty-Four bit (SF) registersize. This logic is derived
2418 * from the ARMv8 specs for LDR (Shared decode for all encodings).
2419 */
2420static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2421{
2422    int opc0 = extract32(opc, 0, 1);
2423    int regsize;
2424
2425    if (is_signed) {
2426        regsize = opc0 ? 32 : 64;
2427    } else {
2428        regsize = size == 3 ? 64 : 32;
2429    }
2430    return regsize == 64;
2431}
2432
2433/* Load/store exclusive
2434 *
2435 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2436 * +-----+-------------+----+---+----+------+----+-------+------+------+
2437 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2438 * +-----+-------------+----+---+----+------+----+-------+------+------+
2439 *
2440 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2441 *   L: 0 -> store, 1 -> load
2442 *  o2: 0 -> exclusive, 1 -> not
2443 *  o1: 0 -> single register, 1 -> register pair
2444 *  o0: 1 -> load-acquire/store-release, 0 -> not
2445 */
2446static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2447{
2448    int rt = extract32(insn, 0, 5);
2449    int rn = extract32(insn, 5, 5);
2450    int rt2 = extract32(insn, 10, 5);
2451    int rs = extract32(insn, 16, 5);
2452    int is_lasr = extract32(insn, 15, 1);
2453    int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2454    int size = extract32(insn, 30, 2);
2455    TCGv_i64 clean_addr;
2456
2457    switch (o2_L_o1_o0) {
2458    case 0x0: /* STXR */
2459    case 0x1: /* STLXR */
2460        if (rn == 31) {
2461            gen_check_sp_alignment(s);
2462        }
2463        if (is_lasr) {
2464            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2465        }
2466        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2467        gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
2468        return;
2469
2470    case 0x4: /* LDXR */
2471    case 0x5: /* LDAXR */
2472        if (rn == 31) {
2473            gen_check_sp_alignment(s);
2474        }
2475        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2476        s->is_ldex = true;
2477        gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
2478        if (is_lasr) {
2479            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2480        }
2481        return;
2482
2483    case 0x8: /* STLLR */
2484        if (!dc_isar_feature(aa64_lor, s)) {
2485            break;
2486        }
2487        /* StoreLORelease is the same as Store-Release for QEMU.  */
2488        /* fall through */
2489    case 0x9: /* STLR */
2490        /* Generate ISS for non-exclusive accesses including LASR.  */
2491        if (rn == 31) {
2492            gen_check_sp_alignment(s);
2493        }
2494        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2495        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2496        do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
2497                  disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2498        return;
2499
2500    case 0xc: /* LDLAR */
2501        if (!dc_isar_feature(aa64_lor, s)) {
2502            break;
2503        }
2504        /* LoadLOAcquire is the same as Load-Acquire for QEMU.  */
2505        /* fall through */
2506    case 0xd: /* LDAR */
2507        /* Generate ISS for non-exclusive accesses including LASR.  */
2508        if (rn == 31) {
2509            gen_check_sp_alignment(s);
2510        }
2511        clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2512        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
2513                  disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2514        tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2515        return;
2516
2517    case 0x2: case 0x3: /* CASP / STXP */
2518        if (size & 2) { /* STXP / STLXP */
2519            if (rn == 31) {
2520                gen_check_sp_alignment(s);
2521            }
2522            if (is_lasr) {
2523                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2524            }
2525            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2526            gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
2527            return;
2528        }
2529        if (rt2 == 31
2530            && ((rt | rs) & 1) == 0
2531            && dc_isar_feature(aa64_atomics, s)) {
2532            /* CASP / CASPL */
2533            gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2534            return;
2535        }
2536        break;
2537
2538    case 0x6: case 0x7: /* CASPA / LDXP */
2539        if (size & 2) { /* LDXP / LDAXP */
2540            if (rn == 31) {
2541                gen_check_sp_alignment(s);
2542            }
2543            clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
2544            s->is_ldex = true;
2545            gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
2546            if (is_lasr) {
2547                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2548            }
2549            return;
2550        }
2551        if (rt2 == 31
2552            && ((rt | rs) & 1) == 0
2553            && dc_isar_feature(aa64_atomics, s)) {
2554            /* CASPA / CASPAL */
2555            gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2556            return;
2557        }
2558        break;
2559
2560    case 0xa: /* CAS */
2561    case 0xb: /* CASL */
2562    case 0xe: /* CASA */
2563    case 0xf: /* CASAL */
2564        if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) {
2565            gen_compare_and_swap(s, rs, rt, rn, size);
2566            return;
2567        }
2568        break;
2569    }
2570    unallocated_encoding(s);
2571}
2572
2573/*
2574 * Load register (literal)
2575 *
2576 *  31 30 29   27  26 25 24 23                5 4     0
2577 * +-----+-------+---+-----+-------------------+-------+
2578 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2579 * +-----+-------+---+-----+-------------------+-------+
2580 *
2581 * V: 1 -> vector (simd/fp)
2582 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2583 *                   10-> 32 bit signed, 11 -> prefetch
2584 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2585 */
2586static void disas_ld_lit(DisasContext *s, uint32_t insn)
2587{
2588    int rt = extract32(insn, 0, 5);
2589    int64_t imm = sextract32(insn, 5, 19) << 2;
2590    bool is_vector = extract32(insn, 26, 1);
2591    int opc = extract32(insn, 30, 2);
2592    bool is_signed = false;
2593    int size = 2;
2594    TCGv_i64 tcg_rt, clean_addr;
2595
2596    if (is_vector) {
2597        if (opc == 3) {
2598            unallocated_encoding(s);
2599            return;
2600        }
2601        size = 2 + opc;
2602        if (!fp_access_check(s)) {
2603            return;
2604        }
2605    } else {
2606        if (opc == 3) {
2607            /* PRFM (literal) : prefetch */
2608            return;
2609        }
2610        size = 2 + extract32(opc, 0, 1);
2611        is_signed = extract32(opc, 1, 1);
2612    }
2613
2614    tcg_rt = cpu_reg(s, rt);
2615
2616    clean_addr = tcg_const_i64(s->pc_curr + imm);
2617    if (is_vector) {
2618        do_fp_ld(s, rt, clean_addr, size);
2619    } else {
2620        /* Only unsigned 32bit loads target 32bit registers.  */
2621        bool iss_sf = opc != 0;
2622
2623        do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, false,
2624                  true, rt, iss_sf, false);
2625    }
2626    tcg_temp_free_i64(clean_addr);
2627}
2628
2629/*
2630 * LDNP (Load Pair - non-temporal hint)
2631 * LDP (Load Pair - non vector)
2632 * LDPSW (Load Pair Signed Word - non vector)
2633 * STNP (Store Pair - non-temporal hint)
2634 * STP (Store Pair - non vector)
2635 * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2636 * LDP (Load Pair of SIMD&FP)
2637 * STNP (Store Pair of SIMD&FP - non-temporal hint)
2638 * STP (Store Pair of SIMD&FP)
2639 *
2640 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2641 * +-----+-------+---+---+-------+---+-----------------------------+
2642 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2643 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2644 *
2645 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2646 *      LDPSW                    01
2647 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2648 *   V: 0 -> GPR, 1 -> Vector
2649 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2650 *      10 -> signed offset, 11 -> pre-index
2651 *   L: 0 -> Store 1 -> Load
2652 *
2653 * Rt, Rt2 = GPR or SIMD registers to be stored
2654 * Rn = general purpose register containing address
2655 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2656 */
2657static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2658{
2659    int rt = extract32(insn, 0, 5);
2660    int rn = extract32(insn, 5, 5);
2661    int rt2 = extract32(insn, 10, 5);
2662    uint64_t offset = sextract64(insn, 15, 7);
2663    int index = extract32(insn, 23, 2);
2664    bool is_vector = extract32(insn, 26, 1);
2665    bool is_load = extract32(insn, 22, 1);
2666    int opc = extract32(insn, 30, 2);
2667
2668    bool is_signed = false;
2669    bool postindex = false;
2670    bool wback = false;
2671
2672    TCGv_i64 clean_addr, dirty_addr;
2673
2674    int size;
2675
2676    if (opc == 3) {
2677        unallocated_encoding(s);
2678        return;
2679    }
2680
2681    if (is_vector) {
2682        size = 2 + opc;
2683    } else {
2684        size = 2 + extract32(opc, 1, 1);
2685        is_signed = extract32(opc, 0, 1);
2686        if (!is_load && is_signed) {
2687            unallocated_encoding(s);
2688            return;
2689        }
2690    }
2691
2692    switch (index) {
2693    case 1: /* post-index */
2694        postindex = true;
2695        wback = true;
2696        break;
2697    case 0:
2698        /* signed offset with "non-temporal" hint. Since we don't emulate
2699         * caches we don't care about hints to the cache system about
2700         * data access patterns, and handle this identically to plain
2701         * signed offset.
2702         */
2703        if (is_signed) {
2704            /* There is no non-temporal-hint version of LDPSW */
2705            unallocated_encoding(s);
2706            return;
2707        }
2708        postindex = false;
2709        break;
2710    case 2: /* signed offset, rn not updated */
2711        postindex = false;
2712        break;
2713    case 3: /* pre-index */
2714        postindex = false;
2715        wback = true;
2716        break;
2717    }
2718
2719    if (is_vector && !fp_access_check(s)) {
2720        return;
2721    }
2722
2723    offset <<= size;
2724
2725    if (rn == 31) {
2726        gen_check_sp_alignment(s);
2727    }
2728
2729    dirty_addr = read_cpu_reg_sp(s, rn, 1);
2730    if (!postindex) {
2731        tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2732    }
2733    clean_addr = clean_data_tbi(s, dirty_addr);
2734
2735    if (is_vector) {
2736        if (is_load) {
2737            do_fp_ld(s, rt, clean_addr, size);
2738        } else {
2739            do_fp_st(s, rt, clean_addr, size);
2740        }
2741        tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2742        if (is_load) {
2743            do_fp_ld(s, rt2, clean_addr, size);
2744        } else {
2745            do_fp_st(s, rt2, clean_addr, size);
2746        }
2747    } else {
2748        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2749        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2750
2751        if (is_load) {
2752            TCGv_i64 tmp = tcg_temp_new_i64();
2753
2754            /* Do not modify tcg_rt before recognizing any exception
2755             * from the second load.
2756             */
2757            do_gpr_ld(s, tmp, clean_addr, size, is_signed, false,
2758                      false, 0, false, false);
2759            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2760            do_gpr_ld(s, tcg_rt2, clean_addr, size, is_signed, false,
2761                      false, 0, false, false);
2762
2763            tcg_gen_mov_i64(tcg_rt, tmp);
2764            tcg_temp_free_i64(tmp);
2765        } else {
2766            do_gpr_st(s, tcg_rt, clean_addr, size,
2767                      false, 0, false, false);
2768            tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
2769            do_gpr_st(s, tcg_rt2, clean_addr, size,
2770                      false, 0, false, false);
2771        }
2772    }
2773
2774    if (wback) {
2775        if (postindex) {
2776            tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
2777        }
2778        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
2779    }
2780}
2781
2782/*
2783 * Load/store (immediate post-indexed)
2784 * Load/store (immediate pre-indexed)
2785 * Load/store (unscaled immediate)
2786 *
2787 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2788 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2789 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2790 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2791 *
2792 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2793         10 -> unprivileged
2794 * V = 0 -> non-vector
2795 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2796 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2797 */
2798static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2799                                int opc,
2800                                int size,
2801                                int rt,
2802                                bool is_vector)
2803{
2804    int rn = extract32(insn, 5, 5);
2805    int imm9 = sextract32(insn, 12, 9);
2806    int idx = extract32(insn, 10, 2);
2807    bool is_signed = false;
2808    bool is_store = false;
2809    bool is_extended = false;
2810    bool is_unpriv = (idx == 2);
2811    bool iss_valid = !is_vector;
2812    bool post_index;
2813    bool writeback;
2814
2815    TCGv_i64 clean_addr, dirty_addr;
2816
2817    if (is_vector) {
2818        size |= (opc & 2) << 1;
2819        if (size > 4 || is_unpriv) {
2820            unallocated_encoding(s);
2821            return;
2822        }
2823        is_store = ((opc & 1) == 0);
2824        if (!fp_access_check(s)) {
2825            return;
2826        }
2827    } else {
2828        if (size == 3 && opc == 2) {
2829            /* PRFM - prefetch */
2830            if (idx != 0) {
2831                unallocated_encoding(s);
2832                return;
2833            }
2834            return;
2835        }
2836        if (opc == 3 && size > 1) {
2837            unallocated_encoding(s);
2838            return;
2839        }
2840        is_store = (opc == 0);
2841        is_signed = extract32(opc, 1, 1);
2842        is_extended = (size < 3) && extract32(opc, 0, 1);
2843    }
2844
2845    switch (idx) {
2846    case 0:
2847    case 2:
2848        post_index = false;
2849        writeback = false;
2850        break;
2851    case 1:
2852        post_index = true;
2853        writeback = true;
2854        break;
2855    case 3:
2856        post_index = false;
2857        writeback = true;
2858        break;
2859    default:
2860        g_assert_not_reached();
2861    }
2862
2863    if (rn == 31) {
2864        gen_check_sp_alignment(s);
2865    }
2866
2867    dirty_addr = read_cpu_reg_sp(s, rn, 1);
2868    if (!post_index) {
2869        tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
2870    }
2871    clean_addr = clean_data_tbi(s, dirty_addr);
2872
2873    if (is_vector) {
2874        if (is_store) {
2875            do_fp_st(s, rt, clean_addr, size);
2876        } else {
2877            do_fp_ld(s, rt, clean_addr, size);
2878        }
2879    } else {
2880        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2881        int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2882        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2883
2884        if (is_store) {
2885            do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
2886                             iss_valid, rt, iss_sf, false);
2887        } else {
2888            do_gpr_ld_memidx(s, tcg_rt, clean_addr, size,
2889                             is_signed, is_extended, memidx,
2890                             iss_valid, rt, iss_sf, false);
2891        }
2892    }
2893
2894    if (writeback) {
2895        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2896        if (post_index) {
2897            tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
2898        }
2899        tcg_gen_mov_i64(tcg_rn, dirty_addr);
2900    }
2901}
2902
2903/*
2904 * Load/store (register offset)
2905 *
2906 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2907 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2908 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2909 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2910 *
2911 * For non-vector:
2912 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2913 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2914 * For vector:
2915 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2916 *   opc<0>: 0 -> store, 1 -> load
2917 * V: 1 -> vector/simd
2918 * opt: extend encoding (see DecodeRegExtend)
2919 * S: if S=1 then scale (essentially index by sizeof(size))
2920 * Rt: register to transfer into/out of
2921 * Rn: address register or SP for base
2922 * Rm: offset register or ZR for offset
2923 */
2924static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2925                                   int opc,
2926                                   int size,
2927                                   int rt,
2928                                   bool is_vector)
2929{
2930    int rn = extract32(insn, 5, 5);
2931    int shift = extract32(insn, 12, 1);
2932    int rm = extract32(insn, 16, 5);
2933    int opt = extract32(insn, 13, 3);
2934    bool is_signed = false;
2935    bool is_store = false;
2936    bool is_extended = false;
2937
2938    TCGv_i64 tcg_rm, clean_addr, dirty_addr;
2939
2940    if (extract32(opt, 1, 1) == 0) {
2941        unallocated_encoding(s);
2942        return;
2943    }
2944
2945    if (is_vector) {
2946        size |= (opc & 2) << 1;
2947        if (size > 4) {
2948            unallocated_encoding(s);
2949            return;
2950        }
2951        is_store = !extract32(opc, 0, 1);
2952        if (!fp_access_check(s)) {
2953            return;
2954        }
2955    } else {
2956        if (size == 3 && opc == 2) {
2957            /* PRFM - prefetch */
2958            return;
2959        }
2960        if (opc == 3 && size > 1) {
2961            unallocated_encoding(s);
2962            return;
2963        }
2964        is_store = (opc == 0);
2965        is_signed = extract32(opc, 1, 1);
2966        is_extended = (size < 3) && extract32(opc, 0, 1);
2967    }
2968
2969    if (rn == 31) {
2970        gen_check_sp_alignment(s);
2971    }
2972    dirty_addr = read_cpu_reg_sp(s, rn, 1);
2973
2974    tcg_rm = read_cpu_reg(s, rm, 1);
2975    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2976
2977    tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
2978    clean_addr = clean_data_tbi(s, dirty_addr);
2979
2980    if (is_vector) {
2981        if (is_store) {
2982            do_fp_st(s, rt, clean_addr, size);
2983        } else {
2984            do_fp_ld(s, rt, clean_addr, size);
2985        }
2986    } else {
2987        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2988        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2989        if (is_store) {
2990            do_gpr_st(s, tcg_rt, clean_addr, size,
2991                      true, rt, iss_sf, false);
2992        } else {
2993            do_gpr_ld(s, tcg_rt, clean_addr, size,
2994                      is_signed, is_extended,
2995                      true, rt, iss_sf, false);
2996        }
2997    }
2998}
2999
3000/*
3001 * Load/store (unsigned immediate)
3002 *
3003 * 31 30 29   27  26 25 24 23 22 21        10 9     5
3004 * +----+-------+---+-----+-----+------------+-------+------+
3005 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
3006 * +----+-------+---+-----+-----+------------+-------+------+
3007 *
3008 * For non-vector:
3009 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
3010 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
3011 * For vector:
3012 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
3013 *   opc<0>: 0 -> store, 1 -> load
3014 * Rn: base address register (inc SP)
3015 * Rt: target register
3016 */
3017static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
3018                                        int opc,
3019                                        int size,
3020                                        int rt,
3021                                        bool is_vector)
3022{
3023    int rn = extract32(insn, 5, 5);
3024    unsigned int imm12 = extract32(insn, 10, 12);
3025    unsigned int offset;
3026
3027    TCGv_i64 clean_addr, dirty_addr;
3028
3029    bool is_store;
3030    bool is_signed = false;
3031    bool is_extended = false;
3032
3033    if (is_vector) {
3034        size |= (opc & 2) << 1;
3035        if (size > 4) {
3036            unallocated_encoding(s);
3037            return;
3038        }
3039        is_store = !extract32(opc, 0, 1);
3040        if (!fp_access_check(s)) {
3041            return;
3042        }
3043    } else {
3044        if (size == 3 && opc == 2) {
3045            /* PRFM - prefetch */
3046            return;
3047        }
3048        if (opc == 3 && size > 1) {
3049            unallocated_encoding(s);
3050            return;
3051        }
3052        is_store = (opc == 0);
3053        is_signed = extract32(opc, 1, 1);
3054        is_extended = (size < 3) && extract32(opc, 0, 1);
3055    }
3056
3057    if (rn == 31) {
3058        gen_check_sp_alignment(s);
3059    }
3060    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3061    offset = imm12 << size;
3062    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3063    clean_addr = clean_data_tbi(s, dirty_addr);
3064
3065    if (is_vector) {
3066        if (is_store) {
3067            do_fp_st(s, rt, clean_addr, size);
3068        } else {
3069            do_fp_ld(s, rt, clean_addr, size);
3070        }
3071    } else {
3072        TCGv_i64 tcg_rt = cpu_reg(s, rt);
3073        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
3074        if (is_store) {
3075            do_gpr_st(s, tcg_rt, clean_addr, size,
3076                      true, rt, iss_sf, false);
3077        } else {
3078            do_gpr_ld(s, tcg_rt, clean_addr, size, is_signed, is_extended,
3079                      true, rt, iss_sf, false);
3080        }
3081    }
3082}
3083
3084/* Atomic memory operations
3085 *
3086 *  31  30      27  26    24    22  21   16   15    12    10    5     0
3087 * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
3088 * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
3089 * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
3090 *
3091 * Rt: the result register
3092 * Rn: base address or SP
3093 * Rs: the source register for the operation
3094 * V: vector flag (always 0 as of v8.3)
3095 * A: acquire flag
3096 * R: release flag
3097 */
3098static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
3099                              int size, int rt, bool is_vector)
3100{
3101    int rs = extract32(insn, 16, 5);
3102    int rn = extract32(insn, 5, 5);
3103    int o3_opc = extract32(insn, 12, 4);
3104    TCGv_i64 tcg_rs, clean_addr;
3105    AtomicThreeOpFn *fn;
3106
3107    if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
3108        unallocated_encoding(s);
3109        return;
3110    }
3111    switch (o3_opc) {
3112    case 000: /* LDADD */
3113        fn = tcg_gen_atomic_fetch_add_i64;
3114        break;
3115    case 001: /* LDCLR */
3116        fn = tcg_gen_atomic_fetch_and_i64;
3117        break;
3118    case 002: /* LDEOR */
3119        fn = tcg_gen_atomic_fetch_xor_i64;
3120        break;
3121    case 003: /* LDSET */
3122        fn = tcg_gen_atomic_fetch_or_i64;
3123        break;
3124    case 004: /* LDSMAX */
3125        fn = tcg_gen_atomic_fetch_smax_i64;
3126        break;
3127    case 005: /* LDSMIN */
3128        fn = tcg_gen_atomic_fetch_smin_i64;
3129        break;
3130    case 006: /* LDUMAX */
3131        fn = tcg_gen_atomic_fetch_umax_i64;
3132        break;
3133    case 007: /* LDUMIN */
3134        fn = tcg_gen_atomic_fetch_umin_i64;
3135        break;
3136    case 010: /* SWP */
3137        fn = tcg_gen_atomic_xchg_i64;
3138        break;
3139    default:
3140        unallocated_encoding(s);
3141        return;
3142    }
3143
3144    if (rn == 31) {
3145        gen_check_sp_alignment(s);
3146    }
3147    clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
3148    tcg_rs = read_cpu_reg(s, rs, true);
3149
3150    if (o3_opc == 1) { /* LDCLR */
3151        tcg_gen_not_i64(tcg_rs, tcg_rs);
3152    }
3153
3154    /* The tcg atomic primitives are all full barriers.  Therefore we
3155     * can ignore the Acquire and Release bits of this instruction.
3156     */
3157    fn(cpu_reg(s, rt), clean_addr, tcg_rs, get_mem_index(s),
3158       s->be_data | size | MO_ALIGN);
3159}
3160
3161/*
3162 * PAC memory operations
3163 *
3164 *  31  30      27  26    24    22  21       12  11  10    5     0
3165 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3166 * | size | 1 1 1 | V | 0 0 | M S | 1 |  imm9  | W | 1 | Rn |  Rt |
3167 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+
3168 *
3169 * Rt: the result register
3170 * Rn: base address or SP
3171 * V: vector flag (always 0 as of v8.3)
3172 * M: clear for key DA, set for key DB
3173 * W: pre-indexing flag
3174 * S: sign for imm9.
3175 */
3176static void disas_ldst_pac(DisasContext *s, uint32_t insn,
3177                           int size, int rt, bool is_vector)
3178{
3179    int rn = extract32(insn, 5, 5);
3180    bool is_wback = extract32(insn, 11, 1);
3181    bool use_key_a = !extract32(insn, 23, 1);
3182    int offset;
3183    TCGv_i64 clean_addr, dirty_addr, tcg_rt;
3184
3185    if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
3186        unallocated_encoding(s);
3187        return;
3188    }
3189
3190    if (rn == 31) {
3191        gen_check_sp_alignment(s);
3192    }
3193    dirty_addr = read_cpu_reg_sp(s, rn, 1);
3194
3195    if (s->pauth_active) {
3196        if (use_key_a) {
3197            gen_helper_autda(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
3198        } else {
3199            gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, cpu_X[31]);
3200        }
3201    }
3202
3203    /* Form the 10-bit signed, scaled offset.  */
3204    offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9);
3205    offset = sextract32(offset << size, 0, 10 + size);
3206    tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
3207
3208    /* Note that "clean" and "dirty" here refer to TBI not PAC.  */
3209    clean_addr = clean_data_tbi(s, dirty_addr);
3210
3211    tcg_rt = cpu_reg(s, rt);
3212    do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false,
3213              /* extend */ false, /* iss_valid */ !is_wback,
3214              /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
3215
3216    if (is_wback) {
3217        tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr);
3218    }
3219}
3220
3221/* Load/store register (all forms) */
3222static void disas_ldst_reg(DisasContext *s, uint32_t insn)
3223{
3224    int rt = extract32(insn, 0, 5);
3225    int opc = extract32(insn, 22, 2);
3226    bool is_vector = extract32(insn, 26, 1);
3227    int size = extract32(insn, 30, 2);
3228
3229    switch (extract32(insn, 24, 2)) {
3230    case 0:
3231        if (extract32(insn, 21, 1) == 0) {
3232            /* Load/store register (unscaled immediate)
3233             * Load/store immediate pre/post-indexed
3234             * Load/store register unprivileged
3235             */
3236            disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
3237            return;
3238        }
3239        switch (extract32(insn, 10, 2)) {
3240        case 0:
3241            disas_ldst_atomic(s, insn, size, rt, is_vector);
3242            return;
3243        case 2:
3244            disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
3245            return;
3246        default:
3247            disas_ldst_pac(s, insn, size, rt, is_vector);
3248            return;
3249        }
3250        break;
3251    case 1:
3252        disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
3253        return;
3254    }
3255    unallocated_encoding(s);
3256}
3257
3258/* AdvSIMD load/store multiple structures
3259 *
3260 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
3261 * +---+---+---------------+---+-------------+--------+------+------+------+
3262 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
3263 * +---+---+---------------+---+-------------+--------+------+------+------+
3264 *
3265 * AdvSIMD load/store multiple structures (post-indexed)
3266 *
3267 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
3268 * +---+---+---------------+---+---+---------+--------+------+------+------+
3269 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
3270 * +---+---+---------------+---+---+---------+--------+------+------+------+
3271 *
3272 * Rt: first (or only) SIMD&FP register to be transferred
3273 * Rn: base address or SP
3274 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3275 */
3276static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
3277{
3278    int rt = extract32(insn, 0, 5);
3279    int rn = extract32(insn, 5, 5);
3280    int rm = extract32(insn, 16, 5);
3281    int size = extract32(insn, 10, 2);
3282    int opcode = extract32(insn, 12, 4);
3283    bool is_store = !extract32(insn, 22, 1);
3284    bool is_postidx = extract32(insn, 23, 1);
3285    bool is_q = extract32(insn, 30, 1);
3286    TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3287    MemOp endian = s->be_data;
3288
3289    int ebytes;   /* bytes per element */
3290    int elements; /* elements per vector */
3291    int rpt;    /* num iterations */
3292    int selem;  /* structure elements */
3293    int r;
3294
3295    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
3296        unallocated_encoding(s);
3297        return;
3298    }
3299
3300    if (!is_postidx && rm != 0) {
3301        unallocated_encoding(s);
3302        return;
3303    }
3304
3305    /* From the shared decode logic */
3306    switch (opcode) {
3307    case 0x0:
3308        rpt = 1;
3309        selem = 4;
3310        break;
3311    case 0x2:
3312        rpt = 4;
3313        selem = 1;
3314        break;
3315    case 0x4:
3316        rpt = 1;
3317        selem = 3;
3318        break;
3319    case 0x6:
3320        rpt = 3;
3321        selem = 1;
3322        break;
3323    case 0x7:
3324        rpt = 1;
3325        selem = 1;
3326        break;
3327    case 0x8:
3328        rpt = 1;
3329        selem = 2;
3330        break;
3331    case 0xa:
3332        rpt = 2;
3333        selem = 1;
3334        break;
3335    default:
3336        unallocated_encoding(s);
3337        return;
3338    }
3339
3340    if (size == 3 && !is_q && selem != 1) {
3341        /* reserved */
3342        unallocated_encoding(s);
3343        return;
3344    }
3345
3346    if (!fp_access_check(s)) {
3347        return;
3348    }
3349
3350    if (rn == 31) {
3351        gen_check_sp_alignment(s);
3352    }
3353
3354    /* For our purposes, bytes are always little-endian.  */
3355    if (size == 0) {
3356        endian = MO_LE;
3357    }
3358
3359    /* Consecutive little-endian elements from a single register
3360     * can be promoted to a larger little-endian operation.
3361     */
3362    if (selem == 1 && endian == MO_LE) {
3363        size = 3;
3364    }
3365    ebytes = 1 << size;
3366    elements = (is_q ? 16 : 8) / ebytes;
3367
3368    tcg_rn = cpu_reg_sp(s, rn);
3369    clean_addr = clean_data_tbi(s, tcg_rn);
3370    tcg_ebytes = tcg_const_i64(ebytes);
3371
3372    for (r = 0; r < rpt; r++) {
3373        int e;
3374        for (e = 0; e < elements; e++) {
3375            int xs;
3376            for (xs = 0; xs < selem; xs++) {
3377                int tt = (rt + r + xs) % 32;
3378                if (is_store) {
3379                    do_vec_st(s, tt, e, clean_addr, size, endian);
3380                } else {
3381                    do_vec_ld(s, tt, e, clean_addr, size, endian);
3382                }
3383                tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3384            }
3385        }
3386    }
3387    tcg_temp_free_i64(tcg_ebytes);
3388
3389    if (!is_store) {
3390        /* For non-quad operations, setting a slice of the low
3391         * 64 bits of the register clears the high 64 bits (in
3392         * the ARM ARM pseudocode this is implicit in the fact
3393         * that 'rval' is a 64 bit wide variable).
3394         * For quad operations, we might still need to zero the
3395         * high bits of SVE.
3396         */
3397        for (r = 0; r < rpt * selem; r++) {
3398            int tt = (rt + r) % 32;
3399            clear_vec_high(s, is_q, tt);
3400        }
3401    }
3402
3403    if (is_postidx) {
3404        if (rm == 31) {
3405            tcg_gen_addi_i64(tcg_rn, tcg_rn, rpt * elements * selem * ebytes);
3406        } else {
3407            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3408        }
3409    }
3410}
3411
3412/* AdvSIMD load/store single structure
3413 *
3414 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3415 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3416 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3417 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3418 *
3419 * AdvSIMD load/store single structure (post-indexed)
3420 *
3421 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3422 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3423 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3424 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3425 *
3426 * Rt: first (or only) SIMD&FP register to be transferred
3427 * Rn: base address or SP
3428 * Rm (post-index only): post-index register (when !31) or size dependent #imm
3429 * index = encoded in Q:S:size dependent on size
3430 *
3431 * lane_size = encoded in R, opc
3432 * transfer width = encoded in opc, S, size
3433 */
3434static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3435{
3436    int rt = extract32(insn, 0, 5);
3437    int rn = extract32(insn, 5, 5);
3438    int rm = extract32(insn, 16, 5);
3439    int size = extract32(insn, 10, 2);
3440    int S = extract32(insn, 12, 1);
3441    int opc = extract32(insn, 13, 3);
3442    int R = extract32(insn, 21, 1);
3443    int is_load = extract32(insn, 22, 1);
3444    int is_postidx = extract32(insn, 23, 1);
3445    int is_q = extract32(insn, 30, 1);
3446
3447    int scale = extract32(opc, 1, 2);
3448    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3449    bool replicate = false;
3450    int index = is_q << 3 | S << 2 | size;
3451    int ebytes, xs;
3452    TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
3453
3454    if (extract32(insn, 31, 1)) {
3455        unallocated_encoding(s);
3456        return;
3457    }
3458    if (!is_postidx && rm != 0) {
3459        unallocated_encoding(s);
3460        return;
3461    }
3462
3463    switch (scale) {
3464    case 3:
3465        if (!is_load || S) {
3466            unallocated_encoding(s);
3467            return;
3468        }
3469        scale = size;
3470        replicate = true;
3471        break;
3472    case 0:
3473        break;
3474    case 1:
3475        if (extract32(size, 0, 1)) {
3476            unallocated_encoding(s);
3477            return;
3478        }
3479        index >>= 1;
3480        break;
3481    case 2:
3482        if (extract32(size, 1, 1)) {
3483            unallocated_encoding(s);
3484            return;
3485        }
3486        if (!extract32(size, 0, 1)) {
3487            index >>= 2;
3488        } else {
3489            if (S) {
3490                unallocated_encoding(s);
3491                return;
3492            }
3493            index >>= 3;
3494            scale = 3;
3495        }
3496        break;
3497    default:
3498        g_assert_not_reached();
3499    }
3500
3501    if (!fp_access_check(s)) {
3502        return;
3503    }
3504
3505    ebytes = 1 << scale;
3506
3507    if (rn == 31) {
3508        gen_check_sp_alignment(s);
3509    }
3510
3511    tcg_rn = cpu_reg_sp(s, rn);
3512    clean_addr = clean_data_tbi(s, tcg_rn);
3513    tcg_ebytes = tcg_const_i64(ebytes);
3514
3515    for (xs = 0; xs < selem; xs++) {
3516        if (replicate) {
3517            /* Load and replicate to all elements */
3518            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3519
3520            tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr,
3521                                get_mem_index(s), s->be_data + scale);
3522            tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt),
3523                                 (is_q + 1) * 8, vec_full_reg_size(s),
3524                                 tcg_tmp);
3525            tcg_temp_free_i64(tcg_tmp);
3526        } else {
3527            /* Load/store one element per register */
3528            if (is_load) {
3529                do_vec_ld(s, rt, index, clean_addr, scale, s->be_data);
3530            } else {
3531                do_vec_st(s, rt, index, clean_addr, scale, s->be_data);
3532            }
3533        }
3534        tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes);
3535        rt = (rt + 1) % 32;
3536    }
3537    tcg_temp_free_i64(tcg_ebytes);
3538
3539    if (is_postidx) {
3540        if (rm == 31) {
3541            tcg_gen_addi_i64(tcg_rn, tcg_rn, selem * ebytes);
3542        } else {
3543            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3544        }
3545    }
3546}
3547
3548/* Loads and stores */
3549static void disas_ldst(DisasContext *s, uint32_t insn)
3550{
3551    switch (extract32(insn, 24, 6)) {
3552    case 0x08: /* Load/store exclusive */
3553        disas_ldst_excl(s, insn);
3554        break;
3555    case 0x18: case 0x1c: /* Load register (literal) */
3556        disas_ld_lit(s, insn);
3557        break;
3558    case 0x28: case 0x29:
3559    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
3560        disas_ldst_pair(s, insn);
3561        break;
3562    case 0x38: case 0x39:
3563    case 0x3c: case 0x3d: /* Load/store register (all forms) */
3564        disas_ldst_reg(s, insn);
3565        break;
3566    case 0x0c: /* AdvSIMD load/store multiple structures */
3567        disas_ldst_multiple_struct(s, insn);
3568        break;
3569    case 0x0d: /* AdvSIMD load/store single structure */
3570        disas_ldst_single_struct(s, insn);
3571        break;
3572    default:
3573        unallocated_encoding(s);
3574        break;
3575    }
3576}
3577
3578/* PC-rel. addressing
3579 *   31  30   29 28       24 23                5 4    0
3580 * +----+-------+-----------+-------------------+------+
3581 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
3582 * +----+-------+-----------+-------------------+------+
3583 */
3584static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
3585{
3586    unsigned int page, rd;
3587    uint64_t base;
3588    uint64_t offset;
3589
3590    page = extract32(insn, 31, 1);
3591    /* SignExtend(immhi:immlo) -> offset */
3592    offset = sextract64(insn, 5, 19);
3593    offset = offset << 2 | extract32(insn, 29, 2);
3594    rd = extract32(insn, 0, 5);
3595    base = s->pc_curr;
3596
3597    if (page) {
3598        /* ADRP (page based) */
3599        base &= ~0xfff;
3600        offset <<= 12;
3601    }
3602
3603    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
3604}
3605
3606/*
3607 * Add/subtract (immediate)
3608 *
3609 *  31 30 29 28       24 23 22 21         10 9   5 4   0
3610 * +--+--+--+-----------+-----+-------------+-----+-----+
3611 * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
3612 * +--+--+--+-----------+-----+-------------+-----+-----+
3613 *
3614 *    sf: 0 -> 32bit, 1 -> 64bit
3615 *    op: 0 -> add  , 1 -> sub
3616 *     S: 1 -> set flags
3617 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
3618 */
3619static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
3620{
3621    int rd = extract32(insn, 0, 5);
3622    int rn = extract32(insn, 5, 5);
3623    uint64_t imm = extract32(insn, 10, 12);
3624    int shift = extract32(insn, 22, 2);
3625    bool setflags = extract32(insn, 29, 1);
3626    bool sub_op = extract32(insn, 30, 1);
3627    bool is_64bit = extract32(insn, 31, 1);
3628
3629    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3630    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
3631    TCGv_i64 tcg_result;
3632
3633    switch (shift) {
3634    case 0x0:
3635        break;
3636    case 0x1:
3637        imm <<= 12;
3638        break;
3639    default:
3640        unallocated_encoding(s);
3641        return;
3642    }
3643
3644    tcg_result = tcg_temp_new_i64();
3645    if (!setflags) {
3646        if (sub_op) {
3647            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
3648        } else {
3649            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
3650        }
3651    } else {
3652        TCGv_i64 tcg_imm = tcg_const_i64(imm);
3653        if (sub_op) {
3654            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3655        } else {
3656            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3657        }
3658        tcg_temp_free_i64(tcg_imm);
3659    }
3660
3661    if (is_64bit) {
3662        tcg_gen_mov_i64(tcg_rd, tcg_result);
3663    } else {
3664        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3665    }
3666
3667    tcg_temp_free_i64(tcg_result);
3668}
3669
3670/* The input should be a value in the bottom e bits (with higher
3671 * bits zero); returns that value replicated into every element
3672 * of size e in a 64 bit integer.
3673 */
3674static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
3675{
3676    assert(e != 0);
3677    while (e < 64) {
3678        mask |= mask << e;
3679        e *= 2;
3680    }
3681    return mask;
3682}
3683
3684/* Return a value with the bottom len bits set (where 0 < len <= 64) */
3685static inline uint64_t bitmask64(unsigned int length)
3686{
3687    assert(length > 0 && length <= 64);
3688    return ~0ULL >> (64 - length);
3689}
3690
3691/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3692 * only require the wmask. Returns false if the imms/immr/immn are a reserved
3693 * value (ie should cause a guest UNDEF exception), and true if they are
3694 * valid, in which case the decoded bit pattern is written to result.
3695 */
3696bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3697                            unsigned int imms, unsigned int immr)
3698{
3699    uint64_t mask;
3700    unsigned e, levels, s, r;
3701    int len;
3702
3703    assert(immn < 2 && imms < 64 && immr < 64);
3704
3705    /* The bit patterns we create here are 64 bit patterns which
3706     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3707     * 64 bits each. Each element contains the same value: a run
3708     * of between 1 and e-1 non-zero bits, rotated within the
3709     * element by between 0 and e-1 bits.
3710     *
3711     * The element size and run length are encoded into immn (1 bit)
3712     * and imms (6 bits) as follows:
3713     * 64 bit elements: immn = 1, imms = <length of run - 1>
3714     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3715     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3716     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3717     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3718     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3719     * Notice that immn = 0, imms = 11111x is the only combination
3720     * not covered by one of the above options; this is reserved.
3721     * Further, <length of run - 1> all-ones is a reserved pattern.
3722     *
3723     * In all cases the rotation is by immr % e (and immr is 6 bits).
3724     */
3725
3726    /* First determine the element size */
3727    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3728    if (len < 1) {
3729        /* This is the immn == 0, imms == 0x11111x case */
3730        return false;
3731    }
3732    e = 1 << len;
3733
3734    levels = e - 1;
3735    s = imms & levels;
3736    r = immr & levels;
3737
3738    if (s == levels) {
3739        /* <length of run - 1> mustn't be all-ones. */
3740        return false;
3741    }
3742
3743    /* Create the value of one element: s+1 set bits rotated
3744     * by r within the element (which is e bits wide)...
3745     */
3746    mask = bitmask64(s + 1);
3747    if (r) {
3748        mask = (mask >> r) | (mask << (e - r));
3749        mask &= bitmask64(e);
3750    }
3751    /* ...then replicate the element over the whole 64 bit value */
3752    mask = bitfield_replicate(mask, e);
3753    *result = mask;
3754    return true;
3755}
3756
3757/* Logical (immediate)
3758 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3759 * +----+-----+-------------+---+------+------+------+------+
3760 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3761 * +----+-----+-------------+---+------+------+------+------+
3762 */
3763static void disas_logic_imm(DisasContext *s, uint32_t insn)
3764{
3765    unsigned int sf, opc, is_n, immr, imms, rn, rd;
3766    TCGv_i64 tcg_rd, tcg_rn;
3767    uint64_t wmask;
3768    bool is_and = false;
3769
3770    sf = extract32(insn, 31, 1);
3771    opc = extract32(insn, 29, 2);
3772    is_n = extract32(insn, 22, 1);
3773    immr = extract32(insn, 16, 6);
3774    imms = extract32(insn, 10, 6);
3775    rn = extract32(insn, 5, 5);
3776    rd = extract32(insn, 0, 5);
3777
3778    if (!sf && is_n) {
3779        unallocated_encoding(s);
3780        return;
3781    }
3782
3783    if (opc == 0x3) { /* ANDS */
3784        tcg_rd = cpu_reg(s, rd);
3785    } else {
3786        tcg_rd = cpu_reg_sp(s, rd);
3787    }
3788    tcg_rn = cpu_reg(s, rn);
3789
3790    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3791        /* some immediate field values are reserved */
3792        unallocated_encoding(s);
3793        return;
3794    }
3795
3796    if (!sf) {
3797        wmask &= 0xffffffff;
3798    }
3799
3800    switch (opc) {
3801    case 0x3: /* ANDS */
3802    case 0x0: /* AND */
3803        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3804        is_and = true;
3805        break;
3806    case 0x1: /* ORR */
3807        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3808        break;
3809    case 0x2: /* EOR */
3810        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3811        break;
3812    default:
3813        assert(FALSE); /* must handle all above */
3814        break;
3815    }
3816
3817    if (!sf && !is_and) {
3818        /* zero extend final result; we know we can skip this for AND
3819         * since the immediate had the high 32 bits clear.
3820         */
3821        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3822    }
3823
3824    if (opc == 3) { /* ANDS */
3825        gen_logic_CC(sf, tcg_rd);
3826    }
3827}
3828
3829/*
3830 * Move wide (immediate)
3831 *
3832 *  31 30 29 28         23 22 21 20             5 4    0
3833 * +--+-----+-------------+-----+----------------+------+
3834 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3835 * +--+-----+-------------+-----+----------------+------+
3836 *
3837 * sf: 0 -> 32 bit, 1 -> 64 bit
3838 * opc: 00 -> N, 10 -> Z, 11 -> K
3839 * hw: shift/16 (0,16, and sf only 32, 48)
3840 */
3841static void disas_movw_imm(DisasContext *s, uint32_t insn)
3842{
3843    int rd = extract32(insn, 0, 5);
3844    uint64_t imm = extract32(insn, 5, 16);
3845    int sf = extract32(insn, 31, 1);
3846    int opc = extract32(insn, 29, 2);
3847    int pos = extract32(insn, 21, 2) << 4;
3848    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3849    TCGv_i64 tcg_imm;
3850
3851    if (!sf && (pos >= 32)) {
3852        unallocated_encoding(s);
3853        return;
3854    }
3855
3856    switch (opc) {
3857    case 0: /* MOVN */
3858    case 2: /* MOVZ */
3859        imm <<= pos;
3860        if (opc == 0) {
3861            imm = ~imm;
3862        }
3863        if (!sf) {
3864            imm &= 0xffffffffu;
3865        }
3866        tcg_gen_movi_i64(tcg_rd, imm);
3867        break;
3868    case 3: /* MOVK */
3869        tcg_imm = tcg_const_i64(imm);
3870        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3871        tcg_temp_free_i64(tcg_imm);
3872        if (!sf) {
3873            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3874        }
3875        break;
3876    default:
3877        unallocated_encoding(s);
3878        break;
3879    }
3880}
3881
3882/* Bitfield
3883 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3884 * +----+-----+-------------+---+------+------+------+------+
3885 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3886 * +----+-----+-------------+---+------+------+------+------+
3887 */
3888static void disas_bitfield(DisasContext *s, uint32_t insn)
3889{
3890    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3891    TCGv_i64 tcg_rd, tcg_tmp;
3892
3893    sf = extract32(insn, 31, 1);
3894    opc = extract32(insn, 29, 2);
3895    n = extract32(insn, 22, 1);
3896    ri = extract32(insn, 16, 6);
3897    si = extract32(insn, 10, 6);
3898    rn = extract32(insn, 5, 5);
3899    rd = extract32(insn, 0, 5);
3900    bitsize = sf ? 64 : 32;
3901
3902    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3903        unallocated_encoding(s);
3904        return;
3905    }
3906
3907    tcg_rd = cpu_reg(s, rd);
3908
3909    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3910       to be smaller than bitsize, we'll never reference data outside the
3911       low 32-bits anyway.  */
3912    tcg_tmp = read_cpu_reg(s, rn, 1);
3913
3914    /* Recognize simple(r) extractions.  */
3915    if (si >= ri) {
3916        /* Wd<s-r:0> = Wn<s:r> */
3917        len = (si - ri) + 1;
3918        if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3919            tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3920            goto done;
3921        } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3922            tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3923            return;
3924        }
3925        /* opc == 1, BFXIL fall through to deposit */
3926        tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3927        pos = 0;
3928    } else {
3929        /* Handle the ri > si case with a deposit
3930         * Wd<32+s-r,32-r> = Wn<s:0>
3931         */
3932        len = si + 1;
3933        pos = (bitsize - ri) & (bitsize - 1);
3934    }
3935
3936    if (opc == 0 && len < ri) {
3937        /* SBFM: sign extend the destination field from len to fill
3938           the balance of the word.  Let the deposit below insert all
3939           of those sign bits.  */
3940        tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3941        len = ri;
3942    }
3943
3944    if (opc == 1) { /* BFM, BFXIL */
3945        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3946    } else {
3947        /* SBFM or UBFM: We start with zero, and we haven't modified
3948           any bits outside bitsize, therefore the zero-extension
3949           below is unneeded.  */
3950        tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3951        return;
3952    }
3953
3954 done:
3955    if (!sf) { /* zero extend final result */
3956        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3957    }
3958}
3959
3960/* Extract
3961 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3962 * +----+------+-------------+---+----+------+--------+------+------+
3963 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3964 * +----+------+-------------+---+----+------+--------+------+------+
3965 */
3966static void disas_extract(DisasContext *s, uint32_t insn)
3967{
3968    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3969
3970    sf = extract32(insn, 31, 1);
3971    n = extract32(insn, 22, 1);
3972    rm = extract32(insn, 16, 5);
3973    imm = extract32(insn, 10, 6);
3974    rn = extract32(insn, 5, 5);
3975    rd = extract32(insn, 0, 5);
3976    op21 = extract32(insn, 29, 2);
3977    op0 = extract32(insn, 21, 1);
3978    bitsize = sf ? 64 : 32;
3979
3980    if (sf != n || op21 || op0 || imm >= bitsize) {
3981        unallocated_encoding(s);
3982    } else {
3983        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3984
3985        tcg_rd = cpu_reg(s, rd);
3986
3987        if (unlikely(imm == 0)) {
3988            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3989             * so an extract from bit 0 is a special case.
3990             */
3991            if (sf) {
3992                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3993            } else {
3994                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3995            }
3996        } else {
3997            tcg_rm = cpu_reg(s, rm);
3998            tcg_rn = cpu_reg(s, rn);
3999
4000            if (sf) {
4001                /* Specialization to ROR happens in EXTRACT2.  */
4002                tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm);
4003            } else {
4004                TCGv_i32 t0 = tcg_temp_new_i32();
4005
4006                tcg_gen_extrl_i64_i32(t0, tcg_rm);
4007                if (rm == rn) {
4008                    tcg_gen_rotri_i32(t0, t0, imm);
4009                } else {
4010                    TCGv_i32 t1 = tcg_temp_new_i32();
4011                    tcg_gen_extrl_i64_i32(t1, tcg_rn);
4012                    tcg_gen_extract2_i32(t0, t0, t1, imm);
4013                    tcg_temp_free_i32(t1);
4014                }
4015                tcg_gen_extu_i32_i64(tcg_rd, t0);
4016                tcg_temp_free_i32(t0);
4017            }
4018        }
4019    }
4020}
4021
4022/* Data processing - immediate */
4023static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
4024{
4025    switch (extract32(insn, 23, 6)) {
4026    case 0x20: case 0x21: /* PC-rel. addressing */
4027        disas_pc_rel_adr(s, insn);
4028        break;
4029    case 0x22: case 0x23: /* Add/subtract (immediate) */
4030        disas_add_sub_imm(s, insn);
4031        break;
4032    case 0x24: /* Logical (immediate) */
4033        disas_logic_imm(s, insn);
4034        break;
4035    case 0x25: /* Move wide (immediate) */
4036        disas_movw_imm(s, insn);
4037        break;
4038    case 0x26: /* Bitfield */
4039        disas_bitfield(s, insn);
4040        break;
4041    case 0x27: /* Extract */
4042        disas_extract(s, insn);
4043        break;
4044    default:
4045        unallocated_encoding(s);
4046        break;
4047    }
4048}
4049
4050/* Shift a TCGv src by TCGv shift_amount, put result in dst.
4051 * Note that it is the caller's responsibility to ensure that the
4052 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
4053 * mandated semantics for out of range shifts.
4054 */
4055static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
4056                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
4057{
4058    switch (shift_type) {
4059    case A64_SHIFT_TYPE_LSL:
4060        tcg_gen_shl_i64(dst, src, shift_amount);
4061        break;
4062    case A64_SHIFT_TYPE_LSR:
4063        tcg_gen_shr_i64(dst, src, shift_amount);
4064        break;
4065    case A64_SHIFT_TYPE_ASR:
4066        if (!sf) {
4067            tcg_gen_ext32s_i64(dst, src);
4068        }
4069        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
4070        break;
4071    case A64_SHIFT_TYPE_ROR:
4072        if (sf) {
4073            tcg_gen_rotr_i64(dst, src, shift_amount);
4074        } else {
4075            TCGv_i32 t0, t1;
4076            t0 = tcg_temp_new_i32();
4077            t1 = tcg_temp_new_i32();
4078            tcg_gen_extrl_i64_i32(t0, src);
4079            tcg_gen_extrl_i64_i32(t1, shift_amount);
4080            tcg_gen_rotr_i32(t0, t0, t1);
4081            tcg_gen_extu_i32_i64(dst, t0);
4082            tcg_temp_free_i32(t0);
4083            tcg_temp_free_i32(t1);
4084        }
4085        break;
4086    default:
4087        assert(FALSE); /* all shift types should be handled */
4088        break;
4089    }
4090
4091    if (!sf) { /* zero extend final result */
4092        tcg_gen_ext32u_i64(dst, dst);
4093    }
4094}
4095
4096/* Shift a TCGv src by immediate, put result in dst.
4097 * The shift amount must be in range (this should always be true as the
4098 * relevant instructions will UNDEF on bad shift immediates).
4099 */
4100static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
4101                          enum a64_shift_type shift_type, unsigned int shift_i)
4102{
4103    assert(shift_i < (sf ? 64 : 32));
4104
4105    if (shift_i == 0) {
4106        tcg_gen_mov_i64(dst, src);
4107    } else {
4108        TCGv_i64 shift_const;
4109
4110        shift_const = tcg_const_i64(shift_i);
4111        shift_reg(dst, src, sf, shift_type, shift_const);
4112        tcg_temp_free_i64(shift_const);
4113    }
4114}
4115
4116/* Logical (shifted register)
4117 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
4118 * +----+-----+-----------+-------+---+------+--------+------+------+
4119 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
4120 * +----+-----+-----------+-------+---+------+--------+------+------+
4121 */
4122static void disas_logic_reg(DisasContext *s, uint32_t insn)
4123{
4124    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
4125    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
4126
4127    sf = extract32(insn, 31, 1);
4128    opc = extract32(insn, 29, 2);
4129    shift_type = extract32(insn, 22, 2);
4130    invert = extract32(insn, 21, 1);
4131    rm = extract32(insn, 16, 5);
4132    shift_amount = extract32(insn, 10, 6);
4133    rn = extract32(insn, 5, 5);
4134    rd = extract32(insn, 0, 5);
4135
4136    if (!sf && (shift_amount & (1 << 5))) {
4137        unallocated_encoding(s);
4138        return;
4139    }
4140
4141    tcg_rd = cpu_reg(s, rd);
4142
4143    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
4144        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
4145         * register-register MOV and MVN, so it is worth special casing.
4146         */
4147        tcg_rm = cpu_reg(s, rm);
4148        if (invert) {
4149            tcg_gen_not_i64(tcg_rd, tcg_rm);
4150            if (!sf) {
4151                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4152            }
4153        } else {
4154            if (sf) {
4155                tcg_gen_mov_i64(tcg_rd, tcg_rm);
4156            } else {
4157                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
4158            }
4159        }
4160        return;
4161    }
4162
4163    tcg_rm = read_cpu_reg(s, rm, sf);
4164
4165    if (shift_amount) {
4166        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
4167    }
4168
4169    tcg_rn = cpu_reg(s, rn);
4170
4171    switch (opc | (invert << 2)) {
4172    case 0: /* AND */
4173    case 3: /* ANDS */
4174        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
4175        break;
4176    case 1: /* ORR */
4177        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
4178        break;
4179    case 2: /* EOR */
4180        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
4181        break;
4182    case 4: /* BIC */
4183    case 7: /* BICS */
4184        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
4185        break;
4186    case 5: /* ORN */
4187        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
4188        break;
4189    case 6: /* EON */
4190        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
4191        break;
4192    default:
4193        assert(FALSE);
4194        break;
4195    }
4196
4197    if (!sf) {
4198        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4199    }
4200
4201    if (opc == 3) {
4202        gen_logic_CC(sf, tcg_rd);
4203    }
4204}
4205
4206/*
4207 * Add/subtract (extended register)
4208 *
4209 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
4210 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4211 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
4212 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
4213 *
4214 *  sf: 0 -> 32bit, 1 -> 64bit
4215 *  op: 0 -> add  , 1 -> sub
4216 *   S: 1 -> set flags
4217 * opt: 00
4218 * option: extension type (see DecodeRegExtend)
4219 * imm3: optional shift to Rm
4220 *
4221 * Rd = Rn + LSL(extend(Rm), amount)
4222 */
4223static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
4224{
4225    int rd = extract32(insn, 0, 5);
4226    int rn = extract32(insn, 5, 5);
4227    int imm3 = extract32(insn, 10, 3);
4228    int option = extract32(insn, 13, 3);
4229    int rm = extract32(insn, 16, 5);
4230    int opt = extract32(insn, 22, 2);
4231    bool setflags = extract32(insn, 29, 1);
4232    bool sub_op = extract32(insn, 30, 1);
4233    bool sf = extract32(insn, 31, 1);
4234
4235    TCGv_i64 tcg_rm, tcg_rn; /* temps */
4236    TCGv_i64 tcg_rd;
4237    TCGv_i64 tcg_result;
4238
4239    if (imm3 > 4 || opt != 0) {
4240        unallocated_encoding(s);
4241        return;
4242    }
4243
4244    /* non-flag setting ops may use SP */
4245    if (!setflags) {
4246        tcg_rd = cpu_reg_sp(s, rd);
4247    } else {
4248        tcg_rd = cpu_reg(s, rd);
4249    }
4250    tcg_rn = read_cpu_reg_sp(s, rn, sf);
4251
4252    tcg_rm = read_cpu_reg(s, rm, sf);
4253    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
4254
4255    tcg_result = tcg_temp_new_i64();
4256
4257    if (!setflags) {
4258        if (sub_op) {
4259            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4260        } else {
4261            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4262        }
4263    } else {
4264        if (sub_op) {
4265            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4266        } else {
4267            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4268        }
4269    }
4270
4271    if (sf) {
4272        tcg_gen_mov_i64(tcg_rd, tcg_result);
4273    } else {
4274        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4275    }
4276
4277    tcg_temp_free_i64(tcg_result);
4278}
4279
4280/*
4281 * Add/subtract (shifted register)
4282 *
4283 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
4284 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4285 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
4286 * +--+--+--+-----------+-----+--+-------+---------+------+------+
4287 *
4288 *    sf: 0 -> 32bit, 1 -> 64bit
4289 *    op: 0 -> add  , 1 -> sub
4290 *     S: 1 -> set flags
4291 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
4292 *  imm6: Shift amount to apply to Rm before the add/sub
4293 */
4294static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
4295{
4296    int rd = extract32(insn, 0, 5);
4297    int rn = extract32(insn, 5, 5);
4298    int imm6 = extract32(insn, 10, 6);
4299    int rm = extract32(insn, 16, 5);
4300    int shift_type = extract32(insn, 22, 2);
4301    bool setflags = extract32(insn, 29, 1);
4302    bool sub_op = extract32(insn, 30, 1);
4303    bool sf = extract32(insn, 31, 1);
4304
4305    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4306    TCGv_i64 tcg_rn, tcg_rm;
4307    TCGv_i64 tcg_result;
4308
4309    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
4310        unallocated_encoding(s);
4311        return;
4312    }
4313
4314    tcg_rn = read_cpu_reg(s, rn, sf);
4315    tcg_rm = read_cpu_reg(s, rm, sf);
4316
4317    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
4318
4319    tcg_result = tcg_temp_new_i64();
4320
4321    if (!setflags) {
4322        if (sub_op) {
4323            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
4324        } else {
4325            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
4326        }
4327    } else {
4328        if (sub_op) {
4329            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
4330        } else {
4331            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
4332        }
4333    }
4334
4335    if (sf) {
4336        tcg_gen_mov_i64(tcg_rd, tcg_result);
4337    } else {
4338        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
4339    }
4340
4341    tcg_temp_free_i64(tcg_result);
4342}
4343
4344/* Data-processing (3 source)
4345 *
4346 *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
4347 *  +--+------+-----------+------+------+----+------+------+------+
4348 *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4349 *  +--+------+-----------+------+------+----+------+------+------+
4350 */
4351static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4352{
4353    int rd = extract32(insn, 0, 5);
4354    int rn = extract32(insn, 5, 5);
4355    int ra = extract32(insn, 10, 5);
4356    int rm = extract32(insn, 16, 5);
4357    int op_id = (extract32(insn, 29, 3) << 4) |
4358        (extract32(insn, 21, 3) << 1) |
4359        extract32(insn, 15, 1);
4360    bool sf = extract32(insn, 31, 1);
4361    bool is_sub = extract32(op_id, 0, 1);
4362    bool is_high = extract32(op_id, 2, 1);
4363    bool is_signed = false;
4364    TCGv_i64 tcg_op1;
4365    TCGv_i64 tcg_op2;
4366    TCGv_i64 tcg_tmp;
4367
4368    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4369    switch (op_id) {
4370    case 0x42: /* SMADDL */
4371    case 0x43: /* SMSUBL */
4372    case 0x44: /* SMULH */
4373        is_signed = true;
4374        break;
4375    case 0x0: /* MADD (32bit) */
4376    case 0x1: /* MSUB (32bit) */
4377    case 0x40: /* MADD (64bit) */
4378    case 0x41: /* MSUB (64bit) */
4379    case 0x4a: /* UMADDL */
4380    case 0x4b: /* UMSUBL */
4381    case 0x4c: /* UMULH */
4382        break;
4383    default:
4384        unallocated_encoding(s);
4385        return;
4386    }
4387
4388    if (is_high) {
4389        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4390        TCGv_i64 tcg_rd = cpu_reg(s, rd);
4391        TCGv_i64 tcg_rn = cpu_reg(s, rn);
4392        TCGv_i64 tcg_rm = cpu_reg(s, rm);
4393
4394        if (is_signed) {
4395            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4396        } else {
4397            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4398        }
4399
4400        tcg_temp_free_i64(low_bits);
4401        return;
4402    }
4403
4404    tcg_op1 = tcg_temp_new_i64();
4405    tcg_op2 = tcg_temp_new_i64();
4406    tcg_tmp = tcg_temp_new_i64();
4407
4408    if (op_id < 0x42) {
4409        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4410        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4411    } else {
4412        if (is_signed) {
4413            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4414            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4415        } else {
4416            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4417            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4418        }
4419    }
4420
4421    if (ra == 31 && !is_sub) {
4422        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4423        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4424    } else {
4425        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4426        if (is_sub) {
4427            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4428        } else {
4429            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4430        }
4431    }
4432
4433    if (!sf) {
4434        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4435    }
4436
4437    tcg_temp_free_i64(tcg_op1);
4438    tcg_temp_free_i64(tcg_op2);
4439    tcg_temp_free_i64(tcg_tmp);
4440}
4441
4442/* Add/subtract (with carry)
4443 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15       10  9    5 4   0
4444 * +--+--+--+------------------------+------+-------------+------+-----+
4445 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | 0 0 0 0 0 0 |  Rn  |  Rd |
4446 * +--+--+--+------------------------+------+-------------+------+-----+
4447 */
4448
4449static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4450{
4451    unsigned int sf, op, setflags, rm, rn, rd;
4452    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4453
4454    sf = extract32(insn, 31, 1);
4455    op = extract32(insn, 30, 1);
4456    setflags = extract32(insn, 29, 1);
4457    rm = extract32(insn, 16, 5);
4458    rn = extract32(insn, 5, 5);
4459    rd = extract32(insn, 0, 5);
4460
4461    tcg_rd = cpu_reg(s, rd);
4462    tcg_rn = cpu_reg(s, rn);
4463
4464    if (op) {
4465        tcg_y = new_tmp_a64(s);
4466        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4467    } else {
4468        tcg_y = cpu_reg(s, rm);
4469    }
4470
4471    if (setflags) {
4472        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4473    } else {
4474        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4475    }
4476}
4477
4478/*
4479 * Rotate right into flags
4480 *  31 30 29                21       15          10      5  4      0
4481 * +--+--+--+-----------------+--------+-----------+------+--+------+
4482 * |sf|op| S| 1 1 0 1 0 0 0 0 |  imm6  | 0 0 0 0 1 |  Rn  |o2| mask |
4483 * +--+--+--+-----------------+--------+-----------+------+--+------+
4484 */
4485static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn)
4486{
4487    int mask = extract32(insn, 0, 4);
4488    int o2 = extract32(insn, 4, 1);
4489    int rn = extract32(insn, 5, 5);
4490    int imm6 = extract32(insn, 15, 6);
4491    int sf_op_s = extract32(insn, 29, 3);
4492    TCGv_i64 tcg_rn;
4493    TCGv_i32 nzcv;
4494
4495    if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) {
4496        unallocated_encoding(s);
4497        return;
4498    }
4499
4500    tcg_rn = read_cpu_reg(s, rn, 1);
4501    tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6);
4502
4503    nzcv = tcg_temp_new_i32();
4504    tcg_gen_extrl_i64_i32(nzcv, tcg_rn);
4505
4506    if (mask & 8) { /* N */
4507        tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3);
4508    }
4509    if (mask & 4) { /* Z */
4510        tcg_gen_not_i32(cpu_ZF, nzcv);
4511        tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4);
4512    }
4513    if (mask & 2) { /* C */
4514        tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1);
4515    }
4516    if (mask & 1) { /* V */
4517        tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0);
4518    }
4519
4520    tcg_temp_free_i32(nzcv);
4521}
4522
4523/*
4524 * Evaluate into flags
4525 *  31 30 29                21        15   14        10      5  4      0
4526 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4527 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 |  Rn  |o3| mask |
4528 * +--+--+--+-----------------+---------+----+---------+------+--+------+
4529 */
4530static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn)
4531{
4532    int o3_mask = extract32(insn, 0, 5);
4533    int rn = extract32(insn, 5, 5);
4534    int o2 = extract32(insn, 15, 6);
4535    int sz = extract32(insn, 14, 1);
4536    int sf_op_s = extract32(insn, 29, 3);
4537    TCGv_i32 tmp;
4538    int shift;
4539
4540    if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd ||
4541        !dc_isar_feature(aa64_condm_4, s)) {
4542        unallocated_encoding(s);
4543        return;
4544    }
4545    shift = sz ? 16 : 24;  /* SETF16 or SETF8 */
4546
4547    tmp = tcg_temp_new_i32();
4548    tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn));
4549    tcg_gen_shli_i32(cpu_NF, tmp, shift);
4550    tcg_gen_shli_i32(cpu_VF, tmp, shift - 1);
4551    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
4552    tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF);
4553    tcg_temp_free_i32(tmp);
4554}
4555
4556/* Conditional compare (immediate / register)
4557 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
4558 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4559 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
4560 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4561 *        [1]                             y                [0]       [0]
4562 */
4563static void disas_cc(DisasContext *s, uint32_t insn)
4564{
4565    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
4566    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
4567    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
4568    DisasCompare c;
4569
4570    if (!extract32(insn, 29, 1)) {
4571        unallocated_encoding(s);
4572        return;
4573    }
4574    if (insn & (1 << 10 | 1 << 4)) {
4575        unallocated_encoding(s);
4576        return;
4577    }
4578    sf = extract32(insn, 31, 1);
4579    op = extract32(insn, 30, 1);
4580    is_imm = extract32(insn, 11, 1);
4581    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
4582    cond = extract32(insn, 12, 4);
4583    rn = extract32(insn, 5, 5);
4584    nzcv = extract32(insn, 0, 4);
4585
4586    /* Set T0 = !COND.  */
4587    tcg_t0 = tcg_temp_new_i32();
4588    arm_test_cc(&c, cond);
4589    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
4590    arm_free_cc(&c);
4591
4592    /* Load the arguments for the new comparison.  */
4593    if (is_imm) {
4594        tcg_y = new_tmp_a64(s);
4595        tcg_gen_movi_i64(tcg_y, y);
4596    } else {
4597        tcg_y = cpu_reg(s, y);
4598    }
4599    tcg_rn = cpu_reg(s, rn);
4600
4601    /* Set the flags for the new comparison.  */
4602    tcg_tmp = tcg_temp_new_i64();
4603    if (op) {
4604        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4605    } else {
4606        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4607    }
4608    tcg_temp_free_i64(tcg_tmp);
4609
4610    /* If COND was false, force the flags to #nzcv.  Compute two masks
4611     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4612     * For tcg hosts that support ANDC, we can make do with just T1.
4613     * In either case, allow the tcg optimizer to delete any unused mask.
4614     */
4615    tcg_t1 = tcg_temp_new_i32();
4616    tcg_t2 = tcg_temp_new_i32();
4617    tcg_gen_neg_i32(tcg_t1, tcg_t0);
4618    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
4619
4620    if (nzcv & 8) { /* N */
4621        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
4622    } else {
4623        if (TCG_TARGET_HAS_andc_i32) {
4624            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
4625        } else {
4626            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
4627        }
4628    }
4629    if (nzcv & 4) { /* Z */
4630        if (TCG_TARGET_HAS_andc_i32) {
4631            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
4632        } else {
4633            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
4634        }
4635    } else {
4636        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
4637    }
4638    if (nzcv & 2) { /* C */
4639        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
4640    } else {
4641        if (TCG_TARGET_HAS_andc_i32) {
4642            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
4643        } else {
4644            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
4645        }
4646    }
4647    if (nzcv & 1) { /* V */
4648        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
4649    } else {
4650        if (TCG_TARGET_HAS_andc_i32) {
4651            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
4652        } else {
4653            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
4654        }
4655    }
4656    tcg_temp_free_i32(tcg_t0);
4657    tcg_temp_free_i32(tcg_t1);
4658    tcg_temp_free_i32(tcg_t2);
4659}
4660
4661/* Conditional select
4662 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
4663 * +----+----+---+-----------------+------+------+-----+------+------+
4664 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
4665 * +----+----+---+-----------------+------+------+-----+------+------+
4666 */
4667static void disas_cond_select(DisasContext *s, uint32_t insn)
4668{
4669    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
4670    TCGv_i64 tcg_rd, zero;
4671    DisasCompare64 c;
4672
4673    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
4674        /* S == 1 or op2<1> == 1 */
4675        unallocated_encoding(s);
4676        return;
4677    }
4678    sf = extract32(insn, 31, 1);
4679    else_inv = extract32(insn, 30, 1);
4680    rm = extract32(insn, 16, 5);
4681    cond = extract32(insn, 12, 4);
4682    else_inc = extract32(insn, 10, 1);
4683    rn = extract32(insn, 5, 5);
4684    rd = extract32(insn, 0, 5);
4685
4686    tcg_rd = cpu_reg(s, rd);
4687
4688    a64_test_cc(&c, cond);
4689    zero = tcg_const_i64(0);
4690
4691    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
4692        /* CSET & CSETM.  */
4693        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
4694        if (else_inv) {
4695            tcg_gen_neg_i64(tcg_rd, tcg_rd);
4696        }
4697    } else {
4698        TCGv_i64 t_true = cpu_reg(s, rn);
4699        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
4700        if (else_inv && else_inc) {
4701            tcg_gen_neg_i64(t_false, t_false);
4702        } else if (else_inv) {
4703            tcg_gen_not_i64(t_false, t_false);
4704        } else if (else_inc) {
4705            tcg_gen_addi_i64(t_false, t_false, 1);
4706        }
4707        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
4708    }
4709
4710    tcg_temp_free_i64(zero);
4711    a64_free_cc(&c);
4712
4713    if (!sf) {
4714        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4715    }
4716}
4717
4718static void handle_clz(DisasContext *s, unsigned int sf,
4719                       unsigned int rn, unsigned int rd)
4720{
4721    TCGv_i64 tcg_rd, tcg_rn;
4722    tcg_rd = cpu_reg(s, rd);
4723    tcg_rn = cpu_reg(s, rn);
4724
4725    if (sf) {
4726        tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
4727    } else {
4728        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4729        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4730        tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
4731        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4732        tcg_temp_free_i32(tcg_tmp32);
4733    }
4734}
4735
4736static void handle_cls(DisasContext *s, unsigned int sf,
4737                       unsigned int rn, unsigned int rd)
4738{
4739    TCGv_i64 tcg_rd, tcg_rn;
4740    tcg_rd = cpu_reg(s, rd);
4741    tcg_rn = cpu_reg(s, rn);
4742
4743    if (sf) {
4744        tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4745    } else {
4746        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4747        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4748        tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4749        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4750        tcg_temp_free_i32(tcg_tmp32);
4751    }
4752}
4753
4754static void handle_rbit(DisasContext *s, unsigned int sf,
4755                        unsigned int rn, unsigned int rd)
4756{
4757    TCGv_i64 tcg_rd, tcg_rn;
4758    tcg_rd = cpu_reg(s, rd);
4759    tcg_rn = cpu_reg(s, rn);
4760
4761    if (sf) {
4762        gen_helper_rbit64(tcg_rd, tcg_rn);
4763    } else {
4764        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4765        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4766        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4767        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4768        tcg_temp_free_i32(tcg_tmp32);
4769    }
4770}
4771
4772/* REV with sf==1, opcode==3 ("REV64") */
4773static void handle_rev64(DisasContext *s, unsigned int sf,
4774                         unsigned int rn, unsigned int rd)
4775{
4776    if (!sf) {
4777        unallocated_encoding(s);
4778        return;
4779    }
4780    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4781}
4782
4783/* REV with sf==0, opcode==2
4784 * REV32 (sf==1, opcode==2)
4785 */
4786static void handle_rev32(DisasContext *s, unsigned int sf,
4787                         unsigned int rn, unsigned int rd)
4788{
4789    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4790
4791    if (sf) {
4792        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4793        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4794
4795        /* bswap32_i64 requires zero high word */
4796        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4797        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4798        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4799        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4800        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4801
4802        tcg_temp_free_i64(tcg_tmp);
4803    } else {
4804        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4805        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4806    }
4807}
4808
4809/* REV16 (opcode==1) */
4810static void handle_rev16(DisasContext *s, unsigned int sf,
4811                         unsigned int rn, unsigned int rd)
4812{
4813    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4814    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4815    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4816    TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4817
4818    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4819    tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4820    tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4821    tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4822    tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4823
4824    tcg_temp_free_i64(mask);
4825    tcg_temp_free_i64(tcg_tmp);
4826}
4827
4828/* Data-processing (1 source)
4829 *   31  30  29  28             21 20     16 15    10 9    5 4    0
4830 * +----+---+---+-----------------+---------+--------+------+------+
4831 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4832 * +----+---+---+-----------------+---------+--------+------+------+
4833 */
4834static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4835{
4836    unsigned int sf, opcode, opcode2, rn, rd;
4837    TCGv_i64 tcg_rd;
4838
4839    if (extract32(insn, 29, 1)) {
4840        unallocated_encoding(s);
4841        return;
4842    }
4843
4844    sf = extract32(insn, 31, 1);
4845    opcode = extract32(insn, 10, 6);
4846    opcode2 = extract32(insn, 16, 5);
4847    rn = extract32(insn, 5, 5);
4848    rd = extract32(insn, 0, 5);
4849
4850#define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7))
4851
4852    switch (MAP(sf, opcode2, opcode)) {
4853    case MAP(0, 0x00, 0x00): /* RBIT */
4854    case MAP(1, 0x00, 0x00):
4855        handle_rbit(s, sf, rn, rd);
4856        break;
4857    case MAP(0, 0x00, 0x01): /* REV16 */
4858    case MAP(1, 0x00, 0x01):
4859        handle_rev16(s, sf, rn, rd);
4860        break;
4861    case MAP(0, 0x00, 0x02): /* REV/REV32 */
4862    case MAP(1, 0x00, 0x02):
4863        handle_rev32(s, sf, rn, rd);
4864        break;
4865    case MAP(1, 0x00, 0x03): /* REV64 */
4866        handle_rev64(s, sf, rn, rd);
4867        break;
4868    case MAP(0, 0x00, 0x04): /* CLZ */
4869    case MAP(1, 0x00, 0x04):
4870        handle_clz(s, sf, rn, rd);
4871        break;
4872    case MAP(0, 0x00, 0x05): /* CLS */
4873    case MAP(1, 0x00, 0x05):
4874        handle_cls(s, sf, rn, rd);
4875        break;
4876    case MAP(1, 0x01, 0x00): /* PACIA */
4877        if (s->pauth_active) {
4878            tcg_rd = cpu_reg(s, rd);
4879            gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4880        } else if (!dc_isar_feature(aa64_pauth, s)) {
4881            goto do_unallocated;
4882        }
4883        break;
4884    case MAP(1, 0x01, 0x01): /* PACIB */
4885        if (s->pauth_active) {
4886            tcg_rd = cpu_reg(s, rd);
4887            gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4888        } else if (!dc_isar_feature(aa64_pauth, s)) {
4889            goto do_unallocated;
4890        }
4891        break;
4892    case MAP(1, 0x01, 0x02): /* PACDA */
4893        if (s->pauth_active) {
4894            tcg_rd = cpu_reg(s, rd);
4895            gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4896        } else if (!dc_isar_feature(aa64_pauth, s)) {
4897            goto do_unallocated;
4898        }
4899        break;
4900    case MAP(1, 0x01, 0x03): /* PACDB */
4901        if (s->pauth_active) {
4902            tcg_rd = cpu_reg(s, rd);
4903            gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4904        } else if (!dc_isar_feature(aa64_pauth, s)) {
4905            goto do_unallocated;
4906        }
4907        break;
4908    case MAP(1, 0x01, 0x04): /* AUTIA */
4909        if (s->pauth_active) {
4910            tcg_rd = cpu_reg(s, rd);
4911            gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4912        } else if (!dc_isar_feature(aa64_pauth, s)) {
4913            goto do_unallocated;
4914        }
4915        break;
4916    case MAP(1, 0x01, 0x05): /* AUTIB */
4917        if (s->pauth_active) {
4918            tcg_rd = cpu_reg(s, rd);
4919            gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4920        } else if (!dc_isar_feature(aa64_pauth, s)) {
4921            goto do_unallocated;
4922        }
4923        break;
4924    case MAP(1, 0x01, 0x06): /* AUTDA */
4925        if (s->pauth_active) {
4926            tcg_rd = cpu_reg(s, rd);
4927            gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4928        } else if (!dc_isar_feature(aa64_pauth, s)) {
4929            goto do_unallocated;
4930        }
4931        break;
4932    case MAP(1, 0x01, 0x07): /* AUTDB */
4933        if (s->pauth_active) {
4934            tcg_rd = cpu_reg(s, rd);
4935            gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn));
4936        } else if (!dc_isar_feature(aa64_pauth, s)) {
4937            goto do_unallocated;
4938        }
4939        break;
4940    case MAP(1, 0x01, 0x08): /* PACIZA */
4941        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4942            goto do_unallocated;
4943        } else if (s->pauth_active) {
4944            tcg_rd = cpu_reg(s, rd);
4945            gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4946        }
4947        break;
4948    case MAP(1, 0x01, 0x09): /* PACIZB */
4949        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4950            goto do_unallocated;
4951        } else if (s->pauth_active) {
4952            tcg_rd = cpu_reg(s, rd);
4953            gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4954        }
4955        break;
4956    case MAP(1, 0x01, 0x0a): /* PACDZA */
4957        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4958            goto do_unallocated;
4959        } else if (s->pauth_active) {
4960            tcg_rd = cpu_reg(s, rd);
4961            gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4962        }
4963        break;
4964    case MAP(1, 0x01, 0x0b): /* PACDZB */
4965        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4966            goto do_unallocated;
4967        } else if (s->pauth_active) {
4968            tcg_rd = cpu_reg(s, rd);
4969            gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4970        }
4971        break;
4972    case MAP(1, 0x01, 0x0c): /* AUTIZA */
4973        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4974            goto do_unallocated;
4975        } else if (s->pauth_active) {
4976            tcg_rd = cpu_reg(s, rd);
4977            gen_helper_autia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4978        }
4979        break;
4980    case MAP(1, 0x01, 0x0d): /* AUTIZB */
4981        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4982            goto do_unallocated;
4983        } else if (s->pauth_active) {
4984            tcg_rd = cpu_reg(s, rd);
4985            gen_helper_autib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4986        }
4987        break;
4988    case MAP(1, 0x01, 0x0e): /* AUTDZA */
4989        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4990            goto do_unallocated;
4991        } else if (s->pauth_active) {
4992            tcg_rd = cpu_reg(s, rd);
4993            gen_helper_autda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
4994        }
4995        break;
4996    case MAP(1, 0x01, 0x0f): /* AUTDZB */
4997        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
4998            goto do_unallocated;
4999        } else if (s->pauth_active) {
5000            tcg_rd = cpu_reg(s, rd);
5001            gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s));
5002        }
5003        break;
5004    case MAP(1, 0x01, 0x10): /* XPACI */
5005        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5006            goto do_unallocated;
5007        } else if (s->pauth_active) {
5008            tcg_rd = cpu_reg(s, rd);
5009            gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd);
5010        }
5011        break;
5012    case MAP(1, 0x01, 0x11): /* XPACD */
5013        if (!dc_isar_feature(aa64_pauth, s) || rn != 31) {
5014            goto do_unallocated;
5015        } else if (s->pauth_active) {
5016            tcg_rd = cpu_reg(s, rd);
5017            gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd);
5018        }
5019        break;
5020    default:
5021    do_unallocated:
5022        unallocated_encoding(s);
5023        break;
5024    }
5025
5026#undef MAP
5027}
5028
5029static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
5030                       unsigned int rm, unsigned int rn, unsigned int rd)
5031{
5032    TCGv_i64 tcg_n, tcg_m, tcg_rd;
5033    tcg_rd = cpu_reg(s, rd);
5034
5035    if (!sf && is_signed) {
5036        tcg_n = new_tmp_a64(s);
5037        tcg_m = new_tmp_a64(s);
5038        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
5039        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
5040    } else {
5041        tcg_n = read_cpu_reg(s, rn, sf);
5042        tcg_m = read_cpu_reg(s, rm, sf);
5043    }
5044
5045    if (is_signed) {
5046        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
5047    } else {
5048        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
5049    }
5050
5051    if (!sf) { /* zero extend final result */
5052        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5053    }
5054}
5055
5056/* LSLV, LSRV, ASRV, RORV */
5057static void handle_shift_reg(DisasContext *s,
5058                             enum a64_shift_type shift_type, unsigned int sf,
5059                             unsigned int rm, unsigned int rn, unsigned int rd)
5060{
5061    TCGv_i64 tcg_shift = tcg_temp_new_i64();
5062    TCGv_i64 tcg_rd = cpu_reg(s, rd);
5063    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
5064
5065    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
5066    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
5067    tcg_temp_free_i64(tcg_shift);
5068}
5069
5070/* CRC32[BHWX], CRC32C[BHWX] */
5071static void handle_crc32(DisasContext *s,
5072                         unsigned int sf, unsigned int sz, bool crc32c,
5073                         unsigned int rm, unsigned int rn, unsigned int rd)
5074{
5075    TCGv_i64 tcg_acc, tcg_val;
5076    TCGv_i32 tcg_bytes;
5077
5078    if (!dc_isar_feature(aa64_crc32, s)
5079        || (sf == 1 && sz != 3)
5080        || (sf == 0 && sz == 3)) {
5081        unallocated_encoding(s);
5082        return;
5083    }
5084
5085    if (sz == 3) {
5086        tcg_val = cpu_reg(s, rm);
5087    } else {
5088        uint64_t mask;
5089        switch (sz) {
5090        case 0:
5091            mask = 0xFF;
5092            break;
5093        case 1:
5094            mask = 0xFFFF;
5095            break;
5096        case 2:
5097            mask = 0xFFFFFFFF;
5098            break;
5099        default:
5100            g_assert_not_reached();
5101        }
5102        tcg_val = new_tmp_a64(s);
5103        tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
5104    }
5105
5106    tcg_acc = cpu_reg(s, rn);
5107    tcg_bytes = tcg_const_i32(1 << sz);
5108
5109    if (crc32c) {
5110        gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5111    } else {
5112        gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
5113    }
5114
5115    tcg_temp_free_i32(tcg_bytes);
5116}
5117
5118/* Data-processing (2 source)
5119 *   31   30  29 28             21 20  16 15    10 9    5 4    0
5120 * +----+---+---+-----------------+------+--------+------+------+
5121 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
5122 * +----+---+---+-----------------+------+--------+------+------+
5123 */
5124static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
5125{
5126    unsigned int sf, rm, opcode, rn, rd;
5127    sf = extract32(insn, 31, 1);
5128    rm = extract32(insn, 16, 5);
5129    opcode = extract32(insn, 10, 6);
5130    rn = extract32(insn, 5, 5);
5131    rd = extract32(insn, 0, 5);
5132
5133    if (extract32(insn, 29, 1)) {
5134        unallocated_encoding(s);
5135        return;
5136    }
5137
5138    switch (opcode) {
5139    case 2: /* UDIV */
5140        handle_div(s, false, sf, rm, rn, rd);
5141        break;
5142    case 3: /* SDIV */
5143        handle_div(s, true, sf, rm, rn, rd);
5144        break;
5145    case 8: /* LSLV */
5146        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
5147        break;
5148    case 9: /* LSRV */
5149        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
5150        break;
5151    case 10: /* ASRV */
5152        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
5153        break;
5154    case 11: /* RORV */
5155        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
5156        break;
5157    case 12: /* PACGA */
5158        if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) {
5159            goto do_unallocated;
5160        }
5161        gen_helper_pacga(cpu_reg(s, rd), cpu_env,
5162                         cpu_reg(s, rn), cpu_reg_sp(s, rm));
5163        break;
5164    case 16:
5165    case 17:
5166    case 18:
5167    case 19:
5168    case 20:
5169    case 21:
5170    case 22:
5171    case 23: /* CRC32 */
5172    {
5173        int sz = extract32(opcode, 0, 2);
5174        bool crc32c = extract32(opcode, 2, 1);
5175        handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
5176        break;
5177    }
5178    default:
5179    do_unallocated:
5180        unallocated_encoding(s);
5181        break;
5182    }
5183}
5184
5185/*
5186 * Data processing - register
5187 *  31  30 29  28      25    21  20  16      10         0
5188 * +--+---+--+---+-------+-----+-------+-------+---------+
5189 * |  |op0|  |op1| 1 0 1 | op2 |       |  op3  |         |
5190 * +--+---+--+---+-------+-----+-------+-------+---------+
5191 */
5192static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
5193{
5194    int op0 = extract32(insn, 30, 1);
5195    int op1 = extract32(insn, 28, 1);
5196    int op2 = extract32(insn, 21, 4);
5197    int op3 = extract32(insn, 10, 6);
5198
5199    if (!op1) {
5200        if (op2 & 8) {
5201            if (op2 & 1) {
5202                /* Add/sub (extended register) */
5203                disas_add_sub_ext_reg(s, insn);
5204            } else {
5205                /* Add/sub (shifted register) */
5206                disas_add_sub_reg(s, insn);
5207            }
5208        } else {
5209            /* Logical (shifted register) */
5210            disas_logic_reg(s, insn);
5211        }
5212        return;
5213    }
5214
5215    switch (op2) {
5216    case 0x0:
5217        switch (op3) {
5218        case 0x00: /* Add/subtract (with carry) */
5219            disas_adc_sbc(s, insn);
5220            break;
5221
5222        case 0x01: /* Rotate right into flags */
5223        case 0x21:
5224            disas_rotate_right_into_flags(s, insn);
5225            break;
5226
5227        case 0x02: /* Evaluate into flags */
5228        case 0x12:
5229        case 0x22:
5230        case 0x32:
5231            disas_evaluate_into_flags(s, insn);
5232            break;
5233
5234        default:
5235            goto do_unallocated;
5236        }
5237        break;
5238
5239    case 0x2: /* Conditional compare */
5240        disas_cc(s, insn); /* both imm and reg forms */
5241        break;
5242
5243    case 0x4: /* Conditional select */
5244        disas_cond_select(s, insn);
5245        break;
5246
5247    case 0x6: /* Data-processing */
5248        if (op0) {    /* (1 source) */
5249            disas_data_proc_1src(s, insn);
5250        } else {      /* (2 source) */
5251            disas_data_proc_2src(s, insn);
5252        }
5253        break;
5254    case 0x8 ... 0xf: /* (3 source) */
5255        disas_data_proc_3src(s, insn);
5256        break;
5257
5258    default:
5259    do_unallocated:
5260        unallocated_encoding(s);
5261        break;
5262    }
5263}
5264
5265static void handle_fp_compare(DisasContext *s, int size,
5266                              unsigned int rn, unsigned int rm,
5267                              bool cmp_with_zero, bool signal_all_nans)
5268{
5269    TCGv_i64 tcg_flags = tcg_temp_new_i64();
5270    TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
5271
5272    if (size == MO_64) {
5273        TCGv_i64 tcg_vn, tcg_vm;
5274
5275        tcg_vn = read_fp_dreg(s, rn);
5276        if (cmp_with_zero) {
5277            tcg_vm = tcg_const_i64(0);
5278        } else {
5279            tcg_vm = read_fp_dreg(s, rm);
5280        }
5281        if (signal_all_nans) {
5282            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5283        } else {
5284            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5285        }
5286        tcg_temp_free_i64(tcg_vn);
5287        tcg_temp_free_i64(tcg_vm);
5288    } else {
5289        TCGv_i32 tcg_vn = tcg_temp_new_i32();
5290        TCGv_i32 tcg_vm = tcg_temp_new_i32();
5291
5292        read_vec_element_i32(s, tcg_vn, rn, 0, size);
5293        if (cmp_with_zero) {
5294            tcg_gen_movi_i32(tcg_vm, 0);
5295        } else {
5296            read_vec_element_i32(s, tcg_vm, rm, 0, size);
5297        }
5298
5299        switch (size) {
5300        case MO_32:
5301            if (signal_all_nans) {
5302                gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5303            } else {
5304                gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5305            }
5306            break;
5307        case MO_16:
5308            if (signal_all_nans) {
5309                gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5310            } else {
5311                gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
5312            }
5313            break;
5314        default:
5315            g_assert_not_reached();
5316        }
5317
5318        tcg_temp_free_i32(tcg_vn);
5319        tcg_temp_free_i32(tcg_vm);
5320    }
5321
5322    tcg_temp_free_ptr(fpst);
5323
5324    gen_set_nzcv(tcg_flags);
5325
5326    tcg_temp_free_i64(tcg_flags);
5327}
5328
5329/* Floating point compare
5330 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
5331 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5332 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
5333 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
5334 */
5335static void disas_fp_compare(DisasContext *s, uint32_t insn)
5336{
5337    unsigned int mos, type, rm, op, rn, opc, op2r;
5338    int size;
5339
5340    mos = extract32(insn, 29, 3);
5341    type = extract32(insn, 22, 2);
5342    rm = extract32(insn, 16, 5);
5343    op = extract32(insn, 14, 2);
5344    rn = extract32(insn, 5, 5);
5345    opc = extract32(insn, 3, 2);
5346    op2r = extract32(insn, 0, 3);
5347
5348    if (mos || op || op2r) {
5349        unallocated_encoding(s);
5350        return;
5351    }
5352
5353    switch (type) {
5354    case 0:
5355        size = MO_32;
5356        break;
5357    case 1:
5358        size = MO_64;
5359        break;
5360    case 3:
5361        size = MO_16;
5362        if (dc_isar_feature(aa64_fp16, s)) {
5363            break;
5364        }
5365        /* fallthru */
5366    default:
5367        unallocated_encoding(s);
5368        return;
5369    }
5370
5371    if (!fp_access_check(s)) {
5372        return;
5373    }
5374
5375    handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
5376}
5377
5378/* Floating point conditional compare
5379 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
5380 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5381 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
5382 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
5383 */
5384static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
5385{
5386    unsigned int mos, type, rm, cond, rn, op, nzcv;
5387    TCGv_i64 tcg_flags;
5388    TCGLabel *label_continue = NULL;
5389    int size;
5390
5391    mos = extract32(insn, 29, 3);
5392    type = extract32(insn, 22, 2);
5393    rm = extract32(insn, 16, 5);
5394    cond = extract32(insn, 12, 4);
5395    rn = extract32(insn, 5, 5);
5396    op = extract32(insn, 4, 1);
5397    nzcv = extract32(insn, 0, 4);
5398
5399    if (mos) {
5400        unallocated_encoding(s);
5401        return;
5402    }
5403
5404    switch (type) {
5405    case 0:
5406        size = MO_32;
5407        break;
5408    case 1:
5409        size = MO_64;
5410        break;
5411    case 3:
5412        size = MO_16;
5413        if (dc_isar_feature(aa64_fp16, s)) {
5414            break;
5415        }
5416        /* fallthru */
5417    default:
5418        unallocated_encoding(s);
5419        return;
5420    }
5421
5422    if (!fp_access_check(s)) {
5423        return;
5424    }
5425
5426    if (cond < 0x0e) { /* not always */
5427        TCGLabel *label_match = gen_new_label();
5428        label_continue = gen_new_label();
5429        arm_gen_test_cc(cond, label_match);
5430        /* nomatch: */
5431        tcg_flags = tcg_const_i64(nzcv << 28);
5432        gen_set_nzcv(tcg_flags);
5433        tcg_temp_free_i64(tcg_flags);
5434        tcg_gen_br(label_continue);
5435        gen_set_label(label_match);
5436    }
5437
5438    handle_fp_compare(s, size, rn, rm, false, op);
5439
5440    if (cond < 0x0e) {
5441        gen_set_label(label_continue);
5442    }
5443}
5444
5445/* Floating point conditional select
5446 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
5447 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5448 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
5449 * +---+---+---+-----------+------+---+------+------+-----+------+------+
5450 */
5451static void disas_fp_csel(DisasContext *s, uint32_t insn)
5452{
5453    unsigned int mos, type, rm, cond, rn, rd;
5454    TCGv_i64 t_true, t_false, t_zero;
5455    DisasCompare64 c;
5456    MemOp sz;
5457
5458    mos = extract32(insn, 29, 3);
5459    type = extract32(insn, 22, 2);
5460    rm = extract32(insn, 16, 5);
5461    cond = extract32(insn, 12, 4);
5462    rn = extract32(insn, 5, 5);
5463    rd = extract32(insn, 0, 5);
5464
5465    if (mos) {
5466        unallocated_encoding(s);
5467        return;
5468    }
5469
5470    switch (type) {
5471    case 0:
5472        sz = MO_32;
5473        break;
5474    case 1:
5475        sz = MO_64;
5476        break;
5477    case 3:
5478        sz = MO_16;
5479        if (dc_isar_feature(aa64_fp16, s)) {
5480            break;
5481        }
5482        /* fallthru */
5483    default:
5484        unallocated_encoding(s);
5485        return;
5486    }
5487
5488    if (!fp_access_check(s)) {
5489        return;
5490    }
5491
5492    /* Zero extend sreg & hreg inputs to 64 bits now.  */
5493    t_true = tcg_temp_new_i64();
5494    t_false = tcg_temp_new_i64();
5495    read_vec_element(s, t_true, rn, 0, sz);
5496    read_vec_element(s, t_false, rm, 0, sz);
5497
5498    a64_test_cc(&c, cond);
5499    t_zero = tcg_const_i64(0);
5500    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
5501    tcg_temp_free_i64(t_zero);
5502    tcg_temp_free_i64(t_false);
5503    a64_free_cc(&c);
5504
5505    /* Note that sregs & hregs write back zeros to the high bits,
5506       and we've already done the zero-extension.  */
5507    write_fp_dreg(s, rd, t_true);
5508    tcg_temp_free_i64(t_true);
5509}
5510
5511/* Floating-point data-processing (1 source) - half precision */
5512static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
5513{
5514    TCGv_ptr fpst = NULL;
5515    TCGv_i32 tcg_op = read_fp_hreg(s, rn);
5516    TCGv_i32 tcg_res = tcg_temp_new_i32();
5517
5518    switch (opcode) {
5519    case 0x0: /* FMOV */
5520        tcg_gen_mov_i32(tcg_res, tcg_op);
5521        break;
5522    case 0x1: /* FABS */
5523        tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
5524        break;
5525    case 0x2: /* FNEG */
5526        tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
5527        break;
5528    case 0x3: /* FSQRT */
5529        fpst = get_fpstatus_ptr(true);
5530        gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
5531        break;
5532    case 0x8: /* FRINTN */
5533    case 0x9: /* FRINTP */
5534    case 0xa: /* FRINTM */
5535    case 0xb: /* FRINTZ */
5536    case 0xc: /* FRINTA */
5537    {
5538        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
5539        fpst = get_fpstatus_ptr(true);
5540
5541        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5542        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5543
5544        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5545        tcg_temp_free_i32(tcg_rmode);
5546        break;
5547    }
5548    case 0xe: /* FRINTX */
5549        fpst = get_fpstatus_ptr(true);
5550        gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
5551        break;
5552    case 0xf: /* FRINTI */
5553        fpst = get_fpstatus_ptr(true);
5554        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
5555        break;
5556    default:
5557        abort();
5558    }
5559
5560    write_fp_sreg(s, rd, tcg_res);
5561
5562    if (fpst<