qemu/target-arm/translate-a64.c
<<
>>
Prefs
   1/*
   2 *  AArch64 translation
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20
  21#include "cpu.h"
  22#include "tcg-op.h"
  23#include "qemu/log.h"
  24#include "arm_ldst.h"
  25#include "translate.h"
  26#include "internals.h"
  27#include "qemu/host-utils.h"
  28
  29#include "exec/semihost.h"
  30#include "exec/gen-icount.h"
  31
  32#include "exec/helper-proto.h"
  33#include "exec/helper-gen.h"
  34#include "exec/log.h"
  35
  36#include "trace-tcg.h"
  37
  38static TCGv_i64 cpu_X[32];
  39static TCGv_i64 cpu_pc;
  40
  41/* Load/store exclusive handling */
  42static TCGv_i64 cpu_exclusive_high;
  43
  44static const char *regnames[] = {
  45    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  46    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  47    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  48    "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  49};
  50
  51enum a64_shift_type {
  52    A64_SHIFT_TYPE_LSL = 0,
  53    A64_SHIFT_TYPE_LSR = 1,
  54    A64_SHIFT_TYPE_ASR = 2,
  55    A64_SHIFT_TYPE_ROR = 3
  56};
  57
  58/* Table based decoder typedefs - used when the relevant bits for decode
  59 * are too awkwardly scattered across the instruction (eg SIMD).
  60 */
  61typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  62
  63typedef struct AArch64DecodeTable {
  64    uint32_t pattern;
  65    uint32_t mask;
  66    AArch64DecodeFn *disas_fn;
  67} AArch64DecodeTable;
  68
  69/* Function prototype for gen_ functions for calling Neon helpers */
  70typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  71typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  72typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  73typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  74typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  75typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  76typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  77typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  78typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  79typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  80typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  81typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  82typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  83
  84/* initialize TCG globals.  */
  85void a64_translate_init(void)
  86{
  87    int i;
  88
  89    cpu_pc = tcg_global_mem_new_i64(cpu_env,
  90                                    offsetof(CPUARMState, pc),
  91                                    "pc");
  92    for (i = 0; i < 32; i++) {
  93        cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  94                                          offsetof(CPUARMState, xregs[i]),
  95                                          regnames[i]);
  96    }
  97
  98    cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
  99        offsetof(CPUARMState, exclusive_high), "exclusive_high");
 100}
 101
 102static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 103{
 104    /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 105     *  if EL1, access as if EL0; otherwise access at current EL
 106     */
 107    switch (s->mmu_idx) {
 108    case ARMMMUIdx_S12NSE1:
 109        return ARMMMUIdx_S12NSE0;
 110    case ARMMMUIdx_S1SE1:
 111        return ARMMMUIdx_S1SE0;
 112    case ARMMMUIdx_S2NS:
 113        g_assert_not_reached();
 114    default:
 115        return s->mmu_idx;
 116    }
 117}
 118
 119void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 120                            fprintf_function cpu_fprintf, int flags)
 121{
 122    ARMCPU *cpu = ARM_CPU(cs);
 123    CPUARMState *env = &cpu->env;
 124    uint32_t psr = pstate_read(env);
 125    int i;
 126    int el = arm_current_el(env);
 127    const char *ns_status;
 128
 129    cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 130            env->pc, env->xregs[31]);
 131    for (i = 0; i < 31; i++) {
 132        cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 133        if ((i % 4) == 3) {
 134            cpu_fprintf(f, "\n");
 135        } else {
 136            cpu_fprintf(f, " ");
 137        }
 138    }
 139
 140    if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 141        ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 142    } else {
 143        ns_status = "";
 144    }
 145
 146    cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 147                psr,
 148                psr & PSTATE_N ? 'N' : '-',
 149                psr & PSTATE_Z ? 'Z' : '-',
 150                psr & PSTATE_C ? 'C' : '-',
 151                psr & PSTATE_V ? 'V' : '-',
 152                ns_status,
 153                el,
 154                psr & PSTATE_SP ? 'h' : 't');
 155
 156    if (flags & CPU_DUMP_FPU) {
 157        int numvfpregs = 32;
 158        for (i = 0; i < numvfpregs; i += 2) {
 159            uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 160            uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 161            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 162                        i, vhi, vlo);
 163            vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 164            vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 165            cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 166                        i + 1, vhi, vlo);
 167        }
 168        cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 169                    vfp_get_fpcr(env), vfp_get_fpsr(env));
 170    }
 171}
 172
 173void gen_a64_set_pc_im(uint64_t val)
 174{
 175    tcg_gen_movi_i64(cpu_pc, val);
 176}
 177
 178typedef struct DisasCompare64 {
 179    TCGCond cond;
 180    TCGv_i64 value;
 181} DisasCompare64;
 182
 183static void a64_test_cc(DisasCompare64 *c64, int cc)
 184{
 185    DisasCompare c32;
 186
 187    arm_test_cc(&c32, cc);
 188
 189    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 190       * properly.  The NE/EQ comparisons are also fine with this choice.  */
 191    c64->cond = c32.cond;
 192    c64->value = tcg_temp_new_i64();
 193    tcg_gen_ext_i32_i64(c64->value, c32.value);
 194
 195    arm_free_cc(&c32);
 196}
 197
 198static void a64_free_cc(DisasCompare64 *c64)
 199{
 200    tcg_temp_free_i64(c64->value);
 201}
 202
 203static void gen_exception_internal(int excp)
 204{
 205    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 206
 207    assert(excp_is_internal(excp));
 208    gen_helper_exception_internal(cpu_env, tcg_excp);
 209    tcg_temp_free_i32(tcg_excp);
 210}
 211
 212static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 213{
 214    TCGv_i32 tcg_excp = tcg_const_i32(excp);
 215    TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 216    TCGv_i32 tcg_el = tcg_const_i32(target_el);
 217
 218    gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 219                                       tcg_syn, tcg_el);
 220    tcg_temp_free_i32(tcg_el);
 221    tcg_temp_free_i32(tcg_syn);
 222    tcg_temp_free_i32(tcg_excp);
 223}
 224
 225static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 226{
 227    gen_a64_set_pc_im(s->pc - offset);
 228    gen_exception_internal(excp);
 229    s->is_jmp = DISAS_EXC;
 230}
 231
 232static void gen_exception_insn(DisasContext *s, int offset, int excp,
 233                               uint32_t syndrome, uint32_t target_el)
 234{
 235    gen_a64_set_pc_im(s->pc - offset);
 236    gen_exception(excp, syndrome, target_el);
 237    s->is_jmp = DISAS_EXC;
 238}
 239
 240static void gen_ss_advance(DisasContext *s)
 241{
 242    /* If the singlestep state is Active-not-pending, advance to
 243     * Active-pending.
 244     */
 245    if (s->ss_active) {
 246        s->pstate_ss = 0;
 247        gen_helper_clear_pstate_ss(cpu_env);
 248    }
 249}
 250
 251static void gen_step_complete_exception(DisasContext *s)
 252{
 253    /* We just completed step of an insn. Move from Active-not-pending
 254     * to Active-pending, and then also take the swstep exception.
 255     * This corresponds to making the (IMPDEF) choice to prioritize
 256     * swstep exceptions over asynchronous exceptions taken to an exception
 257     * level where debug is disabled. This choice has the advantage that
 258     * we do not need to maintain internal state corresponding to the
 259     * ISV/EX syndrome bits between completion of the step and generation
 260     * of the exception, and our syndrome information is always correct.
 261     */
 262    gen_ss_advance(s);
 263    gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 264                  default_exception_el(s));
 265    s->is_jmp = DISAS_EXC;
 266}
 267
 268static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 269{
 270    /* No direct tb linking with singlestep (either QEMU's or the ARM
 271     * debug architecture kind) or deterministic io
 272     */
 273    if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 274        return false;
 275    }
 276
 277    /* Only link tbs from inside the same guest page */
 278    if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 279        return false;
 280    }
 281
 282    return true;
 283}
 284
 285static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 286{
 287    TranslationBlock *tb;
 288
 289    tb = s->tb;
 290    if (use_goto_tb(s, n, dest)) {
 291        tcg_gen_goto_tb(n);
 292        gen_a64_set_pc_im(dest);
 293        tcg_gen_exit_tb((intptr_t)tb + n);
 294        s->is_jmp = DISAS_TB_JUMP;
 295    } else {
 296        gen_a64_set_pc_im(dest);
 297        if (s->ss_active) {
 298            gen_step_complete_exception(s);
 299        } else if (s->singlestep_enabled) {
 300            gen_exception_internal(EXCP_DEBUG);
 301        } else {
 302            tcg_gen_exit_tb(0);
 303            s->is_jmp = DISAS_TB_JUMP;
 304        }
 305    }
 306}
 307
 308static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
 309{
 310    /* We don't need to save all of the syndrome so we mask and shift
 311     * out uneeded bits to help the sleb128 encoder do a better job.
 312     */
 313    syn &= ARM_INSN_START_WORD2_MASK;
 314    syn >>= ARM_INSN_START_WORD2_SHIFT;
 315
 316    /* We check and clear insn_start_idx to catch multiple updates.  */
 317    assert(s->insn_start_idx != 0);
 318    tcg_set_insn_param(s->insn_start_idx, 2, syn);
 319    s->insn_start_idx = 0;
 320}
 321
 322static void unallocated_encoding(DisasContext *s)
 323{
 324    /* Unallocated and reserved encodings are uncategorized */
 325    gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 326                       default_exception_el(s));
 327}
 328
 329#define unsupported_encoding(s, insn)                                    \
 330    do {                                                                 \
 331        qemu_log_mask(LOG_UNIMP,                                         \
 332                      "%s:%d: unsupported instruction encoding 0x%08x "  \
 333                      "at pc=%016" PRIx64 "\n",                          \
 334                      __FILE__, __LINE__, insn, s->pc - 4);              \
 335        unallocated_encoding(s);                                         \
 336    } while (0);
 337
 338static void init_tmp_a64_array(DisasContext *s)
 339{
 340#ifdef CONFIG_DEBUG_TCG
 341    int i;
 342    for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 343        TCGV_UNUSED_I64(s->tmp_a64[i]);
 344    }
 345#endif
 346    s->tmp_a64_count = 0;
 347}
 348
 349static void free_tmp_a64(DisasContext *s)
 350{
 351    int i;
 352    for (i = 0; i < s->tmp_a64_count; i++) {
 353        tcg_temp_free_i64(s->tmp_a64[i]);
 354    }
 355    init_tmp_a64_array(s);
 356}
 357
 358static TCGv_i64 new_tmp_a64(DisasContext *s)
 359{
 360    assert(s->tmp_a64_count < TMP_A64_MAX);
 361    return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 362}
 363
 364static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 365{
 366    TCGv_i64 t = new_tmp_a64(s);
 367    tcg_gen_movi_i64(t, 0);
 368    return t;
 369}
 370
 371/*
 372 * Register access functions
 373 *
 374 * These functions are used for directly accessing a register in where
 375 * changes to the final register value are likely to be made. If you
 376 * need to use a register for temporary calculation (e.g. index type
 377 * operations) use the read_* form.
 378 *
 379 * B1.2.1 Register mappings
 380 *
 381 * In instruction register encoding 31 can refer to ZR (zero register) or
 382 * the SP (stack pointer) depending on context. In QEMU's case we map SP
 383 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 384 * This is the point of the _sp forms.
 385 */
 386static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 387{
 388    if (reg == 31) {
 389        return new_tmp_a64_zero(s);
 390    } else {
 391        return cpu_X[reg];
 392    }
 393}
 394
 395/* register access for when 31 == SP */
 396static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 397{
 398    return cpu_X[reg];
 399}
 400
 401/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 402 * representing the register contents. This TCGv is an auto-freed
 403 * temporary so it need not be explicitly freed, and may be modified.
 404 */
 405static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 406{
 407    TCGv_i64 v = new_tmp_a64(s);
 408    if (reg != 31) {
 409        if (sf) {
 410            tcg_gen_mov_i64(v, cpu_X[reg]);
 411        } else {
 412            tcg_gen_ext32u_i64(v, cpu_X[reg]);
 413        }
 414    } else {
 415        tcg_gen_movi_i64(v, 0);
 416    }
 417    return v;
 418}
 419
 420static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 421{
 422    TCGv_i64 v = new_tmp_a64(s);
 423    if (sf) {
 424        tcg_gen_mov_i64(v, cpu_X[reg]);
 425    } else {
 426        tcg_gen_ext32u_i64(v, cpu_X[reg]);
 427    }
 428    return v;
 429}
 430
 431/* We should have at some point before trying to access an FP register
 432 * done the necessary access check, so assert that
 433 * (a) we did the check and
 434 * (b) we didn't then just plough ahead anyway if it failed.
 435 * Print the instruction pattern in the abort message so we can figure
 436 * out what we need to fix if a user encounters this problem in the wild.
 437 */
 438static inline void assert_fp_access_checked(DisasContext *s)
 439{
 440#ifdef CONFIG_DEBUG_TCG
 441    if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 442        fprintf(stderr, "target-arm: FP access check missing for "
 443                "instruction 0x%08x\n", s->insn);
 444        abort();
 445    }
 446#endif
 447}
 448
 449/* Return the offset into CPUARMState of an element of specified
 450 * size, 'element' places in from the least significant end of
 451 * the FP/vector register Qn.
 452 */
 453static inline int vec_reg_offset(DisasContext *s, int regno,
 454                                 int element, TCGMemOp size)
 455{
 456    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 457#ifdef HOST_WORDS_BIGENDIAN
 458    /* This is complicated slightly because vfp.regs[2n] is
 459     * still the low half and  vfp.regs[2n+1] the high half
 460     * of the 128 bit vector, even on big endian systems.
 461     * Calculate the offset assuming a fully bigendian 128 bits,
 462     * then XOR to account for the order of the two 64 bit halves.
 463     */
 464    offs += (16 - ((element + 1) * (1 << size)));
 465    offs ^= 8;
 466#else
 467    offs += element * (1 << size);
 468#endif
 469    assert_fp_access_checked(s);
 470    return offs;
 471}
 472
 473/* Return the offset into CPUARMState of a slice (from
 474 * the least significant end) of FP register Qn (ie
 475 * Dn, Sn, Hn or Bn).
 476 * (Note that this is not the same mapping as for A32; see cpu.h)
 477 */
 478static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 479{
 480    int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 481#ifdef HOST_WORDS_BIGENDIAN
 482    offs += (8 - (1 << size));
 483#endif
 484    assert_fp_access_checked(s);
 485    return offs;
 486}
 487
 488/* Offset of the high half of the 128 bit vector Qn */
 489static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 490{
 491    assert_fp_access_checked(s);
 492    return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 493}
 494
 495/* Convenience accessors for reading and writing single and double
 496 * FP registers. Writing clears the upper parts of the associated
 497 * 128 bit vector register, as required by the architecture.
 498 * Note that unlike the GP register accessors, the values returned
 499 * by the read functions must be manually freed.
 500 */
 501static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 502{
 503    TCGv_i64 v = tcg_temp_new_i64();
 504
 505    tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 506    return v;
 507}
 508
 509static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 510{
 511    TCGv_i32 v = tcg_temp_new_i32();
 512
 513    tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 514    return v;
 515}
 516
 517static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 518{
 519    TCGv_i64 tcg_zero = tcg_const_i64(0);
 520
 521    tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 522    tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 523    tcg_temp_free_i64(tcg_zero);
 524}
 525
 526static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 527{
 528    TCGv_i64 tmp = tcg_temp_new_i64();
 529
 530    tcg_gen_extu_i32_i64(tmp, v);
 531    write_fp_dreg(s, reg, tmp);
 532    tcg_temp_free_i64(tmp);
 533}
 534
 535static TCGv_ptr get_fpstatus_ptr(void)
 536{
 537    TCGv_ptr statusptr = tcg_temp_new_ptr();
 538    int offset;
 539
 540    /* In A64 all instructions (both FP and Neon) use the FPCR;
 541     * there is no equivalent of the A32 Neon "standard FPSCR value"
 542     * and all operations use vfp.fp_status.
 543     */
 544    offset = offsetof(CPUARMState, vfp.fp_status);
 545    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 546    return statusptr;
 547}
 548
 549/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 550 * than the 32 bit equivalent.
 551 */
 552static inline void gen_set_NZ64(TCGv_i64 result)
 553{
 554    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 555    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 556}
 557
 558/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 559static inline void gen_logic_CC(int sf, TCGv_i64 result)
 560{
 561    if (sf) {
 562        gen_set_NZ64(result);
 563    } else {
 564        tcg_gen_extrl_i64_i32(cpu_ZF, result);
 565        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 566    }
 567    tcg_gen_movi_i32(cpu_CF, 0);
 568    tcg_gen_movi_i32(cpu_VF, 0);
 569}
 570
 571/* dest = T0 + T1; compute C, N, V and Z flags */
 572static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 573{
 574    if (sf) {
 575        TCGv_i64 result, flag, tmp;
 576        result = tcg_temp_new_i64();
 577        flag = tcg_temp_new_i64();
 578        tmp = tcg_temp_new_i64();
 579
 580        tcg_gen_movi_i64(tmp, 0);
 581        tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 582
 583        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 584
 585        gen_set_NZ64(result);
 586
 587        tcg_gen_xor_i64(flag, result, t0);
 588        tcg_gen_xor_i64(tmp, t0, t1);
 589        tcg_gen_andc_i64(flag, flag, tmp);
 590        tcg_temp_free_i64(tmp);
 591        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 592
 593        tcg_gen_mov_i64(dest, result);
 594        tcg_temp_free_i64(result);
 595        tcg_temp_free_i64(flag);
 596    } else {
 597        /* 32 bit arithmetic */
 598        TCGv_i32 t0_32 = tcg_temp_new_i32();
 599        TCGv_i32 t1_32 = tcg_temp_new_i32();
 600        TCGv_i32 tmp = tcg_temp_new_i32();
 601
 602        tcg_gen_movi_i32(tmp, 0);
 603        tcg_gen_extrl_i64_i32(t0_32, t0);
 604        tcg_gen_extrl_i64_i32(t1_32, t1);
 605        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 606        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 607        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 608        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 609        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 610        tcg_gen_extu_i32_i64(dest, cpu_NF);
 611
 612        tcg_temp_free_i32(tmp);
 613        tcg_temp_free_i32(t0_32);
 614        tcg_temp_free_i32(t1_32);
 615    }
 616}
 617
 618/* dest = T0 - T1; compute C, N, V and Z flags */
 619static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 620{
 621    if (sf) {
 622        /* 64 bit arithmetic */
 623        TCGv_i64 result, flag, tmp;
 624
 625        result = tcg_temp_new_i64();
 626        flag = tcg_temp_new_i64();
 627        tcg_gen_sub_i64(result, t0, t1);
 628
 629        gen_set_NZ64(result);
 630
 631        tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 632        tcg_gen_extrl_i64_i32(cpu_CF, flag);
 633
 634        tcg_gen_xor_i64(flag, result, t0);
 635        tmp = tcg_temp_new_i64();
 636        tcg_gen_xor_i64(tmp, t0, t1);
 637        tcg_gen_and_i64(flag, flag, tmp);
 638        tcg_temp_free_i64(tmp);
 639        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 640        tcg_gen_mov_i64(dest, result);
 641        tcg_temp_free_i64(flag);
 642        tcg_temp_free_i64(result);
 643    } else {
 644        /* 32 bit arithmetic */
 645        TCGv_i32 t0_32 = tcg_temp_new_i32();
 646        TCGv_i32 t1_32 = tcg_temp_new_i32();
 647        TCGv_i32 tmp;
 648
 649        tcg_gen_extrl_i64_i32(t0_32, t0);
 650        tcg_gen_extrl_i64_i32(t1_32, t1);
 651        tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 652        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 653        tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 654        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 655        tmp = tcg_temp_new_i32();
 656        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 657        tcg_temp_free_i32(t0_32);
 658        tcg_temp_free_i32(t1_32);
 659        tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 660        tcg_temp_free_i32(tmp);
 661        tcg_gen_extu_i32_i64(dest, cpu_NF);
 662    }
 663}
 664
 665/* dest = T0 + T1 + CF; do not compute flags. */
 666static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 667{
 668    TCGv_i64 flag = tcg_temp_new_i64();
 669    tcg_gen_extu_i32_i64(flag, cpu_CF);
 670    tcg_gen_add_i64(dest, t0, t1);
 671    tcg_gen_add_i64(dest, dest, flag);
 672    tcg_temp_free_i64(flag);
 673
 674    if (!sf) {
 675        tcg_gen_ext32u_i64(dest, dest);
 676    }
 677}
 678
 679/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 680static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 681{
 682    if (sf) {
 683        TCGv_i64 result, cf_64, vf_64, tmp;
 684        result = tcg_temp_new_i64();
 685        cf_64 = tcg_temp_new_i64();
 686        vf_64 = tcg_temp_new_i64();
 687        tmp = tcg_const_i64(0);
 688
 689        tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 690        tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 691        tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 692        tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 693        gen_set_NZ64(result);
 694
 695        tcg_gen_xor_i64(vf_64, result, t0);
 696        tcg_gen_xor_i64(tmp, t0, t1);
 697        tcg_gen_andc_i64(vf_64, vf_64, tmp);
 698        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 699
 700        tcg_gen_mov_i64(dest, result);
 701
 702        tcg_temp_free_i64(tmp);
 703        tcg_temp_free_i64(vf_64);
 704        tcg_temp_free_i64(cf_64);
 705        tcg_temp_free_i64(result);
 706    } else {
 707        TCGv_i32 t0_32, t1_32, tmp;
 708        t0_32 = tcg_temp_new_i32();
 709        t1_32 = tcg_temp_new_i32();
 710        tmp = tcg_const_i32(0);
 711
 712        tcg_gen_extrl_i64_i32(t0_32, t0);
 713        tcg_gen_extrl_i64_i32(t1_32, t1);
 714        tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 715        tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 716
 717        tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 718        tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 719        tcg_gen_xor_i32(tmp, t0_32, t1_32);
 720        tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 721        tcg_gen_extu_i32_i64(dest, cpu_NF);
 722
 723        tcg_temp_free_i32(tmp);
 724        tcg_temp_free_i32(t1_32);
 725        tcg_temp_free_i32(t0_32);
 726    }
 727}
 728
 729/*
 730 * Load/Store generators
 731 */
 732
 733/*
 734 * Store from GPR register to memory.
 735 */
 736static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 737                             TCGv_i64 tcg_addr, int size, int memidx,
 738                             bool iss_valid,
 739                             unsigned int iss_srt,
 740                             bool iss_sf, bool iss_ar)
 741{
 742    g_assert(size <= 3);
 743    tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 744
 745    if (iss_valid) {
 746        uint32_t syn;
 747
 748        syn = syn_data_abort_with_iss(0,
 749                                      size,
 750                                      false,
 751                                      iss_srt,
 752                                      iss_sf,
 753                                      iss_ar,
 754                                      0, 0, 0, 0, 0, false);
 755        disas_set_insn_syndrome(s, syn);
 756    }
 757}
 758
 759static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 760                      TCGv_i64 tcg_addr, int size,
 761                      bool iss_valid,
 762                      unsigned int iss_srt,
 763                      bool iss_sf, bool iss_ar)
 764{
 765    do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 766                     iss_valid, iss_srt, iss_sf, iss_ar);
 767}
 768
 769/*
 770 * Load from memory to GPR register
 771 */
 772static void do_gpr_ld_memidx(DisasContext *s,
 773                             TCGv_i64 dest, TCGv_i64 tcg_addr,
 774                             int size, bool is_signed,
 775                             bool extend, int memidx,
 776                             bool iss_valid, unsigned int iss_srt,
 777                             bool iss_sf, bool iss_ar)
 778{
 779    TCGMemOp memop = s->be_data + size;
 780
 781    g_assert(size <= 3);
 782
 783    if (is_signed) {
 784        memop += MO_SIGN;
 785    }
 786
 787    tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 788
 789    if (extend && is_signed) {
 790        g_assert(size < 3);
 791        tcg_gen_ext32u_i64(dest, dest);
 792    }
 793
 794    if (iss_valid) {
 795        uint32_t syn;
 796
 797        syn = syn_data_abort_with_iss(0,
 798                                      size,
 799                                      is_signed,
 800                                      iss_srt,
 801                                      iss_sf,
 802                                      iss_ar,
 803                                      0, 0, 0, 0, 0, false);
 804        disas_set_insn_syndrome(s, syn);
 805    }
 806}
 807
 808static void do_gpr_ld(DisasContext *s,
 809                      TCGv_i64 dest, TCGv_i64 tcg_addr,
 810                      int size, bool is_signed, bool extend,
 811                      bool iss_valid, unsigned int iss_srt,
 812                      bool iss_sf, bool iss_ar)
 813{
 814    do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 815                     get_mem_index(s),
 816                     iss_valid, iss_srt, iss_sf, iss_ar);
 817}
 818
 819/*
 820 * Store from FP register to memory
 821 */
 822static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 823{
 824    /* This writes the bottom N bits of a 128 bit wide vector to memory */
 825    TCGv_i64 tmp = tcg_temp_new_i64();
 826    tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 827    if (size < 4) {
 828        tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 829                            s->be_data + size);
 830    } else {
 831        bool be = s->be_data == MO_BE;
 832        TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 833
 834        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 835        tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 836                            s->be_data | MO_Q);
 837        tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 838        tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 839                            s->be_data | MO_Q);
 840        tcg_temp_free_i64(tcg_hiaddr);
 841    }
 842
 843    tcg_temp_free_i64(tmp);
 844}
 845
 846/*
 847 * Load from memory to FP register
 848 */
 849static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 850{
 851    /* This always zero-extends and writes to a full 128 bit wide vector */
 852    TCGv_i64 tmplo = tcg_temp_new_i64();
 853    TCGv_i64 tmphi;
 854
 855    if (size < 4) {
 856        TCGMemOp memop = s->be_data + size;
 857        tmphi = tcg_const_i64(0);
 858        tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 859    } else {
 860        bool be = s->be_data == MO_BE;
 861        TCGv_i64 tcg_hiaddr;
 862
 863        tmphi = tcg_temp_new_i64();
 864        tcg_hiaddr = tcg_temp_new_i64();
 865
 866        tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 867        tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 868                            s->be_data | MO_Q);
 869        tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 870                            s->be_data | MO_Q);
 871        tcg_temp_free_i64(tcg_hiaddr);
 872    }
 873
 874    tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 875    tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 876
 877    tcg_temp_free_i64(tmplo);
 878    tcg_temp_free_i64(tmphi);
 879}
 880
 881/*
 882 * Vector load/store helpers.
 883 *
 884 * The principal difference between this and a FP load is that we don't
 885 * zero extend as we are filling a partial chunk of the vector register.
 886 * These functions don't support 128 bit loads/stores, which would be
 887 * normal load/store operations.
 888 *
 889 * The _i32 versions are useful when operating on 32 bit quantities
 890 * (eg for floating point single or using Neon helper functions).
 891 */
 892
 893/* Get value of an element within a vector register */
 894static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 895                             int element, TCGMemOp memop)
 896{
 897    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 898    switch (memop) {
 899    case MO_8:
 900        tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 901        break;
 902    case MO_16:
 903        tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 904        break;
 905    case MO_32:
 906        tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 907        break;
 908    case MO_8|MO_SIGN:
 909        tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 910        break;
 911    case MO_16|MO_SIGN:
 912        tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 913        break;
 914    case MO_32|MO_SIGN:
 915        tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 916        break;
 917    case MO_64:
 918    case MO_64|MO_SIGN:
 919        tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 920        break;
 921    default:
 922        g_assert_not_reached();
 923    }
 924}
 925
 926static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 927                                 int element, TCGMemOp memop)
 928{
 929    int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 930    switch (memop) {
 931    case MO_8:
 932        tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
 933        break;
 934    case MO_16:
 935        tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
 936        break;
 937    case MO_8|MO_SIGN:
 938        tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
 939        break;
 940    case MO_16|MO_SIGN:
 941        tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
 942        break;
 943    case MO_32:
 944    case MO_32|MO_SIGN:
 945        tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
 946        break;
 947    default:
 948        g_assert_not_reached();
 949    }
 950}
 951
 952/* Set value of an element within a vector register */
 953static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
 954                              int element, TCGMemOp memop)
 955{
 956    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 957    switch (memop) {
 958    case MO_8:
 959        tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
 960        break;
 961    case MO_16:
 962        tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
 963        break;
 964    case MO_32:
 965        tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
 966        break;
 967    case MO_64:
 968        tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
 969        break;
 970    default:
 971        g_assert_not_reached();
 972    }
 973}
 974
 975static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
 976                                  int destidx, int element, TCGMemOp memop)
 977{
 978    int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 979    switch (memop) {
 980    case MO_8:
 981        tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
 982        break;
 983    case MO_16:
 984        tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
 985        break;
 986    case MO_32:
 987        tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
 988        break;
 989    default:
 990        g_assert_not_reached();
 991    }
 992}
 993
 994/* Clear the high 64 bits of a 128 bit vector (in general non-quad
 995 * vector ops all need to do this).
 996 */
 997static void clear_vec_high(DisasContext *s, int rd)
 998{
 999    TCGv_i64 tcg_zero = tcg_const_i64(0);
1000
1001    write_vec_element(s, tcg_zero, rd, 1, MO_64);
1002    tcg_temp_free_i64(tcg_zero);
1003}
1004
1005/* Store from vector register to memory */
1006static void do_vec_st(DisasContext *s, int srcidx, int element,
1007                      TCGv_i64 tcg_addr, int size)
1008{
1009    TCGMemOp memop = s->be_data + size;
1010    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1011
1012    read_vec_element(s, tcg_tmp, srcidx, element, size);
1013    tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1014
1015    tcg_temp_free_i64(tcg_tmp);
1016}
1017
1018/* Load from memory to vector register */
1019static void do_vec_ld(DisasContext *s, int destidx, int element,
1020                      TCGv_i64 tcg_addr, int size)
1021{
1022    TCGMemOp memop = s->be_data + size;
1023    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1024
1025    tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1026    write_vec_element(s, tcg_tmp, destidx, element, size);
1027
1028    tcg_temp_free_i64(tcg_tmp);
1029}
1030
1031/* Check that FP/Neon access is enabled. If it is, return
1032 * true. If not, emit code to generate an appropriate exception,
1033 * and return false; the caller should not emit any code for
1034 * the instruction. Note that this check must happen after all
1035 * unallocated-encoding checks (otherwise the syndrome information
1036 * for the resulting exception will be incorrect).
1037 */
1038static inline bool fp_access_check(DisasContext *s)
1039{
1040    assert(!s->fp_access_checked);
1041    s->fp_access_checked = true;
1042
1043    if (!s->fp_excp_el) {
1044        return true;
1045    }
1046
1047    gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1048                       s->fp_excp_el);
1049    return false;
1050}
1051
1052/*
1053 * This utility function is for doing register extension with an
1054 * optional shift. You will likely want to pass a temporary for the
1055 * destination register. See DecodeRegExtend() in the ARM ARM.
1056 */
1057static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1058                              int option, unsigned int shift)
1059{
1060    int extsize = extract32(option, 0, 2);
1061    bool is_signed = extract32(option, 2, 1);
1062
1063    if (is_signed) {
1064        switch (extsize) {
1065        case 0:
1066            tcg_gen_ext8s_i64(tcg_out, tcg_in);
1067            break;
1068        case 1:
1069            tcg_gen_ext16s_i64(tcg_out, tcg_in);
1070            break;
1071        case 2:
1072            tcg_gen_ext32s_i64(tcg_out, tcg_in);
1073            break;
1074        case 3:
1075            tcg_gen_mov_i64(tcg_out, tcg_in);
1076            break;
1077        }
1078    } else {
1079        switch (extsize) {
1080        case 0:
1081            tcg_gen_ext8u_i64(tcg_out, tcg_in);
1082            break;
1083        case 1:
1084            tcg_gen_ext16u_i64(tcg_out, tcg_in);
1085            break;
1086        case 2:
1087            tcg_gen_ext32u_i64(tcg_out, tcg_in);
1088            break;
1089        case 3:
1090            tcg_gen_mov_i64(tcg_out, tcg_in);
1091            break;
1092        }
1093    }
1094
1095    if (shift) {
1096        tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1097    }
1098}
1099
1100static inline void gen_check_sp_alignment(DisasContext *s)
1101{
1102    /* The AArch64 architecture mandates that (if enabled via PSTATE
1103     * or SCTLR bits) there is a check that SP is 16-aligned on every
1104     * SP-relative load or store (with an exception generated if it is not).
1105     * In line with general QEMU practice regarding misaligned accesses,
1106     * we omit these checks for the sake of guest program performance.
1107     * This function is provided as a hook so we can more easily add these
1108     * checks in future (possibly as a "favour catching guest program bugs
1109     * over speed" user selectable option).
1110     */
1111}
1112
1113/*
1114 * This provides a simple table based table lookup decoder. It is
1115 * intended to be used when the relevant bits for decode are too
1116 * awkwardly placed and switch/if based logic would be confusing and
1117 * deeply nested. Since it's a linear search through the table, tables
1118 * should be kept small.
1119 *
1120 * It returns the first handler where insn & mask == pattern, or
1121 * NULL if there is no match.
1122 * The table is terminated by an empty mask (i.e. 0)
1123 */
1124static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1125                                               uint32_t insn)
1126{
1127    const AArch64DecodeTable *tptr = table;
1128
1129    while (tptr->mask) {
1130        if ((insn & tptr->mask) == tptr->pattern) {
1131            return tptr->disas_fn;
1132        }
1133        tptr++;
1134    }
1135    return NULL;
1136}
1137
1138/*
1139 * the instruction disassembly implemented here matches
1140 * the instruction encoding classifications in chapter 3 (C3)
1141 * of the ARM Architecture Reference Manual (DDI0487A_a)
1142 */
1143
1144/* C3.2.7 Unconditional branch (immediate)
1145 *   31  30       26 25                                  0
1146 * +----+-----------+-------------------------------------+
1147 * | op | 0 0 1 0 1 |                 imm26               |
1148 * +----+-----------+-------------------------------------+
1149 */
1150static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1151{
1152    uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1153
1154    if (insn & (1U << 31)) {
1155        /* C5.6.26 BL Branch with link */
1156        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1157    }
1158
1159    /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1160    gen_goto_tb(s, 0, addr);
1161}
1162
1163/* C3.2.1 Compare & branch (immediate)
1164 *   31  30         25  24  23                  5 4      0
1165 * +----+-------------+----+---------------------+--------+
1166 * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1167 * +----+-------------+----+---------------------+--------+
1168 */
1169static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1170{
1171    unsigned int sf, op, rt;
1172    uint64_t addr;
1173    TCGLabel *label_match;
1174    TCGv_i64 tcg_cmp;
1175
1176    sf = extract32(insn, 31, 1);
1177    op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1178    rt = extract32(insn, 0, 5);
1179    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1180
1181    tcg_cmp = read_cpu_reg(s, rt, sf);
1182    label_match = gen_new_label();
1183
1184    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1185                        tcg_cmp, 0, label_match);
1186
1187    gen_goto_tb(s, 0, s->pc);
1188    gen_set_label(label_match);
1189    gen_goto_tb(s, 1, addr);
1190}
1191
1192/* C3.2.5 Test & branch (immediate)
1193 *   31  30         25  24  23   19 18          5 4    0
1194 * +----+-------------+----+-------+-------------+------+
1195 * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1196 * +----+-------------+----+-------+-------------+------+
1197 */
1198static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1199{
1200    unsigned int bit_pos, op, rt;
1201    uint64_t addr;
1202    TCGLabel *label_match;
1203    TCGv_i64 tcg_cmp;
1204
1205    bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1206    op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1207    addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1208    rt = extract32(insn, 0, 5);
1209
1210    tcg_cmp = tcg_temp_new_i64();
1211    tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1212    label_match = gen_new_label();
1213    tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1214                        tcg_cmp, 0, label_match);
1215    tcg_temp_free_i64(tcg_cmp);
1216    gen_goto_tb(s, 0, s->pc);
1217    gen_set_label(label_match);
1218    gen_goto_tb(s, 1, addr);
1219}
1220
1221/* C3.2.2 / C5.6.19 Conditional branch (immediate)
1222 *  31           25  24  23                  5   4  3    0
1223 * +---------------+----+---------------------+----+------+
1224 * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1225 * +---------------+----+---------------------+----+------+
1226 */
1227static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1228{
1229    unsigned int cond;
1230    uint64_t addr;
1231
1232    if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1233        unallocated_encoding(s);
1234        return;
1235    }
1236    addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1237    cond = extract32(insn, 0, 4);
1238
1239    if (cond < 0x0e) {
1240        /* genuinely conditional branches */
1241        TCGLabel *label_match = gen_new_label();
1242        arm_gen_test_cc(cond, label_match);
1243        gen_goto_tb(s, 0, s->pc);
1244        gen_set_label(label_match);
1245        gen_goto_tb(s, 1, addr);
1246    } else {
1247        /* 0xe and 0xf are both "always" conditions */
1248        gen_goto_tb(s, 0, addr);
1249    }
1250}
1251
1252/* C5.6.68 HINT */
1253static void handle_hint(DisasContext *s, uint32_t insn,
1254                        unsigned int op1, unsigned int op2, unsigned int crm)
1255{
1256    unsigned int selector = crm << 3 | op2;
1257
1258    if (op1 != 3) {
1259        unallocated_encoding(s);
1260        return;
1261    }
1262
1263    switch (selector) {
1264    case 0: /* NOP */
1265        return;
1266    case 3: /* WFI */
1267        s->is_jmp = DISAS_WFI;
1268        return;
1269    case 1: /* YIELD */
1270        s->is_jmp = DISAS_YIELD;
1271        return;
1272    case 2: /* WFE */
1273        s->is_jmp = DISAS_WFE;
1274        return;
1275    case 4: /* SEV */
1276        gen_helper_sev(cpu_env);
1277        return;
1278    case 5: /* SEVL */
1279        gen_helper_sevl(cpu_env);
1280        return;
1281    default:
1282        /* default specified as NOP equivalent */
1283        return;
1284    }
1285}
1286
1287static void gen_clrex(DisasContext *s, uint32_t insn)
1288{
1289    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1290    gen_helper_sev(cpu_env);
1291}
1292
1293/* CLREX, DSB, DMB, ISB */
1294static void handle_sync(DisasContext *s, uint32_t insn,
1295                        unsigned int op1, unsigned int op2, unsigned int crm)
1296{
1297    if (op1 != 3) {
1298        unallocated_encoding(s);
1299        return;
1300    }
1301
1302    switch (op2) {
1303    case 2: /* CLREX */
1304        gen_clrex(s, insn);
1305        return;
1306    case 4: /* DSB */
1307    case 5: /* DMB */
1308        /* We don't emulate caches so barriers are no-ops */
1309        return;
1310    case 6: /* ISB */
1311        /* We need to break the TB after this insn to execute
1312         * a self-modified code correctly and also to take
1313         * any pending interrupts immediately.
1314         */
1315        s->is_jmp = DISAS_UPDATE;
1316        return;
1317    default:
1318        unallocated_encoding(s);
1319        return;
1320    }
1321}
1322
1323/* C5.6.130 MSR (immediate) - move immediate to processor state field */
1324static void handle_msr_i(DisasContext *s, uint32_t insn,
1325                         unsigned int op1, unsigned int op2, unsigned int crm)
1326{
1327    int op = op1 << 3 | op2;
1328    switch (op) {
1329    case 0x05: /* SPSel */
1330        if (s->current_el == 0) {
1331            unallocated_encoding(s);
1332            return;
1333        }
1334        /* fall through */
1335    case 0x1e: /* DAIFSet */
1336    case 0x1f: /* DAIFClear */
1337    {
1338        TCGv_i32 tcg_imm = tcg_const_i32(crm);
1339        TCGv_i32 tcg_op = tcg_const_i32(op);
1340        gen_a64_set_pc_im(s->pc - 4);
1341        gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1342        tcg_temp_free_i32(tcg_imm);
1343        tcg_temp_free_i32(tcg_op);
1344        s->is_jmp = DISAS_UPDATE;
1345        break;
1346    }
1347    default:
1348        unallocated_encoding(s);
1349        return;
1350    }
1351}
1352
1353static void gen_get_nzcv(TCGv_i64 tcg_rt)
1354{
1355    TCGv_i32 tmp = tcg_temp_new_i32();
1356    TCGv_i32 nzcv = tcg_temp_new_i32();
1357
1358    /* build bit 31, N */
1359    tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1360    /* build bit 30, Z */
1361    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1362    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1363    /* build bit 29, C */
1364    tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1365    /* build bit 28, V */
1366    tcg_gen_shri_i32(tmp, cpu_VF, 31);
1367    tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1368    /* generate result */
1369    tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1370
1371    tcg_temp_free_i32(nzcv);
1372    tcg_temp_free_i32(tmp);
1373}
1374
1375static void gen_set_nzcv(TCGv_i64 tcg_rt)
1376
1377{
1378    TCGv_i32 nzcv = tcg_temp_new_i32();
1379
1380    /* take NZCV from R[t] */
1381    tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1382
1383    /* bit 31, N */
1384    tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1385    /* bit 30, Z */
1386    tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1387    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1388    /* bit 29, C */
1389    tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1390    tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1391    /* bit 28, V */
1392    tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1393    tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1394    tcg_temp_free_i32(nzcv);
1395}
1396
1397/* C5.6.129 MRS - move from system register
1398 * C5.6.131 MSR (register) - move to system register
1399 * C5.6.204 SYS
1400 * C5.6.205 SYSL
1401 * These are all essentially the same insn in 'read' and 'write'
1402 * versions, with varying op0 fields.
1403 */
1404static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1405                       unsigned int op0, unsigned int op1, unsigned int op2,
1406                       unsigned int crn, unsigned int crm, unsigned int rt)
1407{
1408    const ARMCPRegInfo *ri;
1409    TCGv_i64 tcg_rt;
1410
1411    ri = get_arm_cp_reginfo(s->cp_regs,
1412                            ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1413                                               crn, crm, op0, op1, op2));
1414
1415    if (!ri) {
1416        /* Unknown register; this might be a guest error or a QEMU
1417         * unimplemented feature.
1418         */
1419        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1420                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1421                      isread ? "read" : "write", op0, op1, crn, crm, op2);
1422        unallocated_encoding(s);
1423        return;
1424    }
1425
1426    /* Check access permissions */
1427    if (!cp_access_ok(s->current_el, ri, isread)) {
1428        unallocated_encoding(s);
1429        return;
1430    }
1431
1432    if (ri->accessfn) {
1433        /* Emit code to perform further access permissions checks at
1434         * runtime; this may result in an exception.
1435         */
1436        TCGv_ptr tmpptr;
1437        TCGv_i32 tcg_syn, tcg_isread;
1438        uint32_t syndrome;
1439
1440        gen_a64_set_pc_im(s->pc - 4);
1441        tmpptr = tcg_const_ptr(ri);
1442        syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1443        tcg_syn = tcg_const_i32(syndrome);
1444        tcg_isread = tcg_const_i32(isread);
1445        gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1446        tcg_temp_free_ptr(tmpptr);
1447        tcg_temp_free_i32(tcg_syn);
1448        tcg_temp_free_i32(tcg_isread);
1449    }
1450
1451    /* Handle special cases first */
1452    switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1453    case ARM_CP_NOP:
1454        return;
1455    case ARM_CP_NZCV:
1456        tcg_rt = cpu_reg(s, rt);
1457        if (isread) {
1458            gen_get_nzcv(tcg_rt);
1459        } else {
1460            gen_set_nzcv(tcg_rt);
1461        }
1462        return;
1463    case ARM_CP_CURRENTEL:
1464        /* Reads as current EL value from pstate, which is
1465         * guaranteed to be constant by the tb flags.
1466         */
1467        tcg_rt = cpu_reg(s, rt);
1468        tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1469        return;
1470    case ARM_CP_DC_ZVA:
1471        /* Writes clear the aligned block of memory which rt points into. */
1472        tcg_rt = cpu_reg(s, rt);
1473        gen_helper_dc_zva(cpu_env, tcg_rt);
1474        return;
1475    default:
1476        break;
1477    }
1478
1479    if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1480        gen_io_start();
1481    }
1482
1483    tcg_rt = cpu_reg(s, rt);
1484
1485    if (isread) {
1486        if (ri->type & ARM_CP_CONST) {
1487            tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1488        } else if (ri->readfn) {
1489            TCGv_ptr tmpptr;
1490            tmpptr = tcg_const_ptr(ri);
1491            gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1492            tcg_temp_free_ptr(tmpptr);
1493        } else {
1494            tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1495        }
1496    } else {
1497        if (ri->type & ARM_CP_CONST) {
1498            /* If not forbidden by access permissions, treat as WI */
1499            return;
1500        } else if (ri->writefn) {
1501            TCGv_ptr tmpptr;
1502            tmpptr = tcg_const_ptr(ri);
1503            gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1504            tcg_temp_free_ptr(tmpptr);
1505        } else {
1506            tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1507        }
1508    }
1509
1510    if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1511        /* I/O operations must end the TB here (whether read or write) */
1512        gen_io_end();
1513        s->is_jmp = DISAS_UPDATE;
1514    } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1515        /* We default to ending the TB on a coprocessor register write,
1516         * but allow this to be suppressed by the register definition
1517         * (usually only necessary to work around guest bugs).
1518         */
1519        s->is_jmp = DISAS_UPDATE;
1520    }
1521}
1522
1523/* C3.2.4 System
1524 *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1525 * +---------------------+---+-----+-----+-------+-------+-----+------+
1526 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1527 * +---------------------+---+-----+-----+-------+-------+-----+------+
1528 */
1529static void disas_system(DisasContext *s, uint32_t insn)
1530{
1531    unsigned int l, op0, op1, crn, crm, op2, rt;
1532    l = extract32(insn, 21, 1);
1533    op0 = extract32(insn, 19, 2);
1534    op1 = extract32(insn, 16, 3);
1535    crn = extract32(insn, 12, 4);
1536    crm = extract32(insn, 8, 4);
1537    op2 = extract32(insn, 5, 3);
1538    rt = extract32(insn, 0, 5);
1539
1540    if (op0 == 0) {
1541        if (l || rt != 31) {
1542            unallocated_encoding(s);
1543            return;
1544        }
1545        switch (crn) {
1546        case 2: /* C5.6.68 HINT */
1547            handle_hint(s, insn, op1, op2, crm);
1548            break;
1549        case 3: /* CLREX, DSB, DMB, ISB */
1550            handle_sync(s, insn, op1, op2, crm);
1551            break;
1552        case 4: /* C5.6.130 MSR (immediate) */
1553            handle_msr_i(s, insn, op1, op2, crm);
1554            break;
1555        default:
1556            unallocated_encoding(s);
1557            break;
1558        }
1559        return;
1560    }
1561    handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1562}
1563
1564/* C3.2.3 Exception generation
1565 *
1566 *  31             24 23 21 20                     5 4   2 1  0
1567 * +-----------------+-----+------------------------+-----+----+
1568 * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1569 * +-----------------------+------------------------+----------+
1570 */
1571static void disas_exc(DisasContext *s, uint32_t insn)
1572{
1573    int opc = extract32(insn, 21, 3);
1574    int op2_ll = extract32(insn, 0, 5);
1575    int imm16 = extract32(insn, 5, 16);
1576    TCGv_i32 tmp;
1577
1578    switch (opc) {
1579    case 0:
1580        /* For SVC, HVC and SMC we advance the single-step state
1581         * machine before taking the exception. This is architecturally
1582         * mandated, to ensure that single-stepping a system call
1583         * instruction works properly.
1584         */
1585        switch (op2_ll) {
1586        case 1:
1587            gen_ss_advance(s);
1588            gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1589                               default_exception_el(s));
1590            break;
1591        case 2:
1592            if (s->current_el == 0) {
1593                unallocated_encoding(s);
1594                break;
1595            }
1596            /* The pre HVC helper handles cases when HVC gets trapped
1597             * as an undefined insn by runtime configuration.
1598             */
1599            gen_a64_set_pc_im(s->pc - 4);
1600            gen_helper_pre_hvc(cpu_env);
1601            gen_ss_advance(s);
1602            gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1603            break;
1604        case 3:
1605            if (s->current_el == 0) {
1606                unallocated_encoding(s);
1607                break;
1608            }
1609            gen_a64_set_pc_im(s->pc - 4);
1610            tmp = tcg_const_i32(syn_aa64_smc(imm16));
1611            gen_helper_pre_smc(cpu_env, tmp);
1612            tcg_temp_free_i32(tmp);
1613            gen_ss_advance(s);
1614            gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1615            break;
1616        default:
1617            unallocated_encoding(s);
1618            break;
1619        }
1620        break;
1621    case 1:
1622        if (op2_ll != 0) {
1623            unallocated_encoding(s);
1624            break;
1625        }
1626        /* BRK */
1627        gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1628                           default_exception_el(s));
1629        break;
1630    case 2:
1631        if (op2_ll != 0) {
1632            unallocated_encoding(s);
1633            break;
1634        }
1635        /* HLT. This has two purposes.
1636         * Architecturally, it is an external halting debug instruction.
1637         * Since QEMU doesn't implement external debug, we treat this as
1638         * it is required for halting debug disabled: it will UNDEF.
1639         * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1640         */
1641        if (semihosting_enabled() && imm16 == 0xf000) {
1642#ifndef CONFIG_USER_ONLY
1643            /* In system mode, don't allow userspace access to semihosting,
1644             * to provide some semblance of security (and for consistency
1645             * with our 32-bit semihosting).
1646             */
1647            if (s->current_el == 0) {
1648                unsupported_encoding(s, insn);
1649                break;
1650            }
1651#endif
1652            gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1653        } else {
1654            unsupported_encoding(s, insn);
1655        }
1656        break;
1657    case 5:
1658        if (op2_ll < 1 || op2_ll > 3) {
1659            unallocated_encoding(s);
1660            break;
1661        }
1662        /* DCPS1, DCPS2, DCPS3 */
1663        unsupported_encoding(s, insn);
1664        break;
1665    default:
1666        unallocated_encoding(s);
1667        break;
1668    }
1669}
1670
1671/* C3.2.7 Unconditional branch (register)
1672 *  31           25 24   21 20   16 15   10 9    5 4     0
1673 * +---------------+-------+-------+-------+------+-------+
1674 * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1675 * +---------------+-------+-------+-------+------+-------+
1676 */
1677static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1678{
1679    unsigned int opc, op2, op3, rn, op4;
1680
1681    opc = extract32(insn, 21, 4);
1682    op2 = extract32(insn, 16, 5);
1683    op3 = extract32(insn, 10, 6);
1684    rn = extract32(insn, 5, 5);
1685    op4 = extract32(insn, 0, 5);
1686
1687    if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1688        unallocated_encoding(s);
1689        return;
1690    }
1691
1692    switch (opc) {
1693    case 0: /* BR */
1694    case 2: /* RET */
1695        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1696        break;
1697    case 1: /* BLR */
1698        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1699        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1700        break;
1701    case 4: /* ERET */
1702        if (s->current_el == 0) {
1703            unallocated_encoding(s);
1704            return;
1705        }
1706        gen_helper_exception_return(cpu_env);
1707        s->is_jmp = DISAS_JUMP;
1708        return;
1709    case 5: /* DRPS */
1710        if (rn != 0x1f) {
1711            unallocated_encoding(s);
1712        } else {
1713            unsupported_encoding(s, insn);
1714        }
1715        return;
1716    default:
1717        unallocated_encoding(s);
1718        return;
1719    }
1720
1721    s->is_jmp = DISAS_JUMP;
1722}
1723
1724/* C3.2 Branches, exception generating and system instructions */
1725static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1726{
1727    switch (extract32(insn, 25, 7)) {
1728    case 0x0a: case 0x0b:
1729    case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1730        disas_uncond_b_imm(s, insn);
1731        break;
1732    case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1733        disas_comp_b_imm(s, insn);
1734        break;
1735    case 0x1b: case 0x5b: /* Test & branch (immediate) */
1736        disas_test_b_imm(s, insn);
1737        break;
1738    case 0x2a: /* Conditional branch (immediate) */
1739        disas_cond_b_imm(s, insn);
1740        break;
1741    case 0x6a: /* Exception generation / System */
1742        if (insn & (1 << 24)) {
1743            disas_system(s, insn);
1744        } else {
1745            disas_exc(s, insn);
1746        }
1747        break;
1748    case 0x6b: /* Unconditional branch (register) */
1749        disas_uncond_b_reg(s, insn);
1750        break;
1751    default:
1752        unallocated_encoding(s);
1753        break;
1754    }
1755}
1756
1757/*
1758 * Load/Store exclusive instructions are implemented by remembering
1759 * the value/address loaded, and seeing if these are the same
1760 * when the store is performed. This is not actually the architecturally
1761 * mandated semantics, but it works for typical guest code sequences
1762 * and avoids having to monitor regular stores.
1763 *
1764 * In system emulation mode only one CPU will be running at once, so
1765 * this sequence is effectively atomic.  In user emulation mode we
1766 * throw an exception and handle the atomic operation elsewhere.
1767 */
1768static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1769                               TCGv_i64 addr, int size, bool is_pair)
1770{
1771    TCGv_i64 tmp = tcg_temp_new_i64();
1772    TCGMemOp memop = s->be_data + size;
1773
1774    g_assert(size <= 3);
1775    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1776
1777    if (is_pair) {
1778        TCGv_i64 addr2 = tcg_temp_new_i64();
1779        TCGv_i64 hitmp = tcg_temp_new_i64();
1780
1781        g_assert(size >= 2);
1782        tcg_gen_addi_i64(addr2, addr, 1 << size);
1783        tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1784        tcg_temp_free_i64(addr2);
1785        tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1786        tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1787        tcg_temp_free_i64(hitmp);
1788    }
1789
1790    tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1791    tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1792
1793    tcg_temp_free_i64(tmp);
1794    tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1795}
1796
1797#ifdef CONFIG_USER_ONLY
1798static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1799                                TCGv_i64 addr, int size, int is_pair)
1800{
1801    tcg_gen_mov_i64(cpu_exclusive_test, addr);
1802    tcg_gen_movi_i32(cpu_exclusive_info,
1803                     size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1804    gen_exception_internal_insn(s, 4, EXCP_STREX);
1805}
1806#else
1807static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1808                                TCGv_i64 inaddr, int size, int is_pair)
1809{
1810    /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1811     *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1812     *     [addr] = {Rt};
1813     *     if (is_pair) {
1814     *         [addr + datasize] = {Rt2};
1815     *     }
1816     *     {Rd} = 0;
1817     * } else {
1818     *     {Rd} = 1;
1819     * }
1820     * env->exclusive_addr = -1;
1821     */
1822    TCGLabel *fail_label = gen_new_label();
1823    TCGLabel *done_label = gen_new_label();
1824    TCGv_i64 addr = tcg_temp_local_new_i64();
1825    TCGv_i64 tmp;
1826
1827    /* Copy input into a local temp so it is not trashed when the
1828     * basic block ends at the branch insn.
1829     */
1830    tcg_gen_mov_i64(addr, inaddr);
1831    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1832
1833    tmp = tcg_temp_new_i64();
1834    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), s->be_data + size);
1835    tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1836    tcg_temp_free_i64(tmp);
1837
1838    if (is_pair) {
1839        TCGv_i64 addrhi = tcg_temp_new_i64();
1840        TCGv_i64 tmphi = tcg_temp_new_i64();
1841
1842        tcg_gen_addi_i64(addrhi, addr, 1 << size);
1843        tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s),
1844                            s->be_data + size);
1845        tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1846
1847        tcg_temp_free_i64(tmphi);
1848        tcg_temp_free_i64(addrhi);
1849    }
1850
1851    /* We seem to still have the exclusive monitor, so do the store */
1852    tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s),
1853                        s->be_data + size);
1854    if (is_pair) {
1855        TCGv_i64 addrhi = tcg_temp_new_i64();
1856
1857        tcg_gen_addi_i64(addrhi, addr, 1 << size);
1858        tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1859                            get_mem_index(s), s->be_data + size);
1860        tcg_temp_free_i64(addrhi);
1861    }
1862
1863    tcg_temp_free_i64(addr);
1864
1865    tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1866    tcg_gen_br(done_label);
1867    gen_set_label(fail_label);
1868    tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1869    gen_set_label(done_label);
1870    tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1871
1872    gen_helper_sev(cpu_env);
1873}
1874#endif
1875
1876/* Update the Sixty-Four bit (SF) registersize. This logic is derived
1877 * from the ARMv8 specs for LDR (Shared decode for all encodings).
1878 */
1879static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
1880{
1881    int opc0 = extract32(opc, 0, 1);
1882    int regsize;
1883
1884    if (is_signed) {
1885        regsize = opc0 ? 32 : 64;
1886    } else {
1887        regsize = size == 3 ? 64 : 32;
1888    }
1889    return regsize == 64;
1890}
1891
1892/* C3.3.6 Load/store exclusive
1893 *
1894 *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1895 * +-----+-------------+----+---+----+------+----+-------+------+------+
1896 * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1897 * +-----+-------------+----+---+----+------+----+-------+------+------+
1898 *
1899 *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1900 *   L: 0 -> store, 1 -> load
1901 *  o2: 0 -> exclusive, 1 -> not
1902 *  o1: 0 -> single register, 1 -> register pair
1903 *  o0: 1 -> load-acquire/store-release, 0 -> not
1904 */
1905static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1906{
1907    int rt = extract32(insn, 0, 5);
1908    int rn = extract32(insn, 5, 5);
1909    int rt2 = extract32(insn, 10, 5);
1910    int is_lasr = extract32(insn, 15, 1);
1911    int rs = extract32(insn, 16, 5);
1912    int is_pair = extract32(insn, 21, 1);
1913    int is_store = !extract32(insn, 22, 1);
1914    int is_excl = !extract32(insn, 23, 1);
1915    int size = extract32(insn, 30, 2);
1916    TCGv_i64 tcg_addr;
1917
1918    if ((!is_excl && !is_pair && !is_lasr) ||
1919        (!is_excl && is_pair) ||
1920        (is_pair && size < 2)) {
1921        unallocated_encoding(s);
1922        return;
1923    }
1924
1925    if (rn == 31) {
1926        gen_check_sp_alignment(s);
1927    }
1928    tcg_addr = read_cpu_reg_sp(s, rn, 1);
1929
1930    /* Note that since TCG is single threaded load-acquire/store-release
1931     * semantics require no extra if (is_lasr) { ... } handling.
1932     */
1933
1934    if (is_excl) {
1935        if (!is_store) {
1936            s->is_ldex = true;
1937            gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1938        } else {
1939            gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1940        }
1941    } else {
1942        TCGv_i64 tcg_rt = cpu_reg(s, rt);
1943        bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
1944
1945        /* Generate ISS for non-exclusive accesses including LASR.  */
1946        if (is_store) {
1947            do_gpr_st(s, tcg_rt, tcg_addr, size,
1948                      true, rt, iss_sf, is_lasr);
1949        } else {
1950            do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
1951                      true, rt, iss_sf, is_lasr);
1952        }
1953    }
1954}
1955
1956/*
1957 * C3.3.5 Load register (literal)
1958 *
1959 *  31 30 29   27  26 25 24 23                5 4     0
1960 * +-----+-------+---+-----+-------------------+-------+
1961 * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1962 * +-----+-------+---+-----+-------------------+-------+
1963 *
1964 * V: 1 -> vector (simd/fp)
1965 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1966 *                   10-> 32 bit signed, 11 -> prefetch
1967 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1968 */
1969static void disas_ld_lit(DisasContext *s, uint32_t insn)
1970{
1971    int rt = extract32(insn, 0, 5);
1972    int64_t imm = sextract32(insn, 5, 19) << 2;
1973    bool is_vector = extract32(insn, 26, 1);
1974    int opc = extract32(insn, 30, 2);
1975    bool is_signed = false;
1976    int size = 2;
1977    TCGv_i64 tcg_rt, tcg_addr;
1978
1979    if (is_vector) {
1980        if (opc == 3) {
1981            unallocated_encoding(s);
1982            return;
1983        }
1984        size = 2 + opc;
1985        if (!fp_access_check(s)) {
1986            return;
1987        }
1988    } else {
1989        if (opc == 3) {
1990            /* PRFM (literal) : prefetch */
1991            return;
1992        }
1993        size = 2 + extract32(opc, 0, 1);
1994        is_signed = extract32(opc, 1, 1);
1995    }
1996
1997    tcg_rt = cpu_reg(s, rt);
1998
1999    tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2000    if (is_vector) {
2001        do_fp_ld(s, rt, tcg_addr, size);
2002    } else {
2003        /* Only unsigned 32bit loads target 32bit registers.  */
2004        bool iss_sf = opc == 0 ? 32 : 64;
2005
2006        do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2007                  true, rt, iss_sf, false);
2008    }
2009    tcg_temp_free_i64(tcg_addr);
2010}
2011
2012/*
2013 * C5.6.80 LDNP (Load Pair - non-temporal hint)
2014 * C5.6.81 LDP (Load Pair - non vector)
2015 * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
2016 * C5.6.176 STNP (Store Pair - non-temporal hint)
2017 * C5.6.177 STP (Store Pair - non vector)
2018 * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
2019 * C6.3.165 LDP (Load Pair of SIMD&FP)
2020 * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
2021 * C6.3.284 STP (Store Pair of SIMD&FP)
2022 *
2023 *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2024 * +-----+-------+---+---+-------+---+-----------------------------+
2025 * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2026 * +-----+-------+---+---+-------+---+-------+-------+------+------+
2027 *
2028 * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2029 *      LDPSW                    01
2030 *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2031 *   V: 0 -> GPR, 1 -> Vector
2032 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2033 *      10 -> signed offset, 11 -> pre-index
2034 *   L: 0 -> Store 1 -> Load
2035 *
2036 * Rt, Rt2 = GPR or SIMD registers to be stored
2037 * Rn = general purpose register containing address
2038 * imm7 = signed offset (multiple of 4 or 8 depending on size)
2039 */
2040static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2041{
2042    int rt = extract32(insn, 0, 5);
2043    int rn = extract32(insn, 5, 5);
2044    int rt2 = extract32(insn, 10, 5);
2045    uint64_t offset = sextract64(insn, 15, 7);
2046    int index = extract32(insn, 23, 2);
2047    bool is_vector = extract32(insn, 26, 1);
2048    bool is_load = extract32(insn, 22, 1);
2049    int opc = extract32(insn, 30, 2);
2050
2051    bool is_signed = false;
2052    bool postindex = false;
2053    bool wback = false;
2054
2055    TCGv_i64 tcg_addr; /* calculated address */
2056    int size;
2057
2058    if (opc == 3) {
2059        unallocated_encoding(s);
2060        return;
2061    }
2062
2063    if (is_vector) {
2064        size = 2 + opc;
2065    } else {
2066        size = 2 + extract32(opc, 1, 1);
2067        is_signed = extract32(opc, 0, 1);
2068        if (!is_load && is_signed) {
2069            unallocated_encoding(s);
2070            return;
2071        }
2072    }
2073
2074    switch (index) {
2075    case 1: /* post-index */
2076        postindex = true;
2077        wback = true;
2078        break;
2079    case 0:
2080        /* signed offset with "non-temporal" hint. Since we don't emulate
2081         * caches we don't care about hints to the cache system about
2082         * data access patterns, and handle this identically to plain
2083         * signed offset.
2084         */
2085        if (is_signed) {
2086            /* There is no non-temporal-hint version of LDPSW */
2087            unallocated_encoding(s);
2088            return;
2089        }
2090        postindex = false;
2091        break;
2092    case 2: /* signed offset, rn not updated */
2093        postindex = false;
2094        break;
2095    case 3: /* pre-index */
2096        postindex = false;
2097        wback = true;
2098        break;
2099    }
2100
2101    if (is_vector && !fp_access_check(s)) {
2102        return;
2103    }
2104
2105    offset <<= size;
2106
2107    if (rn == 31) {
2108        gen_check_sp_alignment(s);
2109    }
2110
2111    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2112
2113    if (!postindex) {
2114        tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2115    }
2116
2117    if (is_vector) {
2118        if (is_load) {
2119            do_fp_ld(s, rt, tcg_addr, size);
2120        } else {
2121            do_fp_st(s, rt, tcg_addr, size);
2122        }
2123    } else {
2124        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2125        if (is_load) {
2126            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2127                      false, 0, false, false);
2128        } else {
2129            do_gpr_st(s, tcg_rt, tcg_addr, size,
2130                      false, 0, false, false);
2131        }
2132    }
2133    tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2134    if (is_vector) {
2135        if (is_load) {
2136            do_fp_ld(s, rt2, tcg_addr, size);
2137        } else {
2138            do_fp_st(s, rt2, tcg_addr, size);
2139        }
2140    } else {
2141        TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2142        if (is_load) {
2143            do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2144                      false, 0, false, false);
2145        } else {
2146            do_gpr_st(s, tcg_rt2, tcg_addr, size,
2147                      false, 0, false, false);
2148        }
2149    }
2150
2151    if (wback) {
2152        if (postindex) {
2153            tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2154        } else {
2155            tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2156        }
2157        tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2158    }
2159}
2160
2161/*
2162 * C3.3.8 Load/store (immediate post-indexed)
2163 * C3.3.9 Load/store (immediate pre-indexed)
2164 * C3.3.12 Load/store (unscaled immediate)
2165 *
2166 * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2167 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2168 * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2169 * +----+-------+---+-----+-----+---+--------+-----+------+------+
2170 *
2171 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2172         10 -> unprivileged
2173 * V = 0 -> non-vector
2174 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2175 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2176 */
2177static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
2178{
2179    int rt = extract32(insn, 0, 5);
2180    int rn = extract32(insn, 5, 5);
2181    int imm9 = sextract32(insn, 12, 9);
2182    int opc = extract32(insn, 22, 2);
2183    int size = extract32(insn, 30, 2);
2184    int idx = extract32(insn, 10, 2);
2185    bool is_signed = false;
2186    bool is_store = false;
2187    bool is_extended = false;
2188    bool is_unpriv = (idx == 2);
2189    bool is_vector = extract32(insn, 26, 1);
2190    bool iss_valid = !is_vector;
2191    bool post_index;
2192    bool writeback;
2193
2194    TCGv_i64 tcg_addr;
2195
2196    if (is_vector) {
2197        size |= (opc & 2) << 1;
2198        if (size > 4 || is_unpriv) {
2199            unallocated_encoding(s);
2200            return;
2201        }
2202        is_store = ((opc & 1) == 0);
2203        if (!fp_access_check(s)) {
2204            return;
2205        }
2206    } else {
2207        if (size == 3 && opc == 2) {
2208            /* PRFM - prefetch */
2209            if (is_unpriv) {
2210                unallocated_encoding(s);
2211                return;
2212            }
2213            return;
2214        }
2215        if (opc == 3 && size > 1) {
2216            unallocated_encoding(s);
2217            return;
2218        }
2219        is_store = (opc == 0);
2220        is_signed = opc & (1<<1);
2221        is_extended = (size < 3) && (opc & 1);
2222    }
2223
2224    switch (idx) {
2225    case 0:
2226    case 2:
2227        post_index = false;
2228        writeback = false;
2229        break;
2230    case 1:
2231        post_index = true;
2232        writeback = true;
2233        break;
2234    case 3:
2235        post_index = false;
2236        writeback = true;
2237        break;
2238    }
2239
2240    if (rn == 31) {
2241        gen_check_sp_alignment(s);
2242    }
2243    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2244
2245    if (!post_index) {
2246        tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2247    }
2248
2249    if (is_vector) {
2250        if (is_store) {
2251            do_fp_st(s, rt, tcg_addr, size);
2252        } else {
2253            do_fp_ld(s, rt, tcg_addr, size);
2254        }
2255    } else {
2256        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2257        int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2258        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2259
2260        if (is_store) {
2261            do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2262                             iss_valid, rt, iss_sf, false);
2263        } else {
2264            do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2265                             is_signed, is_extended, memidx,
2266                             iss_valid, rt, iss_sf, false);
2267        }
2268    }
2269
2270    if (writeback) {
2271        TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2272        if (post_index) {
2273            tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2274        }
2275        tcg_gen_mov_i64(tcg_rn, tcg_addr);
2276    }
2277}
2278
2279/*
2280 * C3.3.10 Load/store (register offset)
2281 *
2282 * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2283 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2284 * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2285 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2286 *
2287 * For non-vector:
2288 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2289 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2290 * For vector:
2291 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2292 *   opc<0>: 0 -> store, 1 -> load
2293 * V: 1 -> vector/simd
2294 * opt: extend encoding (see DecodeRegExtend)
2295 * S: if S=1 then scale (essentially index by sizeof(size))
2296 * Rt: register to transfer into/out of
2297 * Rn: address register or SP for base
2298 * Rm: offset register or ZR for offset
2299 */
2300static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
2301{
2302    int rt = extract32(insn, 0, 5);
2303    int rn = extract32(insn, 5, 5);
2304    int shift = extract32(insn, 12, 1);
2305    int rm = extract32(insn, 16, 5);
2306    int opc = extract32(insn, 22, 2);
2307    int opt = extract32(insn, 13, 3);
2308    int size = extract32(insn, 30, 2);
2309    bool is_signed = false;
2310    bool is_store = false;
2311    bool is_extended = false;
2312    bool is_vector = extract32(insn, 26, 1);
2313
2314    TCGv_i64 tcg_rm;
2315    TCGv_i64 tcg_addr;
2316
2317    if (extract32(opt, 1, 1) == 0) {
2318        unallocated_encoding(s);
2319        return;
2320    }
2321
2322    if (is_vector) {
2323        size |= (opc & 2) << 1;
2324        if (size > 4) {
2325            unallocated_encoding(s);
2326            return;
2327        }
2328        is_store = !extract32(opc, 0, 1);
2329        if (!fp_access_check(s)) {
2330            return;
2331        }
2332    } else {
2333        if (size == 3 && opc == 2) {
2334            /* PRFM - prefetch */
2335            return;
2336        }
2337        if (opc == 3 && size > 1) {
2338            unallocated_encoding(s);
2339            return;
2340        }
2341        is_store = (opc == 0);
2342        is_signed = extract32(opc, 1, 1);
2343        is_extended = (size < 3) && extract32(opc, 0, 1);
2344    }
2345
2346    if (rn == 31) {
2347        gen_check_sp_alignment(s);
2348    }
2349    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2350
2351    tcg_rm = read_cpu_reg(s, rm, 1);
2352    ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2353
2354    tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2355
2356    if (is_vector) {
2357        if (is_store) {
2358            do_fp_st(s, rt, tcg_addr, size);
2359        } else {
2360            do_fp_ld(s, rt, tcg_addr, size);
2361        }
2362    } else {
2363        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2364        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2365        if (is_store) {
2366            do_gpr_st(s, tcg_rt, tcg_addr, size,
2367                      true, rt, iss_sf, false);
2368        } else {
2369            do_gpr_ld(s, tcg_rt, tcg_addr, size,
2370                      is_signed, is_extended,
2371                      true, rt, iss_sf, false);
2372        }
2373    }
2374}
2375
2376/*
2377 * C3.3.13 Load/store (unsigned immediate)
2378 *
2379 * 31 30 29   27  26 25 24 23 22 21        10 9     5
2380 * +----+-------+---+-----+-----+------------+-------+------+
2381 * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2382 * +----+-------+---+-----+-----+------------+-------+------+
2383 *
2384 * For non-vector:
2385 *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2386 *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2387 * For vector:
2388 *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2389 *   opc<0>: 0 -> store, 1 -> load
2390 * Rn: base address register (inc SP)
2391 * Rt: target register
2392 */
2393static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2394{
2395    int rt = extract32(insn, 0, 5);
2396    int rn = extract32(insn, 5, 5);
2397    unsigned int imm12 = extract32(insn, 10, 12);
2398    bool is_vector = extract32(insn, 26, 1);
2399    int size = extract32(insn, 30, 2);
2400    int opc = extract32(insn, 22, 2);
2401    unsigned int offset;
2402
2403    TCGv_i64 tcg_addr;
2404
2405    bool is_store;
2406    bool is_signed = false;
2407    bool is_extended = false;
2408
2409    if (is_vector) {
2410        size |= (opc & 2) << 1;
2411        if (size > 4) {
2412            unallocated_encoding(s);
2413            return;
2414        }
2415        is_store = !extract32(opc, 0, 1);
2416        if (!fp_access_check(s)) {
2417            return;
2418        }
2419    } else {
2420        if (size == 3 && opc == 2) {
2421            /* PRFM - prefetch */
2422            return;
2423        }
2424        if (opc == 3 && size > 1) {
2425            unallocated_encoding(s);
2426            return;
2427        }
2428        is_store = (opc == 0);
2429        is_signed = extract32(opc, 1, 1);
2430        is_extended = (size < 3) && extract32(opc, 0, 1);
2431    }
2432
2433    if (rn == 31) {
2434        gen_check_sp_alignment(s);
2435    }
2436    tcg_addr = read_cpu_reg_sp(s, rn, 1);
2437    offset = imm12 << size;
2438    tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2439
2440    if (is_vector) {
2441        if (is_store) {
2442            do_fp_st(s, rt, tcg_addr, size);
2443        } else {
2444            do_fp_ld(s, rt, tcg_addr, size);
2445        }
2446    } else {
2447        TCGv_i64 tcg_rt = cpu_reg(s, rt);
2448        bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2449        if (is_store) {
2450            do_gpr_st(s, tcg_rt, tcg_addr, size,
2451                      true, rt, iss_sf, false);
2452        } else {
2453            do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2454                      true, rt, iss_sf, false);
2455        }
2456    }
2457}
2458
2459/* Load/store register (all forms) */
2460static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2461{
2462    switch (extract32(insn, 24, 2)) {
2463    case 0:
2464        if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2465            disas_ldst_reg_roffset(s, insn);
2466        } else {
2467            /* Load/store register (unscaled immediate)
2468             * Load/store immediate pre/post-indexed
2469             * Load/store register unprivileged
2470             */
2471            disas_ldst_reg_imm9(s, insn);
2472        }
2473        break;
2474    case 1:
2475        disas_ldst_reg_unsigned_imm(s, insn);
2476        break;
2477    default:
2478        unallocated_encoding(s);
2479        break;
2480    }
2481}
2482
2483/* C3.3.1 AdvSIMD load/store multiple structures
2484 *
2485 *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2486 * +---+---+---------------+---+-------------+--------+------+------+------+
2487 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2488 * +---+---+---------------+---+-------------+--------+------+------+------+
2489 *
2490 * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2491 *
2492 *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2493 * +---+---+---------------+---+---+---------+--------+------+------+------+
2494 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2495 * +---+---+---------------+---+---+---------+--------+------+------+------+
2496 *
2497 * Rt: first (or only) SIMD&FP register to be transferred
2498 * Rn: base address or SP
2499 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2500 */
2501static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2502{
2503    int rt = extract32(insn, 0, 5);
2504    int rn = extract32(insn, 5, 5);
2505    int size = extract32(insn, 10, 2);
2506    int opcode = extract32(insn, 12, 4);
2507    bool is_store = !extract32(insn, 22, 1);
2508    bool is_postidx = extract32(insn, 23, 1);
2509    bool is_q = extract32(insn, 30, 1);
2510    TCGv_i64 tcg_addr, tcg_rn;
2511
2512    int ebytes = 1 << size;
2513    int elements = (is_q ? 128 : 64) / (8 << size);
2514    int rpt;    /* num iterations */
2515    int selem;  /* structure elements */
2516    int r;
2517
2518    if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2519        unallocated_encoding(s);
2520        return;
2521    }
2522
2523    /* From the shared decode logic */
2524    switch (opcode) {
2525    case 0x0:
2526        rpt = 1;
2527        selem = 4;
2528        break;
2529    case 0x2:
2530        rpt = 4;
2531        selem = 1;
2532        break;
2533    case 0x4:
2534        rpt = 1;
2535        selem = 3;
2536        break;
2537    case 0x6:
2538        rpt = 3;
2539        selem = 1;
2540        break;
2541    case 0x7:
2542        rpt = 1;
2543        selem = 1;
2544        break;
2545    case 0x8:
2546        rpt = 1;
2547        selem = 2;
2548        break;
2549    case 0xa:
2550        rpt = 2;
2551        selem = 1;
2552        break;
2553    default:
2554        unallocated_encoding(s);
2555        return;
2556    }
2557
2558    if (size == 3 && !is_q && selem != 1) {
2559        /* reserved */
2560        unallocated_encoding(s);
2561        return;
2562    }
2563
2564    if (!fp_access_check(s)) {
2565        return;
2566    }
2567
2568    if (rn == 31) {
2569        gen_check_sp_alignment(s);
2570    }
2571
2572    tcg_rn = cpu_reg_sp(s, rn);
2573    tcg_addr = tcg_temp_new_i64();
2574    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2575
2576    for (r = 0; r < rpt; r++) {
2577        int e;
2578        for (e = 0; e < elements; e++) {
2579            int tt = (rt + r) % 32;
2580            int xs;
2581            for (xs = 0; xs < selem; xs++) {
2582                if (is_store) {
2583                    do_vec_st(s, tt, e, tcg_addr, size);
2584                } else {
2585                    do_vec_ld(s, tt, e, tcg_addr, size);
2586
2587                    /* For non-quad operations, setting a slice of the low
2588                     * 64 bits of the register clears the high 64 bits (in
2589                     * the ARM ARM pseudocode this is implicit in the fact
2590                     * that 'rval' is a 64 bit wide variable). We optimize
2591                     * by noticing that we only need to do this the first
2592                     * time we touch a register.
2593                     */
2594                    if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2595                        clear_vec_high(s, tt);
2596                    }
2597                }
2598                tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2599                tt = (tt + 1) % 32;
2600            }
2601        }
2602    }
2603
2604    if (is_postidx) {
2605        int rm = extract32(insn, 16, 5);
2606        if (rm == 31) {
2607            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2608        } else {
2609            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2610        }
2611    }
2612    tcg_temp_free_i64(tcg_addr);
2613}
2614
2615/* C3.3.3 AdvSIMD load/store single structure
2616 *
2617 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2618 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2619 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2620 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2621 *
2622 * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2623 *
2624 *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2625 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2626 * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2627 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2628 *
2629 * Rt: first (or only) SIMD&FP register to be transferred
2630 * Rn: base address or SP
2631 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2632 * index = encoded in Q:S:size dependent on size
2633 *
2634 * lane_size = encoded in R, opc
2635 * transfer width = encoded in opc, S, size
2636 */
2637static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2638{
2639    int rt = extract32(insn, 0, 5);
2640    int rn = extract32(insn, 5, 5);
2641    int size = extract32(insn, 10, 2);
2642    int S = extract32(insn, 12, 1);
2643    int opc = extract32(insn, 13, 3);
2644    int R = extract32(insn, 21, 1);
2645    int is_load = extract32(insn, 22, 1);
2646    int is_postidx = extract32(insn, 23, 1);
2647    int is_q = extract32(insn, 30, 1);
2648
2649    int scale = extract32(opc, 1, 2);
2650    int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2651    bool replicate = false;
2652    int index = is_q << 3 | S << 2 | size;
2653    int ebytes, xs;
2654    TCGv_i64 tcg_addr, tcg_rn;
2655
2656    switch (scale) {
2657    case 3:
2658        if (!is_load || S) {
2659            unallocated_encoding(s);
2660            return;
2661        }
2662        scale = size;
2663        replicate = true;
2664        break;
2665    case 0:
2666        break;
2667    case 1:
2668        if (extract32(size, 0, 1)) {
2669            unallocated_encoding(s);
2670            return;
2671        }
2672        index >>= 1;
2673        break;
2674    case 2:
2675        if (extract32(size, 1, 1)) {
2676            unallocated_encoding(s);
2677            return;
2678        }
2679        if (!extract32(size, 0, 1)) {
2680            index >>= 2;
2681        } else {
2682            if (S) {
2683                unallocated_encoding(s);
2684                return;
2685            }
2686            index >>= 3;
2687            scale = 3;
2688        }
2689        break;
2690    default:
2691        g_assert_not_reached();
2692    }
2693
2694    if (!fp_access_check(s)) {
2695        return;
2696    }
2697
2698    ebytes = 1 << scale;
2699
2700    if (rn == 31) {
2701        gen_check_sp_alignment(s);
2702    }
2703
2704    tcg_rn = cpu_reg_sp(s, rn);
2705    tcg_addr = tcg_temp_new_i64();
2706    tcg_gen_mov_i64(tcg_addr, tcg_rn);
2707
2708    for (xs = 0; xs < selem; xs++) {
2709        if (replicate) {
2710            /* Load and replicate to all elements */
2711            uint64_t mulconst;
2712            TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2713
2714            tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2715                                get_mem_index(s), s->be_data + scale);
2716            switch (scale) {
2717            case 0:
2718                mulconst = 0x0101010101010101ULL;
2719                break;
2720            case 1:
2721                mulconst = 0x0001000100010001ULL;
2722                break;
2723            case 2:
2724                mulconst = 0x0000000100000001ULL;
2725                break;
2726            case 3:
2727                mulconst = 0;
2728                break;
2729            default:
2730                g_assert_not_reached();
2731            }
2732            if (mulconst) {
2733                tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2734            }
2735            write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2736            if (is_q) {
2737                write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2738            } else {
2739                clear_vec_high(s, rt);
2740            }
2741            tcg_temp_free_i64(tcg_tmp);
2742        } else {
2743            /* Load/store one element per register */
2744            if (is_load) {
2745                do_vec_ld(s, rt, index, tcg_addr, s->be_data + scale);
2746            } else {
2747                do_vec_st(s, rt, index, tcg_addr, s->be_data + scale);
2748            }
2749        }
2750        tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2751        rt = (rt + 1) % 32;
2752    }
2753
2754    if (is_postidx) {
2755        int rm = extract32(insn, 16, 5);
2756        if (rm == 31) {
2757            tcg_gen_mov_i64(tcg_rn, tcg_addr);
2758        } else {
2759            tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2760        }
2761    }
2762    tcg_temp_free_i64(tcg_addr);
2763}
2764
2765/* C3.3 Loads and stores */
2766static void disas_ldst(DisasContext *s, uint32_t insn)
2767{
2768    switch (extract32(insn, 24, 6)) {
2769    case 0x08: /* Load/store exclusive */
2770        disas_ldst_excl(s, insn);
2771        break;
2772    case 0x18: case 0x1c: /* Load register (literal) */
2773        disas_ld_lit(s, insn);
2774        break;
2775    case 0x28: case 0x29:
2776    case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2777        disas_ldst_pair(s, insn);
2778        break;
2779    case 0x38: case 0x39:
2780    case 0x3c: case 0x3d: /* Load/store register (all forms) */
2781        disas_ldst_reg(s, insn);
2782        break;
2783    case 0x0c: /* AdvSIMD load/store multiple structures */
2784        disas_ldst_multiple_struct(s, insn);
2785        break;
2786    case 0x0d: /* AdvSIMD load/store single structure */
2787        disas_ldst_single_struct(s, insn);
2788        break;
2789    default:
2790        unallocated_encoding(s);
2791        break;
2792    }
2793}
2794
2795/* C3.4.6 PC-rel. addressing
2796 *   31  30   29 28       24 23                5 4    0
2797 * +----+-------+-----------+-------------------+------+
2798 * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2799 * +----+-------+-----------+-------------------+------+
2800 */
2801static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2802{
2803    unsigned int page, rd;
2804    uint64_t base;
2805    uint64_t offset;
2806
2807    page = extract32(insn, 31, 1);
2808    /* SignExtend(immhi:immlo) -> offset */
2809    offset = sextract64(insn, 5, 19);
2810    offset = offset << 2 | extract32(insn, 29, 2);
2811    rd = extract32(insn, 0, 5);
2812    base = s->pc - 4;
2813
2814    if (page) {
2815        /* ADRP (page based) */
2816        base &= ~0xfff;
2817        offset <<= 12;
2818    }
2819
2820    tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2821}
2822
2823/*
2824 * C3.4.1 Add/subtract (immediate)
2825 *
2826 *  31 30 29 28       24 23 22 21         10 9   5 4   0
2827 * +--+--+--+-----------+-----+-------------+-----+-----+
2828 * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2829 * +--+--+--+-----------+-----+-------------+-----+-----+
2830 *
2831 *    sf: 0 -> 32bit, 1 -> 64bit
2832 *    op: 0 -> add  , 1 -> sub
2833 *     S: 1 -> set flags
2834 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2835 */
2836static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2837{
2838    int rd = extract32(insn, 0, 5);
2839    int rn = extract32(insn, 5, 5);
2840    uint64_t imm = extract32(insn, 10, 12);
2841    int shift = extract32(insn, 22, 2);
2842    bool setflags = extract32(insn, 29, 1);
2843    bool sub_op = extract32(insn, 30, 1);
2844    bool is_64bit = extract32(insn, 31, 1);
2845
2846    TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2847    TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2848    TCGv_i64 tcg_result;
2849
2850    switch (shift) {
2851    case 0x0:
2852        break;
2853    case 0x1:
2854        imm <<= 12;
2855        break;
2856    default:
2857        unallocated_encoding(s);
2858        return;
2859    }
2860
2861    tcg_result = tcg_temp_new_i64();
2862    if (!setflags) {
2863        if (sub_op) {
2864            tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2865        } else {
2866            tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2867        }
2868    } else {
2869        TCGv_i64 tcg_imm = tcg_const_i64(imm);
2870        if (sub_op) {
2871            gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2872        } else {
2873            gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2874        }
2875        tcg_temp_free_i64(tcg_imm);
2876    }
2877
2878    if (is_64bit) {
2879        tcg_gen_mov_i64(tcg_rd, tcg_result);
2880    } else {
2881        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2882    }
2883
2884    tcg_temp_free_i64(tcg_result);
2885}
2886
2887/* The input should be a value in the bottom e bits (with higher
2888 * bits zero); returns that value replicated into every element
2889 * of size e in a 64 bit integer.
2890 */
2891static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2892{
2893    assert(e != 0);
2894    while (e < 64) {
2895        mask |= mask << e;
2896        e *= 2;
2897    }
2898    return mask;
2899}
2900
2901/* Return a value with the bottom len bits set (where 0 < len <= 64) */
2902static inline uint64_t bitmask64(unsigned int length)
2903{
2904    assert(length > 0 && length <= 64);
2905    return ~0ULL >> (64 - length);
2906}
2907
2908/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2909 * only require the wmask. Returns false if the imms/immr/immn are a reserved
2910 * value (ie should cause a guest UNDEF exception), and true if they are
2911 * valid, in which case the decoded bit pattern is written to result.
2912 */
2913static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2914                                   unsigned int imms, unsigned int immr)
2915{
2916    uint64_t mask;
2917    unsigned e, levels, s, r;
2918    int len;
2919
2920    assert(immn < 2 && imms < 64 && immr < 64);
2921
2922    /* The bit patterns we create here are 64 bit patterns which
2923     * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2924     * 64 bits each. Each element contains the same value: a run
2925     * of between 1 and e-1 non-zero bits, rotated within the
2926     * element by between 0 and e-1 bits.
2927     *
2928     * The element size and run length are encoded into immn (1 bit)
2929     * and imms (6 bits) as follows:
2930     * 64 bit elements: immn = 1, imms = <length of run - 1>
2931     * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2932     * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2933     *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2934     *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2935     *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2936     * Notice that immn = 0, imms = 11111x is the only combination
2937     * not covered by one of the above options; this is reserved.
2938     * Further, <length of run - 1> all-ones is a reserved pattern.
2939     *
2940     * In all cases the rotation is by immr % e (and immr is 6 bits).
2941     */
2942
2943    /* First determine the element size */
2944    len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2945    if (len < 1) {
2946        /* This is the immn == 0, imms == 0x11111x case */
2947        return false;
2948    }
2949    e = 1 << len;
2950
2951    levels = e - 1;
2952    s = imms & levels;
2953    r = immr & levels;
2954
2955    if (s == levels) {
2956        /* <length of run - 1> mustn't be all-ones. */
2957        return false;
2958    }
2959
2960    /* Create the value of one element: s+1 set bits rotated
2961     * by r within the element (which is e bits wide)...
2962     */
2963    mask = bitmask64(s + 1);
2964    if (r) {
2965        mask = (mask >> r) | (mask << (e - r));
2966        mask &= bitmask64(e);
2967    }
2968    /* ...then replicate the element over the whole 64 bit value */
2969    mask = bitfield_replicate(mask, e);
2970    *result = mask;
2971    return true;
2972}
2973
2974/* C3.4.4 Logical (immediate)
2975 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2976 * +----+-----+-------------+---+------+------+------+------+
2977 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2978 * +----+-----+-------------+---+------+------+------+------+
2979 */
2980static void disas_logic_imm(DisasContext *s, uint32_t insn)
2981{
2982    unsigned int sf, opc, is_n, immr, imms, rn, rd;
2983    TCGv_i64 tcg_rd, tcg_rn;
2984    uint64_t wmask;
2985    bool is_and = false;
2986
2987    sf = extract32(insn, 31, 1);
2988    opc = extract32(insn, 29, 2);
2989    is_n = extract32(insn, 22, 1);
2990    immr = extract32(insn, 16, 6);
2991    imms = extract32(insn, 10, 6);
2992    rn = extract32(insn, 5, 5);
2993    rd = extract32(insn, 0, 5);
2994
2995    if (!sf && is_n) {
2996        unallocated_encoding(s);
2997        return;
2998    }
2999
3000    if (opc == 0x3) { /* ANDS */
3001        tcg_rd = cpu_reg(s, rd);
3002    } else {
3003        tcg_rd = cpu_reg_sp(s, rd);
3004    }
3005    tcg_rn = cpu_reg(s, rn);
3006
3007    if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3008        /* some immediate field values are reserved */
3009        unallocated_encoding(s);
3010        return;
3011    }
3012
3013    if (!sf) {
3014        wmask &= 0xffffffff;
3015    }
3016
3017    switch (opc) {
3018    case 0x3: /* ANDS */
3019    case 0x0: /* AND */
3020        tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3021        is_and = true;
3022        break;
3023    case 0x1: /* ORR */
3024        tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3025        break;
3026    case 0x2: /* EOR */
3027        tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3028        break;
3029    default:
3030        assert(FALSE); /* must handle all above */
3031        break;
3032    }
3033
3034    if (!sf && !is_and) {
3035        /* zero extend final result; we know we can skip this for AND
3036         * since the immediate had the high 32 bits clear.
3037         */
3038        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3039    }
3040
3041    if (opc == 3) { /* ANDS */
3042        gen_logic_CC(sf, tcg_rd);
3043    }
3044}
3045
3046/*
3047 * C3.4.5 Move wide (immediate)
3048 *
3049 *  31 30 29 28         23 22 21 20             5 4    0
3050 * +--+-----+-------------+-----+----------------+------+
3051 * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3052 * +--+-----+-------------+-----+----------------+------+
3053 *
3054 * sf: 0 -> 32 bit, 1 -> 64 bit
3055 * opc: 00 -> N, 10 -> Z, 11 -> K
3056 * hw: shift/16 (0,16, and sf only 32, 48)
3057 */
3058static void disas_movw_imm(DisasContext *s, uint32_t insn)
3059{
3060    int rd = extract32(insn, 0, 5);
3061    uint64_t imm = extract32(insn, 5, 16);
3062    int sf = extract32(insn, 31, 1);
3063    int opc = extract32(insn, 29, 2);
3064    int pos = extract32(insn, 21, 2) << 4;
3065    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3066    TCGv_i64 tcg_imm;
3067
3068    if (!sf && (pos >= 32)) {
3069        unallocated_encoding(s);
3070        return;
3071    }
3072
3073    switch (opc) {
3074    case 0: /* MOVN */
3075    case 2: /* MOVZ */
3076        imm <<= pos;
3077        if (opc == 0) {
3078            imm = ~imm;
3079        }
3080        if (!sf) {
3081            imm &= 0xffffffffu;
3082        }
3083        tcg_gen_movi_i64(tcg_rd, imm);
3084        break;
3085    case 3: /* MOVK */
3086        tcg_imm = tcg_const_i64(imm);
3087        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3088        tcg_temp_free_i64(tcg_imm);
3089        if (!sf) {
3090            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3091        }
3092        break;
3093    default:
3094        unallocated_encoding(s);
3095        break;
3096    }
3097}
3098
3099/* C3.4.2 Bitfield
3100 *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3101 * +----+-----+-------------+---+------+------+------+------+
3102 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3103 * +----+-----+-------------+---+------+------+------+------+
3104 */
3105static void disas_bitfield(DisasContext *s, uint32_t insn)
3106{
3107    unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3108    TCGv_i64 tcg_rd, tcg_tmp;
3109
3110    sf = extract32(insn, 31, 1);
3111    opc = extract32(insn, 29, 2);
3112    n = extract32(insn, 22, 1);
3113    ri = extract32(insn, 16, 6);
3114    si = extract32(insn, 10, 6);
3115    rn = extract32(insn, 5, 5);
3116    rd = extract32(insn, 0, 5);
3117    bitsize = sf ? 64 : 32;
3118
3119    if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3120        unallocated_encoding(s);
3121        return;
3122    }
3123
3124    tcg_rd = cpu_reg(s, rd);
3125
3126    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3127       to be smaller than bitsize, we'll never reference data outside the
3128       low 32-bits anyway.  */
3129    tcg_tmp = read_cpu_reg(s, rn, 1);
3130
3131    /* Recognize the common aliases.  */
3132    if (opc == 0) { /* SBFM */
3133        if (ri == 0) {
3134            if (si == 7) { /* SXTB */
3135                tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3136                goto done;
3137            } else if (si == 15) { /* SXTH */
3138                tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3139                goto done;
3140            } else if (si == 31) { /* SXTW */
3141                tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3142                goto done;
3143            }
3144        }
3145        if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3146            if (si == 31) {
3147                tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3148            }
3149            tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3150            goto done;
3151        }
3152    } else if (opc == 2) { /* UBFM */
3153        if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
3154            tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
3155            return;
3156        }
3157        if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
3158            if (si == 31) {
3159                tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
3160            }
3161            tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
3162            return;
3163        }
3164        if (si + 1 == ri && si != bitsize - 1) { /* LSL */
3165            int shift = bitsize - 1 - si;
3166            tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
3167            goto done;
3168        }
3169    }
3170
3171    if (opc != 1) { /* SBFM or UBFM */
3172        tcg_gen_movi_i64(tcg_rd, 0);
3173    }
3174
3175    /* do the bit move operation */
3176    if (si >= ri) {
3177        /* Wd<s-r:0> = Wn<s:r> */
3178        tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3179        pos = 0;
3180        len = (si - ri) + 1;
3181    } else {
3182        /* Wd<32+s-r,32-r> = Wn<s:0> */
3183        pos = bitsize - ri;
3184        len = si + 1;
3185    }
3186
3187    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3188
3189    if (opc == 0) { /* SBFM - sign extend the destination field */
3190        tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3191        tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3192    }
3193
3194 done:
3195    if (!sf) { /* zero extend final result */
3196        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3197    }
3198}
3199
3200/* C3.4.3 Extract
3201 *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3202 * +----+------+-------------+---+----+------+--------+------+------+
3203 * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3204 * +----+------+-------------+---+----+------+--------+------+------+
3205 */
3206static void disas_extract(DisasContext *s, uint32_t insn)
3207{
3208    unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3209
3210    sf = extract32(insn, 31, 1);
3211    n = extract32(insn, 22, 1);
3212    rm = extract32(insn, 16, 5);
3213    imm = extract32(insn, 10, 6);
3214    rn = extract32(insn, 5, 5);
3215    rd = extract32(insn, 0, 5);
3216    op21 = extract32(insn, 29, 2);
3217    op0 = extract32(insn, 21, 1);
3218    bitsize = sf ? 64 : 32;
3219
3220    if (sf != n || op21 || op0 || imm >= bitsize) {
3221        unallocated_encoding(s);
3222    } else {
3223        TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3224
3225        tcg_rd = cpu_reg(s, rd);
3226
3227        if (unlikely(imm == 0)) {
3228            /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3229             * so an extract from bit 0 is a special case.
3230             */
3231            if (sf) {
3232                tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3233            } else {
3234                tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3235            }
3236        } else if (rm == rn) { /* ROR */
3237            tcg_rm = cpu_reg(s, rm);
3238            if (sf) {
3239                tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3240            } else {
3241                TCGv_i32 tmp = tcg_temp_new_i32();
3242                tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3243                tcg_gen_rotri_i32(tmp, tmp, imm);
3244                tcg_gen_extu_i32_i64(tcg_rd, tmp);
3245                tcg_temp_free_i32(tmp);
3246            }
3247        } else {
3248            tcg_rm = read_cpu_reg(s, rm, sf);
3249            tcg_rn = read_cpu_reg(s, rn, sf);
3250            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3251            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3252            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3253            if (!sf) {
3254                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3255            }
3256        }
3257    }
3258}
3259
3260/* C3.4 Data processing - immediate */
3261static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3262{
3263    switch (extract32(insn, 23, 6)) {
3264    case 0x20: case 0x21: /* PC-rel. addressing */
3265        disas_pc_rel_adr(s, insn);
3266        break;
3267    case 0x22: case 0x23: /* Add/subtract (immediate) */
3268        disas_add_sub_imm(s, insn);
3269        break;
3270    case 0x24: /* Logical (immediate) */
3271        disas_logic_imm(s, insn);
3272        break;
3273    case 0x25: /* Move wide (immediate) */
3274        disas_movw_imm(s, insn);
3275        break;
3276    case 0x26: /* Bitfield */
3277        disas_bitfield(s, insn);
3278        break;
3279    case 0x27: /* Extract */
3280        disas_extract(s, insn);
3281        break;
3282    default:
3283        unallocated_encoding(s);
3284        break;
3285    }
3286}
3287
3288/* Shift a TCGv src by TCGv shift_amount, put result in dst.
3289 * Note that it is the caller's responsibility to ensure that the
3290 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3291 * mandated semantics for out of range shifts.
3292 */
3293static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3294                      enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3295{
3296    switch (shift_type) {
3297    case A64_SHIFT_TYPE_LSL:
3298        tcg_gen_shl_i64(dst, src, shift_amount);
3299        break;
3300    case A64_SHIFT_TYPE_LSR:
3301        tcg_gen_shr_i64(dst, src, shift_amount);
3302        break;
3303    case A64_SHIFT_TYPE_ASR:
3304        if (!sf) {
3305            tcg_gen_ext32s_i64(dst, src);
3306        }
3307        tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3308        break;
3309    case A64_SHIFT_TYPE_ROR:
3310        if (sf) {
3311            tcg_gen_rotr_i64(dst, src, shift_amount);
3312        } else {
3313            TCGv_i32 t0, t1;
3314            t0 = tcg_temp_new_i32();
3315            t1 = tcg_temp_new_i32();
3316            tcg_gen_extrl_i64_i32(t0, src);
3317            tcg_gen_extrl_i64_i32(t1, shift_amount);
3318            tcg_gen_rotr_i32(t0, t0, t1);
3319            tcg_gen_extu_i32_i64(dst, t0);
3320            tcg_temp_free_i32(t0);
3321            tcg_temp_free_i32(t1);
3322        }
3323        break;
3324    default:
3325        assert(FALSE); /* all shift types should be handled */
3326        break;
3327    }
3328
3329    if (!sf) { /* zero extend final result */
3330        tcg_gen_ext32u_i64(dst, dst);
3331    }
3332}
3333
3334/* Shift a TCGv src by immediate, put result in dst.
3335 * The shift amount must be in range (this should always be true as the
3336 * relevant instructions will UNDEF on bad shift immediates).
3337 */
3338static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3339                          enum a64_shift_type shift_type, unsigned int shift_i)
3340{
3341    assert(shift_i < (sf ? 64 : 32));
3342
3343    if (shift_i == 0) {
3344        tcg_gen_mov_i64(dst, src);
3345    } else {
3346        TCGv_i64 shift_const;
3347
3348        shift_const = tcg_const_i64(shift_i);
3349        shift_reg(dst, src, sf, shift_type, shift_const);
3350        tcg_temp_free_i64(shift_const);
3351    }
3352}
3353
3354/* C3.5.10 Logical (shifted register)
3355 *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3356 * +----+-----+-----------+-------+---+------+--------+------+------+
3357 * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3358 * +----+-----+-----------+-------+---+------+--------+------+------+
3359 */
3360static void disas_logic_reg(DisasContext *s, uint32_t insn)
3361{
3362    TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3363    unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3364
3365    sf = extract32(insn, 31, 1);
3366    opc = extract32(insn, 29, 2);
3367    shift_type = extract32(insn, 22, 2);
3368    invert = extract32(insn, 21, 1);
3369    rm = extract32(insn, 16, 5);
3370    shift_amount = extract32(insn, 10, 6);
3371    rn = extract32(insn, 5, 5);
3372    rd = extract32(insn, 0, 5);
3373
3374    if (!sf && (shift_amount & (1 << 5))) {
3375        unallocated_encoding(s);
3376        return;
3377    }
3378
3379    tcg_rd = cpu_reg(s, rd);
3380
3381    if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3382        /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3383         * register-register MOV and MVN, so it is worth special casing.
3384         */
3385        tcg_rm = cpu_reg(s, rm);
3386        if (invert) {
3387            tcg_gen_not_i64(tcg_rd, tcg_rm);
3388            if (!sf) {
3389                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3390            }
3391        } else {
3392            if (sf) {
3393                tcg_gen_mov_i64(tcg_rd, tcg_rm);
3394            } else {
3395                tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3396            }
3397        }
3398        return;
3399    }
3400
3401    tcg_rm = read_cpu_reg(s, rm, sf);
3402
3403    if (shift_amount) {
3404        shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3405    }
3406
3407    tcg_rn = cpu_reg(s, rn);
3408
3409    switch (opc | (invert << 2)) {
3410    case 0: /* AND */
3411    case 3: /* ANDS */
3412        tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3413        break;
3414    case 1: /* ORR */
3415        tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3416        break;
3417    case 2: /* EOR */
3418        tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3419        break;
3420    case 4: /* BIC */
3421    case 7: /* BICS */
3422        tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3423        break;
3424    case 5: /* ORN */
3425        tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3426        break;
3427    case 6: /* EON */
3428        tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3429        break;
3430    default:
3431        assert(FALSE);
3432        break;
3433    }
3434
3435    if (!sf) {
3436        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3437    }
3438
3439    if (opc == 3) {
3440        gen_logic_CC(sf, tcg_rd);
3441    }
3442}
3443
3444/*
3445 * C3.5.1 Add/subtract (extended register)
3446 *
3447 *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3448 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3449 * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3450 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3451 *
3452 *  sf: 0 -> 32bit, 1 -> 64bit
3453 *  op: 0 -> add  , 1 -> sub
3454 *   S: 1 -> set flags
3455 * opt: 00
3456 * option: extension type (see DecodeRegExtend)
3457 * imm3: optional shift to Rm
3458 *
3459 * Rd = Rn + LSL(extend(Rm), amount)
3460 */
3461static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3462{
3463    int rd = extract32(insn, 0, 5);
3464    int rn = extract32(insn, 5, 5);
3465    int imm3 = extract32(insn, 10, 3);
3466    int option = extract32(insn, 13, 3);
3467    int rm = extract32(insn, 16, 5);
3468    bool setflags = extract32(insn, 29, 1);
3469    bool sub_op = extract32(insn, 30, 1);
3470    bool sf = extract32(insn, 31, 1);
3471
3472    TCGv_i64 tcg_rm, tcg_rn; /* temps */
3473    TCGv_i64 tcg_rd;
3474    TCGv_i64 tcg_result;
3475
3476    if (imm3 > 4) {
3477        unallocated_encoding(s);
3478        return;
3479    }
3480
3481    /* non-flag setting ops may use SP */
3482    if (!setflags) {
3483        tcg_rd = cpu_reg_sp(s, rd);
3484    } else {
3485        tcg_rd = cpu_reg(s, rd);
3486    }
3487    tcg_rn = read_cpu_reg_sp(s, rn, sf);
3488
3489    tcg_rm = read_cpu_reg(s, rm, sf);
3490    ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3491
3492    tcg_result = tcg_temp_new_i64();
3493
3494    if (!setflags) {
3495        if (sub_op) {
3496            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3497        } else {
3498            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3499        }
3500    } else {
3501        if (sub_op) {
3502            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3503        } else {
3504            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3505        }
3506    }
3507
3508    if (sf) {
3509        tcg_gen_mov_i64(tcg_rd, tcg_result);
3510    } else {
3511        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3512    }
3513
3514    tcg_temp_free_i64(tcg_result);
3515}
3516
3517/*
3518 * C3.5.2 Add/subtract (shifted register)
3519 *
3520 *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3521 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3522 * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3523 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3524 *
3525 *    sf: 0 -> 32bit, 1 -> 64bit
3526 *    op: 0 -> add  , 1 -> sub
3527 *     S: 1 -> set flags
3528 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3529 *  imm6: Shift amount to apply to Rm before the add/sub
3530 */
3531static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3532{
3533    int rd = extract32(insn, 0, 5);
3534    int rn = extract32(insn, 5, 5);
3535    int imm6 = extract32(insn, 10, 6);
3536    int rm = extract32(insn, 16, 5);
3537    int shift_type = extract32(insn, 22, 2);
3538    bool setflags = extract32(insn, 29, 1);
3539    bool sub_op = extract32(insn, 30, 1);
3540    bool sf = extract32(insn, 31, 1);
3541
3542    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3543    TCGv_i64 tcg_rn, tcg_rm;
3544    TCGv_i64 tcg_result;
3545
3546    if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3547        unallocated_encoding(s);
3548        return;
3549    }
3550
3551    tcg_rn = read_cpu_reg(s, rn, sf);
3552    tcg_rm = read_cpu_reg(s, rm, sf);
3553
3554    shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3555
3556    tcg_result = tcg_temp_new_i64();
3557
3558    if (!setflags) {
3559        if (sub_op) {
3560            tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3561        } else {
3562            tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3563        }
3564    } else {
3565        if (sub_op) {
3566            gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3567        } else {
3568            gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3569        }
3570    }
3571
3572    if (sf) {
3573        tcg_gen_mov_i64(tcg_rd, tcg_result);
3574    } else {
3575        tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3576    }
3577
3578    tcg_temp_free_i64(tcg_result);
3579}
3580
3581/* C3.5.9 Data-processing (3 source)
3582
3583   31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3584  +--+------+-----------+------+------+----+------+------+------+
3585  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3586  +--+------+-----------+------+------+----+------+------+------+
3587
3588 */
3589static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3590{
3591    int rd = extract32(insn, 0, 5);
3592    int rn = extract32(insn, 5, 5);
3593    int ra = extract32(insn, 10, 5);
3594    int rm = extract32(insn, 16, 5);
3595    int op_id = (extract32(insn, 29, 3) << 4) |
3596        (extract32(insn, 21, 3) << 1) |
3597        extract32(insn, 15, 1);
3598    bool sf = extract32(insn, 31, 1);
3599    bool is_sub = extract32(op_id, 0, 1);
3600    bool is_high = extract32(op_id, 2, 1);
3601    bool is_signed = false;
3602    TCGv_i64 tcg_op1;
3603    TCGv_i64 tcg_op2;
3604    TCGv_i64 tcg_tmp;
3605
3606    /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3607    switch (op_id) {
3608    case 0x42: /* SMADDL */
3609    case 0x43: /* SMSUBL */
3610    case 0x44: /* SMULH */
3611        is_signed = true;
3612        break;
3613    case 0x0: /* MADD (32bit) */
3614    case 0x1: /* MSUB (32bit) */
3615    case 0x40: /* MADD (64bit) */
3616    case 0x41: /* MSUB (64bit) */
3617    case 0x4a: /* UMADDL */
3618    case 0x4b: /* UMSUBL */
3619    case 0x4c: /* UMULH */
3620        break;
3621    default:
3622        unallocated_encoding(s);
3623        return;
3624    }
3625
3626    if (is_high) {
3627        TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3628        TCGv_i64 tcg_rd = cpu_reg(s, rd);
3629        TCGv_i64 tcg_rn = cpu_reg(s, rn);
3630        TCGv_i64 tcg_rm = cpu_reg(s, rm);
3631
3632        if (is_signed) {
3633            tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3634        } else {
3635            tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3636        }
3637
3638        tcg_temp_free_i64(low_bits);
3639        return;
3640    }
3641
3642    tcg_op1 = tcg_temp_new_i64();
3643    tcg_op2 = tcg_temp_new_i64();
3644    tcg_tmp = tcg_temp_new_i64();
3645
3646    if (op_id < 0x42) {
3647        tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3648        tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3649    } else {
3650        if (is_signed) {
3651            tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3652            tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3653        } else {
3654            tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3655            tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3656        }
3657    }
3658
3659    if (ra == 31 && !is_sub) {
3660        /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3661        tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3662    } else {
3663        tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3664        if (is_sub) {
3665            tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3666        } else {
3667            tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3668        }
3669    }
3670
3671    if (!sf) {
3672        tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3673    }
3674
3675    tcg_temp_free_i64(tcg_op1);
3676    tcg_temp_free_i64(tcg_op2);
3677    tcg_temp_free_i64(tcg_tmp);
3678}
3679
3680/* C3.5.3 - Add/subtract (with carry)
3681 *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3682 * +--+--+--+------------------------+------+---------+------+-----+
3683 * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3684 * +--+--+--+------------------------+------+---------+------+-----+
3685 *                                            [000000]
3686 */
3687
3688static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3689{
3690    unsigned int sf, op, setflags, rm, rn, rd;
3691    TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3692
3693    if (extract32(insn, 10, 6) != 0) {
3694        unallocated_encoding(s);
3695        return;
3696    }
3697
3698    sf = extract32(insn, 31, 1);
3699    op = extract32(insn, 30, 1);
3700    setflags = extract32(insn, 29, 1);
3701    rm = extract32(insn, 16, 5);
3702    rn = extract32(insn, 5, 5);
3703    rd = extract32(insn, 0, 5);
3704
3705    tcg_rd = cpu_reg(s, rd);
3706    tcg_rn = cpu_reg(s, rn);
3707
3708    if (op) {
3709        tcg_y = new_tmp_a64(s);
3710        tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3711    } else {
3712        tcg_y = cpu_reg(s, rm);
3713    }
3714
3715    if (setflags) {
3716        gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3717    } else {
3718        gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3719    }
3720}
3721
3722/* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3723 *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3724 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3725 * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3726 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3727 *        [1]                             y                [0]       [0]
3728 */
3729static void disas_cc(DisasContext *s, uint32_t insn)
3730{
3731    unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3732    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3733    TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3734    DisasCompare c;
3735
3736    if (!extract32(insn, 29, 1)) {
3737        unallocated_encoding(s);
3738        return;
3739    }
3740    if (insn & (1 << 10 | 1 << 4)) {
3741        unallocated_encoding(s);
3742        return;
3743    }
3744    sf = extract32(insn, 31, 1);
3745    op = extract32(insn, 30, 1);
3746    is_imm = extract32(insn, 11, 1);
3747    y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3748    cond = extract32(insn, 12, 4);
3749    rn = extract32(insn, 5, 5);
3750    nzcv = extract32(insn, 0, 4);
3751
3752    /* Set T0 = !COND.  */
3753    tcg_t0 = tcg_temp_new_i32();
3754    arm_test_cc(&c, cond);
3755    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3756    arm_free_cc(&c);
3757
3758    /* Load the arguments for the new comparison.  */
3759    if (is_imm) {
3760        tcg_y = new_tmp_a64(s);
3761        tcg_gen_movi_i64(tcg_y, y);
3762    } else {
3763        tcg_y = cpu_reg(s, y);
3764    }
3765    tcg_rn = cpu_reg(s, rn);
3766
3767    /* Set the flags for the new comparison.  */
3768    tcg_tmp = tcg_temp_new_i64();
3769    if (op) {
3770        gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3771    } else {
3772        gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3773    }
3774    tcg_temp_free_i64(tcg_tmp);
3775
3776    /* If COND was false, force the flags to #nzcv.  Compute two masks
3777     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3778     * For tcg hosts that support ANDC, we can make do with just T1.
3779     * In either case, allow the tcg optimizer to delete any unused mask.
3780     */
3781    tcg_t1 = tcg_temp_new_i32();
3782    tcg_t2 = tcg_temp_new_i32();
3783    tcg_gen_neg_i32(tcg_t1, tcg_t0);
3784    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3785
3786    if (nzcv & 8) { /* N */
3787        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3788    } else {
3789        if (TCG_TARGET_HAS_andc_i32) {
3790            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3791        } else {
3792            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3793        }
3794    }
3795    if (nzcv & 4) { /* Z */
3796        if (TCG_TARGET_HAS_andc_i32) {
3797            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3798        } else {
3799            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3800        }
3801    } else {
3802        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3803    }
3804    if (nzcv & 2) { /* C */
3805        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3806    } else {
3807        if (TCG_TARGET_HAS_andc_i32) {
3808            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3809        } else {
3810            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3811        }
3812    }
3813    if (nzcv & 1) { /* V */
3814        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3815    } else {
3816        if (TCG_TARGET_HAS_andc_i32) {
3817            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3818        } else {
3819            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3820        }
3821    }
3822    tcg_temp_free_i32(tcg_t0);
3823    tcg_temp_free_i32(tcg_t1);
3824    tcg_temp_free_i32(tcg_t2);
3825}
3826
3827/* C3.5.6 Conditional select
3828 *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3829 * +----+----+---+-----------------+------+------+-----+------+------+
3830 * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3831 * +----+----+---+-----------------+------+------+-----+------+------+
3832 */
3833static void disas_cond_select(DisasContext *s, uint32_t insn)
3834{
3835    unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3836    TCGv_i64 tcg_rd, zero;
3837    DisasCompare64 c;
3838
3839    if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3840        /* S == 1 or op2<1> == 1 */
3841        unallocated_encoding(s);
3842        return;
3843    }
3844    sf = extract32(insn, 31, 1);
3845    else_inv = extract32(insn, 30, 1);
3846    rm = extract32(insn, 16, 5);
3847    cond = extract32(insn, 12, 4);
3848    else_inc = extract32(insn, 10, 1);
3849    rn = extract32(insn, 5, 5);
3850    rd = extract32(insn, 0, 5);
3851
3852    tcg_rd = cpu_reg(s, rd);
3853
3854    a64_test_cc(&c, cond);
3855    zero = tcg_const_i64(0);
3856
3857    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3858        /* CSET & CSETM.  */
3859        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3860        if (else_inv) {
3861            tcg_gen_neg_i64(tcg_rd, tcg_rd);
3862        }
3863    } else {
3864        TCGv_i64 t_true = cpu_reg(s, rn);
3865        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3866        if (else_inv && else_inc) {
3867            tcg_gen_neg_i64(t_false, t_false);
3868        } else if (else_inv) {
3869            tcg_gen_not_i64(t_false, t_false);
3870        } else if (else_inc) {
3871            tcg_gen_addi_i64(t_false, t_false, 1);
3872        }
3873        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3874    }
3875
3876    tcg_temp_free_i64(zero);
3877    a64_free_cc(&c);
3878
3879    if (!sf) {
3880        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3881    }
3882}
3883
3884static void handle_clz(DisasContext *s, unsigned int sf,
3885                       unsigned int rn, unsigned int rd)
3886{
3887    TCGv_i64 tcg_rd, tcg_rn;
3888    tcg_rd = cpu_reg(s, rd);
3889    tcg_rn = cpu_reg(s, rn);
3890
3891    if (sf) {
3892        gen_helper_clz64(tcg_rd, tcg_rn);
3893    } else {
3894        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3895        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3896        gen_helper_clz(tcg_tmp32, tcg_tmp32);
3897        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3898        tcg_temp_free_i32(tcg_tmp32);
3899    }
3900}
3901
3902static void handle_cls(DisasContext *s, unsigned int sf,
3903                       unsigned int rn, unsigned int rd)
3904{
3905    TCGv_i64 tcg_rd, tcg_rn;
3906    tcg_rd = cpu_reg(s, rd);
3907    tcg_rn = cpu_reg(s, rn);
3908
3909    if (sf) {
3910        gen_helper_cls64(tcg_rd, tcg_rn);
3911    } else {
3912        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3913        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3914        gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3915        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3916        tcg_temp_free_i32(tcg_tmp32);
3917    }
3918}
3919
3920static void handle_rbit(DisasContext *s, unsigned int sf,
3921                        unsigned int rn, unsigned int rd)
3922{
3923    TCGv_i64 tcg_rd, tcg_rn;
3924    tcg_rd = cpu_reg(s, rd);
3925    tcg_rn = cpu_reg(s, rn);
3926
3927    if (sf) {
3928        gen_helper_rbit64(tcg_rd, tcg_rn);
3929    } else {
3930        TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3931        tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3932        gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3933        tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3934        tcg_temp_free_i32(tcg_tmp32);
3935    }
3936}
3937
3938/* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3939static void handle_rev64(DisasContext *s, unsigned int sf,
3940                         unsigned int rn, unsigned int rd)
3941{
3942    if (!sf) {
3943        unallocated_encoding(s);
3944        return;
3945    }
3946    tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3947}
3948
3949/* C5.6.149 REV with sf==0, opcode==2
3950 * C5.6.151 REV32 (sf==1, opcode==2)
3951 */
3952static void handle_rev32(DisasContext *s, unsigned int sf,
3953                         unsigned int rn, unsigned int rd)
3954{
3955    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3956
3957    if (sf) {
3958        TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3959        TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3960
3961        /* bswap32_i64 requires zero high word */
3962        tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3963        tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3964        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3965        tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3966        tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3967
3968        tcg_temp_free_i64(tcg_tmp);
3969    } else {
3970        tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3971        tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3972    }
3973}
3974
3975/* C5.6.150 REV16 (opcode==1) */
3976static void handle_rev16(DisasContext *s, unsigned int sf,
3977                         unsigned int rn, unsigned int rd)
3978{
3979    TCGv_i64 tcg_rd = cpu_reg(s, rd);
3980    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3981    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3982
3983    tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3984    tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3985
3986    tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3987    tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3988    tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3989    tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3990
3991    if (sf) {
3992        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3993        tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3994        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3995        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3996
3997        tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3998        tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3999        tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
4000    }
4001
4002    tcg_temp_free_i64(tcg_tmp);
4003}
4004
4005/* C3.5.7 Data-processing (1 source)
4006 *   31  30  29  28             21 20     16 15    10 9    5 4    0
4007 * +----+---+---+-----------------+---------+--------+------+------+
4008 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4009 * +----+---+---+-----------------+---------+--------+------+------+
4010 */
4011static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4012{
4013    unsigned int sf, opcode, rn, rd;
4014
4015    if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4016        unallocated_encoding(s);
4017        return;
4018    }
4019
4020    sf = extract32(insn, 31, 1);
4021    opcode = extract32(insn, 10, 6);
4022    rn = extract32(insn, 5, 5);
4023    rd = extract32(insn, 0, 5);
4024
4025    switch (opcode) {
4026    case 0: /* RBIT */
4027        handle_rbit(s, sf, rn, rd);
4028        break;
4029    case 1: /* REV16 */
4030        handle_rev16(s, sf, rn, rd);
4031        break;
4032    case 2: /* REV32 */
4033        handle_rev32(s, sf, rn, rd);
4034        break;
4035    case 3: /* REV64 */
4036        handle_rev64(s, sf, rn, rd);
4037        break;
4038    case 4: /* CLZ */
4039        handle_clz(s, sf, rn, rd);
4040        break;
4041    case 5: /* CLS */
4042        handle_cls(s, sf, rn, rd);
4043        break;
4044    }
4045}
4046
4047static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4048                       unsigned int rm, unsigned int rn, unsigned int rd)
4049{
4050    TCGv_i64 tcg_n, tcg_m, tcg_rd;
4051    tcg_rd = cpu_reg(s, rd);
4052
4053    if (!sf && is_signed) {
4054        tcg_n = new_tmp_a64(s);
4055        tcg_m = new_tmp_a64(s);
4056        tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4057        tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4058    } else {
4059        tcg_n = read_cpu_reg(s, rn, sf);
4060        tcg_m = read_cpu_reg(s, rm, sf);
4061    }
4062
4063    if (is_signed) {
4064        gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4065    } else {
4066        gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4067    }
4068
4069    if (!sf) { /* zero extend final result */
4070        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4071    }
4072}
4073
4074/* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
4075static void handle_shift_reg(DisasContext *s,
4076                             enum a64_shift_type shift_type, unsigned int sf,
4077                             unsigned int rm, unsigned int rn, unsigned int rd)
4078{
4079    TCGv_i64 tcg_shift = tcg_temp_new_i64();
4080    TCGv_i64 tcg_rd = cpu_reg(s, rd);
4081    TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4082
4083    tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4084    shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4085    tcg_temp_free_i64(tcg_shift);
4086}
4087
4088/* CRC32[BHWX], CRC32C[BHWX] */
4089static void handle_crc32(DisasContext *s,
4090                         unsigned int sf, unsigned int sz, bool crc32c,
4091                         unsigned int rm, unsigned int rn, unsigned int rd)
4092{
4093    TCGv_i64 tcg_acc, tcg_val;
4094    TCGv_i32 tcg_bytes;
4095
4096    if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4097        || (sf == 1 && sz != 3)
4098        || (sf == 0 && sz == 3)) {
4099        unallocated_encoding(s);
4100        return;
4101    }
4102
4103    if (sz == 3) {
4104        tcg_val = cpu_reg(s, rm);
4105    } else {
4106        uint64_t mask;
4107        switch (sz) {
4108        case 0:
4109            mask = 0xFF;
4110            break;
4111        case 1:
4112            mask = 0xFFFF;
4113            break;
4114        case 2:
4115            mask = 0xFFFFFFFF;
4116            break;
4117        default:
4118            g_assert_not_reached();
4119        }
4120        tcg_val = new_tmp_a64(s);
4121        tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4122    }
4123
4124    tcg_acc = cpu_reg(s, rn);
4125    tcg_bytes = tcg_const_i32(1 << sz);
4126
4127    if (crc32c) {
4128        gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4129    } else {
4130        gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4131    }
4132
4133    tcg_temp_free_i32(tcg_bytes);
4134}
4135
4136/* C3.5.8 Data-processing (2 source)
4137 *   31   30  29 28             21 20  16 15    10 9    5 4    0
4138 * +----+---+---+-----------------+------+--------+------+------+
4139 * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4140 * +----+---+---+-----------------+------+--------+------+------+
4141 */
4142static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4143{
4144    unsigned int sf, rm, opcode, rn, rd;
4145    sf = extract32(insn, 31, 1);
4146    rm = extract32(insn, 16, 5);
4147    opcode = extract32(insn, 10, 6);
4148    rn = extract32(insn, 5, 5);
4149    rd = extract32(insn, 0, 5);
4150
4151    if (extract32(insn, 29, 1)) {
4152        unallocated_encoding(s);
4153        return;
4154    }
4155
4156    switch (opcode) {
4157    case 2: /* UDIV */
4158        handle_div(s, false, sf, rm, rn, rd);
4159        break;
4160    case 3: /* SDIV */
4161        handle_div(s, true, sf, rm, rn, rd);
4162        break;
4163    case 8: /* LSLV */
4164        handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4165        break;
4166    case 9: /* LSRV */
4167        handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4168        break;
4169    case 10: /* ASRV */
4170        handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4171        break;
4172    case 11: /* RORV */
4173        handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4174        break;
4175    case 16:
4176    case 17:
4177    case 18:
4178    case 19:
4179    case 20:
4180    case 21:
4181    case 22:
4182    case 23: /* CRC32 */
4183    {
4184        int sz = extract32(opcode, 0, 2);
4185        bool crc32c = extract32(opcode, 2, 1);
4186        handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4187        break;
4188    }
4189    default:
4190        unallocated_encoding(s);
4191        break;
4192    }
4193}
4194
4195/* C3.5 Data processing - register */
4196static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4197{
4198    switch (extract32(insn, 24, 5)) {
4199    case 0x0a: /* Logical (shifted register) */
4200        disas_logic_reg(s, insn);
4201        break;
4202    case 0x0b: /* Add/subtract */
4203        if (insn & (1 << 21)) { /* (extended register) */
4204            disas_add_sub_ext_reg(s, insn);
4205        } else {
4206            disas_add_sub_reg(s, insn);
4207        }
4208        break;
4209    case 0x1b: /* Data-processing (3 source) */
4210        disas_data_proc_3src(s, insn);
4211        break;
4212    case 0x1a:
4213        switch (extract32(insn, 21, 3)) {
4214        case 0x0: /* Add/subtract (with carry) */
4215            disas_adc_sbc(s, insn);
4216            break;
4217        case 0x2: /* Conditional compare */
4218            disas_cc(s, insn); /* both imm and reg forms */
4219            break;
4220        case 0x4: /* Conditional select */
4221            disas_cond_select(s, insn);
4222            break;
4223        case 0x6: /* Data-processing */
4224            if (insn & (1 << 30)) { /* (1 source) */
4225                disas_data_proc_1src(s, insn);
4226            } else {            /* (2 source) */
4227                disas_data_proc_2src(s, insn);
4228            }
4229            break;
4230        default:
4231            unallocated_encoding(s);
4232            break;
4233        }
4234        break;
4235    default:
4236        unallocated_encoding(s);
4237        break;
4238    }
4239}
4240
4241static void handle_fp_compare(DisasContext *s, bool is_double,
4242                              unsigned int rn, unsigned int rm,
4243                              bool cmp_with_zero, bool signal_all_nans)
4244{
4245    TCGv_i64 tcg_flags = tcg_temp_new_i64();
4246    TCGv_ptr fpst = get_fpstatus_ptr();
4247
4248    if (is_double) {
4249        TCGv_i64 tcg_vn, tcg_vm;
4250
4251        tcg_vn = read_fp_dreg(s, rn);
4252        if (cmp_with_zero) {
4253            tcg_vm = tcg_const_i64(0);
4254        } else {
4255            tcg_vm = read_fp_dreg(s, rm);
4256        }
4257        if (signal_all_nans) {
4258            gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4259        } else {
4260            gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4261        }
4262        tcg_temp_free_i64(tcg_vn);
4263        tcg_temp_free_i64(tcg_vm);
4264    } else {
4265        TCGv_i32 tcg_vn, tcg_vm;
4266
4267        tcg_vn = read_fp_sreg(s, rn);
4268        if (cmp_with_zero) {
4269            tcg_vm = tcg_const_i32(0);
4270        } else {
4271            tcg_vm = read_fp_sreg(s, rm);
4272        }
4273        if (signal_all_nans) {
4274            gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4275        } else {
4276            gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4277        }
4278        tcg_temp_free_i32(tcg_vn);
4279        tcg_temp_free_i32(tcg_vm);
4280    }
4281
4282    tcg_temp_free_ptr(fpst);
4283
4284    gen_set_nzcv(tcg_flags);
4285
4286    tcg_temp_free_i64(tcg_flags);
4287}
4288
4289/* C3.6.22 Floating point compare
4290 *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4291 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4292 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4293 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4294 */
4295static void disas_fp_compare(DisasContext *s, uint32_t insn)
4296{
4297    unsigned int mos, type, rm, op, rn, opc, op2r;
4298
4299    mos = extract32(insn, 29, 3);
4300    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4301    rm = extract32(insn, 16, 5);
4302    op = extract32(insn, 14, 2);
4303    rn = extract32(insn, 5, 5);
4304    opc = extract32(insn, 3, 2);
4305    op2r = extract32(insn, 0, 3);
4306
4307    if (mos || op || op2r || type > 1) {
4308        unallocated_encoding(s);
4309        return;
4310    }
4311
4312    if (!fp_access_check(s)) {
4313        return;
4314    }
4315
4316    handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4317}
4318
4319/* C3.6.23 Floating point conditional compare
4320 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4321 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4322 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4323 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4324 */
4325static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4326{
4327    unsigned int mos, type, rm, cond, rn, op, nzcv;
4328    TCGv_i64 tcg_flags;
4329    TCGLabel *label_continue = NULL;
4330
4331    mos = extract32(insn, 29, 3);
4332    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4333    rm = extract32(insn, 16, 5);
4334    cond = extract32(insn, 12, 4);
4335    rn = extract32(insn, 5, 5);
4336    op = extract32(insn, 4, 1);
4337    nzcv = extract32(insn, 0, 4);
4338
4339    if (mos || type > 1) {
4340        unallocated_encoding(s);
4341        return;
4342    }
4343
4344    if (!fp_access_check(s)) {
4345        return;
4346    }
4347
4348    if (cond < 0x0e) { /* not always */
4349        TCGLabel *label_match = gen_new_label();
4350        label_continue = gen_new_label();
4351        arm_gen_test_cc(cond, label_match);
4352        /* nomatch: */
4353        tcg_flags = tcg_const_i64(nzcv << 28);
4354        gen_set_nzcv(tcg_flags);
4355        tcg_temp_free_i64(tcg_flags);
4356        tcg_gen_br(label_continue);
4357        gen_set_label(label_match);
4358    }
4359
4360    handle_fp_compare(s, type, rn, rm, false, op);
4361
4362    if (cond < 0x0e) {
4363        gen_set_label(label_continue);
4364    }
4365}
4366
4367/* C3.6.24 Floating point conditional select
4368 *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4369 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4370 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4371 * +---+---+---+-----------+------+---+------+------+-----+------+------+
4372 */
4373static void disas_fp_csel(DisasContext *s, uint32_t insn)
4374{
4375    unsigned int mos, type, rm, cond, rn, rd;
4376    TCGv_i64 t_true, t_false, t_zero;
4377    DisasCompare64 c;
4378
4379    mos = extract32(insn, 29, 3);
4380    type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4381    rm = extract32(insn, 16, 5);
4382    cond = extract32(insn, 12, 4);
4383    rn = extract32(insn, 5, 5);
4384    rd = extract32(insn, 0, 5);
4385
4386    if (mos || type > 1) {
4387        unallocated_encoding(s);
4388        return;
4389    }
4390
4391    if (!fp_access_check(s)) {
4392        return;
4393    }
4394
4395    /* Zero extend sreg inputs to 64 bits now.  */
4396    t_true = tcg_temp_new_i64();
4397    t_false = tcg_temp_new_i64();
4398    read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4399    read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4400
4401    a64_test_cc(&c, cond);
4402    t_zero = tcg_const_i64(0);
4403    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4404    tcg_temp_free_i64(t_zero);
4405    tcg_temp_free_i64(t_false);
4406    a64_free_cc(&c);
4407
4408    /* Note that sregs write back zeros to the high bits,
4409       and we've already done the zero-extension.  */
4410    write_fp_dreg(s, rd, t_true);
4411    tcg_temp_free_i64(t_true);
4412}
4413
4414/* C3.6.25 Floating-point data-processing (1 source) - single precision */
4415static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4416{
4417    TCGv_ptr fpst;
4418    TCGv_i32 tcg_op;
4419    TCGv_i32 tcg_res;
4420
4421    fpst = get_fpstatus_ptr();
4422    tcg_op = read_fp_sreg(s, rn);
4423    tcg_res = tcg_temp_new_i32();
4424
4425    switch (opcode) {
4426    case 0x0: /* FMOV */
4427        tcg_gen_mov_i32(tcg_res, tcg_op);
4428        break;
4429    case 0x1: /* FABS */
4430        gen_helper_vfp_abss(tcg_res, tcg_op);
4431        break;
4432    case 0x2: /* FNEG */
4433        gen_helper_vfp_negs(tcg_res, tcg_op);
4434        break;
4435    case 0x3: /* FSQRT */
4436        gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4437        break;
4438    case 0x8: /* FRINTN */
4439    case 0x9: /* FRINTP */
4440    case 0xa: /* FRINTM */
4441    case 0xb: /* FRINTZ */
4442    case 0xc: /* FRINTA */
4443    {
4444        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4445
4446        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4447        gen_helper_rints(tcg_res, tcg_op, fpst);
4448
4449        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4450        tcg_temp_free_i32(tcg_rmode);
4451        break;
4452    }
4453    case 0xe: /* FRINTX */
4454        gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4455        break;
4456    case 0xf: /* FRINTI */
4457        gen_helper_rints(tcg_res, tcg_op, fpst);
4458        break;
4459    default:
4460        abort();
4461    }
4462
4463    write_fp_sreg(s, rd, tcg_res);
4464
4465    tcg_temp_free_ptr(fpst);
4466    tcg_temp_free_i32(tcg_op);
4467    tcg_temp_free_i32(tcg_res);
4468}
4469
4470/* C3.6.25 Floating-point data-processing (1 source) - double precision */
4471static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4472{
4473    TCGv_ptr fpst;
4474    TCGv_i64 tcg_op;
4475    TCGv_i64 tcg_res;
4476
4477    fpst = get_fpstatus_ptr();
4478    tcg_op = read_fp_dreg(s, rn);
4479    tcg_res = tcg_temp_new_i64();
4480
4481    switch (opcode) {
4482    case 0x0: /* FMOV */
4483        tcg_gen_mov_i64(tcg_res, tcg_op);
4484        break;
4485    case 0x1: /* FABS */
4486        gen_helper_vfp_absd(tcg_res, tcg_op);
4487        break;
4488    case 0x2: /* FNEG */
4489        gen_helper_vfp_negd(tcg_res, tcg_op);
4490        break;
4491    case 0x3: /* FSQRT */
4492        gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4493        break;
4494    case 0x8: /* FRINTN */
4495    case 0x9: /* FRINTP */
4496    case 0xa: /* FRINTM */
4497    case 0xb: /* FRINTZ */
4498    case 0xc: /* FRINTA */
4499    {
4500        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4501
4502        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4503        gen_helper_rintd(tcg_res, tcg_op, fpst);
4504
4505        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4506        tcg_temp_free_i32(tcg_rmode);
4507        break;
4508    }
4509    case 0xe: /* FRINTX */
4510        gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4511        break;
4512    case 0xf: /* FRINTI */
4513        gen_helper_rintd(tcg_res, tcg_op, fpst);
4514        break;
4515    default:
4516        abort();
4517    }
4518
4519    write_fp_dreg(s, rd, tcg_res);
4520
4521    tcg_temp_free_ptr(fpst);
4522    tcg_temp_free_i64(tcg_op);
4523    tcg_temp_free_i64(tcg_res);
4524}
4525
4526static void handle_fp_fcvt(DisasContext *s, int opcode,
4527                           int rd, int rn, int dtype, int ntype)
4528{
4529    switch (ntype) {
4530    case 0x0:
4531    {
4532        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4533        if (dtype == 1) {
4534            /* Single to double */
4535            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4536            gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4537            write_fp_dreg(s, rd, tcg_rd);
4538            tcg_temp_free_i64(tcg_rd);
4539        } else {
4540            /* Single to half */
4541            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4542            gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4543            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4544            write_fp_sreg(s, rd, tcg_rd);
4545            tcg_temp_free_i32(tcg_rd);
4546        }
4547        tcg_temp_free_i32(tcg_rn);
4548        break;
4549    }
4550    case 0x1:
4551    {
4552        TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4553        TCGv_i32 tcg_rd = tcg_temp_new_i32();
4554        if (dtype == 0) {
4555            /* Double to single */
4556            gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4557        } else {
4558            /* Double to half */
4559            gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4560            /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4561        }
4562        write_fp_sreg(s, rd, tcg_rd);
4563        tcg_temp_free_i32(tcg_rd);
4564        tcg_temp_free_i64(tcg_rn);
4565        break;
4566    }
4567    case 0x3:
4568    {
4569        TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4570        tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4571        if (dtype == 0) {
4572            /* Half to single */
4573            TCGv_i32 tcg_rd = tcg_temp_new_i32();
4574            gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4575            write_fp_sreg(s, rd, tcg_rd);
4576            tcg_temp_free_i32(tcg_rd);
4577        } else {
4578            /* Half to double */
4579            TCGv_i64 tcg_rd = tcg_temp_new_i64();
4580            gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4581            write_fp_dreg(s, rd, tcg_rd);
4582            tcg_temp_free_i64(tcg_rd);
4583        }
4584        tcg_temp_free_i32(tcg_rn);
4585        break;
4586    }
4587    default:
4588        abort();
4589    }
4590}
4591
4592/* C3.6.25 Floating point data-processing (1 source)
4593 *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4594 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4595 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4596 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4597 */
4598static void disas_fp_1src(DisasContext *s, uint32_t insn)
4599{
4600    int type = extract32(insn, 22, 2);
4601    int opcode = extract32(insn, 15, 6);
4602    int rn = extract32(insn, 5, 5);
4603    int rd = extract32(insn, 0, 5);
4604
4605    switch (opcode) {
4606    case 0x4: case 0x5: case 0x7:
4607    {
4608        /* FCVT between half, single and double precision */
4609        int dtype = extract32(opcode, 0, 2);
4610        if (type == 2 || dtype == type) {
4611            unallocated_encoding(s);
4612            return;
4613        }
4614        if (!fp_access_check(s)) {
4615            return;
4616        }
4617
4618        handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4619        break;
4620    }
4621    case 0x0 ... 0x3:
4622    case 0x8 ... 0xc:
4623    case 0xe ... 0xf:
4624        /* 32-to-32 and 64-to-64 ops */
4625        switch (type) {
4626        case 0:
4627            if (!fp_access_check(s)) {
4628                return;
4629            }
4630
4631            handle_fp_1src_single(s, opcode, rd, rn);
4632            break;
4633        case 1:
4634            if (!fp_access_check(s)) {
4635                return;
4636            }
4637
4638            handle_fp_1src_double(s, opcode, rd, rn);
4639            break;
4640        default:
4641            unallocated_encoding(s);
4642        }
4643        break;
4644    default:
4645        unallocated_encoding(s);
4646        break;
4647    }
4648}
4649
4650/* C3.6.26 Floating-point data-processing (2 source) - single precision */
4651static void handle_fp_2src_single(DisasContext *s, int opcode,
4652                                  int rd, int rn, int rm)
4653{
4654    TCGv_i32 tcg_op1;
4655    TCGv_i32 tcg_op2;
4656    TCGv_i32 tcg_res;
4657    TCGv_ptr fpst;
4658
4659    tcg_res = tcg_temp_new_i32();
4660    fpst = get_fpstatus_ptr();
4661    tcg_op1 = read_fp_sreg(s, rn);
4662    tcg_op2 = read_fp_sreg(s, rm);
4663
4664    switch (opcode) {
4665    case 0x0: /* FMUL */
4666        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4667        break;
4668    case 0x1: /* FDIV */
4669        gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4670        break;
4671    case 0x2: /* FADD */
4672        gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4673        break;
4674    case 0x3: /* FSUB */
4675        gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4676        break;
4677    case 0x4: /* FMAX */
4678        gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4679        break;
4680    case 0x5: /* FMIN */
4681        gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4682        break;
4683    case 0x6: /* FMAXNM */
4684        gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4685        break;
4686    case 0x7: /* FMINNM */
4687        gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4688        break;
4689    case 0x8: /* FNMUL */
4690        gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4691        gen_helper_vfp_negs(tcg_res, tcg_res);
4692        break;
4693    }
4694
4695    write_fp_sreg(s, rd, tcg_res);
4696
4697    tcg_temp_free_ptr(fpst);
4698    tcg_temp_free_i32(tcg_op1);
4699    tcg_temp_free_i32(tcg_op2);
4700    tcg_temp_free_i32(tcg_res);
4701}
4702
4703/* C3.6.26 Floating-point data-processing (2 source) - double precision */
4704static void handle_fp_2src_double(DisasContext *s, int opcode,
4705                                  int rd, int rn, int rm)
4706{
4707    TCGv_i64 tcg_op1;
4708    TCGv_i64 tcg_op2;
4709    TCGv_i64 tcg_res;
4710    TCGv_ptr fpst;
4711
4712    tcg_res = tcg_temp_new_i64();
4713    fpst = get_fpstatus_ptr();
4714    tcg_op1 = read_fp_dreg(s, rn);
4715    tcg_op2 = read_fp_dreg(s, rm);
4716
4717    switch (opcode) {
4718    case 0x0: /* FMUL */
4719        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4720        break;
4721    case 0x1: /* FDIV */
4722        gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4723        break;
4724    case 0x2: /* FADD */
4725        gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4726        break;
4727    case 0x3: /* FSUB */
4728        gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4729        break;
4730    case 0x4: /* FMAX */
4731        gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4732        break;
4733    case 0x5: /* FMIN */
4734        gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4735        break;
4736    case 0x6: /* FMAXNM */
4737        gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4738        break;
4739    case 0x7: /* FMINNM */
4740        gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4741        break;
4742    case 0x8: /* FNMUL */
4743        gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4744        gen_helper_vfp_negd(tcg_res, tcg_res);
4745        break;
4746    }
4747
4748    write_fp_dreg(s, rd, tcg_res);
4749
4750    tcg_temp_free_ptr(fpst);
4751    tcg_temp_free_i64(tcg_op1);
4752    tcg_temp_free_i64(tcg_op2);
4753    tcg_temp_free_i64(tcg_res);
4754}
4755
4756/* C3.6.26 Floating point data-processing (2 source)
4757 *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4758 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4759 * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4760 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4761 */
4762static void disas_fp_2src(DisasContext *s, uint32_t insn)
4763{
4764    int type = extract32(insn, 22, 2);
4765    int rd = extract32(insn, 0, 5);
4766    int rn = extract32(insn, 5, 5);
4767    int rm = extract32(insn, 16, 5);
4768    int opcode = extract32(insn, 12, 4);
4769
4770    if (opcode > 8) {
4771        unallocated_encoding(s);
4772        return;
4773    }
4774
4775    switch (type) {
4776    case 0:
4777        if (!fp_access_check(s)) {
4778            return;
4779        }
4780        handle_fp_2src_single(s, opcode, rd, rn, rm);
4781        break;
4782    case 1:
4783        if (!fp_access_check(s)) {
4784            return;
4785        }
4786        handle_fp_2src_double(s, opcode, rd, rn, rm);
4787        break;
4788    default:
4789        unallocated_encoding(s);
4790    }
4791}
4792
4793/* C3.6.27 Floating-point data-processing (3 source) - single precision */
4794static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4795                                  int rd, int rn, int rm, int ra)
4796{
4797    TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4798    TCGv_i32 tcg_res = tcg_temp_new_i32();
4799    TCGv_ptr fpst = get_fpstatus_ptr();
4800
4801    tcg_op1 = read_fp_sreg(s, rn);
4802    tcg_op2 = read_fp_sreg(s, rm);
4803    tcg_op3 = read_fp_sreg(s, ra);
4804
4805    /* These are fused multiply-add, and must be done as one
4806     * floating point operation with no rounding between the
4807     * multiplication and addition steps.
4808     * NB that doing the negations here as separate steps is
4809     * correct : an input NaN should come out with its sign bit
4810     * flipped if it is a negated-input.
4811     */
4812    if (o1 == true) {
4813        gen_helper_vfp_negs(tcg_op3, tcg_op3);
4814    }
4815
4816    if (o0 != o1) {
4817        gen_helper_vfp_negs(tcg_op1, tcg_op1);
4818    }
4819
4820    gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4821
4822    write_fp_sreg(s, rd, tcg_res);
4823
4824    tcg_temp_free_ptr(fpst);
4825    tcg_temp_free_i32(tcg_op1);
4826    tcg_temp_free_i32(tcg_op2);
4827    tcg_temp_free_i32(tcg_op3);
4828    tcg_temp_free_i32(tcg_res);
4829}
4830
4831/* C3.6.27 Floating-point data-processing (3 source) - double precision */
4832static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4833                                  int rd, int rn, int rm, int ra)
4834{
4835    TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4836    TCGv_i64 tcg_res = tcg_temp_new_i64();
4837    TCGv_ptr fpst = get_fpstatus_ptr();
4838
4839    tcg_op1 = read_fp_dreg(s, rn);
4840    tcg_op2 = read_fp_dreg(s, rm);
4841    tcg_op3 = read_fp_dreg(s, ra);
4842
4843    /* These are fused multiply-add, and must be done as one
4844     * floating point operation with no rounding between the
4845     * multiplication and addition steps.
4846     * NB that doing the negations here as separate steps is
4847     * correct : an input NaN should come out with its sign bit
4848     * flipped if it is a negated-input.
4849     */
4850    if (o1 == true) {
4851        gen_helper_vfp_negd(tcg_op3, tcg_op3);
4852    }
4853
4854    if (o0 != o1) {
4855        gen_helper_vfp_negd(tcg_op1, tcg_op1);
4856    }
4857
4858    gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4859
4860    write_fp_dreg(s, rd, tcg_res);
4861
4862    tcg_temp_free_ptr(fpst);
4863    tcg_temp_free_i64(tcg_op1);
4864    tcg_temp_free_i64(tcg_op2);
4865    tcg_temp_free_i64(tcg_op3);
4866    tcg_temp_free_i64(tcg_res);
4867}
4868
4869/* C3.6.27 Floating point data-processing (3 source)
4870 *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4871 * +---+---+---+-----------+------+----+------+----+------+------+------+
4872 * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4873 * +---+---+---+-----------+------+----+------+----+------+------+------+
4874 */
4875static void disas_fp_3src(DisasContext *s, uint32_t insn)
4876{
4877    int type = extract32(insn, 22, 2);
4878    int rd = extract32(insn, 0, 5);
4879    int rn = extract32(insn, 5, 5);
4880    int ra = extract32(insn, 10, 5);
4881    int rm = extract32(insn, 16, 5);
4882    bool o0 = extract32(insn, 15, 1);
4883    bool o1 = extract32(insn, 21, 1);
4884
4885    switch (type) {
4886    case 0:
4887        if (!fp_access_check(s)) {
4888            return;
4889        }
4890        handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4891        break;
4892    case 1:
4893        if (!fp_access_check(s)) {
4894            return;
4895        }
4896        handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4897        break;
4898    default:
4899        unallocated_encoding(s);
4900    }
4901}
4902
4903/* C3.6.28 Floating point immediate
4904 *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4905 * +---+---+---+-----------+------+---+------------+-------+------+------+
4906 * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4907 * +---+---+---+-----------+------+---+------------+-------+------+------+
4908 */
4909static void disas_fp_imm(DisasContext *s, uint32_t insn)
4910{
4911    int rd = extract32(insn, 0, 5);
4912    int imm8 = extract32(insn, 13, 8);
4913    int is_double = extract32(insn, 22, 2);
4914    uint64_t imm;
4915    TCGv_i64 tcg_res;
4916
4917    if (is_double > 1) {
4918        unallocated_encoding(s);
4919        return;
4920    }
4921
4922    if (!fp_access_check(s)) {
4923        return;
4924    }
4925
4926    /* The imm8 encodes the sign bit, enough bits to represent
4927     * an exponent in the range 01....1xx to 10....0xx,
4928     * and the most significant 4 bits of the mantissa; see
4929     * VFPExpandImm() in the v8 ARM ARM.
4930     */
4931    if (is_double) {
4932        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4933            (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4934            extract32(imm8, 0, 6);
4935        imm <<= 48;
4936    } else {
4937        imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4938            (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4939            (extract32(imm8, 0, 6) << 3);
4940        imm <<= 16;
4941    }
4942
4943    tcg_res = tcg_const_i64(imm);
4944    write_fp_dreg(s, rd, tcg_res);
4945    tcg_temp_free_i64(tcg_res);
4946}
4947
4948/* Handle floating point <=> fixed point conversions. Note that we can
4949 * also deal with fp <=> integer conversions as a special case (scale == 64)
4950 * OPTME: consider handling that special case specially or at least skipping
4951 * the call to scalbn in the helpers for zero shifts.
4952 */
4953static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4954                           bool itof, int rmode, int scale, int sf, int type)
4955{
4956    bool is_signed = !(opcode & 1);
4957    bool is_double = type;
4958    TCGv_ptr tcg_fpstatus;
4959    TCGv_i32 tcg_shift;
4960
4961    tcg_fpstatus = get_fpstatus_ptr();
4962
4963    tcg_shift = tcg_const_i32(64 - scale);
4964
4965    if (itof) {
4966        TCGv_i64 tcg_int = cpu_reg(s, rn);
4967        if (!sf) {
4968            TCGv_i64 tcg_extend = new_tmp_a64(s);
4969
4970            if (is_signed) {
4971                tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4972            } else {
4973                tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4974            }
4975
4976            tcg_int = tcg_extend;
4977        }
4978
4979        if (is_double) {
4980            TCGv_i64 tcg_double = tcg_temp_new_i64();
4981            if (is_signed) {
4982                gen_helper_vfp_sqtod(tcg_double, tcg_int,
4983                                     tcg_shift, tcg_fpstatus);
4984            } else {
4985                gen_helper_vfp_uqtod(tcg_double, tcg_int,
4986                                     tcg_shift, tcg_fpstatus);
4987            }
4988            write_fp_dreg(s, rd, tcg_double);
4989            tcg_temp_free_i64(tcg_double);
4990        } else {
4991            TCGv_i32 tcg_single = tcg_temp_new_i32();
4992            if (is_signed) {
4993                gen_helper_vfp_sqtos(tcg_single, tcg_int,
4994                                     tcg_shift, tcg_fpstatus);
4995            } else {
4996                gen_helper_vfp_uqtos(tcg_single, tcg_int,
4997                                     tcg_shift, tcg_fpstatus);
4998            }
4999            write_fp_sreg(s, rd, tcg_single);
5000            tcg_temp_free_i32(tcg_single);
5001        }
5002    } else {
5003        TCGv_i64 tcg_int = cpu_reg(s, rd);
5004        TCGv_i32 tcg_rmode;
5005
5006        if (extract32(opcode, 2, 1)) {
5007            /* There are too many rounding modes to all fit into rmode,
5008             * so FCVTA[US] is a special case.
5009             */
5010            rmode = FPROUNDING_TIEAWAY;
5011        }
5012
5013        tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5014
5015        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5016
5017        if (is_double) {
5018            TCGv_i64 tcg_double = read_fp_dreg(s, rn);
5019            if (is_signed) {
5020                if (!sf) {
5021                    gen_helper_vfp_tosld(tcg_int, tcg_double,
5022                                         tcg_shift, tcg_fpstatus);
5023                } else {
5024                    gen_helper_vfp_tosqd(tcg_int, tcg_double,
5025                                         tcg_shift, tcg_fpstatus);
5026                }
5027            } else {
5028                if (!sf) {
5029                    gen_helper_vfp_tould(tcg_int, tcg_double,
5030                                         tcg_shift, tcg_fpstatus);
5031                } else {
5032                    gen_helper_vfp_touqd(tcg_int, tcg_double,
5033                                         tcg_shift, tcg_fpstatus);
5034                }
5035            }
5036            tcg_temp_free_i64(tcg_double);
5037        } else {
5038            TCGv_i32 tcg_single = read_fp_sreg(s, rn);
5039            if (sf) {
5040                if (is_signed) {
5041                    gen_helper_vfp_tosqs(tcg_int, tcg_single,
5042                                         tcg_shift, tcg_fpstatus);
5043                } else {
5044                    gen_helper_vfp_touqs(tcg_int, tcg_single,
5045                                         tcg_shift, tcg_fpstatus);
5046                }
5047            } else {
5048                TCGv_i32 tcg_dest = tcg_temp_new_i32();
5049                if (is_signed) {
5050                    gen_helper_vfp_tosls(tcg_dest, tcg_single,
5051                                         tcg_shift, tcg_fpstatus);
5052                } else {
5053                    gen_helper_vfp_touls(tcg_dest, tcg_single,
5054                                         tcg_shift, tcg_fpstatus);
5055                }
5056                tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5057                tcg_temp_free_i32(tcg_dest);
5058            }
5059            tcg_temp_free_i32(tcg_single);
5060        }
5061
5062        gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5063        tcg_temp_free_i32(tcg_rmode);
5064
5065        if (!sf) {
5066            tcg_gen_ext32u_i64(tcg_int, tcg_int);
5067        }
5068    }
5069
5070    tcg_temp_free_ptr(tcg_fpstatus);
5071    tcg_temp_free_i32(tcg_shift);
5072}
5073
5074/* C3.6.29 Floating point <-> fixed point conversions
5075 *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5076 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5077 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5078 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5079 */
5080static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5081{
5082    int rd = extract32(insn, 0, 5);
5083    int rn = extract32(insn, 5, 5);
5084    int scale = extract32(insn, 10, 6);
5085    int opcode = extract32(insn, 16, 3);
5086    int rmode = extract32(insn, 19, 2);
5087    int type = extract32(insn, 22, 2);
5088    bool sbit = extract32(insn, 29, 1);
5089    bool sf = extract32(insn, 31, 1);
5090    bool itof;
5091
5092    if (sbit || (type > 1)
5093        || (!sf && scale < 32)) {
5094        unallocated_encoding(s);
5095        return;
5096    }
5097
5098    switch ((rmode << 3) | opcode) {
5099    case 0x2: /* SCVTF */
5100    case 0x3: /* UCVTF */
5101        itof = true;
5102        break;
5103    case 0x18: /* FCVTZS */
5104    case 0x19: /* FCVTZU */
5105        itof = false;
5106        break;
5107    default:
5108        unallocated_encoding(s);
5109        return;
5110    }
5111
5112    if (!fp_access_check(s)) {
5113        return;
5114    }
5115
5116    handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5117}
5118
5119static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5120{
5121    /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5122     * without conversion.
5123     */
5124
5125    if (itof) {
5126        TCGv_i64 tcg_rn = cpu_reg(s, rn);
5127
5128        switch (type) {
5129        case 0:
5130        {
5131            /* 32 bit */
5132            TCGv_i64 tmp = tcg_temp_new_i64();
5133            tcg_gen_ext32u_i64(tmp, tcg_rn);
5134            tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5135            tcg_gen_movi_i64(tmp, 0);
5136            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5137            tcg_temp_free_i64(tmp);
5138            break;
5139        }
5140        case 1:
5141        {
5142            /* 64 bit */
5143            TCGv_i64 tmp = tcg_const_i64(0);
5144            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5145            tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5146            tcg_temp_free_i64(tmp);
5147            break;
5148        }
5149        case 2:
5150            /* 64 bit to top half. */
5151            tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5152            break;
5153        }
5154    } else {
5155        TCGv_i64 tcg_rd = cpu_reg(s, rd);
5156
5157        switch (type) {
5158        case 0:
5159            /* 32 bit */
5160            tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5161            break;
5162        case 1:
5163            /* 64 bit */
5164            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5165            break;
5166        case 2:
5167            /* 64 bits from top half */
5168            tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5169            break;
5170        }
5171    }
5172}
5173
5174/* C3.6.30 Floating point <-> integer conversions
5175 *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5176 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5177 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5178 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5179 */
5180static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5181{
5182    int rd = extract32(insn, 0, 5);
5183    int rn = extract32(insn, 5, 5);
5184    int opcode = extract32(insn, 16, 3);
5185    int rmode = extract32(insn, 19, 2);
5186    int type = extract32(insn, 22, 2);
5187    bool sbit = extract32(insn, 29, 1);
5188    bool sf = extract32(insn, 31, 1);
5189
5190    if (sbit) {
5191        unallocated_encoding(s);
5192        return;
5193    }
5194
5195    if (opcode > 5) {
5196        /* FMOV */
5197        bool itof = opcode & 1;
5198
5199        if (rmode >= 2) {
5200            unallocated_encoding(s);
5201            return;
5202        }
5203
5204        switch (sf << 3 | type << 1 | rmode) {
5205        case 0x0: /* 32 bit */
5206        case 0xa: /* 64 bit */
5207        case 0xd: /* 64 bit to top half of quad */
5208            break;
5209        default:
5210            /* all other sf/type/rmode combinations are invalid */
5211            unallocated_encoding(s);
5212            break;
5213        }
5214
5215        if (!fp_access_check(s)) {
5216            return;
5217        }
5218        handle_fmov(s, rd, rn, type, itof);
5219    } else {
5220        /* actual FP conversions */
5221        bool itof = extract32(opcode, 1, 1);
5222
5223        if (type > 1 || (rmode != 0 && opcode > 1)) {
5224            unallocated_encoding(s);
5225            return;
5226        }
5227
5228        if (!fp_access_check(s)) {
5229            return;
5230        }
5231        handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5232    }
5233}
5234
5235/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5236 *   31  30  29 28     25 24                          0
5237 * +---+---+---+---------+-----------------------------+
5238 * |   | 0 |   | 1 1 1 1 |                             |
5239 * +---+---+---+---------+-----------------------------+
5240 */
5241static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5242{
5243    if (extract32(insn, 24, 1)) {
5244        /* Floating point data-processing (3 source) */
5245        disas_fp_3src(s, insn);
5246    } else if (extract32(insn, 21, 1) == 0) {
5247        /* Floating point to fixed point conversions */
5248        disas_fp_fixed_conv(s, insn);
5249    } else {
5250        switch (extract32(insn, 10, 2)) {
5251        case 1:
5252            /* Floating point conditional compare */
5253            disas_fp_ccomp(s, insn);
5254            break;
5255        case 2:
5256            /* Floating point data-processing (2 source) */
5257            disas_fp_2src(s, insn);
5258            break;
5259        case 3:
5260            /* Floating point conditional select */
5261            disas_fp_csel(s, insn);
5262            break;
5263        case 0:
5264            switch (ctz32(extract32(insn, 12, 4))) {
5265            case 0: /* [15:12] == xxx1 */
5266                /* Floating point immediate */
5267                disas_fp_imm(s, insn);
5268                break;
5269            case 1: /* [15:12] == xx10 */
5270                /* Floating point compare */
5271                disas_fp_compare(s, insn);
5272                break;
5273            case 2: /* [15:12] == x100 */
5274                /* Floating point data-processing (1 source) */
5275                disas_fp_1src(s, insn);
5276                break;
5277            case 3: /* [15:12] == 1000 */
5278                unallocated_encoding(s);
5279                break;
5280            default: /* [15:12] == 0000 */
5281                /* Floating point <-> integer conversions */
5282                disas_fp_int_conv(s, insn);
5283                break;
5284            }
5285            break;
5286        }
5287    }
5288}
5289
5290static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5291                     int pos)
5292{
5293    /* Extract 64 bits from the middle of two concatenated 64 bit
5294     * vector register slices left:right. The extracted bits start
5295     * at 'pos' bits into the right (least significant) side.
5296     * We return the result in tcg_right, and guarantee not to
5297     * trash tcg_left.
5298     */
5299    TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5300    assert(pos > 0 && pos < 64);
5301
5302    tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5303    tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5304    tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5305
5306    tcg_temp_free_i64(tcg_tmp);
5307}
5308
5309/* C3.6.1 EXT
5310 *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5311 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5312 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5313 * +---+---+-------------+-----+---+------+---+------+---+------+------+
5314 */
5315static void disas_simd_ext(DisasContext *s, uint32_t insn)
5316{
5317    int is_q = extract32(insn, 30, 1);
5318    int op2 = extract32(insn, 22, 2);
5319    int imm4 = extract32(insn, 11, 4);
5320    int rm = extract32(insn, 16, 5);
5321    int rn = extract32(insn, 5, 5);
5322    int rd = extract32(insn, 0, 5);
5323    int pos = imm4 << 3;
5324    TCGv_i64 tcg_resl, tcg_resh;
5325
5326    if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5327        unallocated_encoding(s);
5328        return;
5329    }
5330
5331    if (!fp_access_check(s)) {
5332        return;
5333    }
5334
5335    tcg_resh = tcg_temp_new_i64();
5336    tcg_resl = tcg_temp_new_i64();
5337
5338    /* Vd gets bits starting at pos bits into Vm:Vn. This is
5339     * either extracting 128 bits from a 128:128 concatenation, or
5340     * extracting 64 bits from a 64:64 concatenation.
5341     */
5342    if (!is_q) {
5343        read_vec_element(s, tcg_resl, rn, 0, MO_64);
5344        if (pos != 0) {
5345            read_vec_element(s, tcg_resh, rm, 0, MO_64);
5346            do_ext64(s, tcg_resh, tcg_resl, pos);
5347        }
5348        tcg_gen_movi_i64(tcg_resh, 0);
5349    } else {
5350        TCGv_i64 tcg_hh;
5351        typedef struct {
5352            int reg;
5353            int elt;
5354        } EltPosns;
5355        EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5356        EltPosns *elt = eltposns;
5357
5358        if (pos >= 64) {
5359            elt++;
5360            pos -= 64;
5361        }
5362
5363        read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5364        elt++;
5365        read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5366        elt++;
5367        if (pos != 0) {
5368            do_ext64(s, tcg_resh, tcg_resl, pos);
5369            tcg_hh = tcg_temp_new_i64();
5370            read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5371            do_ext64(s, tcg_hh, tcg_resh, pos);
5372            tcg_temp_free_i64(tcg_hh);
5373        }
5374    }
5375
5376    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5377    tcg_temp_free_i64(tcg_resl);
5378    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5379    tcg_temp_free_i64(tcg_resh);
5380}
5381
5382/* C3.6.2 TBL/TBX
5383 *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5384 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5385 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5386 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5387 */
5388static void disas_simd_tb(DisasContext *s, uint32_t insn)
5389{
5390    int op2 = extract32(insn, 22, 2);
5391    int is_q = extract32(insn, 30, 1);
5392    int rm = extract32(insn, 16, 5);
5393    int rn = extract32(insn, 5, 5);
5394    int rd = extract32(insn, 0, 5);
5395    int is_tblx = extract32(insn, 12, 1);
5396    int len = extract32(insn, 13, 2);
5397    TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5398    TCGv_i32 tcg_regno, tcg_numregs;
5399
5400    if (op2 != 0) {
5401        unallocated_encoding(s);
5402        return;
5403    }
5404
5405    if (!fp_access_check(s)) {
5406        return;
5407    }
5408
5409    /* This does a table lookup: for every byte element in the input
5410     * we index into a table formed from up to four vector registers,
5411     * and then the output is the result of the lookups. Our helper
5412     * function does the lookup operation for a single 64 bit part of
5413     * the input.
5414     */
5415    tcg_resl = tcg_temp_new_i64();
5416    tcg_resh = tcg_temp_new_i64();
5417
5418    if (is_tblx) {
5419        read_vec_element(s, tcg_resl, rd, 0, MO_64);
5420    } else {
5421        tcg_gen_movi_i64(tcg_resl, 0);
5422    }
5423    if (is_tblx && is_q) {
5424        read_vec_element(s, tcg_resh, rd, 1, MO_64);
5425    } else {
5426        tcg_gen_movi_i64(tcg_resh, 0);
5427    }
5428
5429    tcg_idx = tcg_temp_new_i64();
5430    tcg_regno = tcg_const_i32(rn);
5431    tcg_numregs = tcg_const_i32(len + 1);
5432    read_vec_element(s, tcg_idx, rm, 0, MO_64);
5433    gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5434                        tcg_regno, tcg_numregs);
5435    if (is_q) {
5436        read_vec_element(s, tcg_idx, rm, 1, MO_64);
5437        gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5438                            tcg_regno, tcg_numregs);
5439    }
5440    tcg_temp_free_i64(tcg_idx);
5441    tcg_temp_free_i32(tcg_regno);
5442    tcg_temp_free_i32(tcg_numregs);
5443
5444    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5445    tcg_temp_free_i64(tcg_resl);
5446    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5447    tcg_temp_free_i64(tcg_resh);
5448}
5449
5450/* C3.6.3 ZIP/UZP/TRN
5451 *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5452 * +---+---+-------------+------+---+------+---+------------------+------+
5453 * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5454 * +---+---+-------------+------+---+------+---+------------------+------+
5455 */
5456static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5457{
5458    int rd = extract32(insn, 0, 5);
5459    int rn = extract32(insn, 5, 5);
5460    int rm = extract32(insn, 16, 5);
5461    int size = extract32(insn, 22, 2);
5462    /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5463     * bit 2 indicates 1 vs 2 variant of the insn.
5464     */
5465    int opcode = extract32(insn, 12, 2);
5466    bool part = extract32(insn, 14, 1);
5467    bool is_q = extract32(insn, 30, 1);
5468    int esize = 8 << size;
5469    int i, ofs;
5470    int datasize = is_q ? 128 : 64;
5471    int elements = datasize / esize;
5472    TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5473
5474    if (opcode == 0 || (size == 3 && !is_q)) {
5475        unallocated_encoding(s);
5476        return;
5477    }
5478
5479    if (!fp_access_check(s)) {
5480        return;
5481    }
5482
5483    tcg_resl = tcg_const_i64(0);
5484    tcg_resh = tcg_const_i64(0);
5485    tcg_res = tcg_temp_new_i64();
5486
5487    for (i = 0; i < elements; i++) {
5488        switch (opcode) {
5489        case 1: /* UZP1/2 */
5490        {
5491            int midpoint = elements / 2;
5492            if (i < midpoint) {
5493                read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5494            } else {
5495                read_vec_element(s, tcg_res, rm,
5496                                 2 * (i - midpoint) + part, size);
5497            }
5498            break;
5499        }
5500        case 2: /* TRN1/2 */
5501            if (i & 1) {
5502                read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5503            } else {
5504                read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5505            }
5506            break;
5507        case 3: /* ZIP1/2 */
5508        {
5509            int base = part * elements / 2;
5510            if (i & 1) {
5511                read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5512            } else {
5513                read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5514            }
5515            break;
5516        }
5517        default:
5518            g_assert_not_reached();
5519        }
5520
5521        ofs = i * esize;
5522        if (ofs < 64) {
5523            tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5524            tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5525        } else {
5526            tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5527            tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5528        }
5529    }
5530
5531    tcg_temp_free_i64(tcg_res);
5532
5533    write_vec_element(s, tcg_resl, rd, 0, MO_64);
5534    tcg_temp_free_i64(tcg_resl);
5535    write_vec_element(s, tcg_resh, rd, 1, MO_64);
5536    tcg_temp_free_i64(tcg_resh);
5537}
5538
5539static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5540                        int opc, bool is_min, TCGv_ptr fpst)
5541{
5542    /* Helper function for disas_simd_across_lanes: do a single precision
5543     * min/max operation on the specified two inputs,
5544     * and return the result in tcg_elt1.
5545     */
5546    if (opc == 0xc) {
5547        if (is_min) {
5548            gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5549        } else {
5550            gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5551        }
5552    } else {
5553        assert(opc == 0xf);
5554        if (is_min) {
5555            gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5556        } else {
5557            gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5558        }
5559    }
5560}
5561
5562/* C3.6.4 AdvSIMD across lanes
5563 *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5564 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5565 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5566 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5567 */
5568static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5569{
5570    int rd = extract32(insn, 0, 5);
5571    int rn = extract32(insn, 5, 5);
5572    int size = extract32(insn, 22, 2);
5573    int opcode = extract32(insn, 12, 5);
5574    bool is_q = extract32(insn, 30, 1);
5575    bool is_u = extract32(insn, 29, 1);
5576    bool is_fp = false;
5577    bool is_min = false;
5578    int esize;
5579    int elements;
5580    int i;
5581    TCGv_i64 tcg_res, tcg_elt;
5582
5583    switch (opcode) {
5584    case 0x1b: /* ADDV */
5585        if (is_u) {
5586            unallocated_encoding(s);
5587            return;
5588        }
5589        /* fall through */
5590    case 0x3: /* SADDLV, UADDLV */
5591    case 0xa: /* SMAXV, UMAXV */
5592    case 0x1a: /* SMINV, UMINV */
5593        if (size == 3 || (size == 2 && !is_q)) {
5594            unallocated_encoding(s);
5595            return;
5596        }
5597        break;
5598    case 0xc: /* FMAXNMV, FMINNMV */
5599    case 0xf: /* FMAXV, FMINV */
5600        if (!is_u || !is_q || extract32(size, 0, 1)) {
5601            unallocated_encoding(s);
5602            return;
5603        }
5604        /* Bit 1 of size field encodes min vs max, and actual size is always
5605         * 32 bits: adjust the size variable so following code can rely on it
5606         */
5607        is_min = extract32(size, 1, 1);
5608        is_fp = true;
5609        size =